1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// Copyright (c) The Diem Core Contributors
// SPDX-License-Identifier: Apache-2.0

use crate::{pool::Entry, SYMBOL_POOL};
use serde::{de::Deserialize, ser::Serialize};
use std::{borrow::Cow, cmp::Ordering, fmt, num::NonZeroU64, ops::Deref};

/// Represents a string that has been cached.
///
/// A `Symbol` represents a pointer to string data that is owned by the global
/// symbol pool; it is not the string data itself. This enables this
/// representation to implement `Copy` and other traits that some string types
/// cannot.
///
/// The strings that `Symbol` types represent are added to the global cache as
/// the `Symbol` are created.
///
/// ```
///# use crate::move_symbol_pool::Symbol;
/// let s1 = Symbol::from("hi"); // "hi" is stored in the global cache
/// let s2 = Symbol::from("hi"); // "hi" is already stored, cache does not grow
/// assert_eq!(s1, s2);
/// ```
///
/// Use the method [`as_str()`] to access the string value that a `Symbol`
/// represents. `Symbol` also implements the [`Display`] trait, so it can be
/// printed as an ordinary string would. This makes it easier to use with
/// crates that print strings to a terminal, such as codespan.
///
/// ```
///# use crate::move_symbol_pool::Symbol;
/// let message = format!("{} {}",
///     Symbol::from("hello").as_str(),
///     Symbol::from("world"));
/// assert_eq!(message, "hello world");
/// ```
///
/// [`as_str()`]: crate::Symbol::as_str
/// [`Display`]: std::fmt::Display
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct Symbol(NonZeroU64);

impl Symbol {
    pub fn as_str(&self) -> &str {
        self.as_ref()
    }
}

impl<'a> From<Cow<'a, str>> for Symbol {
    fn from(s: Cow<'a, str>) -> Self {
        let mut pool = SYMBOL_POOL.lock().expect("could not acquire lock on pool");
        let address = pool.insert(s).as_ptr() as u64;
        Symbol(NonZeroU64::new(address).expect("address of symbol cannot be null"))
    }
}

impl From<&str> for Symbol {
    fn from(s: &str) -> Self {
        Self::from(Cow::Borrowed(s))
    }
}

impl From<String> for Symbol {
    fn from(s: String) -> Self {
        Self::from(Cow::Owned(s))
    }
}

impl Deref for Symbol {
    type Target = str;

    fn deref(&self) -> &str {
        let ptr = self.0.get() as *const Entry;
        let entry = unsafe { &*ptr };
        &entry.string
    }
}

impl AsRef<str> for Symbol {
    fn as_ref(&self) -> &str {
        self
    }
}

impl fmt::Display for Symbol {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.as_str().fmt(f)
    }
}

impl fmt::Debug for Symbol {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.as_str().fmt(f)
    }
}

impl Ord for Symbol {
    fn cmp(&self, other: &Symbol) -> Ordering {
        if self.0 == other.0 {
            Ordering::Equal
        } else {
            self.as_str().cmp(other.as_str())
        }
    }
}

impl PartialOrd for Symbol {
    fn partial_cmp(&self, other: &Symbol) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Serialize for Symbol {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        self.as_str().serialize(serializer)
    }
}

impl<'de> Deserialize<'de> for Symbol {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        Ok(Symbol::from(String::deserialize(deserializer)?))
    }
}

#[cfg(test)]
mod tests {
    use crate::Symbol;
    use std::mem::size_of;

    #[test]
    fn test_size() {
        // Assert that the size of a Symbol is fairly small. Since it'll be used
        // throughout the Move codebase, increases to this size should be
        // scrutinized.
        assert_eq!(size_of::<Symbol>(), size_of::<u64>());
    }

    #[test]
    fn test_from_different_strings_have_different_addresses() {
        let s1 = Symbol::from("hi");
        let s2 = Symbol::from("hello");
        assert_ne!(s1.0, s2.0);
    }

    #[test]
    fn test_from_identical_strings_have_the_same_address() {
        let s1 = Symbol::from("bonjour");
        let s2 = Symbol::from("bonjour");
        assert_eq!(s1.0, s2.0);
    }
}