1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
// Copyright (c) The Diem Core Contributors
// SPDX-License-Identifier: Apache-2.0

//! An identifier is the name of an entity (module, resource, function, etc) in Move.
//!
//! A valid identifier consists of an ASCII string which satisfies any of the conditions:
//!
//! * The first character is a letter and the remaining characters are letters, digits or
//!   underscores.
//! * The first character is an underscore, and there is at least one further letter, digit or
//!   underscore.
//!
//! The spec for allowed identifiers is similar to Rust's spec
//! ([as of version 1.38](https://doc.rust-lang.org/1.38.0/reference/identifiers.html)).
//!
//! Allowed identifiers are currently restricted to ASCII due to unresolved issues with Unicode
//! normalization. See [Rust issue #55467](https://github.com/rust-lang/rust/issues/55467) and the
//! associated RFC for some discussion. Unicode identifiers may eventually be supported once these
//! issues are worked out.
//!
//! This module only determines allowed identifiers at the bytecode level. Move source code will
//! likely be more restrictive than even this, with a "raw identifier" escape hatch similar to
//! Rust's `r#` identifiers.
//!
//! Among other things, identifiers are used to:
//! * specify keys for lookups in storage
//! * do cross-module lookups while executing transactions

use anyhow::{bail, Result};
#[cfg(any(test, feature = "fuzzing"))]
use proptest::prelude::*;
use ref_cast::RefCast;
use serde::{Deserialize, Serialize};
use std::{borrow::Borrow, fmt, ops::Deref};

/// Return true if this character can appear in a Move identifier.
///
/// Note: there are stricter restrictions on whether a character can begin a Move
/// identifier--only alphabetic characters are allowed here.
#[inline]
pub const fn is_valid_identifier_char(c: char) -> bool {
    matches!(c, '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')
}

/// Returns `true` if all bytes in `b` after the offset `start_offset` are valid
/// ASCII identifier characters.
const fn all_bytes_valid(b: &[u8], start_offset: usize) -> bool {
    let mut i = start_offset;
    // TODO(philiphayes): use for loop instead of while loop when it's stable in const fn's.
    while i < b.len() {
        if !is_valid_identifier_char(b[i] as char) {
            return false;
        }
        i += 1;
    }
    true
}

/// Describes what identifiers are allowed.
///
/// For now this is deliberately restrictive -- we would like to evolve this in the future.
// TODO: "<SELF>" is coded as an exception. It should be removed once CompiledScript goes away.
// Note: needs to be pub as it's used in the `ident_str!` macro.
pub const fn is_valid(s: &str) -> bool {
    // Rust const fn's don't currently support slicing or indexing &str's, so we
    // have to operate on the underlying byte slice. This is not a problem as
    // valid identifiers are (currently) ASCII-only.
    let b = s.as_bytes();
    match b {
        b"<SELF>" => true,
        [b'a'..=b'z', ..] | [b'A'..=b'Z', ..] => all_bytes_valid(b, 1),
        [b'_', ..] if b.len() > 1 => all_bytes_valid(b, 1),
        _ => false,
    }
}

/// A regex describing what identifiers are allowed. Used for proptests.
// TODO: "<SELF>" is coded as an exception. It should be removed once CompiledScript goes away.
#[cfg(any(test, feature = "fuzzing"))]
#[allow(dead_code)]
pub(crate) static ALLOWED_IDENTIFIERS: &str =
    r"(?:[a-zA-Z][a-zA-Z0-9_]*)|(?:_[a-zA-Z0-9_]+)|(?:<SELF>)";
#[cfg(any(test, feature = "fuzzing"))]
pub(crate) static ALLOWED_NO_SELF_IDENTIFIERS: &str =
    r"(?:[a-zA-Z][a-zA-Z0-9_]*)|(?:_[a-zA-Z0-9_]+)";

/// An owned identifier.
///
/// For more details, see the module level documentation.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct Identifier(Box<str>);
// An identifier cannot be mutated so use Box<str> instead of String -- it is 1 word smaller.

impl Identifier {
    /// Creates a new `Identifier` instance.
    pub fn new(s: impl Into<Box<str>>) -> Result<Self> {
        let s = s.into();
        if Self::is_valid(&s) {
            Ok(Self(s))
        } else {
            bail!("Invalid identifier '{}'", s);
        }
    }

    /// Returns true if this string is a valid identifier.
    pub fn is_valid(s: impl AsRef<str>) -> bool {
        is_valid(s.as_ref())
    }

    /// Returns if this identifier is "<SELF>".
    /// TODO: remove once we fully separate CompiledScript & CompiledModule.
    pub fn is_self(&self) -> bool {
        &*self.0 == "<SELF>"
    }

    /// Converts a vector of bytes to an `Identifier`.
    pub fn from_utf8(vec: Vec<u8>) -> Result<Self> {
        let s = String::from_utf8(vec)?;
        Self::new(s)
    }

    /// Creates a borrowed version of `self`.
    pub fn as_ident_str(&self) -> &IdentStr {
        self
    }

    /// Converts this `Identifier` into a `String`.
    ///
    /// This is not implemented as a `From` trait to discourage automatic conversions -- these
    /// conversions should not typically happen.
    pub fn into_string(self) -> String {
        self.0.into()
    }

    /// Converts this `Identifier` into a UTF-8-encoded byte sequence.
    pub fn into_bytes(self) -> Vec<u8> {
        self.into_string().into_bytes()
    }
}

impl From<&IdentStr> for Identifier {
    fn from(ident_str: &IdentStr) -> Self {
        ident_str.to_owned()
    }
}

impl AsRef<IdentStr> for Identifier {
    fn as_ref(&self) -> &IdentStr {
        self
    }
}

impl Deref for Identifier {
    type Target = IdentStr;

    fn deref(&self) -> &IdentStr {
        // Identifier and IdentStr maintain the same invariants, so it is safe to
        // convert.
        IdentStr::ref_cast(&self.0)
    }
}

impl fmt::Display for Identifier {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", &self.0)
    }
}

/// A borrowed identifier.
///
/// For more details, see the module level documentation.
#[derive(Debug, Eq, Hash, Ord, PartialEq, PartialOrd, RefCast)]
#[repr(transparent)]
pub struct IdentStr(str);

impl IdentStr {
    pub fn new(s: &str) -> Result<&IdentStr> {
        if Self::is_valid(s) {
            Ok(IdentStr::ref_cast(s))
        } else {
            bail!("Invalid identifier '{}'", s);
        }
    }

    /// Returns true if this string is a valid identifier.
    pub fn is_valid(s: impl AsRef<str>) -> bool {
        is_valid(s.as_ref())
    }

    /// Returns the length of `self` in bytes.
    pub fn len(&self) -> usize {
        self.0.len()
    }

    /// Returns `true` if `self` has a length of zero bytes.
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }

    /// Converts `self` to a `&str`.
    ///
    /// This is not implemented as a `From` trait to discourage automatic conversions -- these
    /// conversions should not typically happen.
    pub fn as_str(&self) -> &str {
        &self.0
    }

    /// Converts `self` to a byte slice.
    pub fn as_bytes(&self) -> &[u8] {
        self.0.as_bytes()
    }
}

impl Borrow<IdentStr> for Identifier {
    fn borrow(&self) -> &IdentStr {
        self
    }
}

impl ToOwned for IdentStr {
    type Owned = Identifier;

    fn to_owned(&self) -> Identifier {
        Identifier(self.0.into())
    }
}

impl fmt::Display for IdentStr {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", &self.0)
    }
}

#[cfg(any(test, feature = "fuzzing"))]
impl Arbitrary for Identifier {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with((): ()) -> Self::Strategy {
        ALLOWED_NO_SELF_IDENTIFIERS
            .prop_map(|s| {
                // Identifier::new will verify that generated identifiers are correct.
                Identifier::new(s).unwrap()
            })
            .boxed()
    }
}

// const assert that IdentStr impls RefCast<From = str>
// This assertion is what guarantees the unsafe transmute is safe.
const _: fn() = || {
    fn assert_impl_all<T: ?Sized + ::ref_cast::RefCast<From = str>>() {}
    assert_impl_all::<IdentStr>();
};

/// `ident_str!` is a compile-time validated macro that constructs a
/// `&'static IdentStr` from a const `&'static str`.
///
/// ### Example
///
/// Creating a valid static or const [`IdentStr`]:
///
/// ```rust
/// use move_core_types::{ident_str, identifier::IdentStr};
/// const VALID_IDENT: &'static IdentStr = ident_str!("MyCoolIdentifier");
///
/// const THING_NAME: &'static str = "thing_name";
/// const THING_IDENT: &'static IdentStr = ident_str!(THING_NAME);
/// ```
///
/// In contrast, creating an invalid [`IdentStr`] will fail at compile time:
///
/// ```rust,compile_fail
/// use move_core_types::{ident_str, identifier::IdentStr};
/// const INVALID_IDENT: &'static IdentStr = ident_str!("123Foo"); // Fails to compile!
/// ```
// TODO(philiphayes): this should really be an associated const fn like `IdentStr::new`;
// unfortunately, both unsafe-reborrow and unsafe-transmute don't currently work
// inside const fn's. Only unsafe-transmute works inside static const-blocks
// (but not const-fn's).
#[macro_export]
macro_rules! ident_str {
    ($ident:expr) => {{
        // Only static strings allowed.
        let s: &'static str = $ident;

        // Only valid identifier strings are allowed.
        // Note: Work-around hack to print an error message in a const block.
        let is_valid = $crate::identifier::is_valid(s);
        ["String is not a valid Move identifier"][!is_valid as usize];

        // SAFETY: the following transmute is safe because
        // (1) it's equivalent to the unsafe-reborrow inside IdentStr::ref_cast()
        //     (which we can't use b/c it's not const).
        // (2) we've just asserted that IdentStr impls RefCast<From = str>, which
        //     already guarantees the transmute is safe (RefCast checks that
        //     IdentStr(str) is #[repr(transparent)]).
        // (3) both in and out lifetimes are 'static, so we're not widening the lifetime.
        // (4) we've just asserted that the IdentStr passes the is_valid check.
        //
        // Note: this lint is unjustified and no longer checked. See issue:
        // https://github.com/rust-lang/rust-clippy/issues/6372
        #[allow(clippy::transmute_ptr_to_ptr)]
        unsafe {
            ::std::mem::transmute::<&'static str, &'static $crate::identifier::IdentStr>(s)
        }
    }};
}