Files
addr2line
adler
aho_corasick
arrayvec
atty
backtrace
bitflags
camino
cargo_metadata
cargo_nextest
cargo_platform
cfg_expr
cfg_if
chrono
clap
clap_derive
color_eyre
config
crossbeam_channel
crossbeam_deque
crossbeam_epoch
crossbeam_utils
ctrlc
datatest_stable
debug_ignore
duct
either
enable_ansi_support
env_logger
eyre
fixedbitset
gimli
guppy
guppy_workspace_hack
hashbrown
humantime
humantime_serde
indent_write
indenter
indexmap
is_ci
itertools
itoa
lazy_static
lexical_core
libc
log
memchr
memoffset
miniz_oxide
nested
nextest_metadata
nextest_runner
nix
nom
num_cpus
num_integer
num_traits
object
once_cell
os_pipe
os_str_bytes
owo_colors
pathdiff
petgraph
proc_macro2
proc_macro_error
proc_macro_error_attr
quick_junit
quick_xml
quote
rayon
rayon_core
regex
regex_syntax
rustc_demangle
ryu
same_file
scopeguard
semver
serde
serde_derive
serde_json
shared_child
shellwords
smallvec
static_assertions
strip_ansi_escapes
strsim
structopt
structopt_derive
supports_color
syn
target_lexicon
target_spec
termcolor
textwrap
time
toml
twox_hash
unicode_xid
utf8parse
vte
vte_generate_state_changes
walkdir
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/// A heuristic frequency based detection of rare bytes for substring search.
///
/// This detector attempts to pick out two bytes in a needle that are predicted
/// to occur least frequently. The purpose is to use these bytes to implement
/// fast candidate search using vectorized code.
///
/// A set of offsets is only computed for needles of length 2 or greater.
/// Smaller needles should be special cased by the substring search algorithm
/// in use. (e.g., Use memchr for single byte needles.)
///
/// Note that we use `u8` to represent the offsets of the rare bytes in a
/// needle to reduce space usage. This means that rare byte occurring after the
/// first 255 bytes in a needle will never be used.
#[derive(Clone, Copy, Debug, Default)]
pub(crate) struct RareNeedleBytes {
    /// The leftmost offset of the rarest byte in the needle, according to
    /// pre-computed frequency analysis. The "leftmost offset" means that
    /// rare1i <= i for all i where needle[i] == needle[rare1i].
    rare1i: u8,
    /// The leftmost offset of the second rarest byte in the needle, according
    /// to pre-computed frequency analysis. The "leftmost offset" means that
    /// rare2i <= i for all i where needle[i] == needle[rare2i].
    ///
    /// The second rarest byte is used as a type of guard for quickly detecting
    /// a mismatch if the first byte matches. This is a hedge against
    /// pathological cases where the pre-computed frequency analysis may be
    /// off. (But of course, does not prevent *all* pathological cases.)
    ///
    /// In general, rare1i != rare2i by construction, although there is no hard
    /// requirement that they be different. However, since the case of a single
    /// byte needle is handled specially by memchr itself, rare2i generally
    /// always should be different from rare1i since it would otherwise be
    /// ineffective as a guard.
    rare2i: u8,
}

impl RareNeedleBytes {
    /// Create a new pair of rare needle bytes with the given offsets. This is
    /// only used in tests for generating input data.
    #[cfg(all(test, feature = "std"))]
    pub(crate) fn new(rare1i: u8, rare2i: u8) -> RareNeedleBytes {
        RareNeedleBytes { rare1i, rare2i }
    }

    /// Detect the leftmost offsets of the two rarest bytes in the given
    /// needle.
    pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes {
        if needle.len() <= 1 || needle.len() > core::u8::MAX as usize {
            // For needles bigger than u8::MAX, our offsets aren't big enough.
            // (We make our offsets small to reduce stack copying.)
            // If you have a use case for it, please file an issue. In that
            // case, we should probably just adjust the routine below to pick
            // some rare bytes from the first 255 bytes of the needle.
            //
            // Also note that for needles of size 0 or 1, they are special
            // cased in Two-Way.
            //
            // TODO: Benchmar this.
            return RareNeedleBytes { rare1i: 0, rare2i: 0 };
        }

        // Find the rarest two bytes. We make them distinct by construction.
        let (mut rare1, mut rare1i) = (needle[0], 0);
        let (mut rare2, mut rare2i) = (needle[1], 1);
        if rank(rare2) < rank(rare1) {
            core::mem::swap(&mut rare1, &mut rare2);
            core::mem::swap(&mut rare1i, &mut rare2i);
        }
        for (i, &b) in needle.iter().enumerate().skip(2) {
            if rank(b) < rank(rare1) {
                rare2 = rare1;
                rare2i = rare1i;
                rare1 = b;
                rare1i = i as u8;
            } else if b != rare1 && rank(b) < rank(rare2) {
                rare2 = b;
                rare2i = i as u8;
            }
        }
        // While not strictly required, we really don't want these to be
        // equivalent. If they were, it would reduce the effectiveness of
        // candidate searching using these rare bytes by increasing the rate of
        // false positives.
        assert_ne!(rare1i, rare2i);
        RareNeedleBytes { rare1i, rare2i }
    }

    /// Return the rare bytes in the given needle in the forward direction.
    /// The needle given must be the same one given to the RareNeedleBytes
    /// constructor.
    pub(crate) fn as_rare_bytes(&self, needle: &[u8]) -> (u8, u8) {
        (needle[self.rare1i as usize], needle[self.rare2i as usize])
    }

    /// Return the rare offsets such that the first offset is always <= to the
    /// second offset. This is useful when the caller doesn't care whether
    /// rare1 is rarer than rare2, but just wants to ensure that they are
    /// ordered with respect to one another.
    #[cfg(memchr_runtime_simd)]
    pub(crate) fn as_rare_ordered_usize(&self) -> (usize, usize) {
        let (rare1i, rare2i) = self.as_rare_ordered_u8();
        (rare1i as usize, rare2i as usize)
    }

    /// Like as_rare_ordered_usize, but returns the offsets as their native
    /// u8 values.
    #[cfg(memchr_runtime_simd)]
    pub(crate) fn as_rare_ordered_u8(&self) -> (u8, u8) {
        if self.rare1i <= self.rare2i {
            (self.rare1i, self.rare2i)
        } else {
            (self.rare2i, self.rare1i)
        }
    }

    /// Return the rare offsets as usize values in the order in which they were
    /// constructed. rare1, for example, is constructed as the "rarer" byte,
    /// and thus, callers may want to treat it differently from rare2.
    pub(crate) fn as_rare_usize(&self) -> (usize, usize) {
        (self.rare1i as usize, self.rare2i as usize)
    }

    /// Return the byte frequency rank of each byte. The higher the rank, the
    /// more frequency the byte is predicted to be. The needle given must be
    /// the same one given to the RareNeedleBytes constructor.
    pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) {
        let (b1, b2) = self.as_rare_bytes(needle);
        (rank(b1), rank(b2))
    }
}

/// Return the heuristical frequency rank of the given byte. A lower rank
/// means the byte is believed to occur less frequently.
fn rank(b: u8) -> usize {
    crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize
}