Skip to main content

cranelift_reader/
lexer.rs

1//! Lexical analysis for .clif files.
2
3use crate::error::Location;
4use cranelift_codegen::ir::types;
5use cranelift_codegen::ir::{Block, Value};
6use std::str::CharIndices;
7use std::u16;
8
9/// A Token returned from the `Lexer`.
10///
11/// Some variants may contains references to the original source text, so the `Token` has the same
12/// lifetime as the source.
13#[derive(Debug, PartialEq, Eq, Clone, Copy)]
14pub enum Token<'a> {
15    Comment(&'a str),
16    LPar,                   // '('
17    RPar,                   // ')'
18    LBrace,                 // '{'
19    RBrace,                 // '}'
20    LBracket,               // '['
21    RBracket,               // ']'
22    LAngle,                 // '<'
23    RAngle,                 // '>'
24    Minus,                  // '-'
25    Plus,                   // '+'
26    Multiply,               // '*'
27    Comma,                  // ','
28    Dot,                    // '.'
29    Colon,                  // ':'
30    Equal,                  // '='
31    Bang,                   // '!'
32    At,                     // '@'
33    Arrow,                  // '->'
34    Float(&'a str),         // Floating point immediate
35    Integer(&'a str),       // Integer immediate
36    Type(types::Type),      // i32, f32, i32x4, ...
37    DynamicType(u32),       // dt5
38    Value(Value),           // v12, v7
39    Block(Block),           // block3
40    Cold,                   // cold (flag on block)
41    StackSlot(u32),         // ss3
42    DynamicStackSlot(u32),  // dss4
43    GlobalValue(u32),       // gv3
44    MemoryType(u32),        // mt0
45    Constant(u32),          // const2
46    FuncRef(u32),           // fn2
47    SigRef(u32),            // sig2
48    UserRef(u32),           // u345
49    UserNameRef(u32),       // userextname345
50    ExceptionTableRef(u32), // ex123
51    ExceptionTag(u32),      // tag123
52    AliasRegion(u32),       // region0
53    TryCallRet(u32),        // ret123
54    TryCallExn(u32),        // exn123
55    Name(&'a str),          // %9arbitrary_alphanum, %x3, %0, %function ...
56    String(&'a str),        // "arbitrary quoted string with no escape" ...
57    HexSequence(&'a str),   // #89AF
58    Identifier(&'a str),    // Unrecognized identifier (opcode, enumerator, ...)
59    SourceLoc(&'a str),     // @00c7
60}
61
62/// A `Token` with an associated location.
63#[derive(Debug, PartialEq, Eq)]
64pub struct LocatedToken<'a> {
65    pub token: Token<'a>,
66    pub location: Location,
67}
68
69/// Wrap up a `Token` with the given location.
70fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
71    Ok(LocatedToken {
72        token,
73        location: loc,
74    })
75}
76
77/// An error from the lexical analysis.
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79pub enum LexError {
80    InvalidChar,
81}
82
83/// A `LexError` with an associated Location.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub struct LocatedError {
86    pub error: LexError,
87    pub location: Location,
88}
89
90/// Wrap up a `LexError` with the given location.
91fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
92    Err(LocatedError {
93        error,
94        location: loc,
95    })
96}
97
98/// Get the number of decimal digits at the end of `s`.
99fn trailing_digits(s: &str) -> usize {
100    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
101    s.as_bytes()
102        .iter()
103        .rev()
104        .take_while(|&&b| b'0' <= b && b <= b'9')
105        .count()
106}
107
108/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
109/// letters and numeric tail.
110pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
111    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
112    if tail.len() > 1 && tail.starts_with('0') {
113        None
114    } else {
115        tail.parse().ok().map(|n| (head, n))
116    }
117}
118
119/// Lexical analysis.
120///
121/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
122///
123/// Also keep track of a line number for error reporting.
124///
125pub struct Lexer<'a> {
126    // Complete source being processed.
127    source: &'a str,
128
129    // Iterator into `source`.
130    chars: CharIndices<'a>,
131
132    // Next character to be processed, or `None` at the end.
133    lookahead: Option<char>,
134
135    // Index into `source` of lookahead character.
136    pos: usize,
137
138    // Current line number.
139    line_number: usize,
140}
141
142impl<'a> Lexer<'a> {
143    pub fn new(s: &'a str) -> Self {
144        let mut lex = Self {
145            source: s,
146            chars: s.char_indices(),
147            lookahead: None,
148            pos: 0,
149            line_number: 1,
150        };
151        // Advance to the first char.
152        lex.next_ch();
153        lex
154    }
155
156    // Advance to the next character.
157    // Return the next lookahead character, or None when the end is encountered.
158    // Always update cur_ch to reflect
159    fn next_ch(&mut self) -> Option<char> {
160        if self.lookahead == Some('\n') {
161            self.line_number += 1;
162        }
163        match self.chars.next() {
164            Some((idx, ch)) => {
165                self.pos = idx;
166                self.lookahead = Some(ch);
167            }
168            None => {
169                self.pos = self.source.len();
170                self.lookahead = None;
171            }
172        }
173        self.lookahead
174    }
175
176    // Get the location corresponding to `lookahead`.
177    fn loc(&self) -> Location {
178        Location {
179            line_number: self.line_number,
180        }
181    }
182
183    // Starting from `lookahead`, are we looking at `prefix`?
184    fn looking_at(&self, prefix: &str) -> bool {
185        self.source[self.pos..].starts_with(prefix)
186    }
187
188    // Starting from `lookahead`, are we looking at a number?
189    fn looking_at_numeric(&self) -> bool {
190        if let Some(c) = self.lookahead {
191            match c {
192                '0'..='9' => return true,
193                '-' => return true,
194                '+' => return true,
195                '.' => return true,
196                _ => {}
197            }
198            if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
199                return true;
200            }
201        }
202        false
203    }
204
205    // Scan a single-char token.
206    fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
207        assert_ne!(self.lookahead, None);
208        let loc = self.loc();
209        self.next_ch();
210        token(tok, loc)
211    }
212
213    // Scan a multi-char token.
214    fn scan_chars(
215        &mut self,
216        count: usize,
217        tok: Token<'a>,
218    ) -> Result<LocatedToken<'a>, LocatedError> {
219        let loc = self.loc();
220        for _ in 0..count {
221            assert_ne!(self.lookahead, None);
222            self.next_ch();
223        }
224        token(tok, loc)
225    }
226
227    /// Get the rest of the current line.
228    /// The next token returned by `next()` will be from the following lines.
229    pub fn rest_of_line(&mut self) -> &'a str {
230        let begin = self.pos;
231        loop {
232            match self.next_ch() {
233                None | Some('\n') => return &self.source[begin..self.pos],
234                _ => {}
235            }
236        }
237    }
238
239    // Scan a comment extending to the end of the current line.
240    fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
241        let loc = self.loc();
242        let text = self.rest_of_line();
243        token(Token::Comment(text), loc)
244    }
245
246    // Scan a number token which can represent either an integer or floating point number.
247    //
248    // Accept the following forms:
249    //
250    // - `10`: Integer
251    // - `-10`: Integer
252    // - `0xff_00`: Integer
253    // - `0.0`: Float
254    // - `0x1.f`: Float
255    // - `-0x2.4`: Float
256    // - `0x0.4p-34`: Float
257    //
258    // This function does not filter out all invalid numbers. It depends in the context-sensitive
259    // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and
260    // an `Ieee64` constant are different.
261    fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
262        let begin = self.pos;
263        let loc = self.loc();
264        let mut is_float = false;
265
266        // Skip a leading sign.
267        match self.lookahead {
268            Some('-') => {
269                self.next_ch();
270                if !self.looking_at_numeric() {
271                    // If the next characters won't parse as a number, we return Token::Minus
272                    return token(Token::Minus, loc);
273                }
274            }
275            Some('+') => {
276                self.next_ch();
277                if !self.looking_at_numeric() {
278                    // If the next characters won't parse as a number, we return Token::Plus
279                    return token(Token::Plus, loc);
280                }
281            }
282            _ => {}
283        }
284
285        // Check for NaNs with payloads.
286        if self.looking_at("NaN:") || self.looking_at("sNaN:") {
287            // Skip the `NaN:` prefix, the loop below won't accept it.
288            // We expect a hexadecimal number to follow the colon.
289            while self.next_ch() != Some(':') {}
290            is_float = true;
291        } else if self.looking_at("NaN") || self.looking_at("Inf") {
292            // This is Inf or a default quiet NaN.
293            is_float = true;
294        }
295
296        // Look for the end of this number. Detect the radix point if there is one.
297        loop {
298            match self.next_ch() {
299                Some('-') | Some('_') => {}
300                Some('.') => is_float = true,
301                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
302                _ => break,
303            }
304        }
305        let text = &self.source[begin..self.pos];
306        if is_float {
307            token(Token::Float(text), loc)
308        } else {
309            token(Token::Integer(text), loc)
310        }
311    }
312
313    // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an
314    // alphabetic char, followed by zero or more alphanumeric or '_' characters.
315    fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
316        let begin = self.pos;
317        let loc = self.loc();
318
319        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
320        loop {
321            match self.next_ch() {
322                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
323                _ => break,
324            }
325        }
326        let text = &self.source[begin..self.pos];
327
328        // Look for numbered well-known entities like block15, v45, ...
329        token(
330            split_entity_name(text)
331                .and_then(|(prefix, number)| {
332                    Self::numbered_entity(prefix, number)
333                        .or_else(|| Self::value_type(text, prefix, number))
334                })
335                .unwrap_or_else(|| match text {
336                    "cold" => Token::Cold,
337                    _ => Token::Identifier(text),
338                }),
339            loc,
340        )
341    }
342
343    // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
344    // decoded token.
345    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
346        match prefix {
347            "v" => Value::with_number(number).map(Token::Value),
348            "block" => Block::with_number(number).map(Token::Block),
349            "ss" => Some(Token::StackSlot(number)),
350            "dss" => Some(Token::DynamicStackSlot(number)),
351            "dt" => Some(Token::DynamicType(number)),
352            "gv" => Some(Token::GlobalValue(number)),
353            "mt" => Some(Token::MemoryType(number)),
354            "const" => Some(Token::Constant(number)),
355            "fn" => Some(Token::FuncRef(number)),
356            "sig" => Some(Token::SigRef(number)),
357            "u" => Some(Token::UserRef(number)),
358            "userextname" => Some(Token::UserNameRef(number)),
359            "extable" => Some(Token::ExceptionTableRef(number)),
360            "tag" => Some(Token::ExceptionTag(number)),
361            "region" => Some(Token::AliasRegion(number)),
362            "ret" => Some(Token::TryCallRet(number)),
363            "exn" => Some(Token::TryCallExn(number)),
364            _ => None,
365        }
366    }
367
368    // Recognize a scalar or vector type.
369    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
370        let is_vector = prefix.ends_with('x');
371        let scalar = if is_vector {
372            &prefix[0..prefix.len() - 1]
373        } else {
374            text
375        };
376        let base_type = match scalar {
377            "i8" => types::I8,
378            "i16" => types::I16,
379            "i32" => types::I32,
380            "i64" => types::I64,
381            "i128" => types::I128,
382            "f16" => types::F16,
383            "f32" => types::F32,
384            "f64" => types::F64,
385            "f128" => types::F128,
386            _ => return None,
387        };
388        if is_vector {
389            if number <= u32::from(u16::MAX) {
390                base_type.by(number).map(Token::Type)
391            } else {
392                None
393            }
394        } else {
395            Some(Token::Type(base_type))
396        }
397    }
398
399    fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
400        let loc = self.loc();
401        let begin = self.pos + 1;
402
403        assert_eq!(self.lookahead, Some('%'));
404
405        loop {
406            match self.next_ch() {
407                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
408                _ => break,
409            }
410        }
411
412        let end = self.pos;
413        token(Token::Name(&self.source[begin..end]), loc)
414    }
415
416    /// Scan for a multi-line quoted string with no escape character.
417    fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
418        let loc = self.loc();
419        let begin = self.pos + 1;
420
421        assert_eq!(self.lookahead, Some('"'));
422
423        while let Some(c) = self.next_ch() {
424            if c == '"' {
425                break;
426            }
427        }
428
429        let end = self.pos;
430        if self.lookahead != Some('"') {
431            return error(LexError::InvalidChar, self.loc());
432        }
433        self.next_ch();
434        token(Token::String(&self.source[begin..end]), loc)
435    }
436
437    fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
438        let loc = self.loc();
439        let begin = self.pos + 1;
440
441        assert_eq!(self.lookahead, Some('#'));
442
443        while let Some(c) = self.next_ch() {
444            if !char::is_digit(c, 16) {
445                break;
446            }
447        }
448
449        let end = self.pos;
450        token(Token::HexSequence(&self.source[begin..end]), loc)
451    }
452
453    /// Given that we've consumed an `@` character, are we looking at a source
454    /// location?
455    fn looking_at_srcloc(&self) -> bool {
456        match self.lookahead {
457            Some(c) => char::is_digit(c, 16),
458            _ => false,
459        }
460    }
461
462    fn scan_srcloc(&mut self, pos: usize, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
463        let begin = pos + 1;
464        while let Some(c) = self.next_ch() {
465            if !char::is_digit(c, 16) {
466                break;
467            }
468        }
469
470        let end = self.pos;
471        token(Token::SourceLoc(&self.source[begin..end]), loc)
472    }
473
474    /// Get the next token or a lexical error.
475    ///
476    /// Return None when the end of the source is encountered.
477    pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
478        loop {
479            let loc = self.loc();
480            return match self.lookahead {
481                None => None,
482                Some(';') => Some(self.scan_comment()),
483                Some('(') => Some(self.scan_char(Token::LPar)),
484                Some(')') => Some(self.scan_char(Token::RPar)),
485                Some('{') => Some(self.scan_char(Token::LBrace)),
486                Some('}') => Some(self.scan_char(Token::RBrace)),
487                Some('[') => Some(self.scan_char(Token::LBracket)),
488                Some(']') => Some(self.scan_char(Token::RBracket)),
489                Some('<') => Some(self.scan_char(Token::LAngle)),
490                Some('>') => Some(self.scan_char(Token::RAngle)),
491                Some(',') => Some(self.scan_char(Token::Comma)),
492                Some('.') => Some(self.scan_char(Token::Dot)),
493                Some(':') => Some(self.scan_char(Token::Colon)),
494                Some('=') => Some(self.scan_char(Token::Equal)),
495                Some('!') => Some(self.scan_char(Token::Bang)),
496                Some('+') => Some(self.scan_number()),
497                Some('*') => Some(self.scan_char(Token::Multiply)),
498                Some('-') => {
499                    if self.looking_at("->") {
500                        Some(self.scan_chars(2, Token::Arrow))
501                    } else {
502                        Some(self.scan_number())
503                    }
504                }
505                Some('0'..='9') => Some(self.scan_number()),
506                Some('a'..='z') | Some('A'..='Z') => {
507                    if self.looking_at("NaN") || self.looking_at("Inf") {
508                        Some(self.scan_number())
509                    } else {
510                        Some(self.scan_word())
511                    }
512                }
513                Some('%') => Some(self.scan_name()),
514                Some('"') => Some(self.scan_string()),
515                Some('#') => Some(self.scan_hex_sequence()),
516                Some('@') => {
517                    let pos = self.pos;
518                    let loc = self.loc();
519                    self.next_ch();
520                    if self.looking_at_srcloc() {
521                        Some(self.scan_srcloc(pos, loc))
522                    } else {
523                        Some(token(Token::At, loc))
524                    }
525                }
526                // all ascii whitespace
527                Some(' ') | Some('\x09'..='\x0d') => {
528                    self.next_ch();
529                    continue;
530                }
531                _ => {
532                    // Skip invalid char, return error.
533                    self.next_ch();
534                    Some(error(LexError::InvalidChar, loc))
535                }
536            };
537        }
538    }
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544
545    #[test]
546    fn digits() {
547        assert_eq!(trailing_digits(""), 0);
548        assert_eq!(trailing_digits("x"), 0);
549        assert_eq!(trailing_digits("0x"), 0);
550        assert_eq!(trailing_digits("x1"), 1);
551        assert_eq!(trailing_digits("1x1"), 1);
552        assert_eq!(trailing_digits("1x01"), 2);
553    }
554
555    #[test]
556    fn entity_name() {
557        assert_eq!(split_entity_name(""), None);
558        assert_eq!(split_entity_name("x"), None);
559        assert_eq!(split_entity_name("x+"), None);
560        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
561        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
562        assert_eq!(split_entity_name("1"), Some(("", 1)));
563        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
564        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
565        // Reject this non-canonical form.
566        assert_eq!(split_entity_name("inst01"), None);
567    }
568
569    fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
570        Some(super::token(token, Location { line_number: line }))
571    }
572
573    fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
574        Some(super::error(error, Location { line_number: line }))
575    }
576
577    #[test]
578    fn make_lexer() {
579        let mut l1 = Lexer::new("");
580        let mut l2 = Lexer::new(" ");
581        let mut l3 = Lexer::new("\n ");
582
583        assert_eq!(l1.next(), None);
584        assert_eq!(l2.next(), None);
585        assert_eq!(l3.next(), None);
586    }
587
588    #[test]
589    fn lex_comment() {
590        let mut lex = Lexer::new("; hello");
591        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
592        assert_eq!(lex.next(), None);
593
594        lex = Lexer::new("\n  ;hello\n;foo");
595        assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
596        assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
597        assert_eq!(lex.next(), None);
598
599        // Scan a comment after an invalid char.
600        let mut lex = Lexer::new("$; hello");
601        assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
602        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
603        assert_eq!(lex.next(), None);
604    }
605
606    #[test]
607    fn lex_chars() {
608        let mut lex = Lexer::new("(); hello\n = :{, }.");
609        assert_eq!(lex.next(), token(Token::LPar, 1));
610        assert_eq!(lex.next(), token(Token::RPar, 1));
611        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
612        assert_eq!(lex.next(), token(Token::Equal, 2));
613        assert_eq!(lex.next(), token(Token::Colon, 2));
614        assert_eq!(lex.next(), token(Token::LBrace, 2));
615        assert_eq!(lex.next(), token(Token::Comma, 2));
616        assert_eq!(lex.next(), token(Token::RBrace, 2));
617        assert_eq!(lex.next(), token(Token::Dot, 2));
618        assert_eq!(lex.next(), None);
619    }
620
621    #[test]
622    fn lex_numbers() {
623        let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
624        assert_eq!(lex.next(), token(Token::Integer("0"), 1));
625        assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
626        assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
627        assert_eq!(lex.next(), token(Token::Comma, 1));
628        assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
629        assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
630        assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
631        assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
632        assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
633        assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
634        assert_eq!(lex.next(), None);
635    }
636
637    #[test]
638    fn lex_identifiers() {
639        let mut lex = Lexer::new(
640            "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
641             function0 function i8 i32x4 f32x5 f16 f128",
642        );
643        assert_eq!(
644            lex.next(),
645            token(Token::Value(Value::with_number(0).unwrap()), 1)
646        );
647        assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
648        assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
649        assert_eq!(
650            lex.next(),
651            token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
652        );
653        assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
654        assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
655        assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
656        assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
657        assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
658        assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
659        assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
660        assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
661        assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
662        assert_eq!(lex.next(), token(Token::Type(types::F16), 1));
663        assert_eq!(lex.next(), token(Token::Type(types::F128), 1));
664        assert_eq!(lex.next(), None);
665    }
666
667    #[test]
668    fn lex_hex_sequences() {
669        let mut lex = Lexer::new("#0 #DEADbeef123 #789");
670
671        assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
672        assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
673        assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
674    }
675
676    #[test]
677    fn lex_names() {
678        let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
679
680        assert_eq!(lex.next(), token(Token::Name("0"), 1));
681        assert_eq!(lex.next(), token(Token::Name("x3"), 1));
682        assert_eq!(lex.next(), token(Token::Name("function"), 1));
683        assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
684        assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
685        assert_eq!(lex.next(), token(Token::Name("v3"), 1));
686        assert_eq!(lex.next(), token(Token::Name("block11"), 1));
687        assert_eq!(lex.next(), token(Token::Name("const42"), 1));
688        assert_eq!(lex.next(), token(Token::Name("_"), 1));
689    }
690
691    #[test]
692    fn lex_strings() {
693        let mut lex = Lexer::new(
694            r#"""  "0" "x3""function" "123 abc" "\" "start
695                    and end on
696                    different lines" "#,
697        );
698
699        assert_eq!(lex.next(), token(Token::String(""), 1));
700        assert_eq!(lex.next(), token(Token::String("0"), 1));
701        assert_eq!(lex.next(), token(Token::String("x3"), 1));
702        assert_eq!(lex.next(), token(Token::String("function"), 1));
703        assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
704        assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
705        assert_eq!(
706            lex.next(),
707            token(
708                Token::String(
709                    r#"start
710                    and end on
711                    different lines"#
712                ),
713                1
714            )
715        );
716    }
717
718    #[test]
719    fn lex_userrefs() {
720        let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
721
722        assert_eq!(lex.next(), token(Token::UserRef(0), 1));
723        assert_eq!(lex.next(), token(Token::UserRef(1), 1));
724        assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
725        assert_eq!(lex.next(), token(Token::UserRef(9), 1));
726        assert_eq!(lex.next(), token(Token::Colon, 1));
727        assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
728        assert_eq!(lex.next(), None);
729    }
730}