cranelift_reader/
lexer.rs

1//! Lexical analysis for .clif files.
2
3use crate::error::Location;
4use cranelift_codegen::ir::types;
5use cranelift_codegen::ir::{Block, Value};
6use std::str::CharIndices;
7use std::u16;
8
9/// A Token returned from the `Lexer`.
10///
11/// Some variants may contains references to the original source text, so the `Token` has the same
12/// lifetime as the source.
13#[derive(Debug, PartialEq, Eq, Clone, Copy)]
14pub enum Token<'a> {
15    Comment(&'a str),
16    LPar,                   // '('
17    RPar,                   // ')'
18    LBrace,                 // '{'
19    RBrace,                 // '}'
20    LBracket,               // '['
21    RBracket,               // ']'
22    LAngle,                 // '<'
23    RAngle,                 // '>'
24    Minus,                  // '-'
25    Plus,                   // '+'
26    Multiply,               // '*'
27    Comma,                  // ','
28    Dot,                    // '.'
29    Colon,                  // ':'
30    Equal,                  // '='
31    Bang,                   // '!'
32    At,                     // '@'
33    Arrow,                  // '->'
34    Float(&'a str),         // Floating point immediate
35    Integer(&'a str),       // Integer immediate
36    Type(types::Type),      // i32, f32, i32x4, ...
37    DynamicType(u32),       // dt5
38    Value(Value),           // v12, v7
39    Block(Block),           // block3
40    Cold,                   // cold (flag on block)
41    StackSlot(u32),         // ss3
42    DynamicStackSlot(u32),  // dss4
43    GlobalValue(u32),       // gv3
44    MemoryType(u32),        // mt0
45    Constant(u32),          // const2
46    FuncRef(u32),           // fn2
47    SigRef(u32),            // sig2
48    UserRef(u32),           // u345
49    UserNameRef(u32),       // userextname345
50    ExceptionTableRef(u32), // ex123
51    ExceptionTag(u32),      // tag123
52    TryCallRet(u32),        // ret123
53    TryCallExn(u32),        // exn123
54    Name(&'a str),          // %9arbitrary_alphanum, %x3, %0, %function ...
55    String(&'a str),        // "arbitrary quoted string with no escape" ...
56    HexSequence(&'a str),   // #89AF
57    Identifier(&'a str),    // Unrecognized identifier (opcode, enumerator, ...)
58    SourceLoc(&'a str),     // @00c7
59}
60
61/// A `Token` with an associated location.
62#[derive(Debug, PartialEq, Eq)]
63pub struct LocatedToken<'a> {
64    pub token: Token<'a>,
65    pub location: Location,
66}
67
68/// Wrap up a `Token` with the given location.
69fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
70    Ok(LocatedToken {
71        token,
72        location: loc,
73    })
74}
75
76/// An error from the lexical analysis.
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum LexError {
79    InvalidChar,
80}
81
82/// A `LexError` with an associated Location.
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub struct LocatedError {
85    pub error: LexError,
86    pub location: Location,
87}
88
89/// Wrap up a `LexError` with the given location.
90fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
91    Err(LocatedError {
92        error,
93        location: loc,
94    })
95}
96
97/// Get the number of decimal digits at the end of `s`.
98fn trailing_digits(s: &str) -> usize {
99    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
100    s.as_bytes()
101        .iter()
102        .rev()
103        .take_while(|&&b| b'0' <= b && b <= b'9')
104        .count()
105}
106
107/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
108/// letters and numeric tail.
109pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
110    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
111    if tail.len() > 1 && tail.starts_with('0') {
112        None
113    } else {
114        tail.parse().ok().map(|n| (head, n))
115    }
116}
117
118/// Lexical analysis.
119///
120/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
121///
122/// Also keep track of a line number for error reporting.
123///
124pub struct Lexer<'a> {
125    // Complete source being processed.
126    source: &'a str,
127
128    // Iterator into `source`.
129    chars: CharIndices<'a>,
130
131    // Next character to be processed, or `None` at the end.
132    lookahead: Option<char>,
133
134    // Index into `source` of lookahead character.
135    pos: usize,
136
137    // Current line number.
138    line_number: usize,
139}
140
141impl<'a> Lexer<'a> {
142    pub fn new(s: &'a str) -> Self {
143        let mut lex = Self {
144            source: s,
145            chars: s.char_indices(),
146            lookahead: None,
147            pos: 0,
148            line_number: 1,
149        };
150        // Advance to the first char.
151        lex.next_ch();
152        lex
153    }
154
155    // Advance to the next character.
156    // Return the next lookahead character, or None when the end is encountered.
157    // Always update cur_ch to reflect
158    fn next_ch(&mut self) -> Option<char> {
159        if self.lookahead == Some('\n') {
160            self.line_number += 1;
161        }
162        match self.chars.next() {
163            Some((idx, ch)) => {
164                self.pos = idx;
165                self.lookahead = Some(ch);
166            }
167            None => {
168                self.pos = self.source.len();
169                self.lookahead = None;
170            }
171        }
172        self.lookahead
173    }
174
175    // Get the location corresponding to `lookahead`.
176    fn loc(&self) -> Location {
177        Location {
178            line_number: self.line_number,
179        }
180    }
181
182    // Starting from `lookahead`, are we looking at `prefix`?
183    fn looking_at(&self, prefix: &str) -> bool {
184        self.source[self.pos..].starts_with(prefix)
185    }
186
187    // Starting from `lookahead`, are we looking at a number?
188    fn looking_at_numeric(&self) -> bool {
189        if let Some(c) = self.lookahead {
190            match c {
191                '0'..='9' => return true,
192                '-' => return true,
193                '+' => return true,
194                '.' => return true,
195                _ => {}
196            }
197            if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
198                return true;
199            }
200        }
201        false
202    }
203
204    // Scan a single-char token.
205    fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
206        assert_ne!(self.lookahead, None);
207        let loc = self.loc();
208        self.next_ch();
209        token(tok, loc)
210    }
211
212    // Scan a multi-char token.
213    fn scan_chars(
214        &mut self,
215        count: usize,
216        tok: Token<'a>,
217    ) -> Result<LocatedToken<'a>, LocatedError> {
218        let loc = self.loc();
219        for _ in 0..count {
220            assert_ne!(self.lookahead, None);
221            self.next_ch();
222        }
223        token(tok, loc)
224    }
225
226    /// Get the rest of the current line.
227    /// The next token returned by `next()` will be from the following lines.
228    pub fn rest_of_line(&mut self) -> &'a str {
229        let begin = self.pos;
230        loop {
231            match self.next_ch() {
232                None | Some('\n') => return &self.source[begin..self.pos],
233                _ => {}
234            }
235        }
236    }
237
238    // Scan a comment extending to the end of the current line.
239    fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
240        let loc = self.loc();
241        let text = self.rest_of_line();
242        token(Token::Comment(text), loc)
243    }
244
245    // Scan a number token which can represent either an integer or floating point number.
246    //
247    // Accept the following forms:
248    //
249    // - `10`: Integer
250    // - `-10`: Integer
251    // - `0xff_00`: Integer
252    // - `0.0`: Float
253    // - `0x1.f`: Float
254    // - `-0x2.4`: Float
255    // - `0x0.4p-34`: Float
256    //
257    // This function does not filter out all invalid numbers. It depends in the context-sensitive
258    // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and
259    // an `Ieee64` constant are different.
260    fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
261        let begin = self.pos;
262        let loc = self.loc();
263        let mut is_float = false;
264
265        // Skip a leading sign.
266        match self.lookahead {
267            Some('-') => {
268                self.next_ch();
269                if !self.looking_at_numeric() {
270                    // If the next characters won't parse as a number, we return Token::Minus
271                    return token(Token::Minus, loc);
272                }
273            }
274            Some('+') => {
275                self.next_ch();
276                if !self.looking_at_numeric() {
277                    // If the next characters won't parse as a number, we return Token::Plus
278                    return token(Token::Plus, loc);
279                }
280            }
281            _ => {}
282        }
283
284        // Check for NaNs with payloads.
285        if self.looking_at("NaN:") || self.looking_at("sNaN:") {
286            // Skip the `NaN:` prefix, the loop below won't accept it.
287            // We expect a hexadecimal number to follow the colon.
288            while self.next_ch() != Some(':') {}
289            is_float = true;
290        } else if self.looking_at("NaN") || self.looking_at("Inf") {
291            // This is Inf or a default quiet NaN.
292            is_float = true;
293        }
294
295        // Look for the end of this number. Detect the radix point if there is one.
296        loop {
297            match self.next_ch() {
298                Some('-') | Some('_') => {}
299                Some('.') => is_float = true,
300                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
301                _ => break,
302            }
303        }
304        let text = &self.source[begin..self.pos];
305        if is_float {
306            token(Token::Float(text), loc)
307        } else {
308            token(Token::Integer(text), loc)
309        }
310    }
311
312    // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an
313    // alphabetic char, followed by zero or more alphanumeric or '_' characters.
314    fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
315        let begin = self.pos;
316        let loc = self.loc();
317
318        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
319        loop {
320            match self.next_ch() {
321                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
322                _ => break,
323            }
324        }
325        let text = &self.source[begin..self.pos];
326
327        // Look for numbered well-known entities like block15, v45, ...
328        token(
329            split_entity_name(text)
330                .and_then(|(prefix, number)| {
331                    Self::numbered_entity(prefix, number)
332                        .or_else(|| Self::value_type(text, prefix, number))
333                })
334                .unwrap_or_else(|| match text {
335                    "cold" => Token::Cold,
336                    _ => Token::Identifier(text),
337                }),
338            loc,
339        )
340    }
341
342    // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
343    // decoded token.
344    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
345        match prefix {
346            "v" => Value::with_number(number).map(Token::Value),
347            "block" => Block::with_number(number).map(Token::Block),
348            "ss" => Some(Token::StackSlot(number)),
349            "dss" => Some(Token::DynamicStackSlot(number)),
350            "dt" => Some(Token::DynamicType(number)),
351            "gv" => Some(Token::GlobalValue(number)),
352            "mt" => Some(Token::MemoryType(number)),
353            "const" => Some(Token::Constant(number)),
354            "fn" => Some(Token::FuncRef(number)),
355            "sig" => Some(Token::SigRef(number)),
356            "u" => Some(Token::UserRef(number)),
357            "userextname" => Some(Token::UserNameRef(number)),
358            "extable" => Some(Token::ExceptionTableRef(number)),
359            "tag" => Some(Token::ExceptionTag(number)),
360            "ret" => Some(Token::TryCallRet(number)),
361            "exn" => Some(Token::TryCallExn(number)),
362            _ => None,
363        }
364    }
365
366    // Recognize a scalar or vector type.
367    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
368        let is_vector = prefix.ends_with('x');
369        let scalar = if is_vector {
370            &prefix[0..prefix.len() - 1]
371        } else {
372            text
373        };
374        let base_type = match scalar {
375            "i8" => types::I8,
376            "i16" => types::I16,
377            "i32" => types::I32,
378            "i64" => types::I64,
379            "i128" => types::I128,
380            "f16" => types::F16,
381            "f32" => types::F32,
382            "f64" => types::F64,
383            "f128" => types::F128,
384            _ => return None,
385        };
386        if is_vector {
387            if number <= u32::from(u16::MAX) {
388                base_type.by(number).map(Token::Type)
389            } else {
390                None
391            }
392        } else {
393            Some(Token::Type(base_type))
394        }
395    }
396
397    fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
398        let loc = self.loc();
399        let begin = self.pos + 1;
400
401        assert_eq!(self.lookahead, Some('%'));
402
403        loop {
404            match self.next_ch() {
405                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
406                _ => break,
407            }
408        }
409
410        let end = self.pos;
411        token(Token::Name(&self.source[begin..end]), loc)
412    }
413
414    /// Scan for a multi-line quoted string with no escape character.
415    fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
416        let loc = self.loc();
417        let begin = self.pos + 1;
418
419        assert_eq!(self.lookahead, Some('"'));
420
421        while let Some(c) = self.next_ch() {
422            if c == '"' {
423                break;
424            }
425        }
426
427        let end = self.pos;
428        if self.lookahead != Some('"') {
429            return error(LexError::InvalidChar, self.loc());
430        }
431        self.next_ch();
432        token(Token::String(&self.source[begin..end]), loc)
433    }
434
435    fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
436        let loc = self.loc();
437        let begin = self.pos + 1;
438
439        assert_eq!(self.lookahead, Some('#'));
440
441        while let Some(c) = self.next_ch() {
442            if !char::is_digit(c, 16) {
443                break;
444            }
445        }
446
447        let end = self.pos;
448        token(Token::HexSequence(&self.source[begin..end]), loc)
449    }
450
451    /// Given that we've consumed an `@` character, are we looking at a source
452    /// location?
453    fn looking_at_srcloc(&self) -> bool {
454        match self.lookahead {
455            Some(c) => char::is_digit(c, 16),
456            _ => false,
457        }
458    }
459
460    fn scan_srcloc(&mut self, pos: usize, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
461        let begin = pos + 1;
462        while let Some(c) = self.next_ch() {
463            if !char::is_digit(c, 16) {
464                break;
465            }
466        }
467
468        let end = self.pos;
469        token(Token::SourceLoc(&self.source[begin..end]), loc)
470    }
471
472    /// Get the next token or a lexical error.
473    ///
474    /// Return None when the end of the source is encountered.
475    pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
476        loop {
477            let loc = self.loc();
478            return match self.lookahead {
479                None => None,
480                Some(';') => Some(self.scan_comment()),
481                Some('(') => Some(self.scan_char(Token::LPar)),
482                Some(')') => Some(self.scan_char(Token::RPar)),
483                Some('{') => Some(self.scan_char(Token::LBrace)),
484                Some('}') => Some(self.scan_char(Token::RBrace)),
485                Some('[') => Some(self.scan_char(Token::LBracket)),
486                Some(']') => Some(self.scan_char(Token::RBracket)),
487                Some('<') => Some(self.scan_char(Token::LAngle)),
488                Some('>') => Some(self.scan_char(Token::RAngle)),
489                Some(',') => Some(self.scan_char(Token::Comma)),
490                Some('.') => Some(self.scan_char(Token::Dot)),
491                Some(':') => Some(self.scan_char(Token::Colon)),
492                Some('=') => Some(self.scan_char(Token::Equal)),
493                Some('!') => Some(self.scan_char(Token::Bang)),
494                Some('+') => Some(self.scan_number()),
495                Some('*') => Some(self.scan_char(Token::Multiply)),
496                Some('-') => {
497                    if self.looking_at("->") {
498                        Some(self.scan_chars(2, Token::Arrow))
499                    } else {
500                        Some(self.scan_number())
501                    }
502                }
503                Some('0'..='9') => Some(self.scan_number()),
504                Some('a'..='z') | Some('A'..='Z') => {
505                    if self.looking_at("NaN") || self.looking_at("Inf") {
506                        Some(self.scan_number())
507                    } else {
508                        Some(self.scan_word())
509                    }
510                }
511                Some('%') => Some(self.scan_name()),
512                Some('"') => Some(self.scan_string()),
513                Some('#') => Some(self.scan_hex_sequence()),
514                Some('@') => {
515                    let pos = self.pos;
516                    let loc = self.loc();
517                    self.next_ch();
518                    if self.looking_at_srcloc() {
519                        Some(self.scan_srcloc(pos, loc))
520                    } else {
521                        Some(token(Token::At, loc))
522                    }
523                }
524                // all ascii whitespace
525                Some(' ') | Some('\x09'..='\x0d') => {
526                    self.next_ch();
527                    continue;
528                }
529                _ => {
530                    // Skip invalid char, return error.
531                    self.next_ch();
532                    Some(error(LexError::InvalidChar, loc))
533                }
534            };
535        }
536    }
537}
538
539#[cfg(test)]
540mod tests {
541    use super::*;
542
543    #[test]
544    fn digits() {
545        assert_eq!(trailing_digits(""), 0);
546        assert_eq!(trailing_digits("x"), 0);
547        assert_eq!(trailing_digits("0x"), 0);
548        assert_eq!(trailing_digits("x1"), 1);
549        assert_eq!(trailing_digits("1x1"), 1);
550        assert_eq!(trailing_digits("1x01"), 2);
551    }
552
553    #[test]
554    fn entity_name() {
555        assert_eq!(split_entity_name(""), None);
556        assert_eq!(split_entity_name("x"), None);
557        assert_eq!(split_entity_name("x+"), None);
558        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
559        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
560        assert_eq!(split_entity_name("1"), Some(("", 1)));
561        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
562        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
563        // Reject this non-canonical form.
564        assert_eq!(split_entity_name("inst01"), None);
565    }
566
567    fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
568        Some(super::token(token, Location { line_number: line }))
569    }
570
571    fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
572        Some(super::error(error, Location { line_number: line }))
573    }
574
575    #[test]
576    fn make_lexer() {
577        let mut l1 = Lexer::new("");
578        let mut l2 = Lexer::new(" ");
579        let mut l3 = Lexer::new("\n ");
580
581        assert_eq!(l1.next(), None);
582        assert_eq!(l2.next(), None);
583        assert_eq!(l3.next(), None);
584    }
585
586    #[test]
587    fn lex_comment() {
588        let mut lex = Lexer::new("; hello");
589        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
590        assert_eq!(lex.next(), None);
591
592        lex = Lexer::new("\n  ;hello\n;foo");
593        assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
594        assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
595        assert_eq!(lex.next(), None);
596
597        // Scan a comment after an invalid char.
598        let mut lex = Lexer::new("$; hello");
599        assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
600        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
601        assert_eq!(lex.next(), None);
602    }
603
604    #[test]
605    fn lex_chars() {
606        let mut lex = Lexer::new("(); hello\n = :{, }.");
607        assert_eq!(lex.next(), token(Token::LPar, 1));
608        assert_eq!(lex.next(), token(Token::RPar, 1));
609        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
610        assert_eq!(lex.next(), token(Token::Equal, 2));
611        assert_eq!(lex.next(), token(Token::Colon, 2));
612        assert_eq!(lex.next(), token(Token::LBrace, 2));
613        assert_eq!(lex.next(), token(Token::Comma, 2));
614        assert_eq!(lex.next(), token(Token::RBrace, 2));
615        assert_eq!(lex.next(), token(Token::Dot, 2));
616        assert_eq!(lex.next(), None);
617    }
618
619    #[test]
620    fn lex_numbers() {
621        let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
622        assert_eq!(lex.next(), token(Token::Integer("0"), 1));
623        assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
624        assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
625        assert_eq!(lex.next(), token(Token::Comma, 1));
626        assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
627        assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
628        assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
629        assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
630        assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
631        assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
632        assert_eq!(lex.next(), None);
633    }
634
635    #[test]
636    fn lex_identifiers() {
637        let mut lex = Lexer::new(
638            "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
639             function0 function i8 i32x4 f32x5 f16 f128",
640        );
641        assert_eq!(
642            lex.next(),
643            token(Token::Value(Value::with_number(0).unwrap()), 1)
644        );
645        assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
646        assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
647        assert_eq!(
648            lex.next(),
649            token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
650        );
651        assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
652        assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
653        assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
654        assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
655        assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
656        assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
657        assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
658        assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
659        assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
660        assert_eq!(lex.next(), token(Token::Type(types::F16), 1));
661        assert_eq!(lex.next(), token(Token::Type(types::F128), 1));
662        assert_eq!(lex.next(), None);
663    }
664
665    #[test]
666    fn lex_hex_sequences() {
667        let mut lex = Lexer::new("#0 #DEADbeef123 #789");
668
669        assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
670        assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
671        assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
672    }
673
674    #[test]
675    fn lex_names() {
676        let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
677
678        assert_eq!(lex.next(), token(Token::Name("0"), 1));
679        assert_eq!(lex.next(), token(Token::Name("x3"), 1));
680        assert_eq!(lex.next(), token(Token::Name("function"), 1));
681        assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
682        assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
683        assert_eq!(lex.next(), token(Token::Name("v3"), 1));
684        assert_eq!(lex.next(), token(Token::Name("block11"), 1));
685        assert_eq!(lex.next(), token(Token::Name("const42"), 1));
686        assert_eq!(lex.next(), token(Token::Name("_"), 1));
687    }
688
689    #[test]
690    fn lex_strings() {
691        let mut lex = Lexer::new(
692            r#"""  "0" "x3""function" "123 abc" "\" "start
693                    and end on
694                    different lines" "#,
695        );
696
697        assert_eq!(lex.next(), token(Token::String(""), 1));
698        assert_eq!(lex.next(), token(Token::String("0"), 1));
699        assert_eq!(lex.next(), token(Token::String("x3"), 1));
700        assert_eq!(lex.next(), token(Token::String("function"), 1));
701        assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
702        assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
703        assert_eq!(
704            lex.next(),
705            token(
706                Token::String(
707                    r#"start
708                    and end on
709                    different lines"#
710                ),
711                1
712            )
713        );
714    }
715
716    #[test]
717    fn lex_userrefs() {
718        let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
719
720        assert_eq!(lex.next(), token(Token::UserRef(0), 1));
721        assert_eq!(lex.next(), token(Token::UserRef(1), 1));
722        assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
723        assert_eq!(lex.next(), token(Token::UserRef(9), 1));
724        assert_eq!(lex.next(), token(Token::Colon, 1));
725        assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
726        assert_eq!(lex.next(), None);
727    }
728}