cranelift_reader/
lexer.rs

1//! Lexical analysis for .clif files.
2
3use crate::error::Location;
4use cranelift_codegen::ir::types;
5use cranelift_codegen::ir::{Block, Value};
6use std::str::CharIndices;
7use std::u16;
8
9/// A Token returned from the `Lexer`.
10///
11/// Some variants may contains references to the original source text, so the `Token` has the same
12/// lifetime as the source.
13#[derive(Debug, PartialEq, Eq, Clone, Copy)]
14pub enum Token<'a> {
15    Comment(&'a str),
16    LPar,                   // '('
17    RPar,                   // ')'
18    LBrace,                 // '{'
19    RBrace,                 // '}'
20    LBracket,               // '['
21    RBracket,               // ']'
22    Minus,                  // '-'
23    Plus,                   // '+'
24    Multiply,               // '*'
25    Comma,                  // ','
26    Dot,                    // '.'
27    Colon,                  // ':'
28    Equal,                  // '='
29    Bang,                   // '!'
30    At,                     // '@'
31    Arrow,                  // '->'
32    Float(&'a str),         // Floating point immediate
33    Integer(&'a str),       // Integer immediate
34    Type(types::Type),      // i32, f32, i32x4, ...
35    DynamicType(u32),       // dt5
36    Value(Value),           // v12, v7
37    Block(Block),           // block3
38    Cold,                   // cold (flag on block)
39    StackSlot(u32),         // ss3
40    DynamicStackSlot(u32),  // dss4
41    GlobalValue(u32),       // gv3
42    MemoryType(u32),        // mt0
43    Constant(u32),          // const2
44    FuncRef(u32),           // fn2
45    SigRef(u32),            // sig2
46    UserRef(u32),           // u345
47    UserNameRef(u32),       // userextname345
48    ExceptionTableRef(u32), // ex123
49    ExceptionTag(u32),      // tag123
50    TryCallRet(u32),        // ret123
51    TryCallExn(u32),        // exn123
52    Name(&'a str),          // %9arbitrary_alphanum, %x3, %0, %function ...
53    String(&'a str),        // "arbitrary quoted string with no escape" ...
54    HexSequence(&'a str),   // #89AF
55    Identifier(&'a str),    // Unrecognized identifier (opcode, enumerator, ...)
56    SourceLoc(&'a str),     // @00c7
57}
58
59/// A `Token` with an associated location.
60#[derive(Debug, PartialEq, Eq)]
61pub struct LocatedToken<'a> {
62    pub token: Token<'a>,
63    pub location: Location,
64}
65
66/// Wrap up a `Token` with the given location.
67fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
68    Ok(LocatedToken {
69        token,
70        location: loc,
71    })
72}
73
74/// An error from the lexical analysis.
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum LexError {
77    InvalidChar,
78}
79
80/// A `LexError` with an associated Location.
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub struct LocatedError {
83    pub error: LexError,
84    pub location: Location,
85}
86
87/// Wrap up a `LexError` with the given location.
88fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
89    Err(LocatedError {
90        error,
91        location: loc,
92    })
93}
94
95/// Get the number of decimal digits at the end of `s`.
96fn trailing_digits(s: &str) -> usize {
97    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
98    s.as_bytes()
99        .iter()
100        .rev()
101        .take_while(|&&b| b'0' <= b && b <= b'9')
102        .count()
103}
104
105/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
106/// letters and numeric tail.
107pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
108    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
109    if tail.len() > 1 && tail.starts_with('0') {
110        None
111    } else {
112        tail.parse().ok().map(|n| (head, n))
113    }
114}
115
116/// Lexical analysis.
117///
118/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
119///
120/// Also keep track of a line number for error reporting.
121///
122pub struct Lexer<'a> {
123    // Complete source being processed.
124    source: &'a str,
125
126    // Iterator into `source`.
127    chars: CharIndices<'a>,
128
129    // Next character to be processed, or `None` at the end.
130    lookahead: Option<char>,
131
132    // Index into `source` of lookahead character.
133    pos: usize,
134
135    // Current line number.
136    line_number: usize,
137}
138
139impl<'a> Lexer<'a> {
140    pub fn new(s: &'a str) -> Self {
141        let mut lex = Self {
142            source: s,
143            chars: s.char_indices(),
144            lookahead: None,
145            pos: 0,
146            line_number: 1,
147        };
148        // Advance to the first char.
149        lex.next_ch();
150        lex
151    }
152
153    // Advance to the next character.
154    // Return the next lookahead character, or None when the end is encountered.
155    // Always update cur_ch to reflect
156    fn next_ch(&mut self) -> Option<char> {
157        if self.lookahead == Some('\n') {
158            self.line_number += 1;
159        }
160        match self.chars.next() {
161            Some((idx, ch)) => {
162                self.pos = idx;
163                self.lookahead = Some(ch);
164            }
165            None => {
166                self.pos = self.source.len();
167                self.lookahead = None;
168            }
169        }
170        self.lookahead
171    }
172
173    // Get the location corresponding to `lookahead`.
174    fn loc(&self) -> Location {
175        Location {
176            line_number: self.line_number,
177        }
178    }
179
180    // Starting from `lookahead`, are we looking at `prefix`?
181    fn looking_at(&self, prefix: &str) -> bool {
182        self.source[self.pos..].starts_with(prefix)
183    }
184
185    // Starting from `lookahead`, are we looking at a number?
186    fn looking_at_numeric(&self) -> bool {
187        if let Some(c) = self.lookahead {
188            match c {
189                '0'..='9' => return true,
190                '-' => return true,
191                '+' => return true,
192                '.' => return true,
193                _ => {}
194            }
195            if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
196                return true;
197            }
198        }
199        false
200    }
201
202    // Scan a single-char token.
203    fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
204        assert_ne!(self.lookahead, None);
205        let loc = self.loc();
206        self.next_ch();
207        token(tok, loc)
208    }
209
210    // Scan a multi-char token.
211    fn scan_chars(
212        &mut self,
213        count: usize,
214        tok: Token<'a>,
215    ) -> Result<LocatedToken<'a>, LocatedError> {
216        let loc = self.loc();
217        for _ in 0..count {
218            assert_ne!(self.lookahead, None);
219            self.next_ch();
220        }
221        token(tok, loc)
222    }
223
224    /// Get the rest of the current line.
225    /// The next token returned by `next()` will be from the following lines.
226    pub fn rest_of_line(&mut self) -> &'a str {
227        let begin = self.pos;
228        loop {
229            match self.next_ch() {
230                None | Some('\n') => return &self.source[begin..self.pos],
231                _ => {}
232            }
233        }
234    }
235
236    // Scan a comment extending to the end of the current line.
237    fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
238        let loc = self.loc();
239        let text = self.rest_of_line();
240        token(Token::Comment(text), loc)
241    }
242
243    // Scan a number token which can represent either an integer or floating point number.
244    //
245    // Accept the following forms:
246    //
247    // - `10`: Integer
248    // - `-10`: Integer
249    // - `0xff_00`: Integer
250    // - `0.0`: Float
251    // - `0x1.f`: Float
252    // - `-0x2.4`: Float
253    // - `0x0.4p-34`: Float
254    //
255    // This function does not filter out all invalid numbers. It depends in the context-sensitive
256    // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and
257    // an `Ieee64` constant are different.
258    fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
259        let begin = self.pos;
260        let loc = self.loc();
261        let mut is_float = false;
262
263        // Skip a leading sign.
264        match self.lookahead {
265            Some('-') => {
266                self.next_ch();
267                if !self.looking_at_numeric() {
268                    // If the next characters won't parse as a number, we return Token::Minus
269                    return token(Token::Minus, loc);
270                }
271            }
272            Some('+') => {
273                self.next_ch();
274                if !self.looking_at_numeric() {
275                    // If the next characters won't parse as a number, we return Token::Plus
276                    return token(Token::Plus, loc);
277                }
278            }
279            _ => {}
280        }
281
282        // Check for NaNs with payloads.
283        if self.looking_at("NaN:") || self.looking_at("sNaN:") {
284            // Skip the `NaN:` prefix, the loop below won't accept it.
285            // We expect a hexadecimal number to follow the colon.
286            while self.next_ch() != Some(':') {}
287            is_float = true;
288        } else if self.looking_at("NaN") || self.looking_at("Inf") {
289            // This is Inf or a default quiet NaN.
290            is_float = true;
291        }
292
293        // Look for the end of this number. Detect the radix point if there is one.
294        loop {
295            match self.next_ch() {
296                Some('-') | Some('_') => {}
297                Some('.') => is_float = true,
298                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
299                _ => break,
300            }
301        }
302        let text = &self.source[begin..self.pos];
303        if is_float {
304            token(Token::Float(text), loc)
305        } else {
306            token(Token::Integer(text), loc)
307        }
308    }
309
310    // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an
311    // alphabetic char, followed by zero or more alphanumeric or '_' characters.
312    fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
313        let begin = self.pos;
314        let loc = self.loc();
315
316        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
317        loop {
318            match self.next_ch() {
319                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
320                _ => break,
321            }
322        }
323        let text = &self.source[begin..self.pos];
324
325        // Look for numbered well-known entities like block15, v45, ...
326        token(
327            split_entity_name(text)
328                .and_then(|(prefix, number)| {
329                    Self::numbered_entity(prefix, number)
330                        .or_else(|| Self::value_type(text, prefix, number))
331                })
332                .unwrap_or_else(|| match text {
333                    "cold" => Token::Cold,
334                    _ => Token::Identifier(text),
335                }),
336            loc,
337        )
338    }
339
340    // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
341    // decoded token.
342    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
343        match prefix {
344            "v" => Value::with_number(number).map(Token::Value),
345            "block" => Block::with_number(number).map(Token::Block),
346            "ss" => Some(Token::StackSlot(number)),
347            "dss" => Some(Token::DynamicStackSlot(number)),
348            "dt" => Some(Token::DynamicType(number)),
349            "gv" => Some(Token::GlobalValue(number)),
350            "mt" => Some(Token::MemoryType(number)),
351            "const" => Some(Token::Constant(number)),
352            "fn" => Some(Token::FuncRef(number)),
353            "sig" => Some(Token::SigRef(number)),
354            "u" => Some(Token::UserRef(number)),
355            "userextname" => Some(Token::UserNameRef(number)),
356            "extable" => Some(Token::ExceptionTableRef(number)),
357            "tag" => Some(Token::ExceptionTag(number)),
358            "ret" => Some(Token::TryCallRet(number)),
359            "exn" => Some(Token::TryCallExn(number)),
360            _ => None,
361        }
362    }
363
364    // Recognize a scalar or vector type.
365    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
366        let is_vector = prefix.ends_with('x');
367        let scalar = if is_vector {
368            &prefix[0..prefix.len() - 1]
369        } else {
370            text
371        };
372        let base_type = match scalar {
373            "i8" => types::I8,
374            "i16" => types::I16,
375            "i32" => types::I32,
376            "i64" => types::I64,
377            "i128" => types::I128,
378            "f16" => types::F16,
379            "f32" => types::F32,
380            "f64" => types::F64,
381            "f128" => types::F128,
382            _ => return None,
383        };
384        if is_vector {
385            if number <= u32::from(u16::MAX) {
386                base_type.by(number).map(Token::Type)
387            } else {
388                None
389            }
390        } else {
391            Some(Token::Type(base_type))
392        }
393    }
394
395    fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
396        let loc = self.loc();
397        let begin = self.pos + 1;
398
399        assert_eq!(self.lookahead, Some('%'));
400
401        loop {
402            match self.next_ch() {
403                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
404                _ => break,
405            }
406        }
407
408        let end = self.pos;
409        token(Token::Name(&self.source[begin..end]), loc)
410    }
411
412    /// Scan for a multi-line quoted string with no escape character.
413    fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
414        let loc = self.loc();
415        let begin = self.pos + 1;
416
417        assert_eq!(self.lookahead, Some('"'));
418
419        while let Some(c) = self.next_ch() {
420            if c == '"' {
421                break;
422            }
423        }
424
425        let end = self.pos;
426        if self.lookahead != Some('"') {
427            return error(LexError::InvalidChar, self.loc());
428        }
429        self.next_ch();
430        token(Token::String(&self.source[begin..end]), loc)
431    }
432
433    fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
434        let loc = self.loc();
435        let begin = self.pos + 1;
436
437        assert_eq!(self.lookahead, Some('#'));
438
439        while let Some(c) = self.next_ch() {
440            if !char::is_digit(c, 16) {
441                break;
442            }
443        }
444
445        let end = self.pos;
446        token(Token::HexSequence(&self.source[begin..end]), loc)
447    }
448
449    /// Given that we've consumed an `@` character, are we looking at a source
450    /// location?
451    fn looking_at_srcloc(&self) -> bool {
452        match self.lookahead {
453            Some(c) => char::is_digit(c, 16),
454            _ => false,
455        }
456    }
457
458    fn scan_srcloc(&mut self, pos: usize, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
459        let begin = pos + 1;
460        while let Some(c) = self.next_ch() {
461            if !char::is_digit(c, 16) {
462                break;
463            }
464        }
465
466        let end = self.pos;
467        token(Token::SourceLoc(&self.source[begin..end]), loc)
468    }
469
470    /// Get the next token or a lexical error.
471    ///
472    /// Return None when the end of the source is encountered.
473    pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
474        loop {
475            let loc = self.loc();
476            return match self.lookahead {
477                None => None,
478                Some(';') => Some(self.scan_comment()),
479                Some('(') => Some(self.scan_char(Token::LPar)),
480                Some(')') => Some(self.scan_char(Token::RPar)),
481                Some('{') => Some(self.scan_char(Token::LBrace)),
482                Some('}') => Some(self.scan_char(Token::RBrace)),
483                Some('[') => Some(self.scan_char(Token::LBracket)),
484                Some(']') => Some(self.scan_char(Token::RBracket)),
485                Some(',') => Some(self.scan_char(Token::Comma)),
486                Some('.') => Some(self.scan_char(Token::Dot)),
487                Some(':') => Some(self.scan_char(Token::Colon)),
488                Some('=') => Some(self.scan_char(Token::Equal)),
489                Some('!') => Some(self.scan_char(Token::Bang)),
490                Some('+') => Some(self.scan_number()),
491                Some('*') => Some(self.scan_char(Token::Multiply)),
492                Some('-') => {
493                    if self.looking_at("->") {
494                        Some(self.scan_chars(2, Token::Arrow))
495                    } else {
496                        Some(self.scan_number())
497                    }
498                }
499                Some('0'..='9') => Some(self.scan_number()),
500                Some('a'..='z') | Some('A'..='Z') => {
501                    if self.looking_at("NaN") || self.looking_at("Inf") {
502                        Some(self.scan_number())
503                    } else {
504                        Some(self.scan_word())
505                    }
506                }
507                Some('%') => Some(self.scan_name()),
508                Some('"') => Some(self.scan_string()),
509                Some('#') => Some(self.scan_hex_sequence()),
510                Some('@') => {
511                    let pos = self.pos;
512                    let loc = self.loc();
513                    self.next_ch();
514                    if self.looking_at_srcloc() {
515                        Some(self.scan_srcloc(pos, loc))
516                    } else {
517                        Some(token(Token::At, loc))
518                    }
519                }
520                // all ascii whitespace
521                Some(' ') | Some('\x09'..='\x0d') => {
522                    self.next_ch();
523                    continue;
524                }
525                _ => {
526                    // Skip invalid char, return error.
527                    self.next_ch();
528                    Some(error(LexError::InvalidChar, loc))
529                }
530            };
531        }
532    }
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538
539    #[test]
540    fn digits() {
541        assert_eq!(trailing_digits(""), 0);
542        assert_eq!(trailing_digits("x"), 0);
543        assert_eq!(trailing_digits("0x"), 0);
544        assert_eq!(trailing_digits("x1"), 1);
545        assert_eq!(trailing_digits("1x1"), 1);
546        assert_eq!(trailing_digits("1x01"), 2);
547    }
548
549    #[test]
550    fn entity_name() {
551        assert_eq!(split_entity_name(""), None);
552        assert_eq!(split_entity_name("x"), None);
553        assert_eq!(split_entity_name("x+"), None);
554        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
555        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
556        assert_eq!(split_entity_name("1"), Some(("", 1)));
557        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
558        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
559        // Reject this non-canonical form.
560        assert_eq!(split_entity_name("inst01"), None);
561    }
562
563    fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
564        Some(super::token(token, Location { line_number: line }))
565    }
566
567    fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
568        Some(super::error(error, Location { line_number: line }))
569    }
570
571    #[test]
572    fn make_lexer() {
573        let mut l1 = Lexer::new("");
574        let mut l2 = Lexer::new(" ");
575        let mut l3 = Lexer::new("\n ");
576
577        assert_eq!(l1.next(), None);
578        assert_eq!(l2.next(), None);
579        assert_eq!(l3.next(), None);
580    }
581
582    #[test]
583    fn lex_comment() {
584        let mut lex = Lexer::new("; hello");
585        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
586        assert_eq!(lex.next(), None);
587
588        lex = Lexer::new("\n  ;hello\n;foo");
589        assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
590        assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
591        assert_eq!(lex.next(), None);
592
593        // Scan a comment after an invalid char.
594        let mut lex = Lexer::new("$; hello");
595        assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
596        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
597        assert_eq!(lex.next(), None);
598    }
599
600    #[test]
601    fn lex_chars() {
602        let mut lex = Lexer::new("(); hello\n = :{, }.");
603        assert_eq!(lex.next(), token(Token::LPar, 1));
604        assert_eq!(lex.next(), token(Token::RPar, 1));
605        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
606        assert_eq!(lex.next(), token(Token::Equal, 2));
607        assert_eq!(lex.next(), token(Token::Colon, 2));
608        assert_eq!(lex.next(), token(Token::LBrace, 2));
609        assert_eq!(lex.next(), token(Token::Comma, 2));
610        assert_eq!(lex.next(), token(Token::RBrace, 2));
611        assert_eq!(lex.next(), token(Token::Dot, 2));
612        assert_eq!(lex.next(), None);
613    }
614
615    #[test]
616    fn lex_numbers() {
617        let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
618        assert_eq!(lex.next(), token(Token::Integer("0"), 1));
619        assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
620        assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
621        assert_eq!(lex.next(), token(Token::Comma, 1));
622        assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
623        assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
624        assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
625        assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
626        assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
627        assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
628        assert_eq!(lex.next(), None);
629    }
630
631    #[test]
632    fn lex_identifiers() {
633        let mut lex = Lexer::new(
634            "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
635             function0 function i8 i32x4 f32x5 f16 f128",
636        );
637        assert_eq!(
638            lex.next(),
639            token(Token::Value(Value::with_number(0).unwrap()), 1)
640        );
641        assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
642        assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
643        assert_eq!(
644            lex.next(),
645            token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
646        );
647        assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
648        assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
649        assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
650        assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
651        assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
652        assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
653        assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
654        assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
655        assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
656        assert_eq!(lex.next(), token(Token::Type(types::F16), 1));
657        assert_eq!(lex.next(), token(Token::Type(types::F128), 1));
658        assert_eq!(lex.next(), None);
659    }
660
661    #[test]
662    fn lex_hex_sequences() {
663        let mut lex = Lexer::new("#0 #DEADbeef123 #789");
664
665        assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
666        assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
667        assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
668    }
669
670    #[test]
671    fn lex_names() {
672        let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
673
674        assert_eq!(lex.next(), token(Token::Name("0"), 1));
675        assert_eq!(lex.next(), token(Token::Name("x3"), 1));
676        assert_eq!(lex.next(), token(Token::Name("function"), 1));
677        assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
678        assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
679        assert_eq!(lex.next(), token(Token::Name("v3"), 1));
680        assert_eq!(lex.next(), token(Token::Name("block11"), 1));
681        assert_eq!(lex.next(), token(Token::Name("const42"), 1));
682        assert_eq!(lex.next(), token(Token::Name("_"), 1));
683    }
684
685    #[test]
686    fn lex_strings() {
687        let mut lex = Lexer::new(
688            r#"""  "0" "x3""function" "123 abc" "\" "start
689                    and end on
690                    different lines" "#,
691        );
692
693        assert_eq!(lex.next(), token(Token::String(""), 1));
694        assert_eq!(lex.next(), token(Token::String("0"), 1));
695        assert_eq!(lex.next(), token(Token::String("x3"), 1));
696        assert_eq!(lex.next(), token(Token::String("function"), 1));
697        assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
698        assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
699        assert_eq!(
700            lex.next(),
701            token(
702                Token::String(
703                    r#"start
704                    and end on
705                    different lines"#
706                ),
707                1
708            )
709        );
710    }
711
712    #[test]
713    fn lex_userrefs() {
714        let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
715
716        assert_eq!(lex.next(), token(Token::UserRef(0), 1));
717        assert_eq!(lex.next(), token(Token::UserRef(1), 1));
718        assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
719        assert_eq!(lex.next(), token(Token::UserRef(9), 1));
720        assert_eq!(lex.next(), token(Token::Colon, 1));
721        assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
722        assert_eq!(lex.next(), None);
723    }
724}