cranelift_reader/
lexer.rs

1//! Lexical analysis for .clif files.
2
3use crate::error::Location;
4use cranelift_codegen::ir::types;
5use cranelift_codegen::ir::{Block, Value};
6use std::str::CharIndices;
7use std::u16;
8
9/// A Token returned from the `Lexer`.
10///
11/// Some variants may contains references to the original source text, so the `Token` has the same
12/// lifetime as the source.
13#[derive(Debug, PartialEq, Eq, Clone, Copy)]
14pub enum Token<'a> {
15    Comment(&'a str),
16    LPar,                  // '('
17    RPar,                  // ')'
18    LBrace,                // '{'
19    RBrace,                // '}'
20    LBracket,              // '['
21    RBracket,              // ']'
22    Minus,                 // '-'
23    Plus,                  // '+'
24    Multiply,              // '*'
25    Comma,                 // ','
26    Dot,                   // '.'
27    Colon,                 // ':'
28    Equal,                 // '='
29    Bang,                  // '!'
30    At,                    // '@'
31    Arrow,                 // '->'
32    Float(&'a str),        // Floating point immediate
33    Integer(&'a str),      // Integer immediate
34    Type(types::Type),     // i32, f32, i32x4, ...
35    DynamicType(u32),      // dt5
36    Value(Value),          // v12, v7
37    Block(Block),          // block3
38    Cold,                  // cold (flag on block)
39    StackSlot(u32),        // ss3
40    DynamicStackSlot(u32), // dss4
41    GlobalValue(u32),      // gv3
42    MemoryType(u32),       // mt0
43    Constant(u32),         // const2
44    FuncRef(u32),          // fn2
45    SigRef(u32),           // sig2
46    UserRef(u32),          // u345
47    UserNameRef(u32),      // userextname345
48    Name(&'a str),         // %9arbitrary_alphanum, %x3, %0, %function ...
49    String(&'a str),       // "arbitrary quoted string with no escape" ...
50    HexSequence(&'a str),  // #89AF
51    Identifier(&'a str),   // Unrecognized identifier (opcode, enumerator, ...)
52    SourceLoc(&'a str),    // @00c7
53}
54
55/// A `Token` with an associated location.
56#[derive(Debug, PartialEq, Eq)]
57pub struct LocatedToken<'a> {
58    pub token: Token<'a>,
59    pub location: Location,
60}
61
62/// Wrap up a `Token` with the given location.
63fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
64    Ok(LocatedToken {
65        token,
66        location: loc,
67    })
68}
69
70/// An error from the lexical analysis.
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum LexError {
73    InvalidChar,
74}
75
76/// A `LexError` with an associated Location.
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub struct LocatedError {
79    pub error: LexError,
80    pub location: Location,
81}
82
83/// Wrap up a `LexError` with the given location.
84fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
85    Err(LocatedError {
86        error,
87        location: loc,
88    })
89}
90
91/// Get the number of decimal digits at the end of `s`.
92fn trailing_digits(s: &str) -> usize {
93    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
94    s.as_bytes()
95        .iter()
96        .rev()
97        .take_while(|&&b| b'0' <= b && b <= b'9')
98        .count()
99}
100
101/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
102/// letters and numeric tail.
103pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
104    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
105    if tail.len() > 1 && tail.starts_with('0') {
106        None
107    } else {
108        tail.parse().ok().map(|n| (head, n))
109    }
110}
111
112/// Lexical analysis.
113///
114/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
115///
116/// Also keep track of a line number for error reporting.
117///
118pub struct Lexer<'a> {
119    // Complete source being processed.
120    source: &'a str,
121
122    // Iterator into `source`.
123    chars: CharIndices<'a>,
124
125    // Next character to be processed, or `None` at the end.
126    lookahead: Option<char>,
127
128    // Index into `source` of lookahead character.
129    pos: usize,
130
131    // Current line number.
132    line_number: usize,
133}
134
135impl<'a> Lexer<'a> {
136    pub fn new(s: &'a str) -> Self {
137        let mut lex = Self {
138            source: s,
139            chars: s.char_indices(),
140            lookahead: None,
141            pos: 0,
142            line_number: 1,
143        };
144        // Advance to the first char.
145        lex.next_ch();
146        lex
147    }
148
149    // Advance to the next character.
150    // Return the next lookahead character, or None when the end is encountered.
151    // Always update cur_ch to reflect
152    fn next_ch(&mut self) -> Option<char> {
153        if self.lookahead == Some('\n') {
154            self.line_number += 1;
155        }
156        match self.chars.next() {
157            Some((idx, ch)) => {
158                self.pos = idx;
159                self.lookahead = Some(ch);
160            }
161            None => {
162                self.pos = self.source.len();
163                self.lookahead = None;
164            }
165        }
166        self.lookahead
167    }
168
169    // Get the location corresponding to `lookahead`.
170    fn loc(&self) -> Location {
171        Location {
172            line_number: self.line_number,
173        }
174    }
175
176    // Starting from `lookahead`, are we looking at `prefix`?
177    fn looking_at(&self, prefix: &str) -> bool {
178        self.source[self.pos..].starts_with(prefix)
179    }
180
181    // Starting from `lookahead`, are we looking at a number?
182    fn looking_at_numeric(&self) -> bool {
183        if let Some(c) = self.lookahead {
184            match c {
185                '0'..='9' => return true,
186                '-' => return true,
187                '+' => return true,
188                '.' => return true,
189                _ => {}
190            }
191            if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
192                return true;
193            }
194        }
195        false
196    }
197
198    // Scan a single-char token.
199    fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
200        assert_ne!(self.lookahead, None);
201        let loc = self.loc();
202        self.next_ch();
203        token(tok, loc)
204    }
205
206    // Scan a multi-char token.
207    fn scan_chars(
208        &mut self,
209        count: usize,
210        tok: Token<'a>,
211    ) -> Result<LocatedToken<'a>, LocatedError> {
212        let loc = self.loc();
213        for _ in 0..count {
214            assert_ne!(self.lookahead, None);
215            self.next_ch();
216        }
217        token(tok, loc)
218    }
219
220    /// Get the rest of the current line.
221    /// The next token returned by `next()` will be from the following lines.
222    pub fn rest_of_line(&mut self) -> &'a str {
223        let begin = self.pos;
224        loop {
225            match self.next_ch() {
226                None | Some('\n') => return &self.source[begin..self.pos],
227                _ => {}
228            }
229        }
230    }
231
232    // Scan a comment extending to the end of the current line.
233    fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
234        let loc = self.loc();
235        let text = self.rest_of_line();
236        token(Token::Comment(text), loc)
237    }
238
239    // Scan a number token which can represent either an integer or floating point number.
240    //
241    // Accept the following forms:
242    //
243    // - `10`: Integer
244    // - `-10`: Integer
245    // - `0xff_00`: Integer
246    // - `0.0`: Float
247    // - `0x1.f`: Float
248    // - `-0x2.4`: Float
249    // - `0x0.4p-34`: Float
250    //
251    // This function does not filter out all invalid numbers. It depends in the context-sensitive
252    // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and
253    // an `Ieee64` constant are different.
254    fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
255        let begin = self.pos;
256        let loc = self.loc();
257        let mut is_float = false;
258
259        // Skip a leading sign.
260        match self.lookahead {
261            Some('-') => {
262                self.next_ch();
263                if !self.looking_at_numeric() {
264                    // If the next characters won't parse as a number, we return Token::Minus
265                    return token(Token::Minus, loc);
266                }
267            }
268            Some('+') => {
269                self.next_ch();
270                if !self.looking_at_numeric() {
271                    // If the next characters won't parse as a number, we return Token::Plus
272                    return token(Token::Plus, loc);
273                }
274            }
275            _ => {}
276        }
277
278        // Check for NaNs with payloads.
279        if self.looking_at("NaN:") || self.looking_at("sNaN:") {
280            // Skip the `NaN:` prefix, the loop below won't accept it.
281            // We expect a hexadecimal number to follow the colon.
282            while self.next_ch() != Some(':') {}
283            is_float = true;
284        } else if self.looking_at("NaN") || self.looking_at("Inf") {
285            // This is Inf or a default quiet NaN.
286            is_float = true;
287        }
288
289        // Look for the end of this number. Detect the radix point if there is one.
290        loop {
291            match self.next_ch() {
292                Some('-') | Some('_') => {}
293                Some('.') => is_float = true,
294                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
295                _ => break,
296            }
297        }
298        let text = &self.source[begin..self.pos];
299        if is_float {
300            token(Token::Float(text), loc)
301        } else {
302            token(Token::Integer(text), loc)
303        }
304    }
305
306    // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an
307    // alphabetic char, followed by zero or more alphanumeric or '_' characters.
308    fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
309        let begin = self.pos;
310        let loc = self.loc();
311
312        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
313        loop {
314            match self.next_ch() {
315                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
316                _ => break,
317            }
318        }
319        let text = &self.source[begin..self.pos];
320
321        // Look for numbered well-known entities like block15, v45, ...
322        token(
323            split_entity_name(text)
324                .and_then(|(prefix, number)| {
325                    Self::numbered_entity(prefix, number)
326                        .or_else(|| Self::value_type(text, prefix, number))
327                })
328                .unwrap_or_else(|| match text {
329                    "cold" => Token::Cold,
330                    _ => Token::Identifier(text),
331                }),
332            loc,
333        )
334    }
335
336    // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
337    // decoded token.
338    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
339        match prefix {
340            "v" => Value::with_number(number).map(Token::Value),
341            "block" => Block::with_number(number).map(Token::Block),
342            "ss" => Some(Token::StackSlot(number)),
343            "dss" => Some(Token::DynamicStackSlot(number)),
344            "dt" => Some(Token::DynamicType(number)),
345            "gv" => Some(Token::GlobalValue(number)),
346            "mt" => Some(Token::MemoryType(number)),
347            "const" => Some(Token::Constant(number)),
348            "fn" => Some(Token::FuncRef(number)),
349            "sig" => Some(Token::SigRef(number)),
350            "u" => Some(Token::UserRef(number)),
351            "userextname" => Some(Token::UserNameRef(number)),
352            _ => None,
353        }
354    }
355
356    // Recognize a scalar or vector type.
357    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
358        let is_vector = prefix.ends_with('x');
359        let scalar = if is_vector {
360            &prefix[0..prefix.len() - 1]
361        } else {
362            text
363        };
364        let base_type = match scalar {
365            "i8" => types::I8,
366            "i16" => types::I16,
367            "i32" => types::I32,
368            "i64" => types::I64,
369            "i128" => types::I128,
370            "f16" => types::F16,
371            "f32" => types::F32,
372            "f64" => types::F64,
373            "f128" => types::F128,
374            _ => return None,
375        };
376        if is_vector {
377            if number <= u32::from(u16::MAX) {
378                base_type.by(number).map(Token::Type)
379            } else {
380                None
381            }
382        } else {
383            Some(Token::Type(base_type))
384        }
385    }
386
387    fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
388        let loc = self.loc();
389        let begin = self.pos + 1;
390
391        assert_eq!(self.lookahead, Some('%'));
392
393        loop {
394            match self.next_ch() {
395                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
396                _ => break,
397            }
398        }
399
400        let end = self.pos;
401        token(Token::Name(&self.source[begin..end]), loc)
402    }
403
404    /// Scan for a multi-line quoted string with no escape character.
405    fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
406        let loc = self.loc();
407        let begin = self.pos + 1;
408
409        assert_eq!(self.lookahead, Some('"'));
410
411        while let Some(c) = self.next_ch() {
412            if c == '"' {
413                break;
414            }
415        }
416
417        let end = self.pos;
418        if self.lookahead != Some('"') {
419            return error(LexError::InvalidChar, self.loc());
420        }
421        self.next_ch();
422        token(Token::String(&self.source[begin..end]), loc)
423    }
424
425    fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
426        let loc = self.loc();
427        let begin = self.pos + 1;
428
429        assert_eq!(self.lookahead, Some('#'));
430
431        while let Some(c) = self.next_ch() {
432            if !char::is_digit(c, 16) {
433                break;
434            }
435        }
436
437        let end = self.pos;
438        token(Token::HexSequence(&self.source[begin..end]), loc)
439    }
440
441    /// Given that we've consumed an `@` character, are we looking at a source
442    /// location?
443    fn looking_at_srcloc(&self) -> bool {
444        match self.lookahead {
445            Some(c) => char::is_digit(c, 16),
446            _ => false,
447        }
448    }
449
450    fn scan_srcloc(&mut self, pos: usize, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
451        let begin = pos + 1;
452        while let Some(c) = self.next_ch() {
453            if !char::is_digit(c, 16) {
454                break;
455            }
456        }
457
458        let end = self.pos;
459        token(Token::SourceLoc(&self.source[begin..end]), loc)
460    }
461
462    /// Get the next token or a lexical error.
463    ///
464    /// Return None when the end of the source is encountered.
465    pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
466        loop {
467            let loc = self.loc();
468            return match self.lookahead {
469                None => None,
470                Some(';') => Some(self.scan_comment()),
471                Some('(') => Some(self.scan_char(Token::LPar)),
472                Some(')') => Some(self.scan_char(Token::RPar)),
473                Some('{') => Some(self.scan_char(Token::LBrace)),
474                Some('}') => Some(self.scan_char(Token::RBrace)),
475                Some('[') => Some(self.scan_char(Token::LBracket)),
476                Some(']') => Some(self.scan_char(Token::RBracket)),
477                Some(',') => Some(self.scan_char(Token::Comma)),
478                Some('.') => Some(self.scan_char(Token::Dot)),
479                Some(':') => Some(self.scan_char(Token::Colon)),
480                Some('=') => Some(self.scan_char(Token::Equal)),
481                Some('!') => Some(self.scan_char(Token::Bang)),
482                Some('+') => Some(self.scan_number()),
483                Some('*') => Some(self.scan_char(Token::Multiply)),
484                Some('-') => {
485                    if self.looking_at("->") {
486                        Some(self.scan_chars(2, Token::Arrow))
487                    } else {
488                        Some(self.scan_number())
489                    }
490                }
491                Some('0'..='9') => Some(self.scan_number()),
492                Some('a'..='z') | Some('A'..='Z') => {
493                    if self.looking_at("NaN") || self.looking_at("Inf") {
494                        Some(self.scan_number())
495                    } else {
496                        Some(self.scan_word())
497                    }
498                }
499                Some('%') => Some(self.scan_name()),
500                Some('"') => Some(self.scan_string()),
501                Some('#') => Some(self.scan_hex_sequence()),
502                Some('@') => {
503                    let pos = self.pos;
504                    let loc = self.loc();
505                    self.next_ch();
506                    if self.looking_at_srcloc() {
507                        Some(self.scan_srcloc(pos, loc))
508                    } else {
509                        Some(token(Token::At, loc))
510                    }
511                }
512                // all ascii whitespace
513                Some(' ') | Some('\x09'..='\x0d') => {
514                    self.next_ch();
515                    continue;
516                }
517                _ => {
518                    // Skip invalid char, return error.
519                    self.next_ch();
520                    Some(error(LexError::InvalidChar, loc))
521                }
522            };
523        }
524    }
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    #[test]
532    fn digits() {
533        assert_eq!(trailing_digits(""), 0);
534        assert_eq!(trailing_digits("x"), 0);
535        assert_eq!(trailing_digits("0x"), 0);
536        assert_eq!(trailing_digits("x1"), 1);
537        assert_eq!(trailing_digits("1x1"), 1);
538        assert_eq!(trailing_digits("1x01"), 2);
539    }
540
541    #[test]
542    fn entity_name() {
543        assert_eq!(split_entity_name(""), None);
544        assert_eq!(split_entity_name("x"), None);
545        assert_eq!(split_entity_name("x+"), None);
546        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
547        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
548        assert_eq!(split_entity_name("1"), Some(("", 1)));
549        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
550        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
551        // Reject this non-canonical form.
552        assert_eq!(split_entity_name("inst01"), None);
553    }
554
555    fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
556        Some(super::token(token, Location { line_number: line }))
557    }
558
559    fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
560        Some(super::error(error, Location { line_number: line }))
561    }
562
563    #[test]
564    fn make_lexer() {
565        let mut l1 = Lexer::new("");
566        let mut l2 = Lexer::new(" ");
567        let mut l3 = Lexer::new("\n ");
568
569        assert_eq!(l1.next(), None);
570        assert_eq!(l2.next(), None);
571        assert_eq!(l3.next(), None);
572    }
573
574    #[test]
575    fn lex_comment() {
576        let mut lex = Lexer::new("; hello");
577        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
578        assert_eq!(lex.next(), None);
579
580        lex = Lexer::new("\n  ;hello\n;foo");
581        assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
582        assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
583        assert_eq!(lex.next(), None);
584
585        // Scan a comment after an invalid char.
586        let mut lex = Lexer::new("$; hello");
587        assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
588        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
589        assert_eq!(lex.next(), None);
590    }
591
592    #[test]
593    fn lex_chars() {
594        let mut lex = Lexer::new("(); hello\n = :{, }.");
595        assert_eq!(lex.next(), token(Token::LPar, 1));
596        assert_eq!(lex.next(), token(Token::RPar, 1));
597        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
598        assert_eq!(lex.next(), token(Token::Equal, 2));
599        assert_eq!(lex.next(), token(Token::Colon, 2));
600        assert_eq!(lex.next(), token(Token::LBrace, 2));
601        assert_eq!(lex.next(), token(Token::Comma, 2));
602        assert_eq!(lex.next(), token(Token::RBrace, 2));
603        assert_eq!(lex.next(), token(Token::Dot, 2));
604        assert_eq!(lex.next(), None);
605    }
606
607    #[test]
608    fn lex_numbers() {
609        let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
610        assert_eq!(lex.next(), token(Token::Integer("0"), 1));
611        assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
612        assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
613        assert_eq!(lex.next(), token(Token::Comma, 1));
614        assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
615        assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
616        assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
617        assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
618        assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
619        assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
620        assert_eq!(lex.next(), None);
621    }
622
623    #[test]
624    fn lex_identifiers() {
625        let mut lex = Lexer::new(
626            "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
627             function0 function i8 i32x4 f32x5 f16 f128",
628        );
629        assert_eq!(
630            lex.next(),
631            token(Token::Value(Value::with_number(0).unwrap()), 1)
632        );
633        assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
634        assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
635        assert_eq!(
636            lex.next(),
637            token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
638        );
639        assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
640        assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
641        assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
642        assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
643        assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
644        assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
645        assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
646        assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
647        assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
648        assert_eq!(lex.next(), token(Token::Type(types::F16), 1));
649        assert_eq!(lex.next(), token(Token::Type(types::F128), 1));
650        assert_eq!(lex.next(), None);
651    }
652
653    #[test]
654    fn lex_hex_sequences() {
655        let mut lex = Lexer::new("#0 #DEADbeef123 #789");
656
657        assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
658        assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
659        assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
660    }
661
662    #[test]
663    fn lex_names() {
664        let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
665
666        assert_eq!(lex.next(), token(Token::Name("0"), 1));
667        assert_eq!(lex.next(), token(Token::Name("x3"), 1));
668        assert_eq!(lex.next(), token(Token::Name("function"), 1));
669        assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
670        assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
671        assert_eq!(lex.next(), token(Token::Name("v3"), 1));
672        assert_eq!(lex.next(), token(Token::Name("block11"), 1));
673        assert_eq!(lex.next(), token(Token::Name("const42"), 1));
674        assert_eq!(lex.next(), token(Token::Name("_"), 1));
675    }
676
677    #[test]
678    fn lex_strings() {
679        let mut lex = Lexer::new(
680            r#"""  "0" "x3""function" "123 abc" "\" "start
681                    and end on
682                    different lines" "#,
683        );
684
685        assert_eq!(lex.next(), token(Token::String(""), 1));
686        assert_eq!(lex.next(), token(Token::String("0"), 1));
687        assert_eq!(lex.next(), token(Token::String("x3"), 1));
688        assert_eq!(lex.next(), token(Token::String("function"), 1));
689        assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
690        assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
691        assert_eq!(
692            lex.next(),
693            token(
694                Token::String(
695                    r#"start
696                    and end on
697                    different lines"#
698                ),
699                1
700            )
701        );
702    }
703
704    #[test]
705    fn lex_userrefs() {
706        let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
707
708        assert_eq!(lex.next(), token(Token::UserRef(0), 1));
709        assert_eq!(lex.next(), token(Token::UserRef(1), 1));
710        assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
711        assert_eq!(lex.next(), token(Token::UserRef(9), 1));
712        assert_eq!(lex.next(), token(Token::Colon, 1));
713        assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
714        assert_eq!(lex.next(), None);
715    }
716}