1
/*!re2c
2
re2c:encoding:utf8 = 1;
3
re2c:encoding-policy = substitute;
4
5
re2c:define:YYCTYPE = u8;
6
// XXX: "inefficient"! https://re2c.org/manual/manual_rust.html#custom-checks
7
re2c:define:YYPEEK = "if cursor < len { *bx.get_unchecked(cursor) } else { 0 }";
8
re2c:define:YYSKIP = "cursor += 1;";
9
re2c:define:YYBACKUP = "marker = cursor;";
10
re2c:define:YYRESTORE = "cursor = marker;";
11
re2c:define:YYBACKUPCTX = "ctxmarker = cursor;";
12
re2c:define:YYRESTORECTX = "cursor = ctxmarker;";
13
re2c:yyfill:enable = 0;
14
re2c:indent:string = ' ';
15
re2c:indent:top = 1;
16
17
re2c:tags = 1;
18
re2c:define:YYSTAGP = "@@{tag} = cursor;";
19
20
re2c:define:YYGETCONDITION = "cond";
21
re2c:define:YYSETCONDITION = "cond = @@;";
22
*/
23
use std::collections::VecDeque;
24
25
use crate::Token;
26
27
/*!conditions:re2c*/
28
29
pub fn lex(input: &str) -> (VecDeque<Token>, Option<usize>) {
30
let bx = input.as_bytes();
31
let len = bx.len();
32
33
let mut cursor = 0;
34
let mut marker = 0;
35
let mut cond = YYC_INIT;
36
37
let mut tx = VecDeque::new();
38
39
/*!svars:re2c format = '#[allow(unused_mut)]\nlet mut @@;\n'; */
40
/*!stags:re2c format = 'let mut @@ = std::usize::MAX;'; */
41
42
'lex: loop {/*!re2c
43
<INIT> '[' { tx.push_back(Token::LeftBracket); continue 'lex; }
44
<INIT> ']' { tx.push_back(Token::RightBracket); continue 'lex; }
45
<INIT> '(' { tx.push_back(Token::LeftParenthesis); continue 'lex; }
46
<INIT> ')' { tx.push_back(Token::RightParenthesis); continue 'lex; }
47
<INIT> ',' { tx.push_back(Token::Comma); continue 'lex; }
48
49
<INIT> @t1 [A-Za-z] [A-Za-z]* {
50
tx.push_back(Token::Symbol(unsafe { std::str::from_utf8_unchecked(&bx[t1..cursor]) }.to_string()));
51
continue 'lex;
52
}
53
54
<INIT> @t1 [-]? [0-9]+ ( [.] [0-9]* ( [eE] [-]? [0-9]+ )? )? {
55
tx.push_back(Token::Number(unsafe { std::str::from_utf8_unchecked(&bx[t1..cursor]) }.to_string()));
56
continue 'lex;
57
}
58
<INIT> @t1 [-]? [.] [0-9]* ( [eE] [-]? [0-9]+ )? {
59
tx.push_back(Token::Number(unsafe { std::str::from_utf8_unchecked(&bx[t1..cursor]) }.to_string()));
60
continue 'lex;
61
}
62
63
<INIT> '"' => STR { tx.push_back(Token::StringOpen); continue 'lex; }
64
65
66
<INIT> [\x00] {
67
return (tx, None);
68
}
69
70
<INIT> [ \t\r\n] { continue 'lex; }
71
72
<STR> '"' => INIT { tx.push_back(Token::StringClose); continue 'lex; }
73
<STR> @t1 [^\x00"]+ {
74
tx.push_back(Token::StringLiteralContent(unsafe { std::str::from_utf8_unchecked(&bx[t1..cursor]) }.to_string()));
75
continue 'lex;
76
}
77
<STR> [\\] @t1 [^\x00] {
78
tx.push_back(Token::StringLiteralContent(unsafe { std::str::from_utf8_unchecked(&bx[t1..cursor]) }.to_string()));
79
continue 'lex;
80
}
81
82
<*> * { return (tx, Some(cursor - 1)); }
83
*/}
84
}
85