-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokenizer.rb
75 lines (60 loc) · 1.53 KB
/
tokenizer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
require 'strscan'
class Tokenizer
def initialize source
@s = StringScanner.new source
@peek = []
@current_line = 1
@current_column = 1
end
def peek n=0
@peek << _next_token until @peek.size > n
return @peek[n]
end
def next_token
return @peek.shift if @peek.size > 0
_next_token
end
private
def _next_token
return nil if @s.eos?
case
when t = scan(/#[^\n]*\s+/, :COMMENT) then next_token
when t = scan(/\s+/, :WHITESPACE) then next_token
when t = scan(/\(/, :LPAREN) then t
when t = scan(/\)/, :RPAREN) then t
when t = scan(/\u03BB/, :LAMBDA) then t
when t = scan(/[0-9a-zA-Z_]+/, :IDENT) then t
when t = scan(/:=/, :ASSIGN) then t
when t = scan(/\./, :PERIOD) then t
else
raise "unexpected token at [#{@current_line}, #{@current_column}]"
end
end
def scan regexp, token_name
match = @s.scan(regexp)
return nil unless match
token = [token_name, match, @current_line, @current_column]
if match
lines, chars = char_offsets(match)
if lines > 0
@current_line += lines
@current_column = chars + 1
else
@current_column += chars
end
end
token
end
def char_offsets str
return [0, 0] unless str
len = str.length
col_offset = len
line_offset = 0
idx = -1
while idx = str.index("\n", idx+1)
line_offset += 1
col_offset = len - idx -1
end
[line_offset, col_offset]
end
end