-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Expand file tree
/
Copy pathreconstruct.py
More file actions
129 lines (100 loc) · 4.13 KB
/
reconstruct.py
File metadata and controls
129 lines (100 loc) · 4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from collections import defaultdict
from .tree import Tree
from .visitors import Transformer_InPlace
from .common import ParserConf, PatternStr
from .lexer import Token
from .parsers import earley, resolve_ambig
from .grammar import Rule, Terminal, NonTerminal
def is_discarded_terminal(t):
return t.is_term and t.filter_out
def is_iter_empty(i):
try:
_ = next(i)
return False
except StopIteration:
return True
class WriteTokensTransformer(Transformer_InPlace):
def __init__(self, tokens):
self.tokens = tokens
def __default__(self, data, children, meta):
# if not isinstance(t, MatchTree):
# return t
if not getattr(meta, 'match_tree', False):
return Tree(data, children)
iter_args = iter(children)
to_write = []
for sym in meta.orig_expansion:
if is_discarded_terminal(sym):
t = self.tokens[sym.name]
assert isinstance(t.pattern, PatternStr)
to_write.append(t.pattern.value)
else:
x = next(iter_args)
if isinstance(x, list):
to_write += x
else:
if isinstance(x, Token):
assert Terminal(x.type) == sym, x
else:
assert NonTerminal(x.data) == sym, (sym, x)
to_write.append(x)
assert is_iter_empty(iter_args)
return to_write
class MatchTree(Tree):
pass
class MakeMatchTree:
def __init__(self, name, expansion):
self.name = name
self.expansion = expansion
def __call__(self, args):
t = MatchTree(self.name, args)
t.meta.match_tree = True
t.meta.orig_expansion = self.expansion
return t
class Reconstructor:
def __init__(self, parser):
# XXX TODO calling compile twice returns different results!
tokens, rules, _grammar_extra = parser.grammar.compile()
self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
self.rules = list(self._build_recons_rules(rules))
def _build_recons_rules(self, rules):
expand1s = {r.origin for r in rules if r.options and r.options.expand1}
aliases = defaultdict(list)
for r in rules:
if r.alias:
aliases[r.origin].append( r.alias )
rule_names = {r.origin for r in rules}
nonterminals = {sym for sym in rule_names
if sym.name.startswith('_') or sym in expand1s or sym in aliases }
for r in rules:
recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
for sym in r.expansion if not is_discarded_terminal(sym)]
# Skip self-recursive constructs
if recons_exp == [r.origin]:
continue
sym = NonTerminal(r.alias) if r.alias else r.origin
yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion))
for origin, rule_aliases in aliases.items():
for alias in rule_aliases:
yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)]))
yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin]))
def _match(self, term, token):
if isinstance(token, Tree):
return Terminal(token.data) == term
elif isinstance(token, Token):
return term == Terminal(token.type)
assert False
def _reconstruct(self, tree):
# TODO: ambiguity?
parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
unreduced_tree = parser.parse(tree.children) # find a full derivation
assert unreduced_tree.data == tree.data
res = self.write_tokens.transform(unreduced_tree)
for item in res:
if isinstance(item, Tree):
for x in self._reconstruct(item):
yield x
else:
yield item
def reconstruct(self, tree):
return ''.join(self._reconstruct(tree))