Castle: The best Real-Time/Embedded/HighTech language EVER. Attempt 2
Revision | 8a2d8be44debb6113b2229ef5cdccb567c5a2a74 (tree) |
---|---|
Zeit | 2022-02-12 02:26:37 |
Autor | Albert Mietus < albert AT mietus DOT nl > |
Commiter | Albert Mietus < albert AT mietus DOT nl > |
We can now parse alternativies (OrderedChoice)
@@ -13,13 +13,14 @@ | ||
13 | 13 | """With this MixIn PEG-classes get the ``.value`` property""" |
14 | 14 | |
15 | 15 | def __init__(self, *, value=None, **kwargs): |
16 | + logger.debug(f'{self._typeName(self)}.MixIn_value_attribute:: value:=' + | |
17 | + ('[[' +', '.join(f'{v}:{type(v).__name__}' for v in value) + ']]') if isinstance(value, list) else f's>>{value}<<') | |
16 | 18 | super().__init__(**kwargs) |
17 | - logger.debug(f'{self._typeName(self)}:: value:={value}:{self._typeName(value)}') | |
18 | 19 | self._value=value |
19 | 20 | |
20 | 21 | @property |
21 | 22 | def value(self): |
22 | - logger.debug(f'{self._typeName(self)}:: @property={self._value}') | |
23 | + logger.debug(f'{self._typeName(self)}:: @value={self._value}') | |
23 | 24 | return self._value |
24 | 25 | |
25 | 26 |
@@ -27,17 +28,21 @@ | ||
27 | 28 | """With this MixIn PEG-classes get the ``.expr`` property""" |
28 | 29 | |
29 | 30 | def __init__(self, *, expr=None, **kwargs): |
31 | + logger.debug(f'{self._typeName(self)}.MixIn_expr_attribute:: expr:={self._valType(expr)}') | |
30 | 32 | super().__init__(**kwargs) |
31 | 33 | self._expr = expr |
32 | 34 | |
33 | 35 | @property |
34 | 36 | def expr(self): |
37 | + logger.debug(f'{self._typeName(self)}:: @expr={self._expr}') | |
35 | 38 | return self._expr |
36 | 39 | |
37 | 40 | |
38 | 41 | class MixIn_children_tuple: |
39 | 42 | """With this MixIn PEG-class get the ``.children`` property; and sequence-alike methods""" |
40 | 43 | def __init__(self, *, children, **kwargs): |
44 | + logger.debug(f'{self._typeName(self)}.MixIn_children_tuple:: children[{len(children)}]:=' + | |
45 | + ('[[' +', '.join(f'{c}:{type(c).__name__}' for c in children) + ']]') if isinstance(children, list) else f's>>{children}<<') | |
41 | 46 | super().__init__(**kwargs) |
42 | 47 | self._childeren = tuple(children) |
43 | 48 |
@@ -125,8 +130,14 @@ | ||
125 | 130 | def __len__(self): return len(self._value) |
126 | 131 | def __getitem__(self, n): return self._value[n] |
127 | 132 | |
133 | + def __str__(self): # mostly for debugging | |
134 | + return "Seq{{" + " ; ".join(f"{c}" for c in self._value) + "}}" # XXX ToDo: _value -> children | |
128 | 135 | |
129 | -class OrderedChoice(Expression):pass # It a an set of alternatives | |
136 | +class OrderedChoice(MixIn_children_tuple, Expression): # A | B | C | ... the order is relevant | |
137 | + """OC: A _tuple_ of alternative expressions""" | |
138 | + | |
139 | + def __str__(self): # mostly for debugging | |
140 | + return "OC{{" + " | ".join(f"{c}" for c in self._childeren) + "}}" | |
130 | 141 | |
131 | 142 | class Optional(Quantity):pass |
132 | 143 | class ZeroOrMore(Quantity):pass |
@@ -4,14 +4,17 @@ | ||
4 | 4 | |
5 | 5 | def peg_grammar(): return rules, EOF |
6 | 6 | def rules(): return OneOrMore(rule) |
7 | -def rule(): return rule_name, '<-', expressions, ";" | |
7 | +def rule(): return rule_name, '<-', expression, ";" | |
8 | 8 | |
9 | -def expressions(): return ( OneOrMore(single_expr), Optional( '|' , expressions ) ) | |
10 | -def single_expr(): return ( [ rule_crossref, term, group, predicate ], op_quantity) | |
9 | +def expression(): return expressions, op_alternative | |
10 | +def expressions(): return OneOrMore(single_expr) | |
11 | +def single_expr(): return [ rule_crossref, term, group, predicate ], op_quantity | |
11 | 12 | |
13 | +def op_alternative(): return Optional( '|' , expression ) | |
12 | 14 | def op_quantity(): return Optional([ '?' , '*' , '+' , '#' ]) |
15 | + | |
13 | 16 | def term(): return [ str_term, regex_term ] |
14 | -def group(): return '(', expressions, ')' | |
17 | +def group(): return '(', expression, ')' | |
15 | 18 | def predicate(): return ['&','!'], single_expr |
16 | 19 | |
17 | 20 | def str_term(): return [ (S3, str_no_s3, S3), |
@@ -17,9 +17,11 @@ | ||
17 | 17 | #NO_VISITOR_NEEDED: visit_term |
18 | 18 | #NO_VISITOR_NEEDED: visit_re_no_slash |
19 | 19 | #NO_VISITOR_NEEDED: visit_group |
20 | -#NO_VISITOR_NEEDED: visit_op_quantity (before: vist_expr_quantity) | |
20 | +#NO_VISITOR_NEEDED: visit_op_quantity -- handle in visit_single_expr | |
21 | 21 | |
22 | 22 | class PegVisitor(arpeggio.PTNodeVisitor): |
23 | + def _logstr_node_children(self, node, children): | |
24 | + return f'>>{node}<< children[{len(children)}] >>' + ", ".join(f'{c}:{type(c).__name__}' for c in children) + '<<' | |
23 | 25 | |
24 | 26 | def visit_str_term(self, node, children): |
25 | 27 | return peg.StrTerm(value=node[1], parse_tree=node) |
@@ -33,7 +35,7 @@ | ||
33 | 35 | def visit_rule_crossref(self, node, children): |
34 | 36 | return peg.ID(name=str(node), parse_tree=node) |
35 | 37 | |
36 | - def visit_rule(self, node, children): # Name '<-' expressions ';' | |
38 | + def visit_rule(self, node, children): # Name '<-' expression ';' | |
37 | 39 | return peg.Rule(name=children[0],expr=children[1], parse_tree=node) |
38 | 40 | |
39 | 41 |
@@ -62,11 +64,32 @@ | ||
62 | 64 | raise NotImplementedError("visit_single_expr, len>2") # XXX -- Is this possible? |
63 | 65 | |
64 | 66 | |
65 | - def visit_expressions(self, node, children): # OneOrMore(single_expr), Optional( '|' , expressions ) | |
66 | - logger.debug(f'visit_expressions:: >>{node}<< #children={len(children)} children={children}:{type(children)}') | |
67 | + def visit_expression(self, node, children): # ( expressions, op_alternatives ) | |
68 | + logger.debug('visit_expression::' + self._logstr_node_children(node, children)) | |
69 | + if len(children) == 1: #Only expressions | |
70 | + return children[0] | |
71 | + elif len(children) == 2: # So, having with alternatives | |
72 | + # The 1st kid is a peg.Sequence ``expr`` | |
73 | + # The 2nd kid can be a | |
74 | + # - peg.Sequence `` | expr ``, OR | |
75 | + # - peg.OrderedChoice `` | expr | expr`` | |
76 | + # In all cased a (single) OrderedChoice with a list of alternatives should be returned. | |
77 | +# return peg.OrderedChoice(children = ((children[0].value,) + | |
78 | +# children[1]._childeren if isinstance(children[1], peg.OrderedChoice) else children[1].value), #XXX HACK | |
79 | +# parse_tree=node) | |
80 | + if isinstance(children[1], peg.OrderedChoice): | |
81 | + alternatives = [children[0]] + [alt for alt in children[1]] | |
82 | + else: | |
83 | + alternatives = children | |
84 | + return peg.OrderedChoice(children = alternatives, parse_tree=node) | |
85 | + else: | |
86 | + raise NotImplementedError("visit_expression, len>2") | |
87 | + | |
88 | + | |
89 | + def visit_expressions(self, node, children): # OneOrMore(single_expr) | |
90 | + logger.debug(f'visit_expressions::{self._logstr_node_children(node, children)}') | |
67 | 91 | return peg.Sequence(value=children, parse_tree=node) |
68 | 92 | |
69 | - raise NotImplementedError("visit_expressions, len>1 :: peg.OrderedChoice") | |
70 | 93 | |
71 | 94 | def visit_predicate(self, node, children): |
72 | 95 | token_2_predicate = {'&': peg.AndPredicate, |
@@ -0,0 +1,65 @@ | ||
1 | +import pytest | |
2 | +import logging;logger = logging.getLogger(__name__) | |
3 | + | |
4 | +from castle.readers.parser import grammar | |
5 | + | |
6 | +import arpeggio | |
7 | + | |
8 | +R, S, X = grammar.regex_term.__name__, grammar.str_term.__name__, grammar.rule_crossref.__name__ # shortcut in grammar | |
9 | +P = grammar.predicate.__name__ | |
10 | +G = grammar.group.__name__ | |
11 | + | |
12 | +def parse_expression(txt, pattern=None): | |
13 | + parser = arpeggio.ParserPython(grammar.expression) | |
14 | + parse_tree = parser.parse(txt) | |
15 | + logger.debug("\nPARSE-TREE\n" + parse_tree.tree_str()+'\n') | |
16 | + | |
17 | + assert parse_tree.position_end == len(txt) , f"Not parsed whole input; Only: >>{txt[parse_tree.position: parse_tree.position_end]}<<; Not: >>{txt[parse_tree.position_end:]}<<." | |
18 | + assert parse_tree.rule_name == "expression" | |
19 | + | |
20 | + if pattern: validate_pattern(parse_tree, pattern=pattern) | |
21 | + | |
22 | + return parse_tree | |
23 | + | |
24 | +def validate_pattern(pt, pattern=None): | |
25 | + expressions = pt[0] | |
26 | + assert len(expressions) == len(pattern), f"Not correct number-of-element" | |
27 | + | |
28 | + for p, s in zip(pattern, expressions): | |
29 | + if p is None: continue | |
30 | + if p == X: | |
31 | + assert s[0].rule_name == p | |
32 | + elif p in (S,R): | |
33 | + assert s[0][0].rule_name == p # S => T => str/regex | |
34 | + elif isinstance(p, tuple): # Group: '(' ... ')' | |
35 | + assert s[0].rule_name == G | |
36 | + validate_pattern(s[0][1:-1][0], pattern=p) # G=>E=> | |
37 | + elif p == P: | |
38 | + assert False, "To Do: Predicate" | |
39 | + else: | |
40 | + assert False, "To Do: More" | |
41 | + | |
42 | + | |
43 | +def test_simple_1(): parse_expression(r"abc", pattern=[X]) | |
44 | +def test_simple_2(): parse_expression(r'A Bc', pattern=[X, X]) | |
45 | + | |
46 | +def test_string_1(): parse_expression(r"'abc'", pattern=[S]) | |
47 | +def test_regexp_1(): parse_expression(r"/re/", pattern=[R]) | |
48 | + | |
49 | +def test_mix(): parse_expression(r'/regex/ "string" crossref crossref', pattern=[R,S, X, X]) | |
50 | + | |
51 | +def test_sub(): parse_expression(r'( A B )', pattern=[(X, X)]) | |
52 | +def test_mix_nosub(): parse_expression(r'/regex/ "string" ( A B ) crossref', pattern=[R,S, None, X]) | |
53 | +def test_mix_sub(): parse_expression(r'/regex/ "string" ( A B ) crossref', pattern=[R,S, (X, X), X]) | |
54 | + | |
55 | +def test_sub_sub(): parse_expression(r'level0 ( level1_1 (level2a level2b ) level1_2) level0', pattern=[X, (X, (X,X), X), X]) | |
56 | + | |
57 | + | |
58 | +def test_bug1(): parse_expression(r"""( rule_crossref | term | group | predicate ) ( '?' | '*' | '+' | '#' )?""") | |
59 | +def test_bug1a(): parse_expression(r"""( rule_crossref | term | group | predicate )""") | |
60 | +def test_bug1a1(): parse_expression(r"""A | B | C | D""") | |
61 | +def test_bug1a2(): parse_expression(r"""(A | B | C | D)""") | |
62 | +def test_bug1b(): parse_expression(r"""( rule_crossref | term | group | predicate ) ( '?' | '*' | '+' | '#' )""") | |
63 | +def test_bug1c(): parse_expression(r"""( '?' | '*' | '+' | '#' )""") | |
64 | + | |
65 | + |
@@ -1,59 +0,0 @@ | ||
1 | -import pytest | |
2 | -import logging;logger = logging.getLogger(__name__) | |
3 | - | |
4 | -from castle.readers.parser import grammar | |
5 | - | |
6 | -import arpeggio | |
7 | - | |
8 | -R, S, X = grammar.regex_term.__name__, grammar.str_term.__name__, grammar.rule_crossref.__name__ # shortcut in grammar | |
9 | -P = grammar.predicate.__name__ | |
10 | -G = grammar.group.__name__ | |
11 | - | |
12 | -def parse_expressions(txt, pattern=None): | |
13 | - parser = arpeggio.ParserPython(grammar.expressions) | |
14 | - parse_tree = parser.parse(txt) | |
15 | - logger.info("\nPARSE-TREE\n" + parse_tree.tree_str()+'\n') | |
16 | - | |
17 | - assert parse_tree.position_end == len(txt) , f"Not parsed whole input; Only: >>{txt[parse_tree.position: parse_tree.position_end]}<<; Not: >>{txt[parse_tree.position_end:]}<<." | |
18 | - assert parse_tree.rule_name == "expressions" | |
19 | - | |
20 | - if pattern: validate_pattern(parse_tree, pattern=pattern) | |
21 | - | |
22 | - return parse_tree | |
23 | - | |
24 | -def validate_pattern(pt, pattern=None): | |
25 | - assert len(pt) == len(pattern), f"Not correct number-of-element" | |
26 | - | |
27 | - for p, s in zip(pattern, pt): # E <- S* (| E)? | |
28 | - if p is None: continue | |
29 | - if p == X: | |
30 | - assert s[0].rule_name == p | |
31 | - elif p in (S,R): | |
32 | - assert s[0][0].rule_name == p # S => T => str/regex | |
33 | - elif isinstance(p, tuple): # Group: '(' ... ')' | |
34 | - assert s[0].rule_name == G | |
35 | - validate_pattern(s[0][1:-1][0], pattern=p) # G=>E=> | |
36 | - elif p == P: | |
37 | - assert False, "To Do: Predicate" | |
38 | - else: | |
39 | - assert False, "To Do: More" | |
40 | - | |
41 | - | |
42 | -def test_simple_1(): parse_expressions(r"abc", pattern=[X]) | |
43 | -def test_simple_2(): parse_expressions(r'A Bc', pattern=[X, X]) | |
44 | - | |
45 | -def test_string_1(): parse_expressions(r"'abc'", pattern=[S]) | |
46 | -def test_regexp_1(): parse_expressions(r"/re/", pattern=[R]) | |
47 | - | |
48 | -def test_mix(): parse_expressions(r'/regex/ "string" crossref crossref', pattern=[R,S, X, X]) | |
49 | - | |
50 | -def test_sub(): parse_expressions(r'( A B )', pattern=[(X, X)]) | |
51 | -def test_mix_nosub(): parse_expressions(r'/regex/ "string" ( A B ) crossref', pattern=[R,S, None, X]) | |
52 | -def test_mix_sub(): parse_expressions(r'/regex/ "string" ( A B ) crossref', pattern=[R,S, (X, X), X]) | |
53 | - | |
54 | -def test_sub_sub(): parse_expressions(r'level0 ( level1_1 (level2a level2b ) level1_2) level0', pattern=[X, (X, (X,X), X), X]) | |
55 | - | |
56 | - | |
57 | - | |
58 | - | |
59 | - |
@@ -9,22 +9,22 @@ | ||
9 | 9 | def parse_rule(txt, pattern=None): |
10 | 10 | parser = arpeggio.ParserPython(grammar.rule) |
11 | 11 | tree = parser.parse(txt) |
12 | - logger.info(f'\nTREE\n{tree.tree_str()}') | |
12 | + logger.debug(f'\nTREE\n{tree.tree_str()}') | |
13 | 13 | |
14 | 14 | assert tree.position_end == len(txt) , f"Not parsed whole input; Only: >>{txt[tree.position: tree.position_end]}<<; Not: >>{txt[tree.position_end:]}<<." |
15 | 15 | assert len(tree) == 4, "A rule should have length=4; ..." |
16 | 16 | assert tree[0].rule_name == "rule_name", " at [0], the name of the rule" |
17 | 17 | assert str(tree[1]) == '<-', " then a arrow" |
18 | - assert tree[2].rule_name == "expressions", " at [2] an ordered_choice" | |
18 | + assert tree[2].rule_name == "expression", " at [2] an expression" | |
19 | 19 | assert str(tree[3]) == ';', " and the the closing ':'" |
20 | 20 | |
21 | 21 | return tree |
22 | 22 | |
23 | 23 | |
24 | -def test_simple(): parse_rule(r"R <- A B C ;") | |
24 | +def test_simple(): parse_rule(r"R <- A B C ;") | |
25 | 25 | def test_OC(): parse_rule(r"Alts <- This | That | Or So ;") |
26 | 26 | |
27 | -def test_rule_rule(): parse_rule(r"""RULE <- RULE_NAME '<-' ORDERED_CHOICE ';' ;""") | |
27 | +def test_rule_rule(): parse_rule(r"""RULE <- RULE_NAME '<-' ORDERED_CHOICE ';' ;""") | |
28 | 28 | def test_expression_rule(): parse_rule(r""" |
29 | 29 | expression <- regex_term |
30 | 30 | | rule_crossref |
@@ -1,6 +1,5 @@ | ||
1 | 1 | """Test that a sequence of expressions is an Expression() |
2 | - | |
3 | - Note: the value of Expression() is a list-subclass; which is fine. But use it as list!!""" | |
2 | +""" | |
4 | 3 | |
5 | 4 | import pytest |
6 | 5 | import logging; logger = logging.getLogger(__name__) |
@@ -23,17 +22,24 @@ | ||
23 | 22 | ast = parse(txt, grammar.single_expr) |
24 | 23 | |
25 | 24 | |
26 | -def test_seq_of_two_as_expressions(): | |
25 | +def test_seq_of_two_as_expression(): | |
27 | 26 | txt = "A B" |
28 | - ast = parse(txt, grammar.expressions) | |
27 | + ast = parse(txt, grammar.expression) | |
29 | 28 | |
30 | 29 | assert_Seq(ast, 2, ids=('A', 'B')) |
31 | 30 | assert isinstance(ast.value, list), "It will be an `arpeggio.SemanticActionResult` which is a subclass of list" |
32 | 31 | |
32 | +def test_seq_of_three_as_expression(): | |
33 | + txt = "A B C" | |
34 | + ast = parse(txt, grammar.expression) | |
35 | + | |
36 | + assert_Seq(ast, 3, ids=('A', 'B', 'C')) | |
37 | + assert isinstance(ast.value, list), "It will be an `arpeggio.SemanticActionResult` which is a subclass of list" | |
38 | + | |
33 | 39 | |
34 | 40 | def test_seq_of_three_with_quantification(): |
35 | 41 | txt = "A? B+ C*" |
36 | - ast = parse(txt, grammar.expressions) | |
42 | + ast = parse(txt, grammar.expression) | |
37 | 43 | |
38 | 44 | assert_Seq(ast, 3) |
39 | 45 |
@@ -46,7 +52,31 @@ | ||
46 | 52 | assert_ID(ast[2].expr, 'C'), "The 3th one is a 'C'" |
47 | 53 | |
48 | 54 | |
55 | +def assert_OC(ast, length_pattern): | |
56 | + assert isinstance(ast, peg.OrderedChoice) | |
57 | + assert isinstance(ast, peg.Expression), "An OrderedChoice is also a Expression" | |
58 | + assert len(ast) == len(length_pattern), "Not the correct number of alternatives" | |
59 | + for i, (alt, l) in enumerate(zip(ast, length_pattern)): | |
60 | + if l is not None: | |
61 | + assert len(alt) == l, f'The {i}th alternative does not match the specified length -- {alt}' | |
62 | + logger.debug(f'OC-alt[{i}] ==> {alt}') | |
63 | + | |
49 | 64 | def test_OrderedChoice_of_two_alternatives(): |
50 | 65 | txt = "A | B" |
51 | - ast = parse(txt, grammar.expressions) | |
52 | - assert False # XXX ToBoDone | |
66 | + ast = parse(txt, grammar.expression) | |
67 | + logger.debug(f"OC.2:: {ast}") | |
68 | + assert_OC(ast, length_pattern=[1,1]) | |
69 | + | |
70 | +def test_OrderedChoice_of_three_alternatives(): | |
71 | + txt = "A | B | C" | |
72 | + ast = parse(txt, grammar.expression) | |
73 | + logger.debug(f"OC.3:: {ast}") | |
74 | + assert_OC(ast, length_pattern=[1,1,1]) | |
75 | + | |
76 | + | |
77 | +def test_OrderedChoice_of_long_alternatives(): | |
78 | + txt = "A | b1 b2 | C" | |
79 | + ast = parse(txt, grammar.expression) | |
80 | + logger.debug(f"OC.long:: {ast}") | |
81 | + | |
82 | + assert_OC(ast, length_pattern=[1,2,1]) |