Skip to content

Commit 2e5a280

Browse files
committed
Implement context sensitive grammar conversion
1 parent 3a81964 commit 2e5a280

10 files changed

Lines changed: 249 additions & 106 deletions

src/notebook/math/grammars/context_sensitive.py

Lines changed: 66 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,66 +4,86 @@
44
from ...support.coderefs import collector
55
from .epsilon_rules import is_epsilon_rule
66
from .exceptions import IncompatibleGrammarError
7-
from .symbols import Terminal
7+
from .symbols import Terminal, new_non_terminal
8+
from .length_increasing import is_essentially_length_increasing_grammar
9+
from .grammar import GrammarRule, Grammar, GrammarSchema
810

11+
from .symbols import NonTerminal
912

10-
if TYPE_CHECKING:
11-
from .grammar import Grammar, GrammarSchema
12-
from .symbols import NonTerminal
1313

14+
def is_context_sensitive_rule(rule: GrammarRule) -> bool:
15+
for i in range(len(rule.src)):
16+
if not isinstance(rule.src[i], NonTerminal) or rule.src[:i] != rule.dest[:i]:
17+
continue
1418

15-
def is_length_increasing_grammar(grammar: Grammar) -> bool:
16-
return all(len(rule.src) <= len(rule.dest) for rule in grammar.schema.rules)
19+
for j in range(len(rule.dest), i, -1):
20+
if rule.src[i + 1:] == rule.dest[j:]:
21+
return True
22+
23+
return False
1724

1825

19-
def is_essentially_length_increasing_grammar(grammar: Grammar) -> bool:
26+
def is_context_sensitive_grammar(grammar: Grammar) -> bool:
2027
return all(
21-
(is_epsilon_rule(rule) and rule.src == [grammar.start]) or len(rule.src) <= len(rule.dest)
28+
(is_epsilon_rule(rule) and rule.src == [grammar.start]) or is_context_sensitive_rule(rule)
2229
for rule in grammar.schema.rules
2330
)
2431

2532

26-
def _iter_all_derivations_one_step(schema: GrammarSchema, current: Iterable[Sequence[Terminal | NonTerminal]]) -> Iterable[Sequence[Terminal | NonTerminal]]:
27-
for derivation in current:
28-
for rule in schema.rules:
29-
if is_epsilon_rule(rule):
30-
continue
33+
@collector.ref('alg:length_increasing_to_context_sensitive')
34+
def length_increasing_to_context_sensitive(grammar: Grammar) -> Grammar:
35+
if not is_essentially_length_increasing_grammar(grammar):
36+
raise IncompatibleGrammarError('Expected an essentially length-increasing grammar')
3137

32-
for i in range(len(derivation) - len(rule.src) + 1):
33-
if rule.src == derivation[i: len(rule.src) + i]:
34-
yield [*derivation[:i], *rule.dest, *derivation[len(rule.src) + i:]]
38+
non_terminals = list(grammar.schema.get_non_terminals())
39+
terminal_map = dict[Terminal, NonTerminal]()
40+
current_schema = GrammarSchema()
3541

42+
for sym in grammar.schema.get_terminals():
43+
active_non_terminal = new_non_terminal(sym.value, non_terminals)
44+
current_schema.rules.append(
45+
GrammarRule(
46+
src=[active_non_terminal],
47+
dest=[sym]
48+
)
49+
)
3650

37-
def iter_derivations(grammar: Grammar, max_derivation_length: int) -> Iterable[Sequence[Terminal]]:
38-
if not is_essentially_length_increasing_grammar(grammar):
39-
raise IncompatibleGrammarError('Expected an essentially ε-free grammar')
51+
terminal_map[sym] = active_non_terminal
52+
non_terminals.append(active_non_terminal)
4053

54+
# We iterate over rules from the original grammar and modify the result at every step
4155
for rule in grammar.schema.rules:
4256
if is_epsilon_rule(rule):
43-
yield []
44-
45-
derivable: Sequence[Sequence[Terminal | NonTerminal]] = [[grammar.start]]
46-
47-
for _ in range(max_derivation_length):
48-
derivable = list(_iter_all_derivations_one_step(grammar.schema, derivable))
49-
50-
for derivation in derivable:
51-
if all(isinstance(sym, Terminal) for sym in derivation):
52-
yield cast(Sequence[Terminal], derivation)
53-
54-
55-
@collector.ref('alg:context_sensitive_string_membership')
56-
def naive_membership(grammar: Grammar, string: str) -> bool:
57-
if not is_essentially_length_increasing_grammar(grammar):
58-
raise IncompatibleGrammarError('Expected an essentially ε-free grammar')
59-
60-
m = len(grammar.schema.get_non_terminals()) + len(grammar.schema.get_terminals())
61-
n = len(string)
62-
63-
max_derivation_length = (1 - m ** (n + 1)) // (1 - m) if m > 1 else n
64-
65-
for derivation in iter_derivations(grammar, max_derivation_length):
66-
if ''.join(sym.value for sym in derivation) == string:
67-
return True
68-
69-
return False
57+
current_schema.rules.append(rule)
58+
continue
59+
60+
modified_src = [terminal_map[sym] if isinstance(sym, Terminal) else sym for sym in rule.src]
61+
modified_dest = [terminal_map[sym] if isinstance(sym, Terminal) else sym for sym in rule.dest]
62+
dest_tail = modified_dest[len(rule.src):]
63+
new_non_terminals = list[NonTerminal]()
64+
65+
for i, sym in enumerate(rule.src):
66+
tail = modified_src[i + 1:] if i + 1 < len(rule.src) else dest_tail
67+
active_non_terminal = new_non_terminal(sym.value, non_terminals)
68+
current_schema.rules.append(
69+
GrammarRule(
70+
src=[*new_non_terminals, *modified_src[i:]],
71+
dest=[*new_non_terminals, active_non_terminal, *tail]
72+
)
73+
)
74+
75+
new_non_terminals.append(active_non_terminal)
76+
non_terminals.append(active_non_terminal)
77+
78+
for i in range(len(new_non_terminals)):
79+
current_schema.rules.append(
80+
GrammarRule(
81+
src=[*modified_dest[:i], *new_non_terminals[i:], *dest_tail],
82+
dest=[*modified_dest[:i + 1], *new_non_terminals[i + 1:], *dest_tail],
83+
)
84+
)
85+
86+
print()
87+
print(current_schema)
88+
89+
return current_schema.instantiate(grammar.start)

src/notebook/math/grammars/epsilon_rules.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import TYPE_CHECKING
22

3+
from ...support.coderefs import collector
34
from .exceptions import IncompatibleRuleError
45
from .grammar import Grammar, GrammarRule, GrammarSchema
56
from .symbols import NonTerminal, Terminal, new_non_terminal
@@ -61,6 +62,7 @@ def iter_rules_without_nullables(nullable: Collection[NonTerminal], dest: Sequen
6162
yield part
6263

6364

65+
@collector.ref('alg:epsilon_rule_removal')
6466
def remove_epsilon_rules(grammar: Grammar) -> Grammar:
6567
nullable = identify_nullable_non_terminals(grammar)
6668
new_start = new_non_terminal(
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from collections.abc import Iterable, Sequence
2+
from typing import TYPE_CHECKING, cast
3+
4+
from ...support.coderefs import collector
5+
from .epsilon_rules import is_epsilon_rule
6+
from .exceptions import IncompatibleGrammarError
7+
from .symbols import Terminal
8+
9+
10+
if TYPE_CHECKING:
11+
from .grammar import Grammar, GrammarSchema
12+
from .symbols import NonTerminal
13+
14+
15+
def is_length_increasing_grammar(grammar: Grammar) -> bool:
16+
return all(len(rule.src) <= len(rule.dest) for rule in grammar.schema.rules)
17+
18+
19+
def is_essentially_length_increasing_grammar(grammar: Grammar) -> bool:
20+
return all(
21+
(is_epsilon_rule(rule) and rule.src == [grammar.start]) or len(rule.src) <= len(rule.dest)
22+
for rule in grammar.schema.rules
23+
)
24+
25+
26+
def _iter_all_derivations_one_step(schema: GrammarSchema, current: Iterable[Sequence[Terminal | NonTerminal]]) -> Iterable[Sequence[Terminal | NonTerminal]]:
27+
for derivation in current:
28+
for rule in schema.rules:
29+
if is_epsilon_rule(rule):
30+
continue
31+
32+
for i in range(len(derivation) - len(rule.src) + 1):
33+
if rule.src == derivation[i: len(rule.src) + i]:
34+
yield [*derivation[:i], *rule.dest, *derivation[len(rule.src) + i:]]
35+
36+
37+
def iter_derivations(grammar: Grammar, max_derivation_length: int) -> Iterable[Sequence[Terminal]]:
38+
if not is_essentially_length_increasing_grammar(grammar):
39+
raise IncompatibleGrammarError('Expected an essentially length-increasing grammar')
40+
41+
for rule in grammar.schema.rules:
42+
if is_epsilon_rule(rule):
43+
yield []
44+
45+
derivable: Sequence[Sequence[Terminal | NonTerminal]] = [[grammar.start]]
46+
47+
for _ in range(max_derivation_length):
48+
derivable = list(_iter_all_derivations_one_step(grammar.schema, derivable))
49+
50+
for derivation in derivable:
51+
if all(isinstance(sym, Terminal) for sym in derivation):
52+
yield cast(Sequence[Terminal], derivation)
53+
54+
55+
@collector.ref('alg:context_sensitive_string_membership')
56+
def naive_membership(grammar: Grammar, string: str) -> bool:
57+
if not is_essentially_length_increasing_grammar(grammar):
58+
raise IncompatibleGrammarError('Expected an essentially length-increasing grammar')
59+
60+
m = len(grammar.schema.get_non_terminals()) + len(grammar.schema.get_terminals())
61+
n = len(string)
62+
63+
max_derivation_length = (1 - m ** (n + 1)) // (1 - m) if m > 1 else n
64+
65+
for derivation in iter_derivations(grammar, max_derivation_length):
66+
if ''.join(sym.value for sym in derivation) == string:
67+
return True
68+
69+
return False
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .parser import parse_grammar_rule_line, parse_grammar_schema, parse_nonterminal, parse_terminal
1+
from .parser import parse_grammar_schema, parse_nonterminal, parse_terminal

src/notebook/math/grammars/parsing/parser.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def parse_rule_dest(self, context: GrammarSymbolRunContext) -> Iterable[Terminal
158158
if is_epsilon_rule == is_non_epsilon_rule:
159159
raise context.annotate_context_error('The right side of a rule must contain terminals and nonterminals and at most a single ε')
160160

161-
def iter_rules_on_line(self) -> Iterable[GrammarRule]:
161+
def _iter_rules_on_line(self) -> Iterable[GrammarRule]:
162162
head = self._skip_spaces()
163163

164164
if not head:
@@ -192,7 +192,10 @@ def iter_rules_on_line(self) -> Iterable[GrammarRule]:
192192
if head.kind == 'LINE_BREAK':
193193
return
194194

195-
if not head or head.kind != 'PIPE':
195+
if head is None:
196+
return
197+
198+
if head.kind != 'PIPE':
196199
break
197200

198201
while (head := self.peek()) and head.kind != 'LINE_BREAK':
@@ -201,15 +204,15 @@ def iter_rules_on_line(self) -> Iterable[GrammarRule]:
201204
context.close_at_previous_token()
202205
raise context.annotate_context_error('The right side of a rule must contain a pipe between runs of terminals, nonterminals and ε')
203206

204-
def iter_rules(self) -> Iterable[GrammarRule]:
207+
def _iter_rules(self) -> Iterable[GrammarRule]:
205208
self._skip_empty_lines()
206209

207210
while self.peek():
208-
yield from self.iter_rules_on_line()
211+
yield from self._iter_rules_on_line()
209212
self._skip_empty_lines()
210213

211214
def parse_schema(self) -> GrammarSchema:
212-
rules = list(self.iter_rules())
215+
rules = list(self._iter_rules())
213216

214217
if len(rules) == 0:
215218
raise ParserError('Expected at least one grammar rule')
@@ -224,13 +227,6 @@ def parse_grammar_schema(source: str) -> GrammarSchema:
224227
return parser.parse_schema()
225228

226229

227-
def parse_grammar_rule_line(source: str) -> GrammarSchema:
228-
tokens = tokenize_grammar(source)
229-
230-
with GrammarParser(source, tokens) as parser:
231-
return GrammarSchema(list(parser.iter_rules_on_line()))
232-
233-
234230
def parse_terminal(source: str) -> Terminal:
235231
tokens = tokenize_grammar(source)
236232

src/notebook/math/grammars/parsing/test_parser.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from ....support.pytest import pytest_parametrize_kwargs, pytest_parametrize_lists
77
from ..grammar import GrammarRule, GrammarSchema
88
from ..symbols import NonTerminal, Terminal
9-
from .parser import parse_grammar_rule_line, parse_grammar_schema, parse_nonterminal, parse_terminal
9+
from .parser import parse_grammar_schema, parse_nonterminal, parse_terminal
1010

1111

1212
@pytest_parametrize_lists(
@@ -168,14 +168,14 @@ def test_parsing_nested_nonterminal() -> None:
168168

169169
def test_parsing_empty_rule() -> None:
170170
with pytest.raises(ParserError) as excinfo:
171-
parse_grammar_rule_line('')
171+
parse_grammar_schema('')
172172

173-
assert str(excinfo.value) == 'Expected a rule'
173+
assert str(excinfo.value) == 'Expected at least one grammar rule'
174174

175175

176176
def test_parsing_rule_with_empty_left_side() -> None:
177177
with pytest.raises(ParserError) as excinfo:
178-
parse_grammar_rule_line('→ ε')
178+
parse_grammar_schema('→ ε')
179179

180180
assert str(excinfo.value) == 'The left side of a rule must be nonempty'
181181
assert excinfo.value.__notes__[0] == dedent('''\
Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,48 @@
1-
from textwrap import dedent
21
from typing import TYPE_CHECKING
32

43
from ...support.pytest import pytest_parametrize_lists
5-
from .context_sensitive import is_essentially_length_increasing_grammar, is_length_increasing_grammar, naive_membership
4+
from .context_sensitive import (
5+
is_context_sensitive_grammar,
6+
is_context_sensitive_rule,
7+
length_increasing_to_context_sensitive,
8+
)
69
from .parsing import parse_grammar_schema
7-
from .symbols import NonTerminal
810

911

1012
if TYPE_CHECKING:
1113
from .conftest import GrammarFixture
1214

1315

14-
def test_is_length_increasing_grammar_an(an: GrammarFixture) -> None:
15-
assert not is_length_increasing_grammar(an.grammar)
16-
assert is_essentially_length_increasing_grammar(an.grammar)
16+
@pytest_parametrize_lists(
17+
rule=[
18+
'<S> → <S>',
19+
'<S> → "a"',
20+
'<S> → "a" "b" "c"',
21+
'"a" <B> "c" → "a" "b" "c"',
22+
'"a" <BC> "d" → "a" "b" "c" "d"',
23+
],
24+
)
25+
def test_is_context_sensitive_rule_success(rule: str) -> None:
26+
rule_ = parse_grammar_schema(rule).rules[0]
27+
assert is_context_sensitive_rule(rule_)
1728

1829

19-
def test_is_length_increasing_grammar_binary(binary: GrammarFixture) -> None:
20-
assert is_length_increasing_grammar(binary.grammar)
30+
@pytest_parametrize_lists(
31+
rule=[
32+
'<S> → ε',
33+
'"a" <B> "c" → "a" "c"',
34+
'"a" <BC> "d" → "b" "c"',
35+
],
36+
)
37+
def test_is_context_sensitive_rule_failure(rule: str) -> None:
38+
rule_ = parse_grammar_schema(rule).rules[0]
39+
assert not is_context_sensitive_rule(rule_)
2140

2241

23-
def test_is_length_increasing_grammar_anbncn(anbncn: GrammarFixture) -> None:
24-
assert is_essentially_length_increasing_grammar(anbncn.grammar)
42+
def test_is_context_sensitive_grammar_anbncn(anbncn: GrammarFixture) -> None:
43+
assert not is_context_sensitive_grammar(anbncn.grammar)
2544

2645

27-
def test_is_length_increasing_grammar_failure() -> None:
28-
schema = parse_grammar_schema(
29-
dedent('''\
30-
<S> → "a" <S> "b"
31-
"a" <S> "b" → <S>
32-
''',
33-
),
34-
)
35-
36-
grammar = schema.instantiate(NonTerminal('S'))
37-
assert not is_essentially_length_increasing_grammar(grammar)
38-
39-
40-
def test_naive_membership_an(an: GrammarFixture) -> None:
41-
for string in an.whitelist:
42-
assert naive_membership(an.grammar, string)
43-
44-
for string in an.blacklist:
45-
assert not naive_membership(an.grammar, string)
46-
47-
48-
def test_naive_membership_anbn(anbn: GrammarFixture) -> None:
49-
for string in anbn.whitelist:
50-
assert naive_membership(anbn.grammar, string)
51-
52-
for string in anbn.blacklist:
53-
assert not naive_membership(anbn.grammar, string)
46+
def test_length_increasing_to_context_sensitive_anbncn(anbncn: GrammarFixture) -> None:
47+
transformed = length_increasing_to_context_sensitive(anbncn.grammar)
48+
assert is_context_sensitive_grammar(transformed)

0 commit comments

Comments
 (0)