Motivation:
For parsing a grammar in a custom parser, I need to discern between bracket literals, which are surrounded by apostrophes and brackets on the grammar level, which are normal brackets. The goal is to return a list of all top-level matches that are found. A match is represented as a 4-tuple of the range that the opening and closing sequences span.
For a motivating example, evaluating the expression id '(' [FArgs] ')' ['::' FunType] '{' VarDecl* Stmt+ '}' with a mapping of {'(': ')', '[': ']', '{': '}', "'('": "')'", "'['": "']'", "'{'": "'}'"} should yield the following list:
def get_top_level_matching_pairs(expression: str, mapping: dict[str, str]) \
-> list[tuple[int, int, int, int]]:
"""
Returns all top-level matches of opening sequences to closing sequences.
Each match is represented as a 4-tuple of the range that the opening and closing sequences span.
>>>get_top_level_matching_pairs("(a) op (b) cl", {'(': ')', 'op': 'cl'})
[(0, 1, 2, 3), (4, 6, 11, 13)]
"""
def head_starts_with_one_from(match_targets: Union[KeysView[str], ValuesView[str]]) -> Optional[str]:
# Check whether the expression, from index i, starts with one of the provided keys or values.
# Return the first match found, none otherwise.
return next(filter(lambda m: expression.startswith(m, i), match_targets), None)
res = []
queue = [] # functions as a stack to keep track of the opened pairs.
start_index = None
start_match = None
i = 0
while i < len(expression):
if open := head_starts_with_one_from(mapping.keys()):
if start_index is None:
start_index = i
start_match = open
queue.append(mapping[open]) # Store the closing counterpart for easy comparisons
i += len(open)
continue
if close := head_starts_with_one_from(mapping.values()):
try:
if (stack_head := queue.pop()) == close:
if not queue: # This closing token closes a top-level opening sequence, so add the result
res.append((start_index, start_index + len(start_match), i, i + len(close)))
start_index = None
start_match = None
i += len(close)
continue
# raise mismatched opening and closing characters.
except IndexError:
# raise closing sequence without an opening.
i += 1
return res