Skip to main content
Clarified how nesting should work.
Source Link

The problem I'm solving is a more complex version of pairing up opening and closing brackets.
Instead of matching only on ([{}]), I additionally need to match arbitrary opening sequences of arbitrary length to closing sequences of arbitrary length, such as '(' which is mapped to ')'.
While only the top-level matches should be returned, the matches should still account for the inner ones. This means that with a simple parenthesis mapping, ((f)) should match only on the first opening bracket and the final closing bracket.

The problem I'm solving is a more complex version of pairing up opening and closing brackets.
Instead of matching only on ([{}]), I additionally need to match arbitrary opening sequences of arbitrary length to closing sequences of arbitrary length, such as '(' which is mapped to ')'.

The problem I'm solving is a more complex version of pairing up opening and closing brackets.
Instead of matching only on ([{}]), I additionally need to match arbitrary opening sequences of arbitrary length to closing sequences of arbitrary length, such as '(' which is mapped to ')'.
While only the top-level matches should be returned, the matches should still account for the inner ones. This means that with a simple parenthesis mapping, ((f)) should match only on the first opening bracket and the final closing bracket.

Tweeted twitter.com/StackCodeReview/status/1367037101938790406
Clarified the need for the stack_head variable in the error handling
Source Link
def get_top_level_matching_pairs(expression: str, mapping: dict[str, str]) \
        -> list[tuple[int, int, int, int]]:
    """
    Returns all top-level matches of opening sequences to closing sequences.
    Each match is represented as a 4-tuple of the range that the opening and closing sequences span.

    >>>get_top_level_matching_pairs("(a) op (b) cl", {'(': ')', 'op': 'cl'})
    [(0, 1, 2, 3), (4, 6, 11, 13)]
    """
    def head_starts_with_one_from(match_targets: Union[KeysView[str], ValuesView[str]]) -> Optional[str]:
        # Check whether the expression, from index i, starts with one of the provided keys or values.
        # Return the first match found, none otherwise.
        return next(filter(lambda m: expression.startswith(m, i), match_targets), None)
    res = []
    queue = []  # functions as a stack to keep track of the opened pairs.
    start_index = None
    start_match = None
    i = 0
    while i < len(expression):
        if open := head_starts_with_one_from(mapping.keys()):
            if start_index is None:
                start_index = i
                start_match = open
            queue.append(mapping[open])  # Store the closing counterpart for easy comparisons
            i += len(open)
            continue
        if close := head_starts_with_one_from(mapping.values()):
            try:
                if (stack_head := queue.pop()) == close:
                    if not queue:  # This closing token closes a top-level opening sequence, so add the result
                        res.append((start_index, start_index + len(start_match), i, i + len(close)))
                        start_index = None
                        start_match = None
                    i += len(close)
                    continue
                # raise mismatched opening and closing characters.

            except IndexError:
                # raise closing sequence without an opening. (uses stack_head variable)
        i += 1
    return res
def get_top_level_matching_pairs(expression: str, mapping: dict[str, str]) \
        -> list[tuple[int, int, int, int]]:
    """
    Returns all top-level matches of opening sequences to closing sequences.
    Each match is represented as a 4-tuple of the range that the opening and closing sequences span.

    >>>get_top_level_matching_pairs("(a) op (b) cl", {'(': ')', 'op': 'cl'})
    [(0, 1, 2, 3), (4, 6, 11, 13)]
    """
    def head_starts_with_one_from(match_targets: Union[KeysView[str], ValuesView[str]]) -> Optional[str]:
        # Check whether the expression, from index i, starts with one of the provided keys or values.
        # Return the first match found, none otherwise.
        return next(filter(lambda m: expression.startswith(m, i), match_targets), None)
    res = []
    queue = []  # functions as a stack to keep track of the opened pairs.
    start_index = None
    start_match = None
    i = 0
    while i < len(expression):
        if open := head_starts_with_one_from(mapping.keys()):
            if start_index is None:
                start_index = i
                start_match = open
            queue.append(mapping[open])  # Store the closing counterpart for easy comparisons
            i += len(open)
            continue
        if close := head_starts_with_one_from(mapping.values()):
            try:
                if (stack_head := queue.pop()) == close:
                    if not queue:  # This closing token closes a top-level opening sequence, so add the result
                        res.append((start_index, start_index + len(start_match), i, i + len(close)))
                        start_index = None
                        start_match = None
                    i += len(close)
                    continue
                # raise mismatched opening and closing characters.

            except IndexError:
                # raise closing sequence without an opening.
        i += 1
    return res
def get_top_level_matching_pairs(expression: str, mapping: dict[str, str]) \
        -> list[tuple[int, int, int, int]]:
    """
    Returns all top-level matches of opening sequences to closing sequences.
    Each match is represented as a 4-tuple of the range that the opening and closing sequences span.

    >>>get_top_level_matching_pairs("(a) op (b) cl", {'(': ')', 'op': 'cl'})
    [(0, 1, 2, 3), (4, 6, 11, 13)]
    """
    def head_starts_with_one_from(match_targets: Union[KeysView[str], ValuesView[str]]) -> Optional[str]:
        # Check whether the expression, from index i, starts with one of the provided keys or values.
        # Return the first match found, none otherwise.
        return next(filter(lambda m: expression.startswith(m, i), match_targets), None)
    res = []
    queue = []  # functions as a stack to keep track of the opened pairs.
    start_index = None
    start_match = None
    i = 0
    while i < len(expression):
        if open := head_starts_with_one_from(mapping.keys()):
            if start_index is None:
                start_index = i
                start_match = open
            queue.append(mapping[open])  # Store the closing counterpart for easy comparisons
            i += len(open)
            continue
        if close := head_starts_with_one_from(mapping.values()):
            try:
                if (stack_head := queue.pop()) == close:
                    if not queue:  # This closing token closes a top-level opening sequence, so add the result
                        res.append((start_index, start_index + len(start_match), i, i + len(close)))
                        start_index = None
                        start_match = None
                    i += len(close)
                    continue
                # raise mismatched opening and closing characters.

            except IndexError:
                # raise closing sequence without an opening. (uses stack_head variable)
        i += 1
    return res
Fixed docstring example and added the balanced-delimiters tag and added a mapping to the motivating example
Source Link

Motivation:
For parsing a grammar in a custom parser, I need to discern between bracket literals, which are surrounded by apostrophes and brackets on the grammar level, which are normal brackets. The goal is to return a list of all top-level matches that are found. A match is represented as a 4-tuple of the range that the opening and closing sequences span.
For a motivating example, evaluating the expression id '(' [FArgs] ')' ['::' FunType] '{' VarDecl* Stmt+ '}' with a mapping of {'(': ')', '[': ']', '{': '}', "'('": "')'", "'['": "']'", "'{'": "'}'"} should yield the following list:

def get_top_level_matching_pairs(expression: str, mapping: dict[str, str]) \
        -> list[tuple[int, int, int, int]]:
    """
    Returns all top-level matches of opening sequences to closing sequences.
    Each match is represented as a 4-tuple of the range that the opening and closing sequences span.

    >>>get_top_level_matching_pairs("(a) op (b) cl", {'(': ')', 'op': 'cl'})
    [(0, 1, 2, 3), (4, 6, 11, 13)]
    """
    def head_starts_with_one_from(match_targets: Union[KeysView[str], ValuesView[str]]) -> Optional[str]:
        # Check whether the expression, from index i, starts with one of the provided keys or values.
        # Return the first match found, none otherwise.
        return next(filter(lambda m: expression.startswith(m, i), match_targets), None)
    res = []
    queue = []  # functions as a stack to keep track of the opened pairs.
    start_index = None
    start_match = None
    i = 0
    while i < len(expression):
        if open := head_starts_with_one_from(mapping.keys()):
            if start_index is None:
                start_index = i
                start_match = open
            queue.append(mapping[open])  # Store the closing counterpart for easy comparisons
            i += len(open)
            continue
        if close := head_starts_with_one_from(mapping.values()):
            try:
                if (stack_head := queue.pop()) == close:
                    if not queue:  # This closing token closes a top-level opening sequence, so add the result
                        res.append((start_index, start_index + len(start_match), i, i + len(close)))
                        start_index = None
                        start_match = None
                    i += len(close)
                    continue
                # raise mismatched opening and closing characters.

            except IndexError:
                # raise closing sequence without an opening.
        i += 1
    return res

Motivation:
For parsing a grammar in a custom parser, I need to discern between bracket literals, which are surrounded by apostrophes and brackets on the grammar level, which are normal brackets. The goal is to return a list of all top-level matches that are found. A match is represented as a 4-tuple of the range that the opening and closing sequences span.
For a motivating example, evaluating id '(' [FArgs] ')' ['::' FunType] '{' VarDecl* Stmt+ '}' should yield the following list:

def get_top_level_matching_pairs(expression: str, mapping: dict[str, str]) \
        -> list[tuple[int, int, int, int]]:
    """
    Returns all top-level matches of opening sequences to closing sequences.
    Each match is represented as a 4-tuple of the range that the opening and closing sequences span.

    >>>get_top_level_matching_pairs("(a) op b cl", {'(': ')', 'op': 'cl'})
    [(0, 1, 2, 3), (4, 6, 11, 13)]
    """
    def head_starts_with_one_from(match_targets: Union[KeysView[str], ValuesView[str]]) -> Optional[str]:
        # Check whether the expression, from index i, starts with one of the provided keys or values.
        # Return the first match found, none otherwise.
        return next(filter(lambda m: expression.startswith(m, i), match_targets), None)
    res = []
    queue = []  # functions as a stack to keep track of the opened pairs.
    start_index = None
    start_match = None
    i = 0
    while i < len(expression):
        if open := head_starts_with_one_from(mapping.keys()):
            if start_index is None:
                start_index = i
                start_match = open
            queue.append(mapping[open])  # Store the closing counterpart for easy comparisons
            i += len(open)
            continue
        if close := head_starts_with_one_from(mapping.values()):
            try:
                if (stack_head := queue.pop()) == close:
                    if not queue:  # This closing token closes a top-level opening sequence, so add the result
                        res.append((start_index, start_index + len(start_match), i, i + len(close)))
                        start_index = None
                        start_match = None
                    i += len(close)
                    continue
                # raise mismatched opening and closing characters.

            except IndexError:
                # raise closing sequence without an opening.
        i += 1
    return res

Motivation:
For parsing a grammar in a custom parser, I need to discern between bracket literals, which are surrounded by apostrophes and brackets on the grammar level, which are normal brackets. The goal is to return a list of all top-level matches that are found. A match is represented as a 4-tuple of the range that the opening and closing sequences span.
For a motivating example, evaluating the expression id '(' [FArgs] ')' ['::' FunType] '{' VarDecl* Stmt+ '}' with a mapping of {'(': ')', '[': ']', '{': '}', "'('": "')'", "'['": "']'", "'{'": "'}'"} should yield the following list:

def get_top_level_matching_pairs(expression: str, mapping: dict[str, str]) \
        -> list[tuple[int, int, int, int]]:
    """
    Returns all top-level matches of opening sequences to closing sequences.
    Each match is represented as a 4-tuple of the range that the opening and closing sequences span.

    >>>get_top_level_matching_pairs("(a) op (b) cl", {'(': ')', 'op': 'cl'})
    [(0, 1, 2, 3), (4, 6, 11, 13)]
    """
    def head_starts_with_one_from(match_targets: Union[KeysView[str], ValuesView[str]]) -> Optional[str]:
        # Check whether the expression, from index i, starts with one of the provided keys or values.
        # Return the first match found, none otherwise.
        return next(filter(lambda m: expression.startswith(m, i), match_targets), None)
    res = []
    queue = []  # functions as a stack to keep track of the opened pairs.
    start_index = None
    start_match = None
    i = 0
    while i < len(expression):
        if open := head_starts_with_one_from(mapping.keys()):
            if start_index is None:
                start_index = i
                start_match = open
            queue.append(mapping[open])  # Store the closing counterpart for easy comparisons
            i += len(open)
            continue
        if close := head_starts_with_one_from(mapping.values()):
            try:
                if (stack_head := queue.pop()) == close:
                    if not queue:  # This closing token closes a top-level opening sequence, so add the result
                        res.append((start_index, start_index + len(start_match), i, i + len(close)))
                        start_index = None
                        start_match = None
                    i += len(close)
                    continue
                # raise mismatched opening and closing characters.

            except IndexError:
                # raise closing sequence without an opening.
        i += 1
    return res
Source Link
Loading