Skip to main content
added 6 characters in body; edited tags
Source Link
toolic
  • 15.8k
  • 6
  • 29
  • 217
  • Truncate an input string to at most max_wordsmax_words words.
  • Split on any whitespace (collapsing runs of spaces, tabs, newlines).
  • If the last retained word is a common stop-word (e.g. “of”, “the”), drop it so you don’t end on an article/preposition.
  • Return a single string of the truncated words.
  • Handle edge cases: empty input, exact fits, max_wordsmax_words = 1, and invalid parameters (max_wordsmax_words < 1).
  • Truncate an input string to at most max_words words.
  • Split on any whitespace (collapsing runs of spaces, tabs, newlines).
  • If the last retained word is a common stop-word (e.g. “of”, “the”), drop it so you don’t end on an article/preposition.
  • Return a single string of the truncated words.
  • Handle edge cases: empty input, exact fits, max_words = 1, and invalid parameters (max_words < 1).
  • Truncate an input string to at most max_words words.
  • Split on any whitespace (collapsing runs of spaces, tabs, newlines).
  • If the last retained word is a common stop-word (e.g. “of”, “the”), drop it so you don’t end on an article/preposition.
  • Return a single string of the truncated words.
  • Handle edge cases: empty input, exact fits, max_words = 1, and invalid parameters (max_words < 1).
added 1086 characters in body
Source Link
Bob
  • 221
  • 1
  • 16

Unit Tests (basic)

import pytest


def test_error_on_invalid_max_words():
    try:
        truncate("some text", 0)
        assert False, "Expected ValueError for max_words < 1"
    except ValueError as e:
        assert "max_words" in str(e)

def test_no_truncation_if_shorter():
    assert truncate("one two", 3) == "one two"
    assert truncate("a b c", 3) == "a b c"

def test_simple_truncation():
    assert truncate("one two three four", 2) == "one two"

def test_drop_trailing_stopword():
    # 'in' is a stopword, should be removed
    assert truncate("alpha beta in the", 3) == "alpha beta"
    # last kept word not a stopword, so stays
    assert truncate("alpha in beta gamma", 3) == "alpha in beta"

def test_strip_whitespace_and_split():
    # leading/trailing spaces collapse
    assert truncate("   hello   world  ", 1) == "hello"

def test_mixed_case_and_stopword():
    # stopword removal is case-insensitive
    assert truncate("Run In The Park", 3) == "Run In"


if __name__ == "__main__":
    pytest.main()

Unit Tests (basic)

import pytest


def test_error_on_invalid_max_words():
    try:
        truncate("some text", 0)
        assert False, "Expected ValueError for max_words < 1"
    except ValueError as e:
        assert "max_words" in str(e)

def test_no_truncation_if_shorter():
    assert truncate("one two", 3) == "one two"
    assert truncate("a b c", 3) == "a b c"

def test_simple_truncation():
    assert truncate("one two three four", 2) == "one two"

def test_drop_trailing_stopword():
    # 'in' is a stopword, should be removed
    assert truncate("alpha beta in the", 3) == "alpha beta"
    # last kept word not a stopword, so stays
    assert truncate("alpha in beta gamma", 3) == "alpha in beta"

def test_strip_whitespace_and_split():
    # leading/trailing spaces collapse
    assert truncate("   hello   world  ", 1) == "hello"

def test_mixed_case_and_stopword():
    # stopword removal is case-insensitive
    assert truncate("Run In The Park", 3) == "Run In"


if __name__ == "__main__":
    pytest.main()
fix doc string
Source Link
Bob
  • 221
  • 1
  • 16
from typing import Set


_STOP_WORDS: Set[str] = {
    "a", 
    "an",
    "the",
    "of",
    "with",
    "by",
    "to",
    "from",
    "in",
    "on",
    "for",
}


def truncate(
    text: str,
    max_words: int = 3
) -> str:
    """
    Truncate `text` to at most `max_words` whitespace-separated words,
    dropping a trailing common stop-word if present.

    Splits on any whitespace (spaces, tabs, newlines), collapsing runs
    into single separators.

    Args:
        text: Input string to truncate.
        max_words: Maximum number of words to retain (must be ≥1).

    Returns:
        A string consisting of up to `max_words` words joined by single spaces.

    Raises:
        ValueError: if `max_words < 1`.

    Examples:
        >>> truncate("runs"run in the park", 3)
        "runs in"run park"in"

        >>> truncate("of the", 2)
        "of the"
    """
    if max_words < 1:
        raise ValueError("max_words must be ≥ 1")

    words = text.strip().split()
    if len(words) <= max_words:
        return " ".join(words)

    head = words[:max_words]
    # Drop trailing stop-word so we don’t end on “of”, “the”, etc.
    if head and head[-1].lower() in _STOP_WORDS:
        head.pop()

    return " ".join(head)
from typing import Set


_STOP_WORDS: Set[str] = {
    "a", 
    "an",
    "the",
    "of",
    "with",
    "by",
    "to",
    "from",
    "in",
    "on",
    "for",
}


def truncate(
    text: str,
    max_words: int = 3
) -> str:
    """
    Truncate `text` to at most `max_words` whitespace-separated words,
    dropping a trailing common stop-word if present.

    Splits on any whitespace (spaces, tabs, newlines), collapsing runs
    into single separators.

    Args:
        text: Input string to truncate.
        max_words: Maximum number of words to retain (must be ≥1).

    Returns:
        A string consisting of up to `max_words` words joined by single spaces.

    Raises:
        ValueError: if `max_words < 1`.

    Examples:
        >>> truncate("runs in the park", 3)
        "runs in park"

        >>> truncate("of the", 2)
        "of the"
    """
    if max_words < 1:
        raise ValueError("max_words must be ≥ 1")

    words = text.strip().split()
    if len(words) <= max_words:
        return " ".join(words)

    head = words[:max_words]
    # Drop trailing stop-word so we don’t end on “of”, “the”, etc.
    if head and head[-1].lower() in _STOP_WORDS:
        head.pop()

    return " ".join(head)
from typing import Set


_STOP_WORDS: Set[str] = {
    "a", 
    "an",
    "the",
    "of",
    "with",
    "by",
    "to",
    "from",
    "in",
    "on",
    "for",
}


def truncate(
    text: str,
    max_words: int = 3
) -> str:
    """
    Truncate `text` to at most `max_words` whitespace-separated words,
    dropping a trailing common stop-word if present.

    Splits on any whitespace (spaces, tabs, newlines), collapsing runs
    into single separators.

    Args:
        text: Input string to truncate.
        max_words: Maximum number of words to retain (must be ≥1).

    Returns:
        A string consisting of up to `max_words` words joined by single spaces.

    Raises:
        ValueError: if `max_words < 1`.

    Examples:
        >>> truncate("run in the park", 3)
        "run in"

        >>> truncate("of the", 2)
        "of the"
    """
    if max_words < 1:
        raise ValueError("max_words must be ≥ 1")

    words = text.strip().split()
    if len(words) <= max_words:
        return " ".join(words)

    head = words[:max_words]
    # Drop trailing stop-word so we don’t end on “of”, “the”, etc.
    if head and head[-1].lower() in _STOP_WORDS:
        head.pop()

    return " ".join(head)
Source Link
Bob
  • 221
  • 1
  • 16
Loading