""" Tests for engine.filter module. """ from engine.filter import skip, strip_tags class TestStripTags: """Tests for strip_tags function.""" def test_strips_simple_html(self): """Basic HTML tags are removed.""" assert strip_tags("
Hello
") == "Hello" assert strip_tags("Bold") == "Bold" assert strip_tags("Italic") == "Italic" def test_strips_nested_html(self): """Nested HTML tags are handled.""" assert strip_tags("Nested
') == ""
def test_handles_empty_string(self):
"""Empty string returns empty string."""
assert strip_tags("") == ""
assert strip_tags(None) == ""
def test_handles_plain_text(self):
"""Plain text without tags passes through."""
assert strip_tags("Plain text") == "Plain text"
def test_unescapes_html_entities(self):
"""HTML entities are decoded and tags are stripped."""
assert strip_tags(" test") == "test"
assert strip_tags("Hello & World") == "Hello & World"
def test_handles_malformed_html(self):
"""Malformed HTML is handled gracefully."""
assert strip_tags("Unclosed") == "Unclosed" assert strip_tags("
No start") == "No start" class TestSkip: """Tests for skip function - content filtering.""" def test_skips_sports_content(self): """Sports-related headlines are skipped.""" assert skip("Football: Team wins championship") is True assert skip("NBA Finals Game 7 results") is True assert skip("Soccer match ends in draw") is True assert skip("Premier League transfer news") is True assert skip("Super Bowl halftime show") is True def test_skips_vapid_content(self): """Vapid/celebrity content is skipped.""" assert skip("Kim Kardashian's new look") is True assert skip("Influencer goes viral") is True assert skip("Red carpet best dressed") is True assert skip("Celebrity couple splits") is True def test_allows_real_news(self): """Legitimate news headlines are allowed.""" assert skip("Scientists discover new planet") is False assert skip("Economy grows by 3%") is False assert skip("World leaders meet for summit") is False assert skip("New technology breakthrough") is False def test_case_insensitive(self): """Filter is case insensitive.""" assert skip("FOOTBALL scores") is True assert skip("Football SCORES") is True assert skip("Kardashian") is True def test_word_boundary_matching(self): """Word boundary matching works correctly.""" assert skip("The football stadium") is True assert skip("Footballer scores") is False assert skip("Footballs on sale") is False class TestIntegration: """Integration tests combining filter functions.""" def test_full_pipeline(self): """Test strip_tags followed by skip.""" html = '' text = strip_tags(html) assert text == "Breaking: Football championship final" assert skip(text) is True