Add basic tests for extract_text
This commit is contained in:
parent
51be747103
commit
60e4913e43
2 changed files with 17 additions and 2 deletions
|
@ -2,7 +2,7 @@ from django.test import SimpleTestCase
|
||||||
|
|
||||||
from .embed import YouTubeLiteEmbedFinder
|
from .embed import YouTubeLiteEmbedFinder
|
||||||
from .models import BasePage
|
from .models import BasePage
|
||||||
from .utils import get_page_models, get_table_of_contents
|
from .utils import extract_text, get_page_models, get_table_of_contents
|
||||||
|
|
||||||
|
|
||||||
class BasePageTestCase(SimpleTestCase):
|
class BasePageTestCase(SimpleTestCase):
|
||||||
|
@ -92,3 +92,15 @@ class TableOfContentsTestCase(SimpleTestCase):
|
||||||
sub_entry = first_entry.children[1]
|
sub_entry = first_entry.children[1]
|
||||||
self.assertEqual(len(sub_entry.children), 1)
|
self.assertEqual(len(sub_entry.children), 1)
|
||||||
self.assertEqual([entry.title for entry in sub_entry.children], ["2.2.1"])
|
self.assertEqual([entry.title for entry in sub_entry.children], ["2.2.1"])
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractTextTestCase(SimpleTestCase):
|
||||||
|
def test_extracts_text(self) -> None:
|
||||||
|
self.assertEqual(extract_text("<p><b>Hello</b> there!</p>"), "Hello there!")
|
||||||
|
self.assertEqual(
|
||||||
|
extract_text("<p>Paragraph 1</p>\n<p>Paragraph 2</p>"),
|
||||||
|
"Paragraph 1 Paragraph 2",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_plain_text(self) -> None:
|
||||||
|
self.assertEqual(extract_text("Hello there!"), "Hello there!")
|
||||||
|
|
|
@ -78,7 +78,10 @@ def extract_text(html: str) -> str:
|
||||||
"""
|
"""
|
||||||
Get the plain text of some HTML.
|
Get the plain text of some HTML.
|
||||||
"""
|
"""
|
||||||
return " ".join(BeautifulSoup(html, "lxml").find_all(text=True))
|
lines = (
|
||||||
|
text.strip(" \n") for text in BeautifulSoup(html, "lxml").find_all(text=True)
|
||||||
|
)
|
||||||
|
return " ".join(line for line in lines if line)
|
||||||
|
|
||||||
|
|
||||||
def truncate_string(text: str, words: int) -> str:
|
def truncate_string(text: str, words: int) -> str:
|
||||||
|
|
Loading…
Reference in a new issue