diff --git a/requirements.txt b/requirements.txt index 0158bd8..b5abbac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ whitenoise[brotli]==6.4.0 Pygments==2.14.0 beautifulsoup4==4.9.3 lxml==4.9.1 -more-itertools==9.0.0 requests==2.28.1 wagtail-generic-chooser==0.5 django-rq==2.7.0 diff --git a/website/common/utils.py b/website/common/utils.py index b702bdf..e6789ab 100644 --- a/website/common/utils.py +++ b/website/common/utils.py @@ -1,13 +1,12 @@ from dataclasses import dataclass from itertools import islice, pairwise -from typing import Type +from typing import Iterable, Type from bs4 import BeautifulSoup, SoupStrainer from django.conf import settings from django.http.request import HttpRequest -from django.utils.text import slugify, smart_split +from django.utils.text import re_words, slugify from django_cache_decorator import django_cache_decorator -from more_itertools import ilen from wagtail.models import Page, Site from wagtail.models import get_page_models as get_wagtail_page_models @@ -66,11 +65,17 @@ def show_toolbar_callback(request: HttpRequest) -> bool: return settings.DEBUG +def split_words(text: str) -> Iterable[str]: + for word in re_words.split(text): + if word and word.strip(): + yield word.strip() + + def count_words(text: str) -> int: """ Count the number of words in the text, without duplicating the item in memory """ - return ilen(smart_split(text)) + return len(list(split_words(text))) def extract_text(html: str) -> str: @@ -83,7 +88,7 @@ def extract_text(html: str) -> str: def truncate_string(text: str, words: int) -> str: - return " ".join(islice(smart_split(text), words)) + return " ".join(islice(split_words(text), words)) def heading_id(heading: str) -> str: