diff --git a/website/common/models.py b/website/common/models.py index ff373c7..e8c1be5 100644 --- a/website/common/models.py +++ b/website/common/models.py @@ -11,7 +11,7 @@ from django.http.response import Http404, HttpResponse, HttpResponseBadRequest from django.shortcuts import redirect from django.template.defaultfilters import pluralize from django.utils.functional import cached_property, classproperty -from django.utils.text import slugify +from django.utils.text import Truncator, slugify from wagtail.admin.panels import FieldPanel, MultiFieldPanel from wagtail.contrib.routable_page.models import RoutablePageMixin, route from wagtail.contrib.settings.models import BaseGenericSetting, register_setting @@ -31,12 +31,10 @@ from .serializers import PaginationSerializer from .streamfield import add_heading_anchors, get_blocks, get_content_html from .utils import ( TocEntry, - count_words, extract_text, get_site_title, get_table_of_contents, get_url_mime_type, - truncate_string, ) @@ -141,16 +139,11 @@ class BaseContentPage(BasePage, MetadataMixin): @cached_property def word_count(self) -> int: - return count_words(self.plain_text) + return len(self.plain_text.split()) @cached_property def summary(self) -> str: - summary = truncate_string(self.plain_text, 50) - - if summary and summary != self.plain_text and not summary.endswith("."): - summary += "…" - - return summary + return Truncator(self.plain_text).words(50) @cached_property def body_html(self) -> str: diff --git a/website/common/tests/test_utils.py b/website/common/tests/test_utils.py index eb899eb..a3fc08c 100644 --- a/website/common/tests/test_utils.py +++ b/website/common/tests/test_utils.py @@ -3,7 +3,6 @@ from django.test import SimpleTestCase from wagtail.rich_text import features as richtext_feature_registry from website.common.utils import ( - count_words, extract_text, get_table_of_contents, heading_id, @@ -97,13 +96,6 @@ class ExtractTextTestCase(SimpleTestCase): self.assertEqual(extract_text("Hello there!"), "Hello there!") -class CountWordsTestCase(SimpleTestCase): - def test_counts_words(self) -> None: - self.assertEqual(count_words("a b c"), 3) - self.assertEqual(count_words("Correct Horse Battery Staple"), 4) - self.assertEqual(count_words("Hello there! How are you?"), 5) - - class RichTextFeaturesTestCase(SimpleTestCase): def test_features_exist(self) -> None: for editor, editor_config in settings.WAGTAILADMIN_RICH_TEXT_EDITORS.items(): diff --git a/website/common/utils.py b/website/common/utils.py index 51c8ded..cfe4c6a 100644 --- a/website/common/utils.py +++ b/website/common/utils.py @@ -1,13 +1,13 @@ from dataclasses import dataclass -from itertools import islice, pairwise -from typing import Iterable, Optional, Type +from itertools import pairwise +from typing import Optional, Type import requests from bs4 import BeautifulSoup, SoupStrainer from django.conf import settings from django.db import models from django.http.request import HttpRequest -from django.utils.text import re_words, slugify +from django.utils.text import slugify from django_cache_decorator import django_cache_decorator from wagtail.models import Page, Site from wagtail.models import get_page_models as get_wagtail_page_models @@ -69,19 +69,6 @@ def show_toolbar_callback(request: HttpRequest) -> bool: return settings.DEBUG -def split_words(text: str) -> Iterable[str]: - for word in re_words.split(text): - if word and word.strip(): - yield word.strip() - - -def count_words(text: str) -> int: - """ - Count the number of words in the text, without duplicating the item in memory - """ - return len(list(split_words(text))) - - def extract_text(html: str) -> str: """ Get the plain text of some HTML. @@ -91,10 +78,6 @@ def extract_text(html: str) -> str: ) -def truncate_string(text: str, words: int) -> str: - return " ".join(islice(split_words(text), words)) - - def heading_id(heading: str) -> str: """ Convert a heading into an identifier which is valid for a HTML id attribute