Simplify summary and word count implementations

This commit is contained in:
Jake Howard 2024-04-01 19:42:59 +01:00
parent fe43b9c683
commit 3a8e6182ad
Signed by: jake
GPG key ID: 57AFB45680EDD477
3 changed files with 6 additions and 38 deletions

View file

@ -11,7 +11,7 @@ from django.http.response import Http404, HttpResponse, HttpResponseBadRequest
from django.shortcuts import redirect from django.shortcuts import redirect
from django.template.defaultfilters import pluralize from django.template.defaultfilters import pluralize
from django.utils.functional import cached_property, classproperty from django.utils.functional import cached_property, classproperty
from django.utils.text import slugify from django.utils.text import Truncator, slugify
from wagtail.admin.panels import FieldPanel, MultiFieldPanel from wagtail.admin.panels import FieldPanel, MultiFieldPanel
from wagtail.contrib.routable_page.models import RoutablePageMixin, route from wagtail.contrib.routable_page.models import RoutablePageMixin, route
from wagtail.contrib.settings.models import BaseGenericSetting, register_setting from wagtail.contrib.settings.models import BaseGenericSetting, register_setting
@ -31,12 +31,10 @@ from .serializers import PaginationSerializer
from .streamfield import add_heading_anchors, get_blocks, get_content_html from .streamfield import add_heading_anchors, get_blocks, get_content_html
from .utils import ( from .utils import (
TocEntry, TocEntry,
count_words,
extract_text, extract_text,
get_site_title, get_site_title,
get_table_of_contents, get_table_of_contents,
get_url_mime_type, get_url_mime_type,
truncate_string,
) )
@ -141,16 +139,11 @@ class BaseContentPage(BasePage, MetadataMixin):
@cached_property @cached_property
def word_count(self) -> int: def word_count(self) -> int:
return count_words(self.plain_text) return len(self.plain_text.split())
@cached_property @cached_property
def summary(self) -> str: def summary(self) -> str:
summary = truncate_string(self.plain_text, 50) return Truncator(self.plain_text).words(50)
if summary and summary != self.plain_text and not summary.endswith("."):
summary += ""
return summary
@cached_property @cached_property
def body_html(self) -> str: def body_html(self) -> str:

View file

@ -3,7 +3,6 @@ from django.test import SimpleTestCase
from wagtail.rich_text import features as richtext_feature_registry from wagtail.rich_text import features as richtext_feature_registry
from website.common.utils import ( from website.common.utils import (
count_words,
extract_text, extract_text,
get_table_of_contents, get_table_of_contents,
heading_id, heading_id,
@ -97,13 +96,6 @@ class ExtractTextTestCase(SimpleTestCase):
self.assertEqual(extract_text("Hello there!"), "Hello there!") self.assertEqual(extract_text("Hello there!"), "Hello there!")
class CountWordsTestCase(SimpleTestCase):
def test_counts_words(self) -> None:
self.assertEqual(count_words("a b c"), 3)
self.assertEqual(count_words("Correct Horse Battery Staple"), 4)
self.assertEqual(count_words("Hello there! How are you?"), 5)
class RichTextFeaturesTestCase(SimpleTestCase): class RichTextFeaturesTestCase(SimpleTestCase):
def test_features_exist(self) -> None: def test_features_exist(self) -> None:
for editor, editor_config in settings.WAGTAILADMIN_RICH_TEXT_EDITORS.items(): for editor, editor_config in settings.WAGTAILADMIN_RICH_TEXT_EDITORS.items():

View file

@ -1,13 +1,13 @@
from dataclasses import dataclass from dataclasses import dataclass
from itertools import islice, pairwise from itertools import pairwise
from typing import Iterable, Optional, Type from typing import Optional, Type
import requests import requests
from bs4 import BeautifulSoup, SoupStrainer from bs4 import BeautifulSoup, SoupStrainer
from django.conf import settings from django.conf import settings
from django.db import models from django.db import models
from django.http.request import HttpRequest from django.http.request import HttpRequest
from django.utils.text import re_words, slugify from django.utils.text import slugify
from django_cache_decorator import django_cache_decorator from django_cache_decorator import django_cache_decorator
from wagtail.models import Page, Site from wagtail.models import Page, Site
from wagtail.models import get_page_models as get_wagtail_page_models from wagtail.models import get_page_models as get_wagtail_page_models
@ -69,19 +69,6 @@ def show_toolbar_callback(request: HttpRequest) -> bool:
return settings.DEBUG return settings.DEBUG
def split_words(text: str) -> Iterable[str]:
for word in re_words.split(text):
if word and word.strip():
yield word.strip()
def count_words(text: str) -> int:
"""
Count the number of words in the text, without duplicating the item in memory
"""
return len(list(split_words(text)))
def extract_text(html: str) -> str: def extract_text(html: str) -> str:
""" """
Get the plain text of some HTML. Get the plain text of some HTML.
@ -91,10 +78,6 @@ def extract_text(html: str) -> str:
) )
def truncate_string(text: str, words: int) -> str:
return " ".join(islice(split_words(text), words))
def heading_id(heading: str) -> str: def heading_id(heading: str) -> str:
""" """
Convert a heading into an identifier which is valid for a HTML id attribute Convert a heading into an identifier which is valid for a HTML id attribute