Simplify summary and word count implementations

This commit is contained in:
Jake Howard 2024-04-01 19:42:59 +01:00
parent fe43b9c683
commit 3a8e6182ad
Signed by: jake
GPG key ID: 57AFB45680EDD477
3 changed files with 6 additions and 38 deletions

View file

@ -11,7 +11,7 @@ from django.http.response import Http404, HttpResponse, HttpResponseBadRequest
from django.shortcuts import redirect
from django.template.defaultfilters import pluralize
from django.utils.functional import cached_property, classproperty
from django.utils.text import slugify
from django.utils.text import Truncator, slugify
from wagtail.admin.panels import FieldPanel, MultiFieldPanel
from wagtail.contrib.routable_page.models import RoutablePageMixin, route
from wagtail.contrib.settings.models import BaseGenericSetting, register_setting
@ -31,12 +31,10 @@ from .serializers import PaginationSerializer
from .streamfield import add_heading_anchors, get_blocks, get_content_html
from .utils import (
TocEntry,
count_words,
extract_text,
get_site_title,
get_table_of_contents,
get_url_mime_type,
truncate_string,
)
@ -141,16 +139,11 @@ class BaseContentPage(BasePage, MetadataMixin):
@cached_property
def word_count(self) -> int:
return count_words(self.plain_text)
return len(self.plain_text.split())
@cached_property
def summary(self) -> str:
summary = truncate_string(self.plain_text, 50)
if summary and summary != self.plain_text and not summary.endswith("."):
summary += ""
return summary
return Truncator(self.plain_text).words(50)
@cached_property
def body_html(self) -> str:

View file

@ -3,7 +3,6 @@ from django.test import SimpleTestCase
from wagtail.rich_text import features as richtext_feature_registry
from website.common.utils import (
count_words,
extract_text,
get_table_of_contents,
heading_id,
@ -97,13 +96,6 @@ class ExtractTextTestCase(SimpleTestCase):
self.assertEqual(extract_text("Hello there!"), "Hello there!")
class CountWordsTestCase(SimpleTestCase):
def test_counts_words(self) -> None:
self.assertEqual(count_words("a b c"), 3)
self.assertEqual(count_words("Correct Horse Battery Staple"), 4)
self.assertEqual(count_words("Hello there! How are you?"), 5)
class RichTextFeaturesTestCase(SimpleTestCase):
def test_features_exist(self) -> None:
for editor, editor_config in settings.WAGTAILADMIN_RICH_TEXT_EDITORS.items():

View file

@ -1,13 +1,13 @@
from dataclasses import dataclass
from itertools import islice, pairwise
from typing import Iterable, Optional, Type
from itertools import pairwise
from typing import Optional, Type
import requests
from bs4 import BeautifulSoup, SoupStrainer
from django.conf import settings
from django.db import models
from django.http.request import HttpRequest
from django.utils.text import re_words, slugify
from django.utils.text import slugify
from django_cache_decorator import django_cache_decorator
from wagtail.models import Page, Site
from wagtail.models import get_page_models as get_wagtail_page_models
@ -69,19 +69,6 @@ def show_toolbar_callback(request: HttpRequest) -> bool:
return settings.DEBUG
def split_words(text: str) -> Iterable[str]:
for word in re_words.split(text):
if word and word.strip():
yield word.strip()
def count_words(text: str) -> int:
"""
Count the number of words in the text, without duplicating the item in memory
"""
return len(list(split_words(text)))
def extract_text(html: str) -> str:
"""
Get the plain text of some HTML.
@ -91,10 +78,6 @@ def extract_text(html: str) -> str:
)
def truncate_string(text: str, words: int) -> str:
return " ".join(islice(split_words(text), words))
def heading_id(heading: str) -> str:
"""
Convert a heading into an identifier which is valid for a HTML id attribute