Fix length calculation for posts

`smart_split` isn't quite what's needed for splitting words.
This commit is contained in:
Jake Howard 2023-03-10 16:08:03 +00:00
parent f903dfcc8b
commit 83bc0d2ffb
Signed by: jake
GPG Key ID: 57AFB45680EDD477
2 changed files with 10 additions and 6 deletions

View File

@ -5,7 +5,6 @@ whitenoise[brotli]==6.4.0
Pygments==2.14.0
beautifulsoup4==4.9.3
lxml==4.9.1
more-itertools==9.0.0
requests==2.28.1
wagtail-generic-chooser==0.5
django-rq==2.7.0

View File

@ -1,13 +1,12 @@
from dataclasses import dataclass
from itertools import islice, pairwise
from typing import Type
from typing import Iterable, Type
from bs4 import BeautifulSoup, SoupStrainer
from django.conf import settings
from django.http.request import HttpRequest
from django.utils.text import slugify, smart_split
from django.utils.text import re_words, slugify
from django_cache_decorator import django_cache_decorator
from more_itertools import ilen
from wagtail.models import Page, Site
from wagtail.models import get_page_models as get_wagtail_page_models
@ -66,11 +65,17 @@ def show_toolbar_callback(request: HttpRequest) -> bool:
return settings.DEBUG
def split_words(text: str) -> Iterable[str]:
for word in re_words.split(text):
if word and word.strip():
yield word.strip()
def count_words(text: str) -> int:
"""
Count the number of words in the text, without duplicating the item in memory
"""
return ilen(smart_split(text))
return len(list(split_words(text)))
def extract_text(html: str) -> str:
@ -83,7 +88,7 @@ def extract_text(html: str) -> str:
def truncate_string(text: str, words: int) -> str:
return " ".join(islice(smart_split(text), words))
return " ".join(islice(split_words(text), words))
def heading_id(heading: str) -> str: