From 299a6342cc4b5fecde84059e437ed9aab72474d0 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Sun, 26 Jun 2022 19:25:30 +0100 Subject: [PATCH] Add basic word count and reading time --- website/common/models.py | 10 +++++++--- website/common/streamfield.py | 23 +++++++++++++++++++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/website/common/models.py b/website/common/models.py index f4e50b4..a6deb52 100644 --- a/website/common/models.py +++ b/website/common/models.py @@ -1,3 +1,4 @@ +import math from typing import Any from django.db import models @@ -8,7 +9,7 @@ from wagtail.fields import StreamField from wagtail.images import get_image_model_string from wagtail.models import Page -from .streamfield import get_blocks +from .streamfield import get_blocks, get_word_count from .utils import TocEntry, get_table_of_contents @@ -53,11 +54,14 @@ class BaseContentMixin(models.Model): @cached_property def reading_time(self) -> int: - return 4 + """ + https://help.medium.com/hc/en-us/articles/214991667-Read-time + """ + return int(math.ceil(self.word_count / 265)) @cached_property def word_count(self) -> int: - return 1600 + return get_word_count(self.body) class ContentPage(BasePage, BaseContentMixin): # type: ignore[misc] diff --git a/website/common/streamfield.py b/website/common/streamfield.py index 13f65e7..0b91bc6 100644 --- a/website/common/streamfield.py +++ b/website/common/streamfield.py @@ -1,13 +1,18 @@ +from typing import Iterator + from django.utils import lorem_ipsum -from django.utils.html import format_html_join +from django.utils.html import format_html_join, strip_tags +from django.utils.text import smart_split from wagtail import blocks from wagtail.embeds.blocks import EmbedBlock +IGNORE_WORDCOUNT_BLOCKS = (blocks.RawHTMLBlock, EmbedBlock) + class LoremBlock(blocks.StructBlock): paragraphs = blocks.IntegerBlock(min_value=1) - def render(self, value: dict, context: dict) -> str: + def render(self, value: dict, context: dict | None = None) -> str: return format_html_join( "\n\n", "

{}

", @@ -26,3 +31,17 @@ def get_blocks() -> list[tuple[str, blocks.BaseBlock]]: ("lorem", LoremBlock()), ("html", blocks.RawHTMLBlock()), ] + + +def get_plain_text(value: blocks.StreamValue) -> Iterator[str]: + for block in value: + if isinstance(block.block_type, IGNORE_WORDCOUNT_BLOCKS): + continue + yield strip_tags(str(block)) + + +def get_word_count(value: blocks.StreamValue) -> int: + count = 0 + for chunk in get_plain_text(value): + count += len(list(smart_split(chunk))) + return count