Optimise getting content HTML by only parsing the necessary tags

This commit is contained in:
Jake Howard 2022-07-04 18:55:18 +01:00
parent 66ccd52b15
commit ccb481726c
Signed by: jake
GPG key ID: 57AFB45680EDD477
2 changed files with 12 additions and 14 deletions

View file

@ -75,7 +75,7 @@ class BaseContentMixin(models.Model):
@cached_property @cached_property
def content_html(self) -> str: def content_html(self) -> str:
return get_content_html(self.body) return get_content_html(self.body_html)
@cached_property @cached_property
def plain_text(self) -> str: def plain_text(self) -> str:

View file

@ -1,7 +1,6 @@
from itertools import product from itertools import product
from typing import Iterable
from bs4 import BeautifulSoup from bs4 import BeautifulSoup, SoupStrainer
from django.utils import lorem_ipsum from django.utils import lorem_ipsum
from django.utils.html import format_html_join from django.utils.html import format_html_join
from django.utils.text import slugify from django.utils.text import slugify
@ -78,20 +77,19 @@ def get_blocks() -> list[tuple[str, blocks.BaseBlock]]:
] ]
def get_content_blocks(value: blocks.StreamValue) -> Iterable[blocks.BaseBlock]: def get_content_html(html: str) -> str:
for block in value:
if not isinstance(block.block_type, IGNORE_PLAINTEXT_BLOCKS):
yield block
def get_content_html(value: blocks.StreamValue) -> str:
""" """
Get the HTML of just original content (eg not embeds etc) Get the HTML of just original content (eg not embeds etc)
""" """
html = "" block_classes = [
for block in get_content_blocks(value): f"block-{block_name}"
html += str(block) for block_name, block in get_blocks()
return html if not isinstance(block, IGNORE_PLAINTEXT_BLOCKS)
]
return str(
BeautifulSoup(html, "lxml", parse_only=SoupStrainer(class_=block_classes))
)
def add_heading_anchors(html: str) -> str: def add_heading_anchors(html: str) -> str: