website/website/common/streamfield.py
Jake Howard 9bc5ccbbdc
Don't include the first tangent in the content HTML
This prevents it being used as part of the summary, which in the case of updates looks strange.
2023-04-11 14:05:46 +01:00

135 lines
3.8 KiB
Python

from itertools import product
from bs4 import BeautifulSoup, SoupStrainer
from django.utils import lorem_ipsum
from django.utils.html import format_html_join
from wagtail import blocks
from wagtail.contrib.typed_table_block.blocks import TypedTableBlock
from wagtail.embeds.blocks import EmbedBlock
from wagtail.images.blocks import ImageChooserBlock
from website.common.utils import HEADER_TAGS, heading_id
from website.contrib.code_block.blocks import CodeBlock
from website.contrib.mermaid_block.blocks import MermaidBlock
class LoremBlock(blocks.StructBlock):
paragraphs = blocks.IntegerBlock(min_value=1)
def render(self, value: dict, context: dict | None = None) -> str:
return format_html_join(
"\n\n",
"<p>{}</p>",
[(paragraph,) for paragraph in lorem_ipsum.paragraphs(value["paragraphs"])],
)
class Meta:
icon = "openquote"
label = "Lorem Ipsum"
class ImageCaptionBlock(blocks.StructBlock):
image = ImageChooserBlock()
caption = blocks.RichTextBlock(editor="plain", required=False)
class Meta:
icon = "image"
label = "Image with caption"
template = "common/blocks/image-caption.html"
class TangentBlock(blocks.StructBlock):
name = blocks.CharBlock(max_length=64)
content = blocks.RichTextBlock(editor="simple")
class Meta:
icon = "comment"
label = "Tangent"
template = "common/blocks/tangent.html"
class IFrameBlock(blocks.StructBlock):
url = blocks.URLBlock()
caption = blocks.RichTextBlock(editor="plain", required=False)
class Meta:
icon = "link-external"
label = "IFrame"
template = "common/blocks/iframe.html"
IGNORE_PLAINTEXT_BLOCKS = (
blocks.RawHTMLBlock,
EmbedBlock,
ImageCaptionBlock,
CodeBlock,
MermaidBlock,
IFrameBlock,
)
IGNORE_HEADING_BLOCKS = (*IGNORE_PLAINTEXT_BLOCKS, LoremBlock)
def get_blocks() -> list[tuple[str, blocks.BaseBlock]]:
return [
("embed", EmbedBlock()),
("rich_text", blocks.RichTextBlock()),
("lorem", LoremBlock()),
("html", blocks.RawHTMLBlock()),
("image", ImageCaptionBlock()),
("code", CodeBlock()),
("tangent", TangentBlock()),
("mermaid", MermaidBlock()),
(
"table",
TypedTableBlock(
[
(
"rich_text",
blocks.RichTextBlock(editor="plain"),
),
("numeric", blocks.FloatBlock()),
("text", blocks.CharBlock()),
]
),
),
("iframe", IFrameBlock()),
]
def get_content_html(html: str) -> str:
"""
Get the HTML of just original content (eg not embeds etc)
"""
block_classes = [
f"block-{block_name}"
for block_name, block in get_blocks()
if not isinstance(block, IGNORE_PLAINTEXT_BLOCKS)
]
soup = BeautifulSoup(html, "lxml", parse_only=SoupStrainer(class_=block_classes))
# If the first block is a tangent, remove it
first_block = soup.find("div", class_=block_classes)
if first_block and first_block.attrs["class"][0].endswith("tangent"):
first_block.extract()
return str(soup)
def add_heading_anchors(html: str) -> str:
targets: list[str] = [
f".block-{block_name} {header_tag}"
for header_tag, block_name in product(
HEADER_TAGS,
[b[0] for b in get_blocks() if not isinstance(b[1], IGNORE_HEADING_BLOCKS)],
)
]
soup = BeautifulSoup(html, "lxml")
for tag in soup.select(", ".join(targets)):
slug = heading_id(tag.text)
anchor = soup.new_tag("a", href="#" + slug, id=slug)
anchor.string = "#"
anchor.attrs["class"] = "heading-anchor"
tag.insert(0, anchor)
return str(soup)