Jake Howard
9bc5ccbbdc
This prevents it being used as part of the summary, which in the case of updates looks strange.
135 lines
3.8 KiB
Python
135 lines
3.8 KiB
Python
from itertools import product
|
|
|
|
from bs4 import BeautifulSoup, SoupStrainer
|
|
from django.utils import lorem_ipsum
|
|
from django.utils.html import format_html_join
|
|
from wagtail import blocks
|
|
from wagtail.contrib.typed_table_block.blocks import TypedTableBlock
|
|
from wagtail.embeds.blocks import EmbedBlock
|
|
from wagtail.images.blocks import ImageChooserBlock
|
|
|
|
from website.common.utils import HEADER_TAGS, heading_id
|
|
from website.contrib.code_block.blocks import CodeBlock
|
|
from website.contrib.mermaid_block.blocks import MermaidBlock
|
|
|
|
|
|
class LoremBlock(blocks.StructBlock):
|
|
paragraphs = blocks.IntegerBlock(min_value=1)
|
|
|
|
def render(self, value: dict, context: dict | None = None) -> str:
|
|
return format_html_join(
|
|
"\n\n",
|
|
"<p>{}</p>",
|
|
[(paragraph,) for paragraph in lorem_ipsum.paragraphs(value["paragraphs"])],
|
|
)
|
|
|
|
class Meta:
|
|
icon = "openquote"
|
|
label = "Lorem Ipsum"
|
|
|
|
|
|
class ImageCaptionBlock(blocks.StructBlock):
|
|
image = ImageChooserBlock()
|
|
caption = blocks.RichTextBlock(editor="plain", required=False)
|
|
|
|
class Meta:
|
|
icon = "image"
|
|
label = "Image with caption"
|
|
template = "common/blocks/image-caption.html"
|
|
|
|
|
|
class TangentBlock(blocks.StructBlock):
|
|
name = blocks.CharBlock(max_length=64)
|
|
content = blocks.RichTextBlock(editor="simple")
|
|
|
|
class Meta:
|
|
icon = "comment"
|
|
label = "Tangent"
|
|
template = "common/blocks/tangent.html"
|
|
|
|
|
|
class IFrameBlock(blocks.StructBlock):
|
|
url = blocks.URLBlock()
|
|
caption = blocks.RichTextBlock(editor="plain", required=False)
|
|
|
|
class Meta:
|
|
icon = "link-external"
|
|
label = "IFrame"
|
|
template = "common/blocks/iframe.html"
|
|
|
|
|
|
IGNORE_PLAINTEXT_BLOCKS = (
|
|
blocks.RawHTMLBlock,
|
|
EmbedBlock,
|
|
ImageCaptionBlock,
|
|
CodeBlock,
|
|
MermaidBlock,
|
|
IFrameBlock,
|
|
)
|
|
IGNORE_HEADING_BLOCKS = (*IGNORE_PLAINTEXT_BLOCKS, LoremBlock)
|
|
|
|
|
|
def get_blocks() -> list[tuple[str, blocks.BaseBlock]]:
|
|
return [
|
|
("embed", EmbedBlock()),
|
|
("rich_text", blocks.RichTextBlock()),
|
|
("lorem", LoremBlock()),
|
|
("html", blocks.RawHTMLBlock()),
|
|
("image", ImageCaptionBlock()),
|
|
("code", CodeBlock()),
|
|
("tangent", TangentBlock()),
|
|
("mermaid", MermaidBlock()),
|
|
(
|
|
"table",
|
|
TypedTableBlock(
|
|
[
|
|
(
|
|
"rich_text",
|
|
blocks.RichTextBlock(editor="plain"),
|
|
),
|
|
("numeric", blocks.FloatBlock()),
|
|
("text", blocks.CharBlock()),
|
|
]
|
|
),
|
|
),
|
|
("iframe", IFrameBlock()),
|
|
]
|
|
|
|
|
|
def get_content_html(html: str) -> str:
|
|
"""
|
|
Get the HTML of just original content (eg not embeds etc)
|
|
"""
|
|
block_classes = [
|
|
f"block-{block_name}"
|
|
for block_name, block in get_blocks()
|
|
if not isinstance(block, IGNORE_PLAINTEXT_BLOCKS)
|
|
]
|
|
|
|
soup = BeautifulSoup(html, "lxml", parse_only=SoupStrainer(class_=block_classes))
|
|
|
|
# If the first block is a tangent, remove it
|
|
first_block = soup.find("div", class_=block_classes)
|
|
if first_block and first_block.attrs["class"][0].endswith("tangent"):
|
|
first_block.extract()
|
|
|
|
return str(soup)
|
|
|
|
|
|
def add_heading_anchors(html: str) -> str:
|
|
targets: list[str] = [
|
|
f".block-{block_name} {header_tag}"
|
|
for header_tag, block_name in product(
|
|
HEADER_TAGS,
|
|
[b[0] for b in get_blocks() if not isinstance(b[1], IGNORE_HEADING_BLOCKS)],
|
|
)
|
|
]
|
|
|
|
soup = BeautifulSoup(html, "lxml")
|
|
for tag in soup.select(", ".join(targets)):
|
|
slug = heading_id(tag.text)
|
|
anchor = soup.new_tag("a", href="#" + slug, id=slug)
|
|
anchor.string = "#"
|
|
anchor.attrs["class"] = "heading-anchor"
|
|
tag.insert(0, anchor)
|
|
return str(soup)
|