2022-07-03 23:10:57 +01:00
|
|
|
from itertools import product
|
2022-06-26 19:25:30 +01:00
|
|
|
|
2022-07-04 18:55:18 +01:00
|
|
|
from bs4 import BeautifulSoup, SoupStrainer
|
2022-06-26 18:37:04 +01:00
|
|
|
from django.utils import lorem_ipsum
|
2022-06-30 21:32:47 +01:00
|
|
|
from django.utils.html import format_html_join
|
2022-06-26 18:37:04 +01:00
|
|
|
from wagtail import blocks
|
2022-08-19 13:48:45 +01:00
|
|
|
from wagtail.contrib.typed_table_block.blocks import TypedTableBlock
|
2022-06-26 18:37:04 +01:00
|
|
|
from wagtail.embeds.blocks import EmbedBlock
|
2022-06-27 20:40:55 +01:00
|
|
|
from wagtail.images.blocks import ImageChooserBlock
|
2022-06-26 18:37:04 +01:00
|
|
|
|
2022-09-23 15:35:32 +01:00
|
|
|
from website.common.utils import HEADER_TAGS, heading_id
|
2022-06-27 23:29:55 +01:00
|
|
|
from website.contrib.code_block.blocks import CodeBlock
|
2022-07-15 09:56:22 +01:00
|
|
|
from website.contrib.mermaid_block.blocks import MermaidBlock
|
2022-06-27 23:29:55 +01:00
|
|
|
|
2022-06-26 18:37:04 +01:00
|
|
|
|
|
|
|
class LoremBlock(blocks.StructBlock):
|
|
|
|
paragraphs = blocks.IntegerBlock(min_value=1)
|
|
|
|
|
2022-06-26 19:25:30 +01:00
|
|
|
def render(self, value: dict, context: dict | None = None) -> str:
|
2022-06-26 18:37:04 +01:00
|
|
|
return format_html_join(
|
|
|
|
"\n\n",
|
|
|
|
"<p>{}</p>",
|
|
|
|
[(paragraph,) for paragraph in lorem_ipsum.paragraphs(value["paragraphs"])],
|
|
|
|
)
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
icon = "openquote"
|
|
|
|
label = "Lorem Ipsum"
|
|
|
|
|
|
|
|
|
2022-06-27 20:40:55 +01:00
|
|
|
class ImageCaptionBlock(blocks.StructBlock):
|
|
|
|
image = ImageChooserBlock()
|
2022-09-03 21:07:27 +01:00
|
|
|
caption = blocks.RichTextBlock(editor="plain", required=False)
|
2022-06-27 20:40:55 +01:00
|
|
|
|
|
|
|
class Meta:
|
|
|
|
icon = "image"
|
|
|
|
label = "Image with caption"
|
|
|
|
template = "common/blocks/image-caption.html"
|
|
|
|
|
|
|
|
|
2022-07-05 09:03:45 +01:00
|
|
|
class TangentBlock(blocks.StructBlock):
|
|
|
|
name = blocks.CharBlock(max_length=64)
|
2022-09-03 21:07:27 +01:00
|
|
|
content = blocks.RichTextBlock(editor="simple")
|
2022-07-05 09:03:45 +01:00
|
|
|
|
|
|
|
class Meta:
|
|
|
|
icon = "comment"
|
|
|
|
label = "Tangent"
|
|
|
|
template = "common/blocks/tangent.html"
|
|
|
|
|
|
|
|
|
2022-09-08 14:31:01 +01:00
|
|
|
class IFrameBlock(blocks.StructBlock):
|
|
|
|
url = blocks.URLBlock()
|
|
|
|
caption = blocks.RichTextBlock(editor="plain", required=False)
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
icon = "link-external"
|
|
|
|
label = "IFrame"
|
|
|
|
template = "common/blocks/iframe.html"
|
|
|
|
|
|
|
|
|
2022-07-29 09:09:35 +01:00
|
|
|
IGNORE_PLAINTEXT_BLOCKS = (
|
|
|
|
blocks.RawHTMLBlock,
|
|
|
|
EmbedBlock,
|
|
|
|
ImageCaptionBlock,
|
|
|
|
CodeBlock,
|
2022-09-25 21:54:02 +01:00
|
|
|
MermaidBlock,
|
|
|
|
IFrameBlock,
|
2022-07-29 09:09:35 +01:00
|
|
|
)
|
2022-07-03 23:10:57 +01:00
|
|
|
IGNORE_HEADING_BLOCKS = (*IGNORE_PLAINTEXT_BLOCKS, LoremBlock)
|
|
|
|
|
|
|
|
|
2022-06-26 18:37:04 +01:00
|
|
|
def get_blocks() -> list[tuple[str, blocks.BaseBlock]]:
|
|
|
|
return [
|
|
|
|
("embed", EmbedBlock()),
|
2022-09-03 21:07:27 +01:00
|
|
|
("rich_text", blocks.RichTextBlock()),
|
2022-06-26 18:37:04 +01:00
|
|
|
("lorem", LoremBlock()),
|
|
|
|
("html", blocks.RawHTMLBlock()),
|
2022-06-27 20:40:55 +01:00
|
|
|
("image", ImageCaptionBlock()),
|
2022-06-27 23:29:55 +01:00
|
|
|
("code", CodeBlock()),
|
2022-07-05 09:03:45 +01:00
|
|
|
("tangent", TangentBlock()),
|
2022-07-15 09:56:22 +01:00
|
|
|
("mermaid", MermaidBlock()),
|
2022-08-19 13:48:45 +01:00
|
|
|
(
|
|
|
|
"table",
|
|
|
|
TypedTableBlock(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
"rich_text",
|
2022-09-03 21:07:27 +01:00
|
|
|
blocks.RichTextBlock(editor="plain"),
|
2022-08-19 13:48:45 +01:00
|
|
|
),
|
|
|
|
("numeric", blocks.FloatBlock()),
|
|
|
|
("text", blocks.CharBlock()),
|
|
|
|
]
|
|
|
|
),
|
|
|
|
),
|
2022-09-08 14:31:01 +01:00
|
|
|
("iframe", IFrameBlock()),
|
2022-06-26 18:37:04 +01:00
|
|
|
]
|
2022-06-26 19:25:30 +01:00
|
|
|
|
|
|
|
|
2022-07-04 18:55:18 +01:00
|
|
|
def get_content_html(html: str) -> str:
|
2022-07-03 23:10:57 +01:00
|
|
|
"""
|
|
|
|
Get the HTML of just original content (eg not embeds etc)
|
|
|
|
"""
|
2022-07-04 18:55:18 +01:00
|
|
|
block_classes = [
|
|
|
|
f"block-{block_name}"
|
|
|
|
for block_name, block in get_blocks()
|
|
|
|
if not isinstance(block, IGNORE_PLAINTEXT_BLOCKS)
|
|
|
|
]
|
|
|
|
|
|
|
|
return str(
|
|
|
|
BeautifulSoup(html, "lxml", parse_only=SoupStrainer(class_=block_classes))
|
|
|
|
)
|
2022-07-03 23:10:57 +01:00
|
|
|
|
|
|
|
|
|
|
|
def add_heading_anchors(html: str) -> str:
|
|
|
|
targets: list[str] = [
|
|
|
|
f".block-{block_name} {header_tag}"
|
|
|
|
for header_tag, block_name in product(
|
|
|
|
HEADER_TAGS,
|
|
|
|
[b[0] for b in get_blocks() if not isinstance(b[1], IGNORE_HEADING_BLOCKS)],
|
|
|
|
)
|
|
|
|
]
|
2022-06-26 19:52:20 +01:00
|
|
|
|
2022-07-03 23:10:57 +01:00
|
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
for tag in soup.select(", ".join(targets)):
|
2022-09-23 15:35:32 +01:00
|
|
|
slug = heading_id(tag.text)
|
2022-07-03 23:10:57 +01:00
|
|
|
anchor = soup.new_tag("a", href="#" + slug, id=slug)
|
|
|
|
anchor.string = "#"
|
|
|
|
anchor.attrs["class"] = "heading-anchor"
|
|
|
|
tag.insert(0, anchor)
|
|
|
|
return str(soup)
|