Ensure heading ids are always valid ids

2022-09-23 15:35:32 +01:00 · 2022-09-23 15:35:32 +01:00 · e4476e1b2a
commit e4476e1b2a
parent d68be02780
5 changed files with 39 additions and 7 deletions
--- a/website/blog/models.py
+++ b/website/blog/models.py
@ -37,7 +37,10 @@ class BlogPostListPage(BaseListingPage):
            reverse=True,
        )

-        return [TocEntry(post_month, post_month, 0, []) for post_month in post_months]
+        return [
+            TocEntry(post_month, "date-" + post_month, 0, [])
+            for post_month in post_months
+        ]

    def get_listing_pages(self) -> models.QuerySet:
        return prefetch_for_listing(
--- a/website/blog/templates/blog/blog_post_list_page.html
+++ b/website/blog/templates/blog/blog_post_list_page.html
@ -4,7 +4,7 @@
  <section class="container">
    {% for page in listing_pages %}
      {% ifchanged %}
-        <h3 id="{{ page.date|date:'Y-m' }}" class="date-header">
+        <h3 id="date-{{ page.date|date:'Y-m' }}" class="date-header">
          <time datetime="{{ page.date|date:'Y-m' }}" title='{{ page.date|date:"F Y" }}'>
            {{ page.date|date:"Y-m" }}
          </time>
--- a/website/common/streamfield.py
+++ b/website/common/streamfield.py
@ -3,13 +3,12 @@ from itertools import product
 from bs4 import BeautifulSoup, SoupStrainer
 from django.utils import lorem_ipsum
 from django.utils.html import format_html_join
-from django.utils.text import slugify
 from wagtail import blocks
 from wagtail.contrib.typed_table_block.blocks import TypedTableBlock
 from wagtail.embeds.blocks import EmbedBlock
 from wagtail.images.blocks import ImageChooserBlock

-from website.common.utils import HEADER_TAGS
+from website.common.utils import HEADER_TAGS, heading_id
 from website.contrib.code_block.blocks import CodeBlock
 from website.contrib.mermaid_block.blocks import MermaidBlock

@ -121,7 +120,7 @@ def add_heading_anchors(html: str) -> str:

    soup = BeautifulSoup(html, "lxml")
    for tag in soup.select(", ".join(targets)):
-        slug = slugify(tag.text)
+        slug = heading_id(tag.text)
        anchor = soup.new_tag("a", href="#" + slug, id=slug)
        anchor.string = "#"
        anchor.attrs["class"] = "heading-anchor"
--- a/website/common/tests/test_utils.py
+++ b/website/common/tests/test_utils.py
@ -3,7 +3,12 @@ from django.test import SimpleTestCase
 from wagtail.rich_text import features as richtext_feature_registry

 from website.common.embed import YouTubeLiteEmbedFinder
-from website.common.utils import count_words, extract_text, get_table_of_contents
+from website.common.utils import (
+    count_words,
+    extract_text,
+    get_table_of_contents,
+    heading_id,
+)


 class YouTubeLiteEmbedFinderTestCase(SimpleTestCase):
@ -35,6 +40,7 @@ class TableOfContentsTestCase(SimpleTestCase):

        self.assertEqual(len(toc), 3)
        self.assertEqual([entry.title for entry in toc], ["2", "3", "4"])
+        self.assertEqual([entry.slug for entry in toc], ["ref-2", "ref-3", "ref-4"])

        first_entry = toc[0]
        self.assertEqual(len(first_entry.children), 3)
@ -78,6 +84,10 @@ class TableOfContentsTestCase(SimpleTestCase):
        self.assertEqual(
            [entry.title for entry in first_entry.children], ["2.1", "2.2", "2.3"]
        )
+        self.assertEqual(
+            [entry.slug for entry in first_entry.children],
+            ["ref-21", "ref-22", "ref-23"],
+        )

        sub_entry = first_entry.children[1]
        self.assertEqual(len(sub_entry.children), 1)
@ -111,3 +121,10 @@ class RichTextFeaturesTestCase(SimpleTestCase):
                    self.assertIsNotNone(
                        richtext_feature_registry.get_editor_plugin("draftail", feature)
                    )
+
+
+class HeadingIDTestCase(SimpleTestCase):
+    def test_headings(self) -> None:
+        self.assertEqual(heading_id("123"), "ref-123")
+        self.assertEqual(heading_id("test"), "test")
+        self.assertEqual(heading_id("Look, a title!"), "look-a-title")
--- a/website/common/utils.py
+++ b/website/common/utils.py
@ -26,7 +26,7 @@ def get_table_of_contents(html: str) -> list[TocEntry]:
    soup = BeautifulSoup(html, "lxml", parse_only=SoupStrainer(HEADER_TAGS))

    heading_levels = [
-        TocEntry(tag.text, slugify(tag.text), int(tag.name[1]), []) for tag in soup
+        TocEntry(tag.text, heading_id(tag.text), int(tag.name[1]), []) for tag in soup
    ]

    # Abort if there are no headings
@ -95,3 +95,16 @@ def prefetch_for_listing(queryset: PageQuerySet) -> PageQuerySet:
    different page models is a pain.
    """
    return queryset.select_related("hero_image", "hero_unsplash_photo")
+
+
+def heading_id(heading: str) -> str:
+    """
+    Convert a heading into an identifier which is valid for a HTML id attribute
+    """
+    if not heading:
+        return ""
+
+    slug = slugify(heading)
+    if slug[0].isdigit():
+        return "ref-" + slug
+    return slug