Don't include the first tangent in the content HTML

This prevents it being used as part of the summary, which in the case of updates looks strange.
This commit is contained in:
Jake Howard 2023-04-11 14:05:46 +01:00
parent 68604e0438
commit 9bc5ccbbdc
Signed by: jake
GPG key ID: 57AFB45680EDD477

View file

@ -106,9 +106,14 @@ def get_content_html(html: str) -> str:
if not isinstance(block, IGNORE_PLAINTEXT_BLOCKS) if not isinstance(block, IGNORE_PLAINTEXT_BLOCKS)
] ]
return str( soup = BeautifulSoup(html, "lxml", parse_only=SoupStrainer(class_=block_classes))
BeautifulSoup(html, "lxml", parse_only=SoupStrainer(class_=block_classes))
) # If the first block is a tangent, remove it
first_block = soup.find("div", class_=block_classes)
if first_block and first_block.attrs["class"][0].endswith("tangent"):
first_block.extract()
return str(soup)
def add_heading_anchors(html: str) -> str: def add_heading_anchors(html: str) -> str: