Support linking out to external posts

This commit is contained in:
Jake Howard 2024-05-29 23:30:17 +01:00
parent 2639d6eb1c
commit 36211e88f2
Signed by: jake
GPG key ID: 57AFB45680EDD477
14 changed files with 303 additions and 26 deletions

View file

@ -29,6 +29,7 @@ django-enforce-host==1.1.0
django-proxy==1.2.2 django-proxy==1.2.2
wagtail-lite-youtube-embed==0.1.0 wagtail-lite-youtube-embed==0.1.0
django-minify-html==1.7.1 django-minify-html==1.7.1
metadata-parser==0.12.1
# DRF OpenAPI dependencies # DRF OpenAPI dependencies
uritemplate uritemplate

View file

@ -18,6 +18,17 @@
.title { .title {
margin-bottom: 0; margin-bottom: 0;
a {
display: flex;
flex-direction: row;
align-items: center;
justify-content: space-between;
i {
font-size: 50%;
}
}
} }
.content-details { .content-details {

View file

@ -1,3 +1,5 @@
import factory
from website.common.factories import BaseContentFactory, BaseListingFactory from website.common.factories import BaseContentFactory, BaseListingFactory
from . import models from . import models
@ -11,3 +13,11 @@ class BlogPostListPageFactory(BaseListingFactory):
class BlogPostPageFactory(BaseContentFactory): class BlogPostPageFactory(BaseContentFactory):
class Meta: class Meta:
model = models.BlogPostPage model = models.BlogPostPage
class ExternalBlogPostPageFactory(BaseContentFactory):
external_url = factory.Faker("url")
class Meta:
model = models.ExternalBlogPostPage
exclude = ["subtitle"]

View file

@ -0,0 +1,45 @@
# Generated by Django 5.0.4 on 2024-05-29 21:10
import django.db.models.deletion
import django.utils.timezone
import modelcluster.fields
import wagtailmetadata.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("blog", "0005_auto_20230602_1236"),
("wagtailcore", "0089_log_entry_data_json_null_to_object"),
]
operations = [
migrations.CreateModel(
name="ExternalBlogPostPage",
fields=[
(
"page_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="wagtailcore.page",
),
),
("external_url", models.URLField()),
("date", models.DateField(default=django.utils.timezone.now)),
(
"tags",
modelcluster.fields.ParentalManyToManyField(
blank=True, to="blog.blogposttagpage"
),
),
],
options={
"abstract": False,
},
bases=("wagtailcore.page", wagtailmetadata.models.MetadataMixin),
),
]

View file

@ -1,18 +1,26 @@
from typing import Any, Optional from typing import Any, Optional
from urllib.parse import urlsplit
from django.contrib.postgres.search import TrigramSimilarity from django.contrib.postgres.search import TrigramSimilarity
from django.db import models from django.db import models
from django.db.models.functions import Cast from django.db.models.functions import Cast, Coalesce
from django.http import HttpRequest, HttpResponse, HttpResponsePermanentRedirect
from django.utils import timezone from django.utils import timezone
from django.utils.functional import cached_property from django.utils.functional import cached_property
from metadata_parser import MetadataParser
from modelcluster.fields import ParentalManyToManyField from modelcluster.fields import ParentalManyToManyField
from wagtail.admin.panels import FieldPanel from wagtail.admin.panels import FieldPanel
from wagtail.models import PageQuerySet from wagtail.models import Page, PageQuerySet, Site
from wagtail.search import index from wagtail.search import index
from wagtailautocomplete.edit_handlers import AutocompletePanel from wagtailautocomplete.edit_handlers import AutocompletePanel
from website.common.models import BaseContentPage, BaseListingPage from website.common.models import BaseContentPage, BaseListingPage, BasePage
from website.common.utils import TocEntry from website.common.utils import (
TocEntry,
extend_query_params,
get_page_metadata,
get_url_mime_type,
)
from website.contrib.singleton_page.utils import SingletonPageCache from website.contrib.singleton_page.utils import SingletonPageCache
@ -23,6 +31,8 @@ class BlogPostListPage(BaseListingPage):
"blog.BlogPostTagListPage", "blog.BlogPostTagListPage",
"blog.BlogPostCollectionListPage", "blog.BlogPostCollectionListPage",
"blog.BlogPostCollectionPage", "blog.BlogPostCollectionPage",
"blog.BlogPostCollectionPage",
"blog.ExternalBlogPostPage",
] ]
@cached_property @cached_property
@ -31,9 +41,12 @@ class BlogPostListPage(BaseListingPage):
def get_listing_pages(self) -> models.QuerySet: def get_listing_pages(self) -> models.QuerySet:
return ( return (
BlogPostPage.objects.descendant_of(self) Page.objects.live()
.live()
.public() .public()
.annotate(date=Coalesce("blogpostpage__date", "externalblogpostpage__date"))
.descendant_of(self)
.type(BlogPostPage, ExternalBlogPostPage)
.specific()
.order_by("-date", "title") .order_by("-date", "title")
) )
@ -87,10 +100,6 @@ class BlogPostPage(BaseContentPage):
similar_posts = listing_pages.exclude(id=self.id).alias( similar_posts = listing_pages.exclude(id=self.id).alias(
title_similarity=TrigramSimilarity("title", self.title), title_similarity=TrigramSimilarity("title", self.title),
# If this page has no subtitle, ignore it as part of similarity
subtitle_similarity=TrigramSimilarity("subtitle", self.subtitle)
if self.subtitle
else models.Value(1),
) )
page_tags = list(self.tags.public().live().values_list("id", flat=True)) page_tags = list(self.tags.public().live().values_list("id", flat=True))
@ -109,7 +118,6 @@ class BlogPostPage(BaseContentPage):
similar_posts = similar_posts.annotate( similar_posts = similar_posts.annotate(
similarity=(models.F("tag_similarity") * 2) similarity=(models.F("tag_similarity") * 2)
+ (models.F("title_similarity") * 10) + (models.F("title_similarity") * 10)
+ (models.F("subtitle_similarity"))
).order_by("-similarity")[:3] ).order_by("-similarity")[:3]
return similar_posts return similar_posts
@ -137,7 +145,20 @@ class BlogPostTagPage(BaseListingPage):
def get_listing_pages(self) -> models.QuerySet: def get_listing_pages(self) -> models.QuerySet:
blog_list_page = BlogPostListPage.objects.get() blog_list_page = BlogPostListPage.objects.get()
return blog_list_page.get_listing_pages().filter(tags=self) listing_pages = blog_list_page.get_listing_pages()
blog_post_tags = list(
BlogPostPage.objects.filter(id__in=listing_pages, tags=self).values_list(
"id", flat=True
)
)
external_post_tags = list(
ExternalBlogPostPage.objects.filter(
id__in=listing_pages, tags=self
).values_list("id", flat=True)
)
return listing_pages.filter(
id__in=blog_post_tags + external_post_tags
).specific()
class BlogPostCollectionListPage(BaseListingPage): class BlogPostCollectionListPage(BaseListingPage):
@ -167,3 +188,101 @@ class BlogPostCollectionPage(BaseListingPage):
.public() .public()
.order_by("-date", "title") .order_by("-date", "title")
) )
class ExternalBlogPostPage(BaseContentPage):
subpage_types: list[Any] = []
parent_page_types = [BlogPostListPage]
preview_modes: list[Any] = []
is_external = True
# Some `BaseContentPage` fields aren't relevant
body = None
subtitle = None
hero_image = None
hero_unsplash_photo = None
external_url = models.URLField()
tags = ParentalManyToManyField("blog.BlogPostTagPage", blank=True)
date = models.DateField(default=timezone.now)
content_panels = BasePage.content_panels + [FieldPanel("external_url")]
promote_panels = BaseContentPage.promote_panels + [
FieldPanel("date"),
AutocompletePanel("tags"),
]
search_fields = BaseContentPage.search_fields + [
index.RelatedFields("tags", [index.SearchField("title", boost=1)]),
index.SearchField("external_url"),
]
@cached_property
def tag_list_page_url(self) -> Optional[str]:
return SingletonPageCache.get_url(BlogPostTagListPage)
@cached_property
def tags_list(self) -> models.QuerySet:
"""
Use this to get a page's tags.
"""
tags = self.tags.order_by("slug")
# In drafts, `django-modelcluster` doesn't support these filters
if isinstance(tags, PageQuerySet):
return tags.public().live()
return tags
@cached_property
def metadata(self) -> MetadataParser:
return get_page_metadata(self.external_url)
@cached_property
def _body_html(self) -> str:
try:
return self.metadata.get_metadatas("description")[0]
except (KeyError, IndexError, TypeError):
return ""
@cached_property
def plain_text(self) -> str:
# The metadata is already just text
return self._body_html
def hero_url(
self, image_size: str, wagtail_image_spec_extra: Optional[str] = None
) -> Optional[str]:
try:
return self.metadata.get_metadatas("image")[0]
except (KeyError, IndexError, TypeError):
return None
@cached_property
def hero_image_url(self) -> str:
return ""
@cached_property
def hero_image_alt(self) -> str:
return ""
def get_meta_image_mime(self) -> Optional[str]:
return get_url_mime_type(self.hero_url(""))
def get_url(
self, request: HttpRequest | None = None, current_site: Site | None = None
) -> str:
return self.get_full_url(request)
def get_full_url(self, request: HttpRequest | None = None) -> str:
full_url = urlsplit(super().get_full_url(request))
return extend_query_params(self.external_url, {"utm_source": full_url.netloc})
def serve(self, request: HttpRequest, *args: tuple, **kwargs: dict) -> HttpResponse:
"""
Send the user directly to the external page
"""
return HttpResponsePermanentRedirect(self.get_full_url(request))

View file

@ -0,0 +1,16 @@
{% comment %}
This template is never used, but exists just in case.
{% endcomment %}
<!DOCTYPE html>
<html lang="en-GB">
<head>
<title>Redirecting...</title>
<link rel="canonical" href="{{ page.external_url }}" />
<meta charset="utf-8" />
<meta http-equiv="refresh" content="0; url={{ page.external_url }}" />
</head>
<body>
<p>Redirecting...</p>
</body>
</html>

View file

@ -3,7 +3,11 @@ from django.urls import reverse
from website.home.models import HomePage from website.home.models import HomePage
from .factories import BlogPostListPageFactory, BlogPostPageFactory from .factories import (
BlogPostListPageFactory,
BlogPostPageFactory,
ExternalBlogPostPageFactory,
)
class BlogPostPageTestCase(TestCase): class BlogPostPageTestCase(TestCase):
@ -69,14 +73,15 @@ class BlogPostListPageTestCase(TestCase):
BlogPostPageFactory(parent=cls.page) BlogPostPageFactory(parent=cls.page)
BlogPostPageFactory(parent=cls.page) BlogPostPageFactory(parent=cls.page)
ExternalBlogPostPageFactory(parent=cls.page, external_url="https://example.com")
def test_accessible(self) -> None: def test_accessible(self) -> None:
response = self.client.get(self.page.url) response = self.client.get(self.page.url)
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.context["listing_pages"]), 2) self.assertEqual(len(response.context["listing_pages"]), 3)
def test_queries(self) -> None: def test_queries(self) -> None:
with self.assertNumQueries(39): with self.assertNumQueries(43):
self.client.get(self.page.url) self.client.get(self.page.url)
def test_feed_accessible(self) -> None: def test_feed_accessible(self) -> None:
@ -84,3 +89,21 @@ class BlogPostListPageTestCase(TestCase):
self.assertRedirects( self.assertRedirects(
response, reverse("feed"), status_code=301, fetch_redirect_response=True response, reverse("feed"), status_code=301, fetch_redirect_response=True
) )
class ExternalBlogPostPageTestCase(TestCase):
@classmethod
def setUpTestData(cls) -> None:
cls.home_page = HomePage.objects.get()
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
cls.page = ExternalBlogPostPageFactory(parent=cls.blog_post_list_page)
def test_redirects(self) -> None:
with self.assertNumQueries(10):
response = self.client.get(self.page.url)
self.assertRedirects(
response,
self.page.external_url + "?utm_source=localhost",
status_code=301,
fetch_redirect_response=False,
)

View file

@ -1,7 +1,6 @@
from datetime import timedelta from datetime import timedelta
from math import ceil from math import ceil
from typing import Any, Optional from typing import Any, Optional
from urllib.parse import urlencode
from django.core.paginator import EmptyPage, Paginator from django.core.paginator import EmptyPage, Paginator
from django.core.paginator import Page as PaginatorPage from django.core.paginator import Page as PaginatorPage
@ -31,6 +30,7 @@ from .serializers import PaginationSerializer
from .streamfield import add_heading_anchors, get_blocks, get_content_html from .streamfield import add_heading_anchors, get_blocks, get_content_html
from .utils import ( from .utils import (
TocEntry, TocEntry,
extend_query_params,
extract_text, extract_text,
get_site_title, get_site_title,
get_table_of_contents, get_table_of_contents,
@ -181,6 +181,7 @@ class BaseContentPage(BasePage, MetadataMixin):
for size, width in UNSPLASH_SIZES.items() for size, width in UNSPLASH_SIZES.items()
} }
@cached_property
def hero_image_url(self) -> Optional[str]: def hero_image_url(self) -> Optional[str]:
return self.hero_url("regular") return self.hero_url("regular")
@ -286,10 +287,7 @@ class BaseListingPage(RoutablePageMixin, BaseContentPage):
url = super().get_meta_url() url = super().get_meta_url()
if not query_data: return extend_query_params(url, query_data)
return url
return url + "?" + urlencode(query_data)
@route(r"^feed/$") @route(r"^feed/$")
def feed(self, request: HttpRequest) -> HttpResponse: def feed(self, request: HttpRequest) -> HttpResponse:

View file

@ -16,7 +16,10 @@
{% include "common/breadcrumbs.html" with parents=page.get_parent_pages %} {% include "common/breadcrumbs.html" with parents=page.get_parent_pages %}
{% endif %} {% endif %}
<h2 class="title is-3"> <h2 class="title is-3">
<a href="{% pageurl page %}">{{ page.title }}</a> <a href="{% pageurl page %}">
{{ page.title }}
{% if page.is_external %}<i class="fa-solid fa-arrow-up-right-from-square" title="This page is from a external source"></i>{% endif %}
</a>
</h2> </h2>
{% include "common/content-details.html" %} {% include "common/content-details.html" %}
<p>{{ page.summary }}</p> <p>{{ page.summary }}</p>

View file

@ -3,6 +3,7 @@ from django.test import SimpleTestCase
from wagtail.rich_text import features as richtext_feature_registry from wagtail.rich_text import features as richtext_feature_registry
from website.common.utils import ( from website.common.utils import (
extend_query_params,
extract_text, extract_text,
get_table_of_contents, get_table_of_contents,
heading_id, heading_id,
@ -111,3 +112,25 @@ class HeadingIDTestCase(SimpleTestCase):
self.assertEqual(heading_id("123"), "ref-123") self.assertEqual(heading_id("123"), "ref-123")
self.assertEqual(heading_id("test"), "test") self.assertEqual(heading_id("test"), "test")
self.assertEqual(heading_id("Look, a title!"), "look-a-title") self.assertEqual(heading_id("Look, a title!"), "look-a-title")
class ExtendQueryParamsTestCase(SimpleTestCase):
def test_params(self) -> None:
self.assertEqual(
extend_query_params("https://example.com", {"foo": "bar"}),
"https://example.com?foo=bar",
)
self.assertEqual(
extend_query_params("https://example.com?foo=bar", {"bar": "foo"}),
"https://example.com?foo=bar&bar=foo",
)
self.assertEqual(
extend_query_params("https://example.com?foo=baz", {"foo": "baz"}),
"https://example.com?foo=baz",
)
def test_removes_param(self) -> None:
self.assertEqual(
extend_query_params("https://example.com?foo=bar", {"foo": None}),
"https://example.com",
)

View file

@ -1,14 +1,17 @@
from dataclasses import dataclass from dataclasses import dataclass
from itertools import pairwise from itertools import pairwise
from typing import Optional, Type from typing import Any, Optional, Type
from urllib.parse import urlsplit, urlunsplit
import requests import requests
from bs4 import BeautifulSoup, SoupStrainer from bs4 import BeautifulSoup, SoupStrainer
from django.conf import settings from django.conf import settings
from django.db import models from django.db import models
from django.http import QueryDict
from django.http.request import HttpRequest from django.http.request import HttpRequest
from django.utils.text import slugify from django.utils.text import slugify
from django_cache_decorator import django_cache_decorator from django_cache_decorator import django_cache_decorator
from metadata_parser import MetadataParser
from wagtail.models import Page, Site from wagtail.models import Page, Site
from wagtail.models import get_page_models as get_wagtail_page_models from wagtail.models import get_page_models as get_wagtail_page_models
@ -122,3 +125,21 @@ def get_ai_robots_txt() -> str:
return requests_session.get( return requests_session.get(
"https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt" "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt"
).content.decode() ).content.decode()
@django_cache_decorator(time=21600)
def get_page_metadata(url: str) -> MetadataParser:
return MetadataParser(url=url, search_head_only=True)
def extend_query_params(url: str, params: dict[str, Any]) -> str:
scheme, netloc, path, query, fragment = urlsplit(url)
query_dict = QueryDict(query, mutable=True)
for k, v in params.items():
if v is None:
del query_dict[k]
else:
query_dict[k] = v
return urlunsplit((scheme, netloc, path, query_dict.urlencode(), fragment))

View file

@ -23,7 +23,7 @@ from website.search.models import SearchPage
from .feed_generators import CustomFeed from .feed_generators import CustomFeed
from .models import BaseListingPage, BasePage from .models import BaseListingPage, BasePage
from .utils import get_ai_robots_txt from .utils import extend_query_params, get_ai_robots_txt
class Error404View(TemplateView): class Error404View(TemplateView):
@ -116,7 +116,9 @@ class AllPagesFeed(Feed):
return item.title return item.title
def item_link(self, item: BasePage) -> str: def item_link(self, item: BasePage) -> str:
return item.get_full_url(request=self.request) + "?utm_medium=rss" return extend_query_params(
item.get_full_url(request=self.request), {"utm_medium": "rss"}
)
def item_pubdate(self, item: BasePage) -> datetime: def item_pubdate(self, item: BasePage) -> datetime:
if item_date := getattr(item, "date", None): if item_date := getattr(item, "date", None):

View file

@ -7,7 +7,7 @@ from wagtail.search.utils import parse_query_string
from wagtail_favicon.models import FaviconSettings from wagtail_favicon.models import FaviconSettings
from wagtail_favicon.utils import get_rendition_url from wagtail_favicon.utils import get_rendition_url
from website.common.utils import get_or_none, get_site_title from website.common.utils import extend_query_params, get_or_none, get_site_title
from website.contrib.singleton_page.utils import SingletonPageCache from website.contrib.singleton_page.utils import SingletonPageCache
from .models import SearchPage from .models import SearchPage
@ -87,4 +87,4 @@ class GoView(RedirectView):
if slug_match := get_or_none(pages.filter(slug__iexact=query)): if slug_match := get_or_none(pages.filter(slug__iexact=query)):
return slug_match.get_url(request=self.request) return slug_match.get_url(request=self.request)
return f"{search_page_url}?{self.request.GET.urlencode()}" return extend_query_params(search_page_url, self.request.GET)

View file

@ -391,6 +391,11 @@ LOGGING = {
"level": "WARNING", "level": "WARNING",
"propagate": False, "propagate": False,
}, },
"metadata_parser": {
"handlers": ["console"],
"level": "CRITICAL",
"propagate": False,
},
}, },
} }