From 36211e88f2b74b27604189b77e18e4aec8e64af2 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Wed, 29 May 2024 23:30:17 +0100 Subject: [PATCH] Support linking out to external posts --- requirements.txt | 1 + static/src/scss/_listing.scss | 11 ++ website/blog/factories.py | 10 ++ .../migrations/0006_externalblogpostpage.py | 45 ++++++ website/blog/models.py | 143 ++++++++++++++++-- .../blog/external_blog_post_page.html | 16 ++ website/blog/tests.py | 29 +++- website/common/models.py | 8 +- .../common/templates/common/listing-item.html | 5 +- website/common/tests/test_utils.py | 23 +++ website/common/utils.py | 23 ++- website/common/views.py | 6 +- website/search/views.py | 4 +- website/settings.py | 5 + 14 files changed, 303 insertions(+), 26 deletions(-) create mode 100644 website/blog/migrations/0006_externalblogpostpage.py create mode 100644 website/blog/templates/blog/external_blog_post_page.html diff --git a/requirements.txt b/requirements.txt index 57b0954..89c6cb2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ django-enforce-host==1.1.0 django-proxy==1.2.2 wagtail-lite-youtube-embed==0.1.0 django-minify-html==1.7.1 +metadata-parser==0.12.1 # DRF OpenAPI dependencies uritemplate diff --git a/static/src/scss/_listing.scss b/static/src/scss/_listing.scss index 0afc4db..bf7baf9 100644 --- a/static/src/scss/_listing.scss +++ b/static/src/scss/_listing.scss @@ -18,6 +18,17 @@ .title { margin-bottom: 0; + + a { + display: flex; + flex-direction: row; + align-items: center; + justify-content: space-between; + + i { + font-size: 50%; + } + } } .content-details { diff --git a/website/blog/factories.py b/website/blog/factories.py index c1f4b7e..a7c95bb 100644 --- a/website/blog/factories.py +++ b/website/blog/factories.py @@ -1,3 +1,5 @@ +import factory + from website.common.factories import BaseContentFactory, BaseListingFactory from . import models @@ -11,3 +13,11 @@ class BlogPostListPageFactory(BaseListingFactory): class BlogPostPageFactory(BaseContentFactory): class Meta: model = models.BlogPostPage + + +class ExternalBlogPostPageFactory(BaseContentFactory): + external_url = factory.Faker("url") + + class Meta: + model = models.ExternalBlogPostPage + exclude = ["subtitle"] diff --git a/website/blog/migrations/0006_externalblogpostpage.py b/website/blog/migrations/0006_externalblogpostpage.py new file mode 100644 index 0000000..b44852f --- /dev/null +++ b/website/blog/migrations/0006_externalblogpostpage.py @@ -0,0 +1,45 @@ +# Generated by Django 5.0.4 on 2024-05-29 21:10 + +import django.db.models.deletion +import django.utils.timezone +import modelcluster.fields +import wagtailmetadata.models +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("blog", "0005_auto_20230602_1236"), + ("wagtailcore", "0089_log_entry_data_json_null_to_object"), + ] + + operations = [ + migrations.CreateModel( + name="ExternalBlogPostPage", + fields=[ + ( + "page_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="wagtailcore.page", + ), + ), + ("external_url", models.URLField()), + ("date", models.DateField(default=django.utils.timezone.now)), + ( + "tags", + modelcluster.fields.ParentalManyToManyField( + blank=True, to="blog.blogposttagpage" + ), + ), + ], + options={ + "abstract": False, + }, + bases=("wagtailcore.page", wagtailmetadata.models.MetadataMixin), + ), + ] diff --git a/website/blog/models.py b/website/blog/models.py index 4e41a86..1bd57c5 100644 --- a/website/blog/models.py +++ b/website/blog/models.py @@ -1,18 +1,26 @@ from typing import Any, Optional +from urllib.parse import urlsplit from django.contrib.postgres.search import TrigramSimilarity from django.db import models -from django.db.models.functions import Cast +from django.db.models.functions import Cast, Coalesce +from django.http import HttpRequest, HttpResponse, HttpResponsePermanentRedirect from django.utils import timezone from django.utils.functional import cached_property +from metadata_parser import MetadataParser from modelcluster.fields import ParentalManyToManyField from wagtail.admin.panels import FieldPanel -from wagtail.models import PageQuerySet +from wagtail.models import Page, PageQuerySet, Site from wagtail.search import index from wagtailautocomplete.edit_handlers import AutocompletePanel -from website.common.models import BaseContentPage, BaseListingPage -from website.common.utils import TocEntry +from website.common.models import BaseContentPage, BaseListingPage, BasePage +from website.common.utils import ( + TocEntry, + extend_query_params, + get_page_metadata, + get_url_mime_type, +) from website.contrib.singleton_page.utils import SingletonPageCache @@ -23,6 +31,8 @@ class BlogPostListPage(BaseListingPage): "blog.BlogPostTagListPage", "blog.BlogPostCollectionListPage", "blog.BlogPostCollectionPage", + "blog.BlogPostCollectionPage", + "blog.ExternalBlogPostPage", ] @cached_property @@ -31,9 +41,12 @@ class BlogPostListPage(BaseListingPage): def get_listing_pages(self) -> models.QuerySet: return ( - BlogPostPage.objects.descendant_of(self) - .live() + Page.objects.live() .public() + .annotate(date=Coalesce("blogpostpage__date", "externalblogpostpage__date")) + .descendant_of(self) + .type(BlogPostPage, ExternalBlogPostPage) + .specific() .order_by("-date", "title") ) @@ -87,10 +100,6 @@ class BlogPostPage(BaseContentPage): similar_posts = listing_pages.exclude(id=self.id).alias( title_similarity=TrigramSimilarity("title", self.title), - # If this page has no subtitle, ignore it as part of similarity - subtitle_similarity=TrigramSimilarity("subtitle", self.subtitle) - if self.subtitle - else models.Value(1), ) page_tags = list(self.tags.public().live().values_list("id", flat=True)) @@ -109,7 +118,6 @@ class BlogPostPage(BaseContentPage): similar_posts = similar_posts.annotate( similarity=(models.F("tag_similarity") * 2) + (models.F("title_similarity") * 10) - + (models.F("subtitle_similarity")) ).order_by("-similarity")[:3] return similar_posts @@ -137,7 +145,20 @@ class BlogPostTagPage(BaseListingPage): def get_listing_pages(self) -> models.QuerySet: blog_list_page = BlogPostListPage.objects.get() - return blog_list_page.get_listing_pages().filter(tags=self) + listing_pages = blog_list_page.get_listing_pages() + blog_post_tags = list( + BlogPostPage.objects.filter(id__in=listing_pages, tags=self).values_list( + "id", flat=True + ) + ) + external_post_tags = list( + ExternalBlogPostPage.objects.filter( + id__in=listing_pages, tags=self + ).values_list("id", flat=True) + ) + return listing_pages.filter( + id__in=blog_post_tags + external_post_tags + ).specific() class BlogPostCollectionListPage(BaseListingPage): @@ -167,3 +188,101 @@ class BlogPostCollectionPage(BaseListingPage): .public() .order_by("-date", "title") ) + + +class ExternalBlogPostPage(BaseContentPage): + subpage_types: list[Any] = [] + parent_page_types = [BlogPostListPage] + preview_modes: list[Any] = [] + + is_external = True + + # Some `BaseContentPage` fields aren't relevant + body = None + subtitle = None + hero_image = None + hero_unsplash_photo = None + + external_url = models.URLField() + + tags = ParentalManyToManyField("blog.BlogPostTagPage", blank=True) + date = models.DateField(default=timezone.now) + + content_panels = BasePage.content_panels + [FieldPanel("external_url")] + + promote_panels = BaseContentPage.promote_panels + [ + FieldPanel("date"), + AutocompletePanel("tags"), + ] + + search_fields = BaseContentPage.search_fields + [ + index.RelatedFields("tags", [index.SearchField("title", boost=1)]), + index.SearchField("external_url"), + ] + + @cached_property + def tag_list_page_url(self) -> Optional[str]: + return SingletonPageCache.get_url(BlogPostTagListPage) + + @cached_property + def tags_list(self) -> models.QuerySet: + """ + Use this to get a page's tags. + """ + tags = self.tags.order_by("slug") + + # In drafts, `django-modelcluster` doesn't support these filters + if isinstance(tags, PageQuerySet): + return tags.public().live() + + return tags + + @cached_property + def metadata(self) -> MetadataParser: + return get_page_metadata(self.external_url) + + @cached_property + def _body_html(self) -> str: + try: + return self.metadata.get_metadatas("description")[0] + except (KeyError, IndexError, TypeError): + return "" + + @cached_property + def plain_text(self) -> str: + # The metadata is already just text + return self._body_html + + def hero_url( + self, image_size: str, wagtail_image_spec_extra: Optional[str] = None + ) -> Optional[str]: + try: + return self.metadata.get_metadatas("image")[0] + except (KeyError, IndexError, TypeError): + return None + + @cached_property + def hero_image_url(self) -> str: + return "" + + @cached_property + def hero_image_alt(self) -> str: + return "" + + def get_meta_image_mime(self) -> Optional[str]: + return get_url_mime_type(self.hero_url("")) + + def get_url( + self, request: HttpRequest | None = None, current_site: Site | None = None + ) -> str: + return self.get_full_url(request) + + def get_full_url(self, request: HttpRequest | None = None) -> str: + full_url = urlsplit(super().get_full_url(request)) + return extend_query_params(self.external_url, {"utm_source": full_url.netloc}) + + def serve(self, request: HttpRequest, *args: tuple, **kwargs: dict) -> HttpResponse: + """ + Send the user directly to the external page + """ + return HttpResponsePermanentRedirect(self.get_full_url(request)) diff --git a/website/blog/templates/blog/external_blog_post_page.html b/website/blog/templates/blog/external_blog_post_page.html new file mode 100644 index 0000000..6876deb --- /dev/null +++ b/website/blog/templates/blog/external_blog_post_page.html @@ -0,0 +1,16 @@ +{% comment %} +This template is never used, but exists just in case. +{% endcomment %} + + + + + Redirecting... + + + + + +

Redirecting...

+ + diff --git a/website/blog/tests.py b/website/blog/tests.py index e4883bf..fa66c55 100644 --- a/website/blog/tests.py +++ b/website/blog/tests.py @@ -3,7 +3,11 @@ from django.urls import reverse from website.home.models import HomePage -from .factories import BlogPostListPageFactory, BlogPostPageFactory +from .factories import ( + BlogPostListPageFactory, + BlogPostPageFactory, + ExternalBlogPostPageFactory, +) class BlogPostPageTestCase(TestCase): @@ -69,14 +73,15 @@ class BlogPostListPageTestCase(TestCase): BlogPostPageFactory(parent=cls.page) BlogPostPageFactory(parent=cls.page) + ExternalBlogPostPageFactory(parent=cls.page, external_url="https://example.com") def test_accessible(self) -> None: response = self.client.get(self.page.url) self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.context["listing_pages"]), 2) + self.assertEqual(len(response.context["listing_pages"]), 3) def test_queries(self) -> None: - with self.assertNumQueries(39): + with self.assertNumQueries(43): self.client.get(self.page.url) def test_feed_accessible(self) -> None: @@ -84,3 +89,21 @@ class BlogPostListPageTestCase(TestCase): self.assertRedirects( response, reverse("feed"), status_code=301, fetch_redirect_response=True ) + + +class ExternalBlogPostPageTestCase(TestCase): + @classmethod + def setUpTestData(cls) -> None: + cls.home_page = HomePage.objects.get() + cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page) + cls.page = ExternalBlogPostPageFactory(parent=cls.blog_post_list_page) + + def test_redirects(self) -> None: + with self.assertNumQueries(10): + response = self.client.get(self.page.url) + self.assertRedirects( + response, + self.page.external_url + "?utm_source=localhost", + status_code=301, + fetch_redirect_response=False, + ) diff --git a/website/common/models.py b/website/common/models.py index f560c06..26e4489 100644 --- a/website/common/models.py +++ b/website/common/models.py @@ -1,7 +1,6 @@ from datetime import timedelta from math import ceil from typing import Any, Optional -from urllib.parse import urlencode from django.core.paginator import EmptyPage, Paginator from django.core.paginator import Page as PaginatorPage @@ -31,6 +30,7 @@ from .serializers import PaginationSerializer from .streamfield import add_heading_anchors, get_blocks, get_content_html from .utils import ( TocEntry, + extend_query_params, extract_text, get_site_title, get_table_of_contents, @@ -181,6 +181,7 @@ class BaseContentPage(BasePage, MetadataMixin): for size, width in UNSPLASH_SIZES.items() } + @cached_property def hero_image_url(self) -> Optional[str]: return self.hero_url("regular") @@ -286,10 +287,7 @@ class BaseListingPage(RoutablePageMixin, BaseContentPage): url = super().get_meta_url() - if not query_data: - return url - - return url + "?" + urlencode(query_data) + return extend_query_params(url, query_data) @route(r"^feed/$") def feed(self, request: HttpRequest) -> HttpResponse: diff --git a/website/common/templates/common/listing-item.html b/website/common/templates/common/listing-item.html index ff88641..612a46c 100644 --- a/website/common/templates/common/listing-item.html +++ b/website/common/templates/common/listing-item.html @@ -16,7 +16,10 @@ {% include "common/breadcrumbs.html" with parents=page.get_parent_pages %} {% endif %}

- {{ page.title }} + + {{ page.title }} + {% if page.is_external %}{% endif %} +

{% include "common/content-details.html" %}

{{ page.summary }}

diff --git a/website/common/tests/test_utils.py b/website/common/tests/test_utils.py index a3fc08c..9ba7391 100644 --- a/website/common/tests/test_utils.py +++ b/website/common/tests/test_utils.py @@ -3,6 +3,7 @@ from django.test import SimpleTestCase from wagtail.rich_text import features as richtext_feature_registry from website.common.utils import ( + extend_query_params, extract_text, get_table_of_contents, heading_id, @@ -111,3 +112,25 @@ class HeadingIDTestCase(SimpleTestCase): self.assertEqual(heading_id("123"), "ref-123") self.assertEqual(heading_id("test"), "test") self.assertEqual(heading_id("Look, a title!"), "look-a-title") + + +class ExtendQueryParamsTestCase(SimpleTestCase): + def test_params(self) -> None: + self.assertEqual( + extend_query_params("https://example.com", {"foo": "bar"}), + "https://example.com?foo=bar", + ) + self.assertEqual( + extend_query_params("https://example.com?foo=bar", {"bar": "foo"}), + "https://example.com?foo=bar&bar=foo", + ) + self.assertEqual( + extend_query_params("https://example.com?foo=baz", {"foo": "baz"}), + "https://example.com?foo=baz", + ) + + def test_removes_param(self) -> None: + self.assertEqual( + extend_query_params("https://example.com?foo=bar", {"foo": None}), + "https://example.com", + ) diff --git a/website/common/utils.py b/website/common/utils.py index 4a8db0b..2e83fd9 100644 --- a/website/common/utils.py +++ b/website/common/utils.py @@ -1,14 +1,17 @@ from dataclasses import dataclass from itertools import pairwise -from typing import Optional, Type +from typing import Any, Optional, Type +from urllib.parse import urlsplit, urlunsplit import requests from bs4 import BeautifulSoup, SoupStrainer from django.conf import settings from django.db import models +from django.http import QueryDict from django.http.request import HttpRequest from django.utils.text import slugify from django_cache_decorator import django_cache_decorator +from metadata_parser import MetadataParser from wagtail.models import Page, Site from wagtail.models import get_page_models as get_wagtail_page_models @@ -122,3 +125,21 @@ def get_ai_robots_txt() -> str: return requests_session.get( "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt" ).content.decode() + + +@django_cache_decorator(time=21600) +def get_page_metadata(url: str) -> MetadataParser: + return MetadataParser(url=url, search_head_only=True) + + +def extend_query_params(url: str, params: dict[str, Any]) -> str: + scheme, netloc, path, query, fragment = urlsplit(url) + query_dict = QueryDict(query, mutable=True) + + for k, v in params.items(): + if v is None: + del query_dict[k] + else: + query_dict[k] = v + + return urlunsplit((scheme, netloc, path, query_dict.urlencode(), fragment)) diff --git a/website/common/views.py b/website/common/views.py index 8a36572..ce0514d 100644 --- a/website/common/views.py +++ b/website/common/views.py @@ -23,7 +23,7 @@ from website.search.models import SearchPage from .feed_generators import CustomFeed from .models import BaseListingPage, BasePage -from .utils import get_ai_robots_txt +from .utils import extend_query_params, get_ai_robots_txt class Error404View(TemplateView): @@ -116,7 +116,9 @@ class AllPagesFeed(Feed): return item.title def item_link(self, item: BasePage) -> str: - return item.get_full_url(request=self.request) + "?utm_medium=rss" + return extend_query_params( + item.get_full_url(request=self.request), {"utm_medium": "rss"} + ) def item_pubdate(self, item: BasePage) -> datetime: if item_date := getattr(item, "date", None): diff --git a/website/search/views.py b/website/search/views.py index 1cfa1da..7c4f915 100644 --- a/website/search/views.py +++ b/website/search/views.py @@ -7,7 +7,7 @@ from wagtail.search.utils import parse_query_string from wagtail_favicon.models import FaviconSettings from wagtail_favicon.utils import get_rendition_url -from website.common.utils import get_or_none, get_site_title +from website.common.utils import extend_query_params, get_or_none, get_site_title from website.contrib.singleton_page.utils import SingletonPageCache from .models import SearchPage @@ -87,4 +87,4 @@ class GoView(RedirectView): if slug_match := get_or_none(pages.filter(slug__iexact=query)): return slug_match.get_url(request=self.request) - return f"{search_page_url}?{self.request.GET.urlencode()}" + return extend_query_params(search_page_url, self.request.GET) diff --git a/website/settings.py b/website/settings.py index 7fcbdab..d74aa79 100644 --- a/website/settings.py +++ b/website/settings.py @@ -391,6 +391,11 @@ LOGGING = { "level": "WARNING", "propagate": False, }, + "metadata_parser": { + "handlers": ["console"], + "level": "CRITICAL", + "propagate": False, + }, }, }