Support linking out to external posts
This commit is contained in:
parent
2639d6eb1c
commit
36211e88f2
14 changed files with 303 additions and 26 deletions
|
@ -29,6 +29,7 @@ django-enforce-host==1.1.0
|
|||
django-proxy==1.2.2
|
||||
wagtail-lite-youtube-embed==0.1.0
|
||||
django-minify-html==1.7.1
|
||||
metadata-parser==0.12.1
|
||||
|
||||
# DRF OpenAPI dependencies
|
||||
uritemplate
|
||||
|
|
|
@ -18,6 +18,17 @@
|
|||
|
||||
.title {
|
||||
margin-bottom: 0;
|
||||
|
||||
a {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
|
||||
i {
|
||||
font-size: 50%;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.content-details {
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import factory
|
||||
|
||||
from website.common.factories import BaseContentFactory, BaseListingFactory
|
||||
|
||||
from . import models
|
||||
|
@ -11,3 +13,11 @@ class BlogPostListPageFactory(BaseListingFactory):
|
|||
class BlogPostPageFactory(BaseContentFactory):
|
||||
class Meta:
|
||||
model = models.BlogPostPage
|
||||
|
||||
|
||||
class ExternalBlogPostPageFactory(BaseContentFactory):
|
||||
external_url = factory.Faker("url")
|
||||
|
||||
class Meta:
|
||||
model = models.ExternalBlogPostPage
|
||||
exclude = ["subtitle"]
|
||||
|
|
45
website/blog/migrations/0006_externalblogpostpage.py
Normal file
45
website/blog/migrations/0006_externalblogpostpage.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
# Generated by Django 5.0.4 on 2024-05-29 21:10
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
import modelcluster.fields
|
||||
import wagtailmetadata.models
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("blog", "0005_auto_20230602_1236"),
|
||||
("wagtailcore", "0089_log_entry_data_json_null_to_object"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="ExternalBlogPostPage",
|
||||
fields=[
|
||||
(
|
||||
"page_ptr",
|
||||
models.OneToOneField(
|
||||
auto_created=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
parent_link=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
to="wagtailcore.page",
|
||||
),
|
||||
),
|
||||
("external_url", models.URLField()),
|
||||
("date", models.DateField(default=django.utils.timezone.now)),
|
||||
(
|
||||
"tags",
|
||||
modelcluster.fields.ParentalManyToManyField(
|
||||
blank=True, to="blog.blogposttagpage"
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"abstract": False,
|
||||
},
|
||||
bases=("wagtailcore.page", wagtailmetadata.models.MetadataMixin),
|
||||
),
|
||||
]
|
|
@ -1,18 +1,26 @@
|
|||
from typing import Any, Optional
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from django.contrib.postgres.search import TrigramSimilarity
|
||||
from django.db import models
|
||||
from django.db.models.functions import Cast
|
||||
from django.db.models.functions import Cast, Coalesce
|
||||
from django.http import HttpRequest, HttpResponse, HttpResponsePermanentRedirect
|
||||
from django.utils import timezone
|
||||
from django.utils.functional import cached_property
|
||||
from metadata_parser import MetadataParser
|
||||
from modelcluster.fields import ParentalManyToManyField
|
||||
from wagtail.admin.panels import FieldPanel
|
||||
from wagtail.models import PageQuerySet
|
||||
from wagtail.models import Page, PageQuerySet, Site
|
||||
from wagtail.search import index
|
||||
from wagtailautocomplete.edit_handlers import AutocompletePanel
|
||||
|
||||
from website.common.models import BaseContentPage, BaseListingPage
|
||||
from website.common.utils import TocEntry
|
||||
from website.common.models import BaseContentPage, BaseListingPage, BasePage
|
||||
from website.common.utils import (
|
||||
TocEntry,
|
||||
extend_query_params,
|
||||
get_page_metadata,
|
||||
get_url_mime_type,
|
||||
)
|
||||
from website.contrib.singleton_page.utils import SingletonPageCache
|
||||
|
||||
|
||||
|
@ -23,6 +31,8 @@ class BlogPostListPage(BaseListingPage):
|
|||
"blog.BlogPostTagListPage",
|
||||
"blog.BlogPostCollectionListPage",
|
||||
"blog.BlogPostCollectionPage",
|
||||
"blog.BlogPostCollectionPage",
|
||||
"blog.ExternalBlogPostPage",
|
||||
]
|
||||
|
||||
@cached_property
|
||||
|
@ -31,9 +41,12 @@ class BlogPostListPage(BaseListingPage):
|
|||
|
||||
def get_listing_pages(self) -> models.QuerySet:
|
||||
return (
|
||||
BlogPostPage.objects.descendant_of(self)
|
||||
.live()
|
||||
Page.objects.live()
|
||||
.public()
|
||||
.annotate(date=Coalesce("blogpostpage__date", "externalblogpostpage__date"))
|
||||
.descendant_of(self)
|
||||
.type(BlogPostPage, ExternalBlogPostPage)
|
||||
.specific()
|
||||
.order_by("-date", "title")
|
||||
)
|
||||
|
||||
|
@ -87,10 +100,6 @@ class BlogPostPage(BaseContentPage):
|
|||
|
||||
similar_posts = listing_pages.exclude(id=self.id).alias(
|
||||
title_similarity=TrigramSimilarity("title", self.title),
|
||||
# If this page has no subtitle, ignore it as part of similarity
|
||||
subtitle_similarity=TrigramSimilarity("subtitle", self.subtitle)
|
||||
if self.subtitle
|
||||
else models.Value(1),
|
||||
)
|
||||
|
||||
page_tags = list(self.tags.public().live().values_list("id", flat=True))
|
||||
|
@ -109,7 +118,6 @@ class BlogPostPage(BaseContentPage):
|
|||
similar_posts = similar_posts.annotate(
|
||||
similarity=(models.F("tag_similarity") * 2)
|
||||
+ (models.F("title_similarity") * 10)
|
||||
+ (models.F("subtitle_similarity"))
|
||||
).order_by("-similarity")[:3]
|
||||
|
||||
return similar_posts
|
||||
|
@ -137,7 +145,20 @@ class BlogPostTagPage(BaseListingPage):
|
|||
|
||||
def get_listing_pages(self) -> models.QuerySet:
|
||||
blog_list_page = BlogPostListPage.objects.get()
|
||||
return blog_list_page.get_listing_pages().filter(tags=self)
|
||||
listing_pages = blog_list_page.get_listing_pages()
|
||||
blog_post_tags = list(
|
||||
BlogPostPage.objects.filter(id__in=listing_pages, tags=self).values_list(
|
||||
"id", flat=True
|
||||
)
|
||||
)
|
||||
external_post_tags = list(
|
||||
ExternalBlogPostPage.objects.filter(
|
||||
id__in=listing_pages, tags=self
|
||||
).values_list("id", flat=True)
|
||||
)
|
||||
return listing_pages.filter(
|
||||
id__in=blog_post_tags + external_post_tags
|
||||
).specific()
|
||||
|
||||
|
||||
class BlogPostCollectionListPage(BaseListingPage):
|
||||
|
@ -167,3 +188,101 @@ class BlogPostCollectionPage(BaseListingPage):
|
|||
.public()
|
||||
.order_by("-date", "title")
|
||||
)
|
||||
|
||||
|
||||
class ExternalBlogPostPage(BaseContentPage):
|
||||
subpage_types: list[Any] = []
|
||||
parent_page_types = [BlogPostListPage]
|
||||
preview_modes: list[Any] = []
|
||||
|
||||
is_external = True
|
||||
|
||||
# Some `BaseContentPage` fields aren't relevant
|
||||
body = None
|
||||
subtitle = None
|
||||
hero_image = None
|
||||
hero_unsplash_photo = None
|
||||
|
||||
external_url = models.URLField()
|
||||
|
||||
tags = ParentalManyToManyField("blog.BlogPostTagPage", blank=True)
|
||||
date = models.DateField(default=timezone.now)
|
||||
|
||||
content_panels = BasePage.content_panels + [FieldPanel("external_url")]
|
||||
|
||||
promote_panels = BaseContentPage.promote_panels + [
|
||||
FieldPanel("date"),
|
||||
AutocompletePanel("tags"),
|
||||
]
|
||||
|
||||
search_fields = BaseContentPage.search_fields + [
|
||||
index.RelatedFields("tags", [index.SearchField("title", boost=1)]),
|
||||
index.SearchField("external_url"),
|
||||
]
|
||||
|
||||
@cached_property
|
||||
def tag_list_page_url(self) -> Optional[str]:
|
||||
return SingletonPageCache.get_url(BlogPostTagListPage)
|
||||
|
||||
@cached_property
|
||||
def tags_list(self) -> models.QuerySet:
|
||||
"""
|
||||
Use this to get a page's tags.
|
||||
"""
|
||||
tags = self.tags.order_by("slug")
|
||||
|
||||
# In drafts, `django-modelcluster` doesn't support these filters
|
||||
if isinstance(tags, PageQuerySet):
|
||||
return tags.public().live()
|
||||
|
||||
return tags
|
||||
|
||||
@cached_property
|
||||
def metadata(self) -> MetadataParser:
|
||||
return get_page_metadata(self.external_url)
|
||||
|
||||
@cached_property
|
||||
def _body_html(self) -> str:
|
||||
try:
|
||||
return self.metadata.get_metadatas("description")[0]
|
||||
except (KeyError, IndexError, TypeError):
|
||||
return ""
|
||||
|
||||
@cached_property
|
||||
def plain_text(self) -> str:
|
||||
# The metadata is already just text
|
||||
return self._body_html
|
||||
|
||||
def hero_url(
|
||||
self, image_size: str, wagtail_image_spec_extra: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
try:
|
||||
return self.metadata.get_metadatas("image")[0]
|
||||
except (KeyError, IndexError, TypeError):
|
||||
return None
|
||||
|
||||
@cached_property
|
||||
def hero_image_url(self) -> str:
|
||||
return ""
|
||||
|
||||
@cached_property
|
||||
def hero_image_alt(self) -> str:
|
||||
return ""
|
||||
|
||||
def get_meta_image_mime(self) -> Optional[str]:
|
||||
return get_url_mime_type(self.hero_url(""))
|
||||
|
||||
def get_url(
|
||||
self, request: HttpRequest | None = None, current_site: Site | None = None
|
||||
) -> str:
|
||||
return self.get_full_url(request)
|
||||
|
||||
def get_full_url(self, request: HttpRequest | None = None) -> str:
|
||||
full_url = urlsplit(super().get_full_url(request))
|
||||
return extend_query_params(self.external_url, {"utm_source": full_url.netloc})
|
||||
|
||||
def serve(self, request: HttpRequest, *args: tuple, **kwargs: dict) -> HttpResponse:
|
||||
"""
|
||||
Send the user directly to the external page
|
||||
"""
|
||||
return HttpResponsePermanentRedirect(self.get_full_url(request))
|
||||
|
|
16
website/blog/templates/blog/external_blog_post_page.html
Normal file
16
website/blog/templates/blog/external_blog_post_page.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
{% comment %}
|
||||
This template is never used, but exists just in case.
|
||||
{% endcomment %}
|
||||
|
||||
<!DOCTYPE html>
|
||||
<html lang="en-GB">
|
||||
<head>
|
||||
<title>Redirecting...</title>
|
||||
<link rel="canonical" href="{{ page.external_url }}" />
|
||||
<meta charset="utf-8" />
|
||||
<meta http-equiv="refresh" content="0; url={{ page.external_url }}" />
|
||||
</head>
|
||||
<body>
|
||||
<p>Redirecting...</p>
|
||||
</body>
|
||||
</html>
|
|
@ -3,7 +3,11 @@ from django.urls import reverse
|
|||
|
||||
from website.home.models import HomePage
|
||||
|
||||
from .factories import BlogPostListPageFactory, BlogPostPageFactory
|
||||
from .factories import (
|
||||
BlogPostListPageFactory,
|
||||
BlogPostPageFactory,
|
||||
ExternalBlogPostPageFactory,
|
||||
)
|
||||
|
||||
|
||||
class BlogPostPageTestCase(TestCase):
|
||||
|
@ -69,14 +73,15 @@ class BlogPostListPageTestCase(TestCase):
|
|||
|
||||
BlogPostPageFactory(parent=cls.page)
|
||||
BlogPostPageFactory(parent=cls.page)
|
||||
ExternalBlogPostPageFactory(parent=cls.page, external_url="https://example.com")
|
||||
|
||||
def test_accessible(self) -> None:
|
||||
response = self.client.get(self.page.url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.context["listing_pages"]), 2)
|
||||
self.assertEqual(len(response.context["listing_pages"]), 3)
|
||||
|
||||
def test_queries(self) -> None:
|
||||
with self.assertNumQueries(39):
|
||||
with self.assertNumQueries(43):
|
||||
self.client.get(self.page.url)
|
||||
|
||||
def test_feed_accessible(self) -> None:
|
||||
|
@ -84,3 +89,21 @@ class BlogPostListPageTestCase(TestCase):
|
|||
self.assertRedirects(
|
||||
response, reverse("feed"), status_code=301, fetch_redirect_response=True
|
||||
)
|
||||
|
||||
|
||||
class ExternalBlogPostPageTestCase(TestCase):
|
||||
@classmethod
|
||||
def setUpTestData(cls) -> None:
|
||||
cls.home_page = HomePage.objects.get()
|
||||
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
|
||||
cls.page = ExternalBlogPostPageFactory(parent=cls.blog_post_list_page)
|
||||
|
||||
def test_redirects(self) -> None:
|
||||
with self.assertNumQueries(10):
|
||||
response = self.client.get(self.page.url)
|
||||
self.assertRedirects(
|
||||
response,
|
||||
self.page.external_url + "?utm_source=localhost",
|
||||
status_code=301,
|
||||
fetch_redirect_response=False,
|
||||
)
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from datetime import timedelta
|
||||
from math import ceil
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from django.core.paginator import EmptyPage, Paginator
|
||||
from django.core.paginator import Page as PaginatorPage
|
||||
|
@ -31,6 +30,7 @@ from .serializers import PaginationSerializer
|
|||
from .streamfield import add_heading_anchors, get_blocks, get_content_html
|
||||
from .utils import (
|
||||
TocEntry,
|
||||
extend_query_params,
|
||||
extract_text,
|
||||
get_site_title,
|
||||
get_table_of_contents,
|
||||
|
@ -181,6 +181,7 @@ class BaseContentPage(BasePage, MetadataMixin):
|
|||
for size, width in UNSPLASH_SIZES.items()
|
||||
}
|
||||
|
||||
@cached_property
|
||||
def hero_image_url(self) -> Optional[str]:
|
||||
return self.hero_url("regular")
|
||||
|
||||
|
@ -286,10 +287,7 @@ class BaseListingPage(RoutablePageMixin, BaseContentPage):
|
|||
|
||||
url = super().get_meta_url()
|
||||
|
||||
if not query_data:
|
||||
return url
|
||||
|
||||
return url + "?" + urlencode(query_data)
|
||||
return extend_query_params(url, query_data)
|
||||
|
||||
@route(r"^feed/$")
|
||||
def feed(self, request: HttpRequest) -> HttpResponse:
|
||||
|
|
|
@ -16,7 +16,10 @@
|
|||
{% include "common/breadcrumbs.html" with parents=page.get_parent_pages %}
|
||||
{% endif %}
|
||||
<h2 class="title is-3">
|
||||
<a href="{% pageurl page %}">{{ page.title }}</a>
|
||||
<a href="{% pageurl page %}">
|
||||
{{ page.title }}
|
||||
{% if page.is_external %}<i class="fa-solid fa-arrow-up-right-from-square" title="This page is from a external source"></i>{% endif %}
|
||||
</a>
|
||||
</h2>
|
||||
{% include "common/content-details.html" %}
|
||||
<p>{{ page.summary }}</p>
|
||||
|
|
|
@ -3,6 +3,7 @@ from django.test import SimpleTestCase
|
|||
from wagtail.rich_text import features as richtext_feature_registry
|
||||
|
||||
from website.common.utils import (
|
||||
extend_query_params,
|
||||
extract_text,
|
||||
get_table_of_contents,
|
||||
heading_id,
|
||||
|
@ -111,3 +112,25 @@ class HeadingIDTestCase(SimpleTestCase):
|
|||
self.assertEqual(heading_id("123"), "ref-123")
|
||||
self.assertEqual(heading_id("test"), "test")
|
||||
self.assertEqual(heading_id("Look, a title!"), "look-a-title")
|
||||
|
||||
|
||||
class ExtendQueryParamsTestCase(SimpleTestCase):
|
||||
def test_params(self) -> None:
|
||||
self.assertEqual(
|
||||
extend_query_params("https://example.com", {"foo": "bar"}),
|
||||
"https://example.com?foo=bar",
|
||||
)
|
||||
self.assertEqual(
|
||||
extend_query_params("https://example.com?foo=bar", {"bar": "foo"}),
|
||||
"https://example.com?foo=bar&bar=foo",
|
||||
)
|
||||
self.assertEqual(
|
||||
extend_query_params("https://example.com?foo=baz", {"foo": "baz"}),
|
||||
"https://example.com?foo=baz",
|
||||
)
|
||||
|
||||
def test_removes_param(self) -> None:
|
||||
self.assertEqual(
|
||||
extend_query_params("https://example.com?foo=bar", {"foo": None}),
|
||||
"https://example.com",
|
||||
)
|
||||
|
|
|
@ -1,14 +1,17 @@
|
|||
from dataclasses import dataclass
|
||||
from itertools import pairwise
|
||||
from typing import Optional, Type
|
||||
from typing import Any, Optional, Type
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, SoupStrainer
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
from django.http import QueryDict
|
||||
from django.http.request import HttpRequest
|
||||
from django.utils.text import slugify
|
||||
from django_cache_decorator import django_cache_decorator
|
||||
from metadata_parser import MetadataParser
|
||||
from wagtail.models import Page, Site
|
||||
from wagtail.models import get_page_models as get_wagtail_page_models
|
||||
|
||||
|
@ -122,3 +125,21 @@ def get_ai_robots_txt() -> str:
|
|||
return requests_session.get(
|
||||
"https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt"
|
||||
).content.decode()
|
||||
|
||||
|
||||
@django_cache_decorator(time=21600)
|
||||
def get_page_metadata(url: str) -> MetadataParser:
|
||||
return MetadataParser(url=url, search_head_only=True)
|
||||
|
||||
|
||||
def extend_query_params(url: str, params: dict[str, Any]) -> str:
|
||||
scheme, netloc, path, query, fragment = urlsplit(url)
|
||||
query_dict = QueryDict(query, mutable=True)
|
||||
|
||||
for k, v in params.items():
|
||||
if v is None:
|
||||
del query_dict[k]
|
||||
else:
|
||||
query_dict[k] = v
|
||||
|
||||
return urlunsplit((scheme, netloc, path, query_dict.urlencode(), fragment))
|
||||
|
|
|
@ -23,7 +23,7 @@ from website.search.models import SearchPage
|
|||
|
||||
from .feed_generators import CustomFeed
|
||||
from .models import BaseListingPage, BasePage
|
||||
from .utils import get_ai_robots_txt
|
||||
from .utils import extend_query_params, get_ai_robots_txt
|
||||
|
||||
|
||||
class Error404View(TemplateView):
|
||||
|
@ -116,7 +116,9 @@ class AllPagesFeed(Feed):
|
|||
return item.title
|
||||
|
||||
def item_link(self, item: BasePage) -> str:
|
||||
return item.get_full_url(request=self.request) + "?utm_medium=rss"
|
||||
return extend_query_params(
|
||||
item.get_full_url(request=self.request), {"utm_medium": "rss"}
|
||||
)
|
||||
|
||||
def item_pubdate(self, item: BasePage) -> datetime:
|
||||
if item_date := getattr(item, "date", None):
|
||||
|
|
|
@ -7,7 +7,7 @@ from wagtail.search.utils import parse_query_string
|
|||
from wagtail_favicon.models import FaviconSettings
|
||||
from wagtail_favicon.utils import get_rendition_url
|
||||
|
||||
from website.common.utils import get_or_none, get_site_title
|
||||
from website.common.utils import extend_query_params, get_or_none, get_site_title
|
||||
from website.contrib.singleton_page.utils import SingletonPageCache
|
||||
|
||||
from .models import SearchPage
|
||||
|
@ -87,4 +87,4 @@ class GoView(RedirectView):
|
|||
if slug_match := get_or_none(pages.filter(slug__iexact=query)):
|
||||
return slug_match.get_url(request=self.request)
|
||||
|
||||
return f"{search_page_url}?{self.request.GET.urlencode()}"
|
||||
return extend_query_params(search_page_url, self.request.GET)
|
||||
|
|
|
@ -391,6 +391,11 @@ LOGGING = {
|
|||
"level": "WARNING",
|
||||
"propagate": False,
|
||||
},
|
||||
"metadata_parser": {
|
||||
"handlers": ["console"],
|
||||
"level": "CRITICAL",
|
||||
"propagate": False,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue