Compare commits

..

1 Commits

Author SHA1 Message Date
b2483a62db Update dependency django-permissions-policy to v4.19.0 2024-05-20 10:00:43 +01:00
15 changed files with 54 additions and 344 deletions

View File

@ -24,12 +24,11 @@ Wand==0.6.13
django3-cache-decorator==0.5.2
django-cors-headers==4.3.1
django-csp==3.7
django-permissions-policy==4.18.0
django-permissions-policy==4.19.0
django-enforce-host==1.1.0
django-proxy==1.2.2
wagtail-lite-youtube-embed==0.1.0
django-minify-html==1.7.1
metadata-parser==0.12.1
# DRF OpenAPI dependencies
uritemplate

View File

@ -18,17 +18,6 @@
.title {
margin-bottom: 0;
a {
display: flex;
flex-direction: row;
align-items: center;
justify-content: space-between;
i {
font-size: 50%;
}
}
}
.content-details {

View File

@ -1,5 +1,3 @@
import factory
from website.common.factories import BaseContentFactory, BaseListingFactory
from . import models
@ -13,11 +11,3 @@ class BlogPostListPageFactory(BaseListingFactory):
class BlogPostPageFactory(BaseContentFactory):
class Meta:
model = models.BlogPostPage
class ExternalBlogPostPageFactory(BaseContentFactory):
external_url = factory.Faker("url")
class Meta:
model = models.ExternalBlogPostPage
exclude = ["subtitle"]

View File

@ -1,45 +0,0 @@
# Generated by Django 5.0.4 on 2024-05-29 21:10
import django.db.models.deletion
import django.utils.timezone
import modelcluster.fields
import wagtailmetadata.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("blog", "0005_auto_20230602_1236"),
("wagtailcore", "0089_log_entry_data_json_null_to_object"),
]
operations = [
migrations.CreateModel(
name="ExternalBlogPostPage",
fields=[
(
"page_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="wagtailcore.page",
),
),
("external_url", models.URLField()),
("date", models.DateField(default=django.utils.timezone.now)),
(
"tags",
modelcluster.fields.ParentalManyToManyField(
blank=True, to="blog.blogposttagpage"
),
),
],
options={
"abstract": False,
},
bases=("wagtailcore.page", wagtailmetadata.models.MetadataMixin),
),
]

View File

@ -1,26 +1,18 @@
from typing import Any, Optional
from urllib.parse import urlsplit
from django.contrib.postgres.search import TrigramSimilarity
from django.db import models
from django.db.models.functions import Cast, Coalesce
from django.http import HttpRequest, HttpResponse, HttpResponsePermanentRedirect
from django.db.models.functions import Cast
from django.utils import timezone
from django.utils.functional import cached_property
from metadata_parser import MetadataParser
from modelcluster.fields import ParentalManyToManyField
from wagtail.admin.panels import FieldPanel
from wagtail.models import Page, PageQuerySet, Site
from wagtail.models import PageQuerySet
from wagtail.search import index
from wagtailautocomplete.edit_handlers import AutocompletePanel
from website.common.models import BaseContentPage, BaseListingPage, BasePage
from website.common.utils import (
TocEntry,
extend_query_params,
get_page_metadata,
get_url_mime_type,
)
from website.common.models import BaseContentPage, BaseListingPage
from website.common.utils import TocEntry
from website.contrib.singleton_page.utils import SingletonPageCache
@ -31,8 +23,6 @@ class BlogPostListPage(BaseListingPage):
"blog.BlogPostTagListPage",
"blog.BlogPostCollectionListPage",
"blog.BlogPostCollectionPage",
"blog.BlogPostCollectionPage",
"blog.ExternalBlogPostPage",
]
@cached_property
@ -41,12 +31,9 @@ class BlogPostListPage(BaseListingPage):
def get_listing_pages(self) -> models.QuerySet:
return (
Page.objects.live()
BlogPostPage.objects.descendant_of(self)
.live()
.public()
.annotate(date=Coalesce("blogpostpage__date", "externalblogpostpage__date"))
.descendant_of(self)
.type(BlogPostPage, ExternalBlogPostPage)
.specific()
.order_by("-date", "title")
)
@ -93,40 +80,36 @@ class BlogPostPage(BaseContentPage):
return SingletonPageCache.get_url(BlogPostListPage)
def get_similar_posts(self) -> models.QuerySet:
listing_pages = BlogPostListPage.objects.get().get_listing_pages()
try:
listing_pages = BlogPostListPage.objects.get().get_listing_pages()
except BlogPostListPage.DoesNotExist:
return BlogPostPage.objects.none()
similar_posts = listing_pages.exclude(id=self.id).alias(
title_similarity=TrigramSimilarity("title", self.title),
# If this page has no subtitle, ignore it as part of similarity
subtitle_similarity=TrigramSimilarity("subtitle", self.subtitle)
if self.subtitle
else models.Value(1),
)
page_tags = list(self.tags.public().live().values_list("id", flat=True))
# If this page has no tags, ignore it as part of similarity
divisor = len(page_tags) if page_tags else models.Value(1)
similar_posts = similar_posts.alias(
# If this page has no tags, ignore it as part of similarity
# NB: Cast to a float, because `COUNT` returns a `bigint`.
_blog_tag_similarity=Cast(
models.Count(
"blogpostpage__tags",
filter=models.Q(blogpostpage__tags__in=page_tags),
),
tag_similarity=Cast(
models.Count("tags", filter=models.Q(tags__in=page_tags)),
output_field=models.FloatField(),
)
/ divisor,
_external_tag_similarity=Cast(
models.Count(
"externalblogpostpage__tags",
filter=models.Q(externalblogpostpage__tags__in=page_tags),
),
output_field=models.FloatField(),
)
/ divisor,
tag_similarity=models.F("_blog_tag_similarity")
+ models.F("_external_tag_similarity"),
/ len(page_tags)
if page_tags
else models.Value(1)
)
similar_posts = similar_posts.annotate(
similarity=(models.F("tag_similarity") * 2)
+ (models.F("title_similarity") * 10)
+ (models.F("subtitle_similarity"))
).order_by("-similarity")[:3]
return similar_posts
@ -154,12 +137,7 @@ class BlogPostTagPage(BaseListingPage):
def get_listing_pages(self) -> models.QuerySet:
blog_list_page = BlogPostListPage.objects.get()
listing_pages = blog_list_page.get_listing_pages()
return listing_pages.filter(
models.Q(blogpostpage__tags=self)
| models.Q(externalblogpostpage__tags=self)
).distinct()
return blog_list_page.get_listing_pages().filter(tags=self)
class BlogPostCollectionListPage(BaseListingPage):
@ -189,101 +167,3 @@ class BlogPostCollectionPage(BaseListingPage):
.public()
.order_by("-date", "title")
)
class ExternalBlogPostPage(BaseContentPage):
subpage_types: list[Any] = []
parent_page_types = [BlogPostListPage]
preview_modes: list[Any] = []
is_external = True
# Some `BaseContentPage` fields aren't relevant
body = None
subtitle = None
hero_image = None
hero_unsplash_photo = None
external_url = models.URLField()
tags = ParentalManyToManyField("blog.BlogPostTagPage", blank=True)
date = models.DateField(default=timezone.now)
content_panels = BasePage.content_panels + [FieldPanel("external_url")]
promote_panels = BaseContentPage.promote_panels + [
FieldPanel("date"),
AutocompletePanel("tags"),
]
search_fields = BaseContentPage.search_fields + [
index.RelatedFields("tags", [index.SearchField("title", boost=1)]),
index.SearchField("external_url"),
]
@cached_property
def tag_list_page_url(self) -> Optional[str]:
return SingletonPageCache.get_url(BlogPostTagListPage)
@cached_property
def tags_list(self) -> models.QuerySet:
"""
Use this to get a page's tags.
"""
tags = self.tags.order_by("slug")
# In drafts, `django-modelcluster` doesn't support these filters
if isinstance(tags, PageQuerySet):
return tags.public().live()
return tags
@cached_property
def metadata(self) -> MetadataParser:
return get_page_metadata(self.external_url)
@cached_property
def _body_html(self) -> str:
try:
return self.metadata.get_metadatas("description")[0]
except (KeyError, IndexError, TypeError):
return ""
@cached_property
def plain_text(self) -> str:
# The metadata is already just text
return self._body_html
def hero_url(
self, image_size: str, wagtail_image_spec_extra: Optional[str] = None
) -> Optional[str]:
try:
return self.metadata.get_metadatas("image")[0]
except (KeyError, IndexError, TypeError):
return None
@cached_property
def hero_image_url(self) -> str:
return ""
@cached_property
def hero_image_alt(self) -> str:
return ""
def get_meta_image_mime(self) -> Optional[str]:
return get_url_mime_type(self.hero_url(""))
def get_url(
self, request: HttpRequest | None = None, current_site: Site | None = None
) -> str:
return self.get_full_url(request)
def get_full_url(self, request: HttpRequest | None = None) -> str:
full_url = urlsplit(super().get_full_url(request))
return extend_query_params(self.external_url, {"utm_source": full_url.netloc})
def serve(self, request: HttpRequest, *args: tuple, **kwargs: dict) -> HttpResponse:
"""
Send the user directly to the external page
"""
return HttpResponsePermanentRedirect(self.get_full_url(request))

View File

@ -1,16 +0,0 @@
{% comment %}
This template is never used, but exists just in case.
{% endcomment %}
<!DOCTYPE html>
<html lang="en-GB">
<head>
<title>Redirecting...</title>
<link rel="canonical" href="{{ page.external_url }}" />
<meta charset="utf-8" />
<meta http-equiv="refresh" content="0; url={{ page.external_url }}" />
</head>
<body>
<p>Redirecting...</p>
</body>
</html>

View File

@ -3,11 +3,7 @@ from django.urls import reverse
from website.home.models import HomePage
from .factories import (
BlogPostListPageFactory,
BlogPostPageFactory,
ExternalBlogPostPageFactory,
)
from .factories import BlogPostListPageFactory, BlogPostPageFactory
class BlogPostPageTestCase(TestCase):
@ -73,15 +69,14 @@ class BlogPostListPageTestCase(TestCase):
BlogPostPageFactory(parent=cls.page)
BlogPostPageFactory(parent=cls.page)
ExternalBlogPostPageFactory(parent=cls.page, external_url="https://example.com")
def test_accessible(self) -> None:
response = self.client.get(self.page.url)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.context["listing_pages"]), 3)
self.assertEqual(len(response.context["listing_pages"]), 2)
def test_queries(self) -> None:
with self.assertNumQueries(43):
with self.assertNumQueries(39):
self.client.get(self.page.url)
def test_feed_accessible(self) -> None:
@ -89,21 +84,3 @@ class BlogPostListPageTestCase(TestCase):
self.assertRedirects(
response, reverse("feed"), status_code=301, fetch_redirect_response=True
)
class ExternalBlogPostPageTestCase(TestCase):
@classmethod
def setUpTestData(cls) -> None:
cls.home_page = HomePage.objects.get()
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
cls.page = ExternalBlogPostPageFactory(parent=cls.blog_post_list_page)
def test_redirects(self) -> None:
with self.assertNumQueries(10):
response = self.client.get(self.page.url)
self.assertRedirects(
response,
self.page.external_url + "?utm_source=localhost",
status_code=301,
fetch_redirect_response=False,
)

View File

@ -1,6 +1,7 @@
from datetime import timedelta
from math import ceil
from typing import Any, Optional
from urllib.parse import urlencode
from django.core.paginator import EmptyPage, Paginator
from django.core.paginator import Page as PaginatorPage
@ -30,7 +31,6 @@ from .serializers import PaginationSerializer
from .streamfield import add_heading_anchors, get_blocks, get_content_html
from .utils import (
TocEntry,
extend_query_params,
extract_text,
get_site_title,
get_table_of_contents,
@ -181,7 +181,6 @@ class BaseContentPage(BasePage, MetadataMixin):
for size, width in UNSPLASH_SIZES.items()
}
@cached_property
def hero_image_url(self) -> Optional[str]:
return self.hero_url("regular")
@ -287,7 +286,10 @@ class BaseListingPage(RoutablePageMixin, BaseContentPage):
url = super().get_meta_url()
return extend_query_params(url, query_data)
if not query_data:
return url
return url + "?" + urlencode(query_data)
@route(r"^feed/$")
def feed(self, request: HttpRequest) -> HttpResponse:
@ -297,11 +299,8 @@ class BaseListingPage(RoutablePageMixin, BaseContentPage):
def random(self, request: HttpRequest) -> HttpResponse:
page = self.get_listing_pages().order_by("?").first()
if page is None:
response = redirect(self.get_url(request=request), permanent=False)
else:
response = redirect(page.get_url(request=request), permanent=False)
response.headers["X-Robots-Tag"] = "noindex"
return response
return redirect(self.get_url(request=request), permanent=False)
return redirect(page.get_url(request=request), permanent=False)
class ListingPage(BaseListingPage):

View File

@ -16,10 +16,7 @@
{% include "common/breadcrumbs.html" with parents=page.get_parent_pages %}
{% endif %}
<h2 class="title is-3">
<a href="{% pageurl page %}">
{{ page.title }}
{% if page.is_external %}<i class="fa-solid fa-arrow-up-right-from-square" title="This page is from a external source"></i>{% endif %}
</a>
<a href="{% pageurl page %}">{{ page.title }}</a>
</h2>
{% include "common/content-details.html" %}
<p>{{ page.summary }}</p>

View File

@ -1,7 +1,11 @@
{% if SEO_INDEX %}
User-agent: *
{% if SEO_INDEX %}Allow: /{% else %}Disallow: /{% endif %}
# https://github.com/ai-robots-txt/ai.robots.txt
{{ ai_robots_txt }}
Allow: /
{% else %}
User-agent: *
Disallow: /
{% endif %}
Disallow: {% url "wagtailadmin_home" %}
Disallow: {% url "api:index" %}
Sitemap: {{ sitemap }}

View File

@ -3,7 +3,6 @@ from django.test import SimpleTestCase
from wagtail.rich_text import features as richtext_feature_registry
from website.common.utils import (
extend_query_params,
extract_text,
get_table_of_contents,
heading_id,
@ -112,25 +111,3 @@ class HeadingIDTestCase(SimpleTestCase):
self.assertEqual(heading_id("123"), "ref-123")
self.assertEqual(heading_id("test"), "test")
self.assertEqual(heading_id("Look, a title!"), "look-a-title")
class ExtendQueryParamsTestCase(SimpleTestCase):
def test_params(self) -> None:
self.assertEqual(
extend_query_params("https://example.com", {"foo": "bar"}),
"https://example.com?foo=bar",
)
self.assertEqual(
extend_query_params("https://example.com?foo=bar", {"bar": "foo"}),
"https://example.com?foo=bar&bar=foo",
)
self.assertEqual(
extend_query_params("https://example.com?foo=baz", {"foo": "baz"}),
"https://example.com?foo=baz",
)
def test_removes_param(self) -> None:
self.assertEqual(
extend_query_params("https://example.com?foo=bar", {"foo": None}),
"https://example.com",
)

View File

@ -1,17 +1,14 @@
from dataclasses import dataclass
from itertools import pairwise
from typing import Any, Optional, Type
from urllib.parse import urlsplit, urlunsplit
from typing import Optional, Type
import requests
from bs4 import BeautifulSoup, SoupStrainer
from django.conf import settings
from django.db import models
from django.http import QueryDict
from django.http.request import HttpRequest
from django.utils.text import slugify
from django_cache_decorator import django_cache_decorator
from metadata_parser import MetadataParser
from wagtail.models import Page, Site
from wagtail.models import get_page_models as get_wagtail_page_models
@ -115,31 +112,3 @@ def get_or_none(queryset: models.QuerySet) -> models.Model:
return queryset.get()
except (queryset.model.DoesNotExist, queryset.model.MultipleObjectsReturned):
return None
@django_cache_decorator(time=21600)
def get_ai_robots_txt() -> str:
"""
https://github.com/ai-robots-txt/ai.robots.txt
"""
return requests_session.get(
"https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt"
).content.decode()
@django_cache_decorator(time=21600)
def get_page_metadata(url: str) -> MetadataParser:
return MetadataParser(url=url, search_head_only=True)
def extend_query_params(url: str, params: dict[str, Any]) -> str:
scheme, netloc, path, query, fragment = urlsplit(url)
query_dict = QueryDict(query, mutable=True)
for k, v in params.items():
if v is None:
del query_dict[k]
else:
query_dict[k] = v
return urlunsplit((scheme, netloc, path, query_dict.urlencode(), fragment))

View File

@ -23,7 +23,6 @@ from website.search.models import SearchPage
from .feed_generators import CustomFeed
from .models import BaseListingPage, BasePage
from .utils import extend_query_params, get_ai_robots_txt
class Error404View(TemplateView):
@ -53,7 +52,6 @@ class RobotsView(TemplateView):
def get_context_data(self, **kwargs: dict) -> dict:
context = super().get_context_data(**kwargs)
context["sitemap"] = self.request.build_absolute_uri(reverse("sitemap"))
context["ai_robots_txt"] = get_ai_robots_txt()
return context
@ -116,9 +114,7 @@ class AllPagesFeed(Feed):
return item.title
def item_link(self, item: BasePage) -> str:
return extend_query_params(
item.get_full_url(request=self.request), {"utm_medium": "rss"}
)
return item.get_full_url(request=self.request) + "?utm_medium=rss"
def item_pubdate(self, item: BasePage) -> datetime:
if item_date := getattr(item, "date", None):

View File

@ -7,7 +7,7 @@ from wagtail.search.utils import parse_query_string
from wagtail_favicon.models import FaviconSettings
from wagtail_favicon.utils import get_rendition_url
from website.common.utils import extend_query_params, get_or_none, get_site_title
from website.common.utils import get_or_none, get_site_title
from website.contrib.singleton_page.utils import SingletonPageCache
from .models import SearchPage
@ -87,4 +87,4 @@ class GoView(RedirectView):
if slug_match := get_or_none(pages.filter(slug__iexact=query)):
return slug_match.get_url(request=self.request)
return extend_query_params(search_page_url, self.request.GET)
return f"{search_page_url}?{self.request.GET.urlencode()}"

View File

@ -320,15 +320,14 @@ if DEBUG:
INSTALLED_APPS.append("django_browser_reload")
MIDDLEWARE.append("django_browser_reload.middleware.BrowserReloadMiddleware")
if not TEST:
# Add django-debug-toolbar
INSTALLED_APPS.append("debug_toolbar")
MIDDLEWARE.append("debug_toolbar.middleware.DebugToolbarMiddleware")
DEBUG_TOOLBAR_CONFIG = {
"SHOW_TOOLBAR_CALLBACK": "website.common.utils.show_toolbar_callback",
"RESULTS_CACHE_SIZE": 5,
"SHOW_COLLAPSED": True,
}
# Add django-debug-toolbar
INSTALLED_APPS.append("debug_toolbar")
MIDDLEWARE.append("debug_toolbar.middleware.DebugToolbarMiddleware")
DEBUG_TOOLBAR_CONFIG = {
"SHOW_TOOLBAR_CALLBACK": "website.common.utils.show_toolbar_callback",
"RESULTS_CACHE_SIZE": 5,
"SHOW_COLLAPSED": True,
}
# Add Wagtail styleguide
INSTALLED_APPS.append("wagtail.contrib.styleguide")
@ -392,11 +391,6 @@ LOGGING = {
"level": "WARNING",
"propagate": False,
},
"metadata_parser": {
"handlers": ["console"],
"level": "CRITICAL",
"propagate": False,
},
},
}