From 6ab0ff3fa76935ee746c3883db81c47a3a09d265 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Fri, 2 Jun 2023 15:33:15 +0100 Subject: [PATCH] Add similar content based on text Trigram similarity --- static/src/scss/_similar_content.scss | 16 ++++++ static/src/scss/base.scss | 1 + .../migrations/0005_auto_20230602_1236.py | 13 +++++ website/blog/models.py | 32 ++++++++++++ .../blog/templates/blog/blog_post_page.html | 18 +++++++ website/blog/tests.py | 49 +++++++++++++++++++ 6 files changed, 129 insertions(+) create mode 100644 static/src/scss/_similar_content.scss create mode 100644 website/blog/migrations/0005_auto_20230602_1236.py create mode 100644 website/blog/tests.py diff --git a/static/src/scss/_similar_content.scss b/static/src/scss/_similar_content.scss new file mode 100644 index 0000000..d9f5af9 --- /dev/null +++ b/static/src/scss/_similar_content.scss @@ -0,0 +1,16 @@ +section#similar-content { + display: flex; + flex-direction: column; + align-items: center; + margin-top: 2rem; + + h2 { + color: inherit; + } + + .media { + @include desktop { + transform: scale(85%); + } + } +} diff --git a/static/src/scss/base.scss b/static/src/scss/base.scss index dba0456..b4dd01b 100644 --- a/static/src/scss/base.scss +++ b/static/src/scss/base.scss @@ -20,6 +20,7 @@ @import "404"; @import "password_required"; @import "commento"; +@import "similar_content"; html, body { diff --git a/website/blog/migrations/0005_auto_20230602_1236.py b/website/blog/migrations/0005_auto_20230602_1236.py new file mode 100644 index 0000000..3fc9246 --- /dev/null +++ b/website/blog/migrations/0005_auto_20230602_1236.py @@ -0,0 +1,13 @@ +# Generated by Django 4.1.9 on 2023-06-02 12:36 + +from django.contrib.postgres.operations import TrigramExtension +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("blog", "0004_alter_blogpostcollectionlistpage_body_and_more"), + ] + + operations = [TrigramExtension()] diff --git a/website/blog/models.py b/website/blog/models.py index f99d4e9..a8942f0 100644 --- a/website/blog/models.py +++ b/website/blog/models.py @@ -1,5 +1,6 @@ from typing import Any, Optional, Type +from django.contrib.postgres.search import TrigramSimilarity from django.db import models from django.utils import timezone from django.utils.functional import cached_property @@ -66,6 +67,37 @@ class BlogPostPage(BaseContentPage): def tag_list_page_url(self) -> Optional[str]: return SingletonPageCache.get_url(BlogPostTagListPage) + def get_similar_posts(self) -> models.QuerySet: + try: + listing_pages = BlogPostListPage.objects.get().get_listing_pages() + except BlogPostListPage.DoesNotExist: + return BlogPostPage.objects.none() + + similar_posts = listing_pages.exclude(id=self.id).annotate( + title_similarity=TrigramSimilarity("title", self.title), + # If this page has no subtitle, ignore it as part of similarity + subtitle_similarity=TrigramSimilarity("subtitle", self.subtitle) + if self.subtitle + else models.Value(1), + ) + + page_tags = list(self.tags.values_list("id", flat=True)) + similar_posts = similar_posts.annotate( + # If this page has no tags, ignore it as part of similarity + tag_similarity=models.Count("tags", filter=models.Q(tags__in=page_tags)) + / len(page_tags) + if page_tags + else models.Value(1) + ) + + similar_posts = similar_posts.annotate( + similarity=(models.F("tag_similarity") * 2) + * (models.F("title_similarity") * 10) + * (models.F("subtitle_similarity")) + ).order_by("-similarity")[:3] + + return similar_posts + class BlogPostTagListPage(BaseListingPage): max_count = 1 diff --git a/website/blog/templates/blog/blog_post_page.html b/website/blog/templates/blog/blog_post_page.html index 1d82e45..be9a782 100644 --- a/website/blog/templates/blog/blog_post_page.html +++ b/website/blog/templates/blog/blog_post_page.html @@ -1 +1,19 @@ {% extends "common/content_page.html" %} + +{% load cache util_tags %} + +{% block post_content %} + {{ block.super }} + + {% cache FRAGMENT_CACHE_TTL|jitter:FRAGMENT_CACHE_TTL_JITTER "similar-content" page.id request.is_preview %} +
+

Similar content

+ + {% for page in page.get_similar_posts %} + {% block listing_item %} + {% include "common/listing-item.html" %} + {% endblock %} + {% endfor %} +
+ {% endcache %} +{% endblock %} diff --git a/website/blog/tests.py b/website/blog/tests.py new file mode 100644 index 0000000..4ec7c3b --- /dev/null +++ b/website/blog/tests.py @@ -0,0 +1,49 @@ +from django.test import TestCase + +from website.home.models import HomePage + +from .factories import BlogPostListPageFactory, BlogPostPageFactory + + +class BlogPostPageTestCase(TestCase): + @classmethod + def setUpTestData(cls) -> None: + cls.home_page = HomePage.objects.get() + cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page) + cls.page = BlogPostPageFactory(parent=cls.blog_post_list_page) + + def test_accessible(self) -> None: + response = self.client.get(self.page.url) + self.assertEqual(response.status_code, 200) + + def test_queries(self) -> None: + with self.assertNumQueries(45): + self.client.get(self.page.url) + + +class BlogPostListPageTestCase(TestCase): + @classmethod + def setUpTestData(cls) -> None: + cls.home_page = HomePage.objects.get() + cls.page = BlogPostListPageFactory(parent=cls.home_page) + + BlogPostPageFactory(parent=cls.page) + BlogPostPageFactory(parent=cls.page) + + def test_accessible(self) -> None: + response = self.client.get(self.page.url) + self.assertEqual(response.status_code, 200) + self.assertEqual(len(response.context["listing_pages"]), 2) + self.assertContains(response, self.page.reverse_subpage("feed")) + + def test_queries(self) -> None: + with self.assertNumQueries(44): + self.client.get(self.page.url) + + def test_feed_accessible(self) -> None: + with self.assertNumQueries(12): + response = self.client.get( + self.page.url + self.page.reverse_subpage("feed") + ) + self.assertEqual(response.status_code, 200) + self.assertEqual(response["Content-Type"], "application/rss+xml; charset=utf-8")