Add similar content based on text Trigram similarity

This commit is contained in:
Jake Howard 2023-06-02 15:33:15 +01:00
parent 5d4c095227
commit 6ab0ff3fa7
Signed by: jake
GPG Key ID: 57AFB45680EDD477
6 changed files with 129 additions and 0 deletions

View File

@ -0,0 +1,16 @@
section#similar-content {
display: flex;
flex-direction: column;
align-items: center;
margin-top: 2rem;
h2 {
color: inherit;
}
.media {
@include desktop {
transform: scale(85%);
}
}
}

View File

@ -20,6 +20,7 @@
@import "404";
@import "password_required";
@import "commento";
@import "similar_content";
html,
body {

View File

@ -0,0 +1,13 @@
# Generated by Django 4.1.9 on 2023-06-02 12:36
from django.contrib.postgres.operations import TrigramExtension
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("blog", "0004_alter_blogpostcollectionlistpage_body_and_more"),
]
operations = [TrigramExtension()]

View File

@ -1,5 +1,6 @@
from typing import Any, Optional, Type
from django.contrib.postgres.search import TrigramSimilarity
from django.db import models
from django.utils import timezone
from django.utils.functional import cached_property
@ -66,6 +67,37 @@ class BlogPostPage(BaseContentPage):
def tag_list_page_url(self) -> Optional[str]:
return SingletonPageCache.get_url(BlogPostTagListPage)
def get_similar_posts(self) -> models.QuerySet:
try:
listing_pages = BlogPostListPage.objects.get().get_listing_pages()
except BlogPostListPage.DoesNotExist:
return BlogPostPage.objects.none()
similar_posts = listing_pages.exclude(id=self.id).annotate(
title_similarity=TrigramSimilarity("title", self.title),
# If this page has no subtitle, ignore it as part of similarity
subtitle_similarity=TrigramSimilarity("subtitle", self.subtitle)
if self.subtitle
else models.Value(1),
)
page_tags = list(self.tags.values_list("id", flat=True))
similar_posts = similar_posts.annotate(
# If this page has no tags, ignore it as part of similarity
tag_similarity=models.Count("tags", filter=models.Q(tags__in=page_tags))
/ len(page_tags)
if page_tags
else models.Value(1)
)
similar_posts = similar_posts.annotate(
similarity=(models.F("tag_similarity") * 2)
* (models.F("title_similarity") * 10)
* (models.F("subtitle_similarity"))
).order_by("-similarity")[:3]
return similar_posts
class BlogPostTagListPage(BaseListingPage):
max_count = 1

View File

@ -1 +1,19 @@
{% extends "common/content_page.html" %}
{% load cache util_tags %}
{% block post_content %}
{{ block.super }}
{% cache FRAGMENT_CACHE_TTL|jitter:FRAGMENT_CACHE_TTL_JITTER "similar-content" page.id request.is_preview %}
<section class="container similar-content" id="similar-content">
<h2 class="subtitle is-size-2">Similar content</h2>
{% for page in page.get_similar_posts %}
{% block listing_item %}
{% include "common/listing-item.html" %}
{% endblock %}
{% endfor %}
</section>
{% endcache %}
{% endblock %}

49
website/blog/tests.py Normal file
View File

@ -0,0 +1,49 @@
from django.test import TestCase
from website.home.models import HomePage
from .factories import BlogPostListPageFactory, BlogPostPageFactory
class BlogPostPageTestCase(TestCase):
@classmethod
def setUpTestData(cls) -> None:
cls.home_page = HomePage.objects.get()
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
cls.page = BlogPostPageFactory(parent=cls.blog_post_list_page)
def test_accessible(self) -> None:
response = self.client.get(self.page.url)
self.assertEqual(response.status_code, 200)
def test_queries(self) -> None:
with self.assertNumQueries(45):
self.client.get(self.page.url)
class BlogPostListPageTestCase(TestCase):
@classmethod
def setUpTestData(cls) -> None:
cls.home_page = HomePage.objects.get()
cls.page = BlogPostListPageFactory(parent=cls.home_page)
BlogPostPageFactory(parent=cls.page)
BlogPostPageFactory(parent=cls.page)
def test_accessible(self) -> None:
response = self.client.get(self.page.url)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.context["listing_pages"]), 2)
self.assertContains(response, self.page.reverse_subpage("feed"))
def test_queries(self) -> None:
with self.assertNumQueries(44):
self.client.get(self.page.url)
def test_feed_accessible(self) -> None:
with self.assertNumQueries(12):
response = self.client.get(
self.page.url + self.page.reverse_subpage("feed")
)
self.assertEqual(response.status_code, 200)
self.assertEqual(response["Content-Type"], "application/rss+xml; charset=utf-8")