Add similar content based on text Trigram similarity
This commit is contained in:
parent
5d4c095227
commit
6ab0ff3fa7
6 changed files with 129 additions and 0 deletions
16
static/src/scss/_similar_content.scss
Normal file
16
static/src/scss/_similar_content.scss
Normal file
|
@ -0,0 +1,16 @@
|
|||
section#similar-content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
margin-top: 2rem;
|
||||
|
||||
h2 {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.media {
|
||||
@include desktop {
|
||||
transform: scale(85%);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -20,6 +20,7 @@
|
|||
@import "404";
|
||||
@import "password_required";
|
||||
@import "commento";
|
||||
@import "similar_content";
|
||||
|
||||
html,
|
||||
body {
|
||||
|
|
13
website/blog/migrations/0005_auto_20230602_1236.py
Normal file
13
website/blog/migrations/0005_auto_20230602_1236.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Generated by Django 4.1.9 on 2023-06-02 12:36
|
||||
|
||||
from django.contrib.postgres.operations import TrigramExtension
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("blog", "0004_alter_blogpostcollectionlistpage_body_and_more"),
|
||||
]
|
||||
|
||||
operations = [TrigramExtension()]
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Any, Optional, Type
|
||||
|
||||
from django.contrib.postgres.search import TrigramSimilarity
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.utils.functional import cached_property
|
||||
|
@ -66,6 +67,37 @@ class BlogPostPage(BaseContentPage):
|
|||
def tag_list_page_url(self) -> Optional[str]:
|
||||
return SingletonPageCache.get_url(BlogPostTagListPage)
|
||||
|
||||
def get_similar_posts(self) -> models.QuerySet:
|
||||
try:
|
||||
listing_pages = BlogPostListPage.objects.get().get_listing_pages()
|
||||
except BlogPostListPage.DoesNotExist:
|
||||
return BlogPostPage.objects.none()
|
||||
|
||||
similar_posts = listing_pages.exclude(id=self.id).annotate(
|
||||
title_similarity=TrigramSimilarity("title", self.title),
|
||||
# If this page has no subtitle, ignore it as part of similarity
|
||||
subtitle_similarity=TrigramSimilarity("subtitle", self.subtitle)
|
||||
if self.subtitle
|
||||
else models.Value(1),
|
||||
)
|
||||
|
||||
page_tags = list(self.tags.values_list("id", flat=True))
|
||||
similar_posts = similar_posts.annotate(
|
||||
# If this page has no tags, ignore it as part of similarity
|
||||
tag_similarity=models.Count("tags", filter=models.Q(tags__in=page_tags))
|
||||
/ len(page_tags)
|
||||
if page_tags
|
||||
else models.Value(1)
|
||||
)
|
||||
|
||||
similar_posts = similar_posts.annotate(
|
||||
similarity=(models.F("tag_similarity") * 2)
|
||||
* (models.F("title_similarity") * 10)
|
||||
* (models.F("subtitle_similarity"))
|
||||
).order_by("-similarity")[:3]
|
||||
|
||||
return similar_posts
|
||||
|
||||
|
||||
class BlogPostTagListPage(BaseListingPage):
|
||||
max_count = 1
|
||||
|
|
|
@ -1 +1,19 @@
|
|||
{% extends "common/content_page.html" %}
|
||||
|
||||
{% load cache util_tags %}
|
||||
|
||||
{% block post_content %}
|
||||
{{ block.super }}
|
||||
|
||||
{% cache FRAGMENT_CACHE_TTL|jitter:FRAGMENT_CACHE_TTL_JITTER "similar-content" page.id request.is_preview %}
|
||||
<section class="container similar-content" id="similar-content">
|
||||
<h2 class="subtitle is-size-2">Similar content</h2>
|
||||
|
||||
{% for page in page.get_similar_posts %}
|
||||
{% block listing_item %}
|
||||
{% include "common/listing-item.html" %}
|
||||
{% endblock %}
|
||||
{% endfor %}
|
||||
</section>
|
||||
{% endcache %}
|
||||
{% endblock %}
|
||||
|
|
49
website/blog/tests.py
Normal file
49
website/blog/tests.py
Normal file
|
@ -0,0 +1,49 @@
|
|||
from django.test import TestCase
|
||||
|
||||
from website.home.models import HomePage
|
||||
|
||||
from .factories import BlogPostListPageFactory, BlogPostPageFactory
|
||||
|
||||
|
||||
class BlogPostPageTestCase(TestCase):
|
||||
@classmethod
|
||||
def setUpTestData(cls) -> None:
|
||||
cls.home_page = HomePage.objects.get()
|
||||
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
|
||||
cls.page = BlogPostPageFactory(parent=cls.blog_post_list_page)
|
||||
|
||||
def test_accessible(self) -> None:
|
||||
response = self.client.get(self.page.url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
def test_queries(self) -> None:
|
||||
with self.assertNumQueries(45):
|
||||
self.client.get(self.page.url)
|
||||
|
||||
|
||||
class BlogPostListPageTestCase(TestCase):
|
||||
@classmethod
|
||||
def setUpTestData(cls) -> None:
|
||||
cls.home_page = HomePage.objects.get()
|
||||
cls.page = BlogPostListPageFactory(parent=cls.home_page)
|
||||
|
||||
BlogPostPageFactory(parent=cls.page)
|
||||
BlogPostPageFactory(parent=cls.page)
|
||||
|
||||
def test_accessible(self) -> None:
|
||||
response = self.client.get(self.page.url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.context["listing_pages"]), 2)
|
||||
self.assertContains(response, self.page.reverse_subpage("feed"))
|
||||
|
||||
def test_queries(self) -> None:
|
||||
with self.assertNumQueries(44):
|
||||
self.client.get(self.page.url)
|
||||
|
||||
def test_feed_accessible(self) -> None:
|
||||
with self.assertNumQueries(12):
|
||||
response = self.client.get(
|
||||
self.page.url + self.page.reverse_subpage("feed")
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response["Content-Type"], "application/rss+xml; charset=utf-8")
|
Loading…
Reference in a new issue