Fix pickle errors for metadata
This commit is contained in:
parent
d242f94024
commit
41a04af8dc
3 changed files with 23 additions and 6 deletions
|
@ -7,7 +7,7 @@ from django.db.models.functions import Cast, Coalesce
|
||||||
from django.http import HttpRequest, HttpResponse, HttpResponsePermanentRedirect
|
from django.http import HttpRequest, HttpResponse, HttpResponsePermanentRedirect
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.utils.functional import cached_property
|
from django.utils.functional import cached_property
|
||||||
from metadata_parser import MetadataParser
|
from metadata_parser import ParsedResult
|
||||||
from modelcluster.fields import ParentalManyToManyField
|
from modelcluster.fields import ParentalManyToManyField
|
||||||
from wagtail.admin.panels import FieldPanel
|
from wagtail.admin.panels import FieldPanel
|
||||||
from wagtail.models import Page, PageQuerySet, Site
|
from wagtail.models import Page, PageQuerySet, Site
|
||||||
|
@ -239,7 +239,7 @@ class ExternalBlogPostPage(BaseContentPage):
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def metadata(self) -> MetadataParser:
|
def metadata(self) -> ParsedResult:
|
||||||
return get_page_metadata(self.external_url)
|
return get_page_metadata(self.external_url)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
import pickle
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
|
@ -96,7 +98,9 @@ class ExternalBlogPostPageTestCase(TestCase):
|
||||||
def setUpTestData(cls) -> None:
|
def setUpTestData(cls) -> None:
|
||||||
cls.home_page = HomePage.objects.get()
|
cls.home_page = HomePage.objects.get()
|
||||||
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
|
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
|
||||||
cls.page = ExternalBlogPostPageFactory(parent=cls.blog_post_list_page)
|
cls.page = ExternalBlogPostPageFactory(
|
||||||
|
parent=cls.blog_post_list_page, external_url="https://example.com"
|
||||||
|
)
|
||||||
|
|
||||||
def test_redirects(self) -> None:
|
def test_redirects(self) -> None:
|
||||||
with self.assertNumQueries(10):
|
with self.assertNumQueries(10):
|
||||||
|
@ -107,3 +111,11 @@ class ExternalBlogPostPageTestCase(TestCase):
|
||||||
status_code=301,
|
status_code=301,
|
||||||
fetch_redirect_response=False,
|
fetch_redirect_response=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_metadata(self) -> None:
|
||||||
|
metadata = self.page.metadata
|
||||||
|
|
||||||
|
self.assertIsNone(metadata.soup)
|
||||||
|
|
||||||
|
# Confirm it can pickle
|
||||||
|
pickle.dumps(metadata)
|
||||||
|
|
|
@ -11,7 +11,7 @@ from django.http import QueryDict
|
||||||
from django.http.request import HttpRequest
|
from django.http.request import HttpRequest
|
||||||
from django.utils.text import slugify
|
from django.utils.text import slugify
|
||||||
from django_cache_decorator import django_cache_decorator
|
from django_cache_decorator import django_cache_decorator
|
||||||
from metadata_parser import MetadataParser
|
from metadata_parser import MetadataParser, ParsedResult
|
||||||
from wagtail.models import Page, Site
|
from wagtail.models import Page, Site
|
||||||
from wagtail.models import get_page_models as get_wagtail_page_models
|
from wagtail.models import get_page_models as get_wagtail_page_models
|
||||||
|
|
||||||
|
@ -128,8 +128,13 @@ def get_ai_robots_txt() -> str:
|
||||||
|
|
||||||
|
|
||||||
@django_cache_decorator(time=21600)
|
@django_cache_decorator(time=21600)
|
||||||
def get_page_metadata(url: str) -> MetadataParser:
|
def get_page_metadata(url: str) -> ParsedResult:
|
||||||
return MetadataParser(url=url, search_head_only=True)
|
metadata = MetadataParser(url=url, search_head_only=True).parsed_result
|
||||||
|
|
||||||
|
# HACK: BeautifulSoup doesn't pickle nicely, and so can't be cached
|
||||||
|
metadata.soup = None
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
def extend_query_params(url: str, params: dict[str, Any]) -> str:
|
def extend_query_params(url: str, params: dict[str, Any]) -> str:
|
||||||
|
|
Loading…
Reference in a new issue