Fix pickle errors for metadata
This commit is contained in:
parent
d242f94024
commit
41a04af8dc
3 changed files with 23 additions and 6 deletions
|
@ -7,7 +7,7 @@ from django.db.models.functions import Cast, Coalesce
|
|||
from django.http import HttpRequest, HttpResponse, HttpResponsePermanentRedirect
|
||||
from django.utils import timezone
|
||||
from django.utils.functional import cached_property
|
||||
from metadata_parser import MetadataParser
|
||||
from metadata_parser import ParsedResult
|
||||
from modelcluster.fields import ParentalManyToManyField
|
||||
from wagtail.admin.panels import FieldPanel
|
||||
from wagtail.models import Page, PageQuerySet, Site
|
||||
|
@ -239,7 +239,7 @@ class ExternalBlogPostPage(BaseContentPage):
|
|||
return tags
|
||||
|
||||
@cached_property
|
||||
def metadata(self) -> MetadataParser:
|
||||
def metadata(self) -> ParsedResult:
|
||||
return get_page_metadata(self.external_url)
|
||||
|
||||
@cached_property
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import pickle
|
||||
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
|
@ -96,7 +98,9 @@ class ExternalBlogPostPageTestCase(TestCase):
|
|||
def setUpTestData(cls) -> None:
|
||||
cls.home_page = HomePage.objects.get()
|
||||
cls.blog_post_list_page = BlogPostListPageFactory(parent=cls.home_page)
|
||||
cls.page = ExternalBlogPostPageFactory(parent=cls.blog_post_list_page)
|
||||
cls.page = ExternalBlogPostPageFactory(
|
||||
parent=cls.blog_post_list_page, external_url="https://example.com"
|
||||
)
|
||||
|
||||
def test_redirects(self) -> None:
|
||||
with self.assertNumQueries(10):
|
||||
|
@ -107,3 +111,11 @@ class ExternalBlogPostPageTestCase(TestCase):
|
|||
status_code=301,
|
||||
fetch_redirect_response=False,
|
||||
)
|
||||
|
||||
def test_metadata(self) -> None:
|
||||
metadata = self.page.metadata
|
||||
|
||||
self.assertIsNone(metadata.soup)
|
||||
|
||||
# Confirm it can pickle
|
||||
pickle.dumps(metadata)
|
||||
|
|
|
@ -11,7 +11,7 @@ from django.http import QueryDict
|
|||
from django.http.request import HttpRequest
|
||||
from django.utils.text import slugify
|
||||
from django_cache_decorator import django_cache_decorator
|
||||
from metadata_parser import MetadataParser
|
||||
from metadata_parser import MetadataParser, ParsedResult
|
||||
from wagtail.models import Page, Site
|
||||
from wagtail.models import get_page_models as get_wagtail_page_models
|
||||
|
||||
|
@ -128,8 +128,13 @@ def get_ai_robots_txt() -> str:
|
|||
|
||||
|
||||
@django_cache_decorator(time=21600)
|
||||
def get_page_metadata(url: str) -> MetadataParser:
|
||||
return MetadataParser(url=url, search_head_only=True)
|
||||
def get_page_metadata(url: str) -> ParsedResult:
|
||||
metadata = MetadataParser(url=url, search_head_only=True).parsed_result
|
||||
|
||||
# HACK: BeautifulSoup doesn't pickle nicely, and so can't be cached
|
||||
metadata.soup = None
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def extend_query_params(url: str, params: dict[str, Any]) -> str:
|
||||
|
|
Loading…
Reference in a new issue