24 lines
661 B
Python
24 lines
661 B
Python
|
from bs4 import BeautifulSoup
|
||
|
from urllib.parse import urlsplit, urlunsplit, parse_qs, urlencode
|
||
|
|
||
|
|
||
|
def on_page_content(html, page, config, files):
|
||
|
soup = BeautifulSoup(html, "html.parser")
|
||
|
|
||
|
for link in soup.find_all("a", href=True):
|
||
|
parsed_url = urlsplit(link["href"])
|
||
|
|
||
|
if not parsed_url.scheme or not parsed_url.netloc:
|
||
|
# Not an external link
|
||
|
continue
|
||
|
|
||
|
# Stick a reference on external URLs
|
||
|
parsed_url = parsed_url._replace(query=urlencode({
|
||
|
**parse_qs(parsed_url.query),
|
||
|
"ref": "mysite.com"
|
||
|
}))
|
||
|
|
||
|
link["href"] = urlunsplit(parsed_url)
|
||
|
|
||
|
return str(soup)
|