23 lines
661 B
Python
23 lines
661 B
Python
from bs4 import BeautifulSoup
|
|
from urllib.parse import urlsplit, urlunsplit, parse_qs, urlencode
|
|
|
|
|
|
def on_page_content(html, page, config, files):
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
for link in soup.find_all("a", href=True):
|
|
parsed_url = urlsplit(link["href"])
|
|
|
|
if not parsed_url.scheme or not parsed_url.netloc:
|
|
# Not an external link
|
|
continue
|
|
|
|
# Stick a reference on external URLs
|
|
parsed_url = parsed_url._replace(query=urlencode({
|
|
**parse_qs(parsed_url.query),
|
|
"ref": "mysite.com"
|
|
}))
|
|
|
|
link["href"] = urlunsplit(parsed_url)
|
|
|
|
return str(soup)
|