from bs4 import BeautifulSoup from urllib.parse import urlsplit, urlunsplit, parse_qs, urlencode def on_page_content(html, page, config, files): soup = BeautifulSoup(html, "html.parser") for link in soup.find_all("a", href=True): parsed_url = urlsplit(link["href"]) if not parsed_url.scheme or not parsed_url.netloc: # Not an external link continue # Stick a reference on external URLs parsed_url = parsed_url._replace(query=urlencode({ **parse_qs(parsed_url.query), "ref": "mysite.com" })) link["href"] = urlunsplit(parsed_url) return str(soup)