import json import gzip def main(): with open("pages.json") as f: pages = json.load(f) for title, content in pages.items(): print(f"Checking '{title}'") other_pages = pages.copy() del other_pages[title] compressed_content = gzip.compress(content.encode()) print("\tCompressed size:", len(compressed_content)) other_pages_compressed = { other_title: len(gzip.compress((content + other_content).encode())) - len(compressed_content) for other_title, other_content in other_pages.items() } similar_pages = sorted(other_pages_compressed.items(), key=lambda i: i[1]) print("\t", similar_pages[:3]) if __name__ == "__main__": main()