28 lines
750 B
Python
28 lines
750 B
Python
import json
|
|
import gzip
|
|
|
|
|
|
def main():
|
|
with open("pages.json") as f:
|
|
pages = json.load(f)
|
|
|
|
for title, content in pages.items():
|
|
print(f"Checking '{title}'")
|
|
other_pages = pages.copy()
|
|
del other_pages[title]
|
|
|
|
compressed_content = gzip.compress(content.encode())
|
|
print("\tCompressed size:", len(compressed_content))
|
|
|
|
other_pages_compressed = {
|
|
other_title: len(gzip.compress((content + other_content).encode())) - len(compressed_content)
|
|
for other_title, other_content in other_pages.items()
|
|
}
|
|
|
|
similar_pages = sorted(other_pages_compressed.items(), key=lambda i: i[1])
|
|
|
|
print("\t", similar_pages[:3])
|
|
|
|
if __name__ == "__main__":
|
|
main()
|