From 6f8aa6ee09accbb46db6927dac0e61ca64b7d143 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Thu, 25 May 2017 20:34:41 +0100 Subject: [PATCH] Calculate wordcount better and make togglable --- md_pdf/assets/templates/footer-template.html | 4 +++- md_pdf/build/context.py | 4 +++- md_pdf/config/validate.py | 10 +++++++++- md_pdf/utils.py | 12 ++++++++++++ test-files/mdp.yml | 1 + 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/md_pdf/assets/templates/footer-template.html b/md_pdf/assets/templates/footer-template.html index ceabfa5..2ea9fde 100644 --- a/md_pdf/assets/templates/footer-template.html +++ b/md_pdf/assets/templates/footer-template.html @@ -11,7 +11,9 @@ Page of - Total Words: {{ word_count }} + {% if word_count %} + Total Words: {{ word_count }} + {% endif %} diff --git a/md_pdf/build/context.py b/md_pdf/build/context.py index e380d42..0a6d127 100644 --- a/md_pdf/build/context.py +++ b/md_pdf/build/context.py @@ -1,5 +1,6 @@ from md_pdf.consts import TEMPLATES_DIR, STATIC_DIR from word_count import word_count +from md_pdf.utils import get_plain_text EXTRA_CONTEXT = { @@ -12,6 +13,7 @@ def get_context(config, content): context = config['context'].copy() context['title'] = config['title'] context = dict(context, **EXTRA_CONTEXT, **{ - 'word_count': word_count(content) }) + if config.get('show_word_count'): + context['word_count'] = word_count(get_plain_text(content)) return context diff --git a/md_pdf/config/validate.py b/md_pdf/config/validate.py index d7e0199..c8dc41e 100644 --- a/md_pdf/config/validate.py +++ b/md_pdf/config/validate.py @@ -76,6 +76,13 @@ def validate_toc(config): raise ConfigValidationException("Table of contents key should be either true or false") +def validate_wordcount(config): + if 'show_word_count' not in config: + return + if type(config['show_word_count']) != bool: + raise ConfigValidationException("Show word count key should be either true or false") + + def validate_config(config): logger.debug("Validating Config...") for validator in [ @@ -84,7 +91,8 @@ def validate_config(config): test_output, validate_bibliography, validate_context, - validate_toc + validate_toc, + validate_wordcount ]: validator(config) logger.debug("Config Ok!") diff --git a/md_pdf/utils.py b/md_pdf/utils.py index 69a0e0b..101e639 100644 --- a/md_pdf/utils.py +++ b/md_pdf/utils.py @@ -1,6 +1,7 @@ import shutil import os import logging +from bs4 import BeautifulSoup logger = logging.getLogger(__file__) @@ -19,3 +20,14 @@ def safe_list_get(l, idx, default): return l[idx] except IndexError: return default + + +def get_plain_text(content): + soup = BeautifulSoup(content, 'html.parser') + body = soup.find('body') + try: + body.find('h1', class_='references-title').extract() + body.find('div', class_='references').extract() + except AttributeError: + pass + return body.text diff --git a/test-files/mdp.yml b/test-files/mdp.yml index 661ce22..538845b 100644 --- a/test-files/mdp.yml +++ b/test-files/mdp.yml @@ -13,3 +13,4 @@ context: turnitin_number: 789123 title: test title toc: true +show_word_count: true