This repository has been archived on 2023-03-26. You can view files and clone it, but cannot push or open issues or pull requests.
md-pdf/md_pdf/utils.py

38 lines
794 B
Python
Raw Normal View History

2017-03-29 22:18:01 +01:00
import shutil
import os
2017-04-01 16:12:03 +01:00
import logging
from bs4 import BeautifulSoup
2017-06-09 23:07:30 +01:00
from typing import List
2017-04-01 16:12:03 +01:00
logger = logging.getLogger(__file__)
2017-03-29 22:18:01 +01:00
2017-06-09 23:07:30 +01:00
def remove_dir(dir: str):
2017-04-01 16:12:03 +01:00
logger.debug("Removing directory {}.".format(dir))
2017-03-29 22:18:01 +01:00
try:
shutil.rmtree(dir)
os.rmdir(dir)
except FileNotFoundError:
pass
2017-04-04 21:44:17 +01:00
2017-06-09 23:07:30 +01:00
def safe_list_get(l: List, idx: int, default):
2017-04-04 21:44:17 +01:00
try:
return l[idx]
except IndexError:
return default
2017-06-09 23:07:30 +01:00
def get_plain_text(content: str) -> str:
soup = BeautifulSoup(content, 'html.parser')
body = soup.find('body')
2017-05-28 16:58:58 +01:00
if body is None:
return content
try:
body.find('h1', class_='references-title').extract()
body.find('div', class_='references').extract()
except AttributeError:
pass
return body.text