317 lines
9.1 KiB
Python
317 lines
9.1 KiB
Python
import csv
|
||
import os
|
||
import re
|
||
import shutil
|
||
import subprocess
|
||
import xml.etree.ElementTree as ET
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
import frontmatter
|
||
import pypandoc
|
||
from jinja2 import Environment, PackageLoader, select_autoescape
|
||
from slugify import slugify
|
||
|
||
# TODO make newsletter URL's absolute to klank.school
|
||
env = Environment(loader=PackageLoader("src"), autoescape=select_autoescape())
|
||
|
||
CONTENT_D = os.path.abspath("src/content")
|
||
OUTPUT_D = "dist"
|
||
OUT_ASSETS = "dist/assets"
|
||
SRC_ASSETS = "src/assets"
|
||
documents = {}
|
||
now = datetime.now()
|
||
word_count = 0
|
||
ins_count = 0
|
||
|
||
def imageSpread(params):
|
||
global documents
|
||
param = params.split(" ")
|
||
print(param[1])
|
||
for item in documents[param[0]]:
|
||
print(item.get("filename"))
|
||
|
||
d = [item for item in documents[param[0]] if item.get("filename") == param[1]]
|
||
print(d)
|
||
template = env.select_template(["snippets/spread-images.jinja"])
|
||
html = template.render(documents=documents, content=d[0], type=param[1])
|
||
|
||
return html
|
||
|
||
# jinja filter that can list documents
|
||
def listDocuments(params):
|
||
param = params.split(" ")
|
||
template = env.select_template(["snippets/list-documents.jinja"])
|
||
html = template.render(documents=documents, layout=param[0], type=param[1])
|
||
|
||
return html
|
||
|
||
|
||
# Source: https://github.com/gandreadis/markdown-word-count
|
||
def count_words_in_markdown(text):
|
||
|
||
# Comments
|
||
text = re.sub(r"<!--(.*?)-->", "", text, flags=re.MULTILINE)
|
||
# Tabs to spaces
|
||
text = text.replace("\t", " ")
|
||
# More than 1 space to 4 spaces
|
||
text = re.sub(r"[ ]{2,}", " ", text)
|
||
# Footnotes
|
||
text = re.sub(r"^\[[^]]*\][^(].*", "", text, flags=re.MULTILINE)
|
||
# Indented blocks of code
|
||
text = re.sub(r"^( {4,}[^-*]).*", "", text, flags=re.MULTILINE)
|
||
# Replace newlines with spaces for uniform handling
|
||
text = text.replace("\n", " ")
|
||
# Custom header IDs
|
||
text = re.sub(r"{#.*}", "", text)
|
||
# Remove images
|
||
text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text)
|
||
# Remove HTML tags
|
||
text = re.sub(r"</?[^>]*>", "", text)
|
||
# Remove special characters
|
||
text = re.sub(r"[#*`~\-–^=<>+|/:]", "", text)
|
||
# Remove footnote references
|
||
text = re.sub(r"\[[0-9]*\]", "", text)
|
||
# Remove enumerations
|
||
text = re.sub(r"[0-9#]*\.", "", text)
|
||
|
||
return len(text.split())
|
||
|
||
|
||
# jinja filter for date formatting
|
||
def prettydate(value, format="%d/%m/%Y"):
|
||
return datetime.fromtimestamp(int(value)).strftime(format)
|
||
|
||
|
||
def get_ins_count(html_string):
|
||
global ins_count
|
||
cnt = len(re.findall(r'<\s*ins\b', html_string, re.IGNORECASE))
|
||
return cnt
|
||
|
||
# jinja filter to replace shortcodes in HTML
|
||
def shortcode_filter(value):
|
||
|
||
shortcode_callbacks = {"show": listDocuments, "showImages": imageSpread}
|
||
|
||
def shortcode_replacer(match):
|
||
|
||
shortcode_name = match.group(1).strip()
|
||
param = match.group(2).strip()
|
||
|
||
if shortcode_name in shortcode_callbacks:
|
||
return shortcode_callbacks[shortcode_name](param)
|
||
|
||
return match.group(0)
|
||
|
||
pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}")
|
||
return pattern.sub(shortcode_replacer, value)
|
||
|
||
|
||
env.filters["shortcode"] = shortcode_filter
|
||
env.filters["slugify"] = slugify
|
||
env.filters["prettydate"] = prettydate
|
||
|
||
|
||
# translate a single file into HTML
|
||
def render_single_file(path, dist, name=False):
|
||
name = Path(path).stem
|
||
template = env.select_template([f"{name}.jinja", "post.jinja"])
|
||
page = get_page_data(path)
|
||
html = template.render(documents=documents, page=page, name=name)
|
||
|
||
if not os.path.exists(dist):
|
||
os.makedirs(dist)
|
||
|
||
with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file:
|
||
output_file.write(html)
|
||
|
||
|
||
# find a pre-rendered page
|
||
def get_existing_page(path, slug):
|
||
stem = Path(path).stem
|
||
folder = os.path.basename(os.path.dirname(path))
|
||
|
||
if stem == "index" and folder != "content":
|
||
folder = Path(path).parent.parent.name
|
||
|
||
if slug in documents:
|
||
return documents[slug]
|
||
|
||
if folder == "content":
|
||
return False
|
||
|
||
return [item for item in documents[folder] if item.get("slug") == slug]
|
||
|
||
return False
|
||
|
||
|
||
# compile markdown into cited HTML
|
||
def get_page_data(path):
|
||
global word_count
|
||
|
||
filename = Path(path).stem
|
||
folder = Path(path).parent.name
|
||
slug = slugify(filename) if folder == "content" else slugify(f"{folder}/{filename}")
|
||
|
||
if prerendered := get_existing_page(path, slug):
|
||
return prerendered
|
||
|
||
page = frontmatter.load(path)
|
||
page["slug"] = slug
|
||
page["filename"] = filename
|
||
page["folder"] = folder
|
||
|
||
if "start_datetime" in page:
|
||
page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now
|
||
|
||
content = page.content
|
||
|
||
if ".include" in page.content:
|
||
print("doing an include!")
|
||
content = pypandoc.convert_text(
|
||
page.content,
|
||
to="md",
|
||
format="md",
|
||
extra_args=["--lua-filter=include-files.lua"],
|
||
)
|
||
|
||
page.body = pypandoc.convert_text(
|
||
content,
|
||
to="html",
|
||
format="md",
|
||
extra_args=[
|
||
"--citeproc",
|
||
"--bibliography=library.bib",
|
||
"--csl=harvard-cite-them-right.csl",
|
||
],
|
||
)
|
||
|
||
return page
|
||
|
||
|
||
# Do stuff to the circuit's pcb
|
||
def save_circuit_svg(filepath, outpath, name):
|
||
|
||
tree = ET.parse(filepath)
|
||
root = tree.getroot()
|
||
|
||
# Extract current width/height (in pixels)
|
||
width_px = float(root.get("width", 0))
|
||
height_px = float(root.get("height", 0))
|
||
|
||
# Set new width/height in mm
|
||
root.set("width", f"{width_px}mm")
|
||
root.set("height", f"{height_px}mm")
|
||
|
||
os.makedirs(outpath, exist_ok=True)
|
||
|
||
tree.write(f"{outpath}/{name}")
|
||
|
||
|
||
# combine HTML & data with Jinja templates
|
||
def render_posts(path, output_path=OUTPUT_D):
|
||
name = Path(path).stem
|
||
|
||
for filename in sorted(os.listdir(path)):
|
||
file_path = Path(path) / filename
|
||
|
||
if file_path.suffix == ".md":
|
||
render_single_file(file_path, f"{output_path}/{name}")
|
||
elif file_path.is_dir():
|
||
render_posts(file_path, f"{output_path}/{name}")
|
||
elif file_path.suffix == ".svg":
|
||
save_circuit_svg(file_path, f"{output_path}/{name}", filename)
|
||
elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png", ".JPG", ".webp"}:
|
||
os.makedirs(f"{output_path}/{name}", exist_ok=True)
|
||
shutil.copyfile(file_path, f"{output_path}/{name}/{filename}")
|
||
|
||
|
||
# Pre-load before compiling
|
||
def preload_documents():
|
||
global documents
|
||
|
||
version = (
|
||
subprocess.check_output(["git", "rev-list", "--count", "HEAD"])
|
||
.decode("utf-8")
|
||
.strip()
|
||
)
|
||
|
||
documents["meta"] = {"now": now.strftime("%d %B %Y - %H:%M:%S"), "version": version}
|
||
|
||
for subdir in os.listdir(CONTENT_D):
|
||
path = os.path.join(CONTENT_D, subdir)
|
||
|
||
if os.path.isdir(path):
|
||
name = Path(path).stem
|
||
documents.setdefault(name, [])
|
||
|
||
for filename in sorted(os.listdir(path)):
|
||
cpath = os.path.join(path, filename)
|
||
if filename.endswith(".md"):
|
||
documents[name].append(get_page_data(cpath))
|
||
elif os.path.isdir(cpath):
|
||
documents[name].append(
|
||
get_page_data(os.path.join(cpath, "index.md"))
|
||
)
|
||
|
||
elif Path(path).suffix == ".md":
|
||
documents[Path(path).stem] = get_page_data(path)
|
||
|
||
|
||
def get_inventory():
|
||
global documents
|
||
|
||
with open("src/content/component-inventory.csv") as f:
|
||
documents['inventory'] = []
|
||
|
||
for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where', 'Mounting type')):
|
||
documents['inventory'].append(line)
|
||
|
||
|
||
def get_wordcount():
|
||
global word_count, ins_count
|
||
word_count += count_words_in_markdown(documents["thesis"].body)
|
||
|
||
for section in ["chapters", "components", "recipes"]:
|
||
for c in documents[section]:
|
||
if section == "recipes" or c["filename"] != "index":
|
||
count = count_words_in_markdown(c.body)
|
||
icount = get_ins_count(c.body)
|
||
word_count += count
|
||
ins_count += icount
|
||
print(f"{c['filename']} or {c['title']}: has {count} words and {icount} todos, totalling {word_count}")
|
||
|
||
print(f"word count: { word_count} ")
|
||
documents["meta"]["count"] = word_count
|
||
|
||
|
||
def main():
|
||
|
||
print("....Start putting together a new document....")
|
||
get_inventory()
|
||
preload_documents()
|
||
get_wordcount()
|
||
|
||
for subdir in os.listdir(CONTENT_D):
|
||
path = os.path.join(CONTENT_D, subdir)
|
||
|
||
if os.path.isdir(path):
|
||
print("Compile: an entire directory", Path(path).name)
|
||
render_posts(path)
|
||
elif Path(path).suffix == ".md":
|
||
print("Compile: single page", Path(path).name)
|
||
render_single_file(path, OUTPUT_D)
|
||
elif Path(path).suffix in [".csv"]:
|
||
print("Compile: not compiling ", Path(path).name)
|
||
|
||
if os.path.exists(OUT_ASSETS):
|
||
shutil.rmtree(OUT_ASSETS)
|
||
|
||
shutil.copytree(SRC_ASSETS, OUT_ASSETS)
|
||
global ins_count
|
||
print(f"total words: {word_count}")
|
||
print(f"total ins left: {ins_count}")
|
||
|
||
|
||
main()
|