2025-04-20 19:59:35 +02:00

317 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import csv
import os
import re
import shutil
import subprocess
import xml.etree.ElementTree as ET
from datetime import datetime
from pathlib import Path
import frontmatter
import pypandoc
from jinja2 import Environment, PackageLoader, select_autoescape
from slugify import slugify
# TODO make newsletter URL's absolute to klank.school
env = Environment(loader=PackageLoader("src"), autoescape=select_autoescape())
CONTENT_D = os.path.abspath("src/content")
OUTPUT_D = "dist"
OUT_ASSETS = "dist/assets"
SRC_ASSETS = "src/assets"
documents = {}
now = datetime.now()
word_count = 0
ins_count = 0
def imageSpread(params):
global documents
param = params.split(" ")
print(param[1])
for item in documents[param[0]]:
print(item.get("filename"))
d = [item for item in documents[param[0]] if item.get("filename") == param[1]]
print(d)
template = env.select_template(["snippets/spread-images.jinja"])
html = template.render(documents=documents, content=d[0], type=param[1])
return html
# jinja filter that can list documents
def listDocuments(params):
param = params.split(" ")
template = env.select_template(["snippets/list-documents.jinja"])
html = template.render(documents=documents, layout=param[0], type=param[1])
return html
# Source: https://github.com/gandreadis/markdown-word-count
def count_words_in_markdown(text):
# Comments
text = re.sub(r"<!--(.*?)-->", "", text, flags=re.MULTILINE)
# Tabs to spaces
text = text.replace("\t", " ")
# More than 1 space to 4 spaces
text = re.sub(r"[ ]{2,}", " ", text)
# Footnotes
text = re.sub(r"^\[[^]]*\][^(].*", "", text, flags=re.MULTILINE)
# Indented blocks of code
text = re.sub(r"^( {4,}[^-*]).*", "", text, flags=re.MULTILINE)
# Replace newlines with spaces for uniform handling
text = text.replace("\n", " ")
# Custom header IDs
text = re.sub(r"{#.*}", "", text)
# Remove images
text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text)
# Remove HTML tags
text = re.sub(r"</?[^>]*>", "", text)
# Remove special characters
text = re.sub(r"[#*`~\-^=<>+|/:]", "", text)
# Remove footnote references
text = re.sub(r"\[[0-9]*\]", "", text)
# Remove enumerations
text = re.sub(r"[0-9#]*\.", "", text)
return len(text.split())
# jinja filter for date formatting
def prettydate(value, format="%d/%m/%Y"):
return datetime.fromtimestamp(int(value)).strftime(format)
def get_ins_count(html_string):
global ins_count
cnt = len(re.findall(r'<\s*ins\b', html_string, re.IGNORECASE))
return cnt
# jinja filter to replace shortcodes in HTML
def shortcode_filter(value):
shortcode_callbacks = {"show": listDocuments, "showImages": imageSpread}
def shortcode_replacer(match):
shortcode_name = match.group(1).strip()
param = match.group(2).strip()
if shortcode_name in shortcode_callbacks:
return shortcode_callbacks[shortcode_name](param)
return match.group(0)
pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}")
return pattern.sub(shortcode_replacer, value)
env.filters["shortcode"] = shortcode_filter
env.filters["slugify"] = slugify
env.filters["prettydate"] = prettydate
# translate a single file into HTML
def render_single_file(path, dist, name=False):
name = Path(path).stem
template = env.select_template([f"{name}.jinja", "post.jinja"])
page = get_page_data(path)
html = template.render(documents=documents, page=page, name=name)
if not os.path.exists(dist):
os.makedirs(dist)
with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file:
output_file.write(html)
# find a pre-rendered page
def get_existing_page(path, slug):
stem = Path(path).stem
folder = os.path.basename(os.path.dirname(path))
if stem == "index" and folder != "content":
folder = Path(path).parent.parent.name
if slug in documents:
return documents[slug]
if folder == "content":
return False
return [item for item in documents[folder] if item.get("slug") == slug]
return False
# compile markdown into cited HTML
def get_page_data(path):
global word_count
filename = Path(path).stem
folder = Path(path).parent.name
slug = slugify(filename) if folder == "content" else slugify(f"{folder}/{filename}")
if prerendered := get_existing_page(path, slug):
return prerendered
page = frontmatter.load(path)
page["slug"] = slug
page["filename"] = filename
page["folder"] = folder
if "start_datetime" in page:
page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now
content = page.content
if ".include" in page.content:
print("doing an include!")
content = pypandoc.convert_text(
page.content,
to="md",
format="md",
extra_args=["--lua-filter=include-files.lua"],
)
page.body = pypandoc.convert_text(
content,
to="html",
format="md",
extra_args=[
"--citeproc",
"--bibliography=library.bib",
"--csl=harvard-cite-them-right.csl",
],
)
return page
# Do stuff to the circuit's pcb
def save_circuit_svg(filepath, outpath, name):
tree = ET.parse(filepath)
root = tree.getroot()
# Extract current width/height (in pixels)
width_px = float(root.get("width", 0))
height_px = float(root.get("height", 0))
# Set new width/height in mm
root.set("width", f"{width_px}mm")
root.set("height", f"{height_px}mm")
os.makedirs(outpath, exist_ok=True)
tree.write(f"{outpath}/{name}")
# combine HTML & data with Jinja templates
def render_posts(path, output_path=OUTPUT_D):
name = Path(path).stem
for filename in sorted(os.listdir(path)):
file_path = Path(path) / filename
if file_path.suffix == ".md":
render_single_file(file_path, f"{output_path}/{name}")
elif file_path.is_dir():
render_posts(file_path, f"{output_path}/{name}")
elif file_path.suffix == ".svg":
save_circuit_svg(file_path, f"{output_path}/{name}", filename)
elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png", ".JPG", ".webp"}:
os.makedirs(f"{output_path}/{name}", exist_ok=True)
shutil.copyfile(file_path, f"{output_path}/{name}/{filename}")
# Pre-load before compiling
def preload_documents():
global documents
version = (
subprocess.check_output(["git", "rev-list", "--count", "HEAD"])
.decode("utf-8")
.strip()
)
documents["meta"] = {"now": now.strftime("%d %B %Y - %H:%M:%S"), "version": version}
for subdir in os.listdir(CONTENT_D):
path = os.path.join(CONTENT_D, subdir)
if os.path.isdir(path):
name = Path(path).stem
documents.setdefault(name, [])
for filename in sorted(os.listdir(path)):
cpath = os.path.join(path, filename)
if filename.endswith(".md"):
documents[name].append(get_page_data(cpath))
elif os.path.isdir(cpath):
documents[name].append(
get_page_data(os.path.join(cpath, "index.md"))
)
elif Path(path).suffix == ".md":
documents[Path(path).stem] = get_page_data(path)
def get_inventory():
global documents
with open("src/content/component-inventory.csv") as f:
documents['inventory'] = []
for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where', 'Mounting type')):
documents['inventory'].append(line)
def get_wordcount():
global word_count, ins_count
word_count += count_words_in_markdown(documents["thesis"].body)
for section in ["chapters", "components", "recipes"]:
for c in documents[section]:
if section == "recipes" or c["filename"] != "index":
count = count_words_in_markdown(c.body)
icount = get_ins_count(c.body)
word_count += count
ins_count += icount
print(f"{c['filename']} or {c['title']}: has {count} words and {icount} todos, totalling {word_count}")
print(f"word count: { word_count} ")
documents["meta"]["count"] = word_count
def main():
print("....Start putting together a new document....")
get_inventory()
preload_documents()
get_wordcount()
for subdir in os.listdir(CONTENT_D):
path = os.path.join(CONTENT_D, subdir)
if os.path.isdir(path):
print("Compile: an entire directory", Path(path).name)
render_posts(path)
elif Path(path).suffix == ".md":
print("Compile: single page", Path(path).name)
render_single_file(path, OUTPUT_D)
elif Path(path).suffix in [".csv"]:
print("Compile: not compiling ", Path(path).name)
if os.path.exists(OUT_ASSETS):
shutil.rmtree(OUT_ASSETS)
shutil.copytree(SRC_ASSETS, OUT_ASSETS)
global ins_count
print(f"total words: {word_count}")
print(f"total ins left: {ins_count}")
main()