317 lines
9.1 KiB
Python
Raw Normal View History

2025-02-15 10:19:38 +01:00
import csv
2025-04-13 10:24:24 +02:00
import os
2025-02-15 10:19:38 +01:00
import re
2025-04-13 10:24:24 +02:00
import shutil
import subprocess
import xml.etree.ElementTree as ET
2025-02-15 10:19:38 +01:00
from datetime import datetime
2025-04-13 10:24:24 +02:00
from pathlib import Path
2025-02-15 10:19:38 +01:00
import frontmatter
import pypandoc
2025-04-13 10:24:24 +02:00
from jinja2 import Environment, PackageLoader, select_autoescape
from slugify import slugify
2025-02-16 11:08:10 +01:00
# TODO make newsletter URL's absolute to klank.school
2025-04-13 10:24:24 +02:00
env = Environment(loader=PackageLoader("src"), autoescape=select_autoescape())
2025-02-15 10:19:38 +01:00
CONTENT_D = os.path.abspath("src/content")
OUTPUT_D = "dist"
2025-02-16 11:08:10 +01:00
OUT_ASSETS = "dist/assets"
SRC_ASSETS = "src/assets"
2025-02-15 10:19:38 +01:00
documents = {}
2025-02-16 11:08:10 +01:00
now = datetime.now()
2025-04-12 11:07:56 +02:00
word_count = 0
2025-04-20 19:59:35 +02:00
ins_count = 0
2025-02-15 10:19:38 +01:00
2025-04-19 12:23:57 +02:00
def imageSpread(params):
global documents
param = params.split(" ")
print(param[1])
for item in documents[param[0]]:
print(item.get("filename"))
d = [item for item in documents[param[0]] if item.get("filename") == param[1]]
print(d)
template = env.select_template(["snippets/spread-images.jinja"])
html = template.render(documents=documents, content=d[0], type=param[1])
return html
2025-02-16 11:08:10 +01:00
# jinja filter that can list documents
def listDocuments(params):
param = params.split(" ")
template = env.select_template(["snippets/list-documents.jinja"])
html = template.render(documents=documents, layout=param[0], type=param[1])
return html
2025-02-15 10:19:38 +01:00
2025-04-12 11:07:56 +02:00
# Source: https://github.com/gandreadis/markdown-word-count
def count_words_in_markdown(text):
# Comments
2025-04-13 10:24:24 +02:00
text = re.sub(r"<!--(.*?)-->", "", text, flags=re.MULTILINE)
2025-04-12 11:07:56 +02:00
# Tabs to spaces
2025-04-13 10:24:24 +02:00
text = text.replace("\t", " ")
2025-04-12 11:07:56 +02:00
# More than 1 space to 4 spaces
2025-04-13 10:24:24 +02:00
text = re.sub(r"[ ]{2,}", " ", text)
2025-04-12 11:07:56 +02:00
# Footnotes
2025-04-13 10:24:24 +02:00
text = re.sub(r"^\[[^]]*\][^(].*", "", text, flags=re.MULTILINE)
2025-04-12 11:07:56 +02:00
# Indented blocks of code
2025-04-13 10:24:24 +02:00
text = re.sub(r"^( {4,}[^-*]).*", "", text, flags=re.MULTILINE)
2025-04-12 11:07:56 +02:00
# Replace newlines with spaces for uniform handling
2025-04-13 10:24:24 +02:00
text = text.replace("\n", " ")
2025-04-12 11:07:56 +02:00
# Custom header IDs
2025-04-13 10:24:24 +02:00
text = re.sub(r"{#.*}", "", text)
2025-04-12 11:07:56 +02:00
# Remove images
2025-04-13 10:24:24 +02:00
text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text)
2025-04-12 11:07:56 +02:00
# Remove HTML tags
2025-04-13 10:24:24 +02:00
text = re.sub(r"</?[^>]*>", "", text)
2025-04-12 11:07:56 +02:00
# Remove special characters
2025-04-13 10:24:24 +02:00
text = re.sub(r"[#*`~\-^=<>+|/:]", "", text)
2025-04-12 11:07:56 +02:00
# Remove footnote references
2025-04-13 10:24:24 +02:00
text = re.sub(r"\[[0-9]*\]", "", text)
2025-04-12 11:07:56 +02:00
# Remove enumerations
2025-04-13 10:24:24 +02:00
text = re.sub(r"[0-9#]*\.", "", text)
2025-04-12 11:07:56 +02:00
return len(text.split())
2025-03-11 22:04:07 +01:00
2025-04-13 09:03:57 +02:00
2025-04-13 10:24:24 +02:00
# jinja filter for date formatting
def prettydate(value, format="%d/%m/%Y"):
2025-02-15 10:19:38 +01:00
return datetime.fromtimestamp(int(value)).strftime(format)
2025-04-20 19:59:35 +02:00
def get_ins_count(html_string):
global ins_count
cnt = len(re.findall(r'<\s*ins\b', html_string, re.IGNORECASE))
return cnt
2025-02-16 11:08:10 +01:00
# jinja filter to replace shortcodes in HTML
2025-02-15 10:19:38 +01:00
def shortcode_filter(value):
2025-04-19 12:23:57 +02:00
shortcode_callbacks = {"show": listDocuments, "showImages": imageSpread}
2025-02-15 10:19:38 +01:00
def shortcode_replacer(match):
shortcode_name = match.group(1).strip()
param = match.group(2).strip()
if shortcode_name in shortcode_callbacks:
return shortcode_callbacks[shortcode_name](param)
return match.group(0)
pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}")
return pattern.sub(shortcode_replacer, value)
env.filters["shortcode"] = shortcode_filter
2025-04-13 10:24:24 +02:00
env.filters["slugify"] = slugify
2025-02-15 10:19:38 +01:00
env.filters["prettydate"] = prettydate
2025-04-13 09:03:57 +02:00
2025-04-13 10:24:24 +02:00
# translate a single file into HTML
def render_single_file(path, dist, name=False):
2025-02-16 13:52:06 +01:00
name = Path(path).stem
2025-02-16 11:08:10 +01:00
template = env.select_template([f"{name}.jinja", "post.jinja"])
2025-04-13 10:24:24 +02:00
page = get_page_data(path)
2025-02-15 10:19:38 +01:00
html = template.render(documents=documents, page=page, name=name)
if not os.path.exists(dist):
os.makedirs(dist)
with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file:
output_file.write(html)
2025-02-16 11:08:10 +01:00
# find a pre-rendered page
def get_existing_page(path, slug):
2025-03-11 22:04:07 +01:00
stem = Path(path).stem
2025-04-13 09:03:57 +02:00
folder = os.path.basename(os.path.dirname(path))
2025-02-16 11:08:10 +01:00
if stem == "index" and folder != "content":
folder = Path(path).parent.parent.name
if slug in documents:
return documents[slug]
2025-04-13 09:03:57 +02:00
2025-02-16 11:08:10 +01:00
if folder == "content":
return False
2025-04-13 09:03:57 +02:00
2025-04-13 10:24:24 +02:00
return [item for item in documents[folder] if item.get("slug") == slug]
2025-04-13 09:03:57 +02:00
2025-02-16 11:08:10 +01:00
return False
# compile markdown into cited HTML
2025-03-11 22:04:07 +01:00
def get_page_data(path):
2025-04-12 11:07:56 +02:00
global word_count
2025-03-13 14:06:43 +01:00
2025-02-15 10:19:38 +01:00
filename = Path(path).stem
2025-03-11 22:04:07 +01:00
folder = Path(path).parent.name
2025-04-13 10:24:24 +02:00
slug = slugify(filename) if folder == "content" else slugify(f"{folder}/{filename}")
2025-02-16 11:08:10 +01:00
2025-04-13 10:24:24 +02:00
if prerendered := get_existing_page(path, slug):
2025-02-16 11:08:10 +01:00
return prerendered
2025-02-15 10:19:38 +01:00
page = frontmatter.load(path)
2025-03-11 22:04:07 +01:00
page["slug"] = slug
page["filename"] = filename
page["folder"] = folder
2025-02-15 10:19:38 +01:00
if "start_datetime" in page:
2025-04-13 10:24:24 +02:00
page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now
2025-02-15 10:19:38 +01:00
2025-03-11 22:04:07 +01:00
content = page.content
2025-04-13 09:03:57 +02:00
2025-04-13 10:24:24 +02:00
if ".include" in page.content:
print("doing an include!")
2025-03-11 22:04:07 +01:00
content = pypandoc.convert_text(
2025-02-16 11:08:10 +01:00
page.content,
2025-04-13 10:24:24 +02:00
to="md",
format="md",
extra_args=["--lua-filter=include-files.lua"],
)
2025-02-15 10:19:38 +01:00
page.body = pypandoc.convert_text(
2025-03-11 22:04:07 +01:00
content,
2025-02-15 10:19:38 +01:00
to="html",
format="md",
extra_args=[
"--citeproc",
"--bibliography=library.bib",
2025-03-13 14:06:43 +01:00
"--csl=harvard-cite-them-right.csl",
2025-04-13 10:24:24 +02:00
],
)
2025-02-15 10:19:38 +01:00
return page
2025-04-13 09:03:57 +02:00
2025-04-13 10:24:24 +02:00
# Do stuff to the circuit's pcb
2025-02-16 11:08:10 +01:00
def save_circuit_svg(filepath, outpath, name):
2025-02-15 10:19:38 +01:00
2025-02-16 11:08:10 +01:00
tree = ET.parse(filepath)
root = tree.getroot()
2025-02-15 10:19:38 +01:00
2025-02-16 11:08:10 +01:00
# Extract current width/height (in pixels)
width_px = float(root.get("width", 0))
height_px = float(root.get("height", 0))
# Set new width/height in mm
root.set("width", f"{width_px}mm")
root.set("height", f"{height_px}mm")
2025-04-13 09:03:57 +02:00
os.makedirs(outpath, exist_ok=True)
2025-02-16 11:08:10 +01:00
tree.write(f"{outpath}/{name}")
# combine HTML & data with Jinja templates
def render_posts(path, output_path=OUTPUT_D):
name = Path(path).stem
2025-04-13 09:03:57 +02:00
2025-03-01 16:38:05 +01:00
for filename in sorted(os.listdir(path)):
2025-03-11 22:04:07 +01:00
file_path = Path(path) / filename
2025-02-16 11:08:10 +01:00
2025-03-11 22:04:07 +01:00
if file_path.suffix == ".md":
2025-04-13 10:24:24 +02:00
render_single_file(file_path, f"{output_path}/{name}")
2025-03-11 22:04:07 +01:00
elif file_path.is_dir():
2025-02-16 11:08:10 +01:00
render_posts(file_path, f"{output_path}/{name}")
2025-03-11 22:04:07 +01:00
elif file_path.suffix == ".svg":
2025-02-16 11:08:10 +01:00
save_circuit_svg(file_path, f"{output_path}/{name}", filename)
2025-04-19 12:23:57 +02:00
elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png", ".JPG", ".webp"}:
2025-04-13 09:03:57 +02:00
os.makedirs(f"{output_path}/{name}", exist_ok=True)
2025-02-16 13:52:06 +01:00
shutil.copyfile(file_path, f"{output_path}/{name}/{filename}")
2025-02-16 11:08:10 +01:00
2025-04-13 09:03:57 +02:00
2025-04-13 10:24:24 +02:00
# Pre-load before compiling
2025-02-15 10:19:38 +01:00
def preload_documents():
2025-03-11 22:04:07 +01:00
global documents
2025-04-13 10:24:24 +02:00
version = (
subprocess.check_output(["git", "rev-list", "--count", "HEAD"])
.decode("utf-8")
.strip()
)
2025-03-11 22:04:07 +01:00
2025-04-13 10:24:24 +02:00
documents["meta"] = {"now": now.strftime("%d %B %Y - %H:%M:%S"), "version": version}
2025-02-15 10:19:38 +01:00
for subdir in os.listdir(CONTENT_D):
path = os.path.join(CONTENT_D, subdir)
if os.path.isdir(path):
name = Path(path).stem
2025-02-16 11:08:10 +01:00
documents.setdefault(name, [])
2025-02-15 10:19:38 +01:00
2025-02-16 11:08:10 +01:00
for filename in sorted(os.listdir(path)):
2025-02-16 13:52:06 +01:00
cpath = os.path.join(path, filename)
2025-02-15 10:19:38 +01:00
if filename.endswith(".md"):
2025-03-11 22:04:07 +01:00
documents[name].append(get_page_data(cpath))
2025-02-16 13:52:06 +01:00
elif os.path.isdir(cpath):
2025-04-13 10:24:24 +02:00
documents[name].append(
get_page_data(os.path.join(cpath, "index.md"))
)
2025-02-15 10:19:38 +01:00
2025-04-13 10:24:24 +02:00
elif Path(path).suffix == ".md":
2025-03-11 22:04:07 +01:00
documents[Path(path).stem] = get_page_data(path)
2025-02-15 10:19:38 +01:00
def get_inventory():
2025-03-11 22:04:07 +01:00
global documents
2025-02-15 10:19:38 +01:00
with open("src/content/component-inventory.csv") as f:
documents['inventory'] = []
2025-03-11 22:04:07 +01:00
2025-04-13 09:03:57 +02:00
for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where', 'Mounting type')):
2025-02-15 10:19:38 +01:00
documents['inventory'].append(line)
2025-04-13 09:03:57 +02:00
def get_wordcount():
2025-04-20 19:59:35 +02:00
global word_count, ins_count
2025-04-13 10:24:24 +02:00
word_count += count_words_in_markdown(documents["thesis"].body)
for section in ["chapters", "components", "recipes"]:
for c in documents[section]:
if section == "recipes" or c["filename"] != "index":
count = count_words_in_markdown(c.body)
2025-04-20 19:59:35 +02:00
icount = get_ins_count(c.body)
2025-04-13 10:24:24 +02:00
word_count += count
2025-04-20 19:59:35 +02:00
ins_count += icount
print(f"{c['filename']} or {c['title']}: has {count} words and {icount} todos, totalling {word_count}")
2025-04-13 09:03:57 +02:00
print(f"word count: { word_count} ")
2025-04-13 10:24:24 +02:00
documents["meta"]["count"] = word_count
2025-04-13 09:03:57 +02:00
2025-02-15 10:19:38 +01:00
def main():
2025-04-20 19:59:35 +02:00
2025-03-13 09:24:22 +01:00
print("....Start putting together a new document....")
2025-02-15 10:19:38 +01:00
get_inventory()
preload_documents()
2025-04-13 09:03:57 +02:00
get_wordcount()
2025-02-15 10:19:38 +01:00
for subdir in os.listdir(CONTENT_D):
path = os.path.join(CONTENT_D, subdir)
if os.path.isdir(path):
2025-03-13 14:06:43 +01:00
print("Compile: an entire directory", Path(path).name)
2025-02-15 10:19:38 +01:00
render_posts(path)
2025-04-13 10:24:24 +02:00
elif Path(path).suffix == ".md":
2025-03-13 14:06:43 +01:00
print("Compile: single page", Path(path).name)
2025-04-13 10:24:24 +02:00
render_single_file(path, OUTPUT_D)
2025-02-15 10:19:38 +01:00
elif Path(path).suffix in [".csv"]:
2025-03-13 14:06:43 +01:00
print("Compile: not compiling ", Path(path).name)
2025-02-15 10:19:38 +01:00
2025-04-13 10:24:24 +02:00
if os.path.exists(OUT_ASSETS):
shutil.rmtree(OUT_ASSETS)
shutil.copytree(SRC_ASSETS, OUT_ASSETS)
2025-04-20 19:59:35 +02:00
global ins_count
2025-04-12 11:07:56 +02:00
print(f"total words: {word_count}")
2025-04-20 19:59:35 +02:00
print(f"total ins left: {ins_count}")
2025-04-12 11:07:56 +02:00
2025-04-13 09:03:57 +02:00
2025-02-15 10:19:38 +01:00
main()