2025-02-15 10:19:38 +01:00
|
|
|
|
import csv
|
2025-04-13 10:24:24 +02:00
|
|
|
|
import os
|
2025-02-15 10:19:38 +01:00
|
|
|
|
import re
|
2025-04-13 10:24:24 +02:00
|
|
|
|
import shutil
|
|
|
|
|
import subprocess
|
|
|
|
|
import xml.etree.ElementTree as ET
|
2025-02-15 10:19:38 +01:00
|
|
|
|
from datetime import datetime
|
2025-04-13 10:24:24 +02:00
|
|
|
|
from pathlib import Path
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
import frontmatter
|
|
|
|
|
import pypandoc
|
2025-04-13 10:24:24 +02:00
|
|
|
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
|
|
|
from slugify import slugify
|
2025-02-16 11:08:10 +01:00
|
|
|
|
|
|
|
|
|
# TODO make newsletter URL's absolute to klank.school
|
2025-04-13 10:24:24 +02:00
|
|
|
|
env = Environment(loader=PackageLoader("src"), autoescape=select_autoescape())
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
CONTENT_D = os.path.abspath("src/content")
|
|
|
|
|
OUTPUT_D = "dist"
|
2025-02-16 11:08:10 +01:00
|
|
|
|
OUT_ASSETS = "dist/assets"
|
|
|
|
|
SRC_ASSETS = "src/assets"
|
2025-02-15 10:19:38 +01:00
|
|
|
|
documents = {}
|
2025-02-16 11:08:10 +01:00
|
|
|
|
now = datetime.now()
|
2025-04-12 11:07:56 +02:00
|
|
|
|
word_count = 0
|
2025-04-20 19:59:35 +02:00
|
|
|
|
ins_count = 0
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-04-19 12:23:57 +02:00
|
|
|
|
def imageSpread(params):
|
|
|
|
|
global documents
|
|
|
|
|
param = params.split(" ")
|
|
|
|
|
print(param[1])
|
|
|
|
|
for item in documents[param[0]]:
|
|
|
|
|
print(item.get("filename"))
|
|
|
|
|
|
|
|
|
|
d = [item for item in documents[param[0]] if item.get("filename") == param[1]]
|
|
|
|
|
print(d)
|
|
|
|
|
template = env.select_template(["snippets/spread-images.jinja"])
|
|
|
|
|
html = template.render(documents=documents, content=d[0], type=param[1])
|
|
|
|
|
|
|
|
|
|
return html
|
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
# jinja filter that can list documents
|
|
|
|
|
def listDocuments(params):
|
|
|
|
|
param = params.split(" ")
|
|
|
|
|
template = env.select_template(["snippets/list-documents.jinja"])
|
|
|
|
|
html = template.render(documents=documents, layout=param[0], type=param[1])
|
|
|
|
|
|
|
|
|
|
return html
|
|
|
|
|
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Source: https://github.com/gandreadis/markdown-word-count
|
|
|
|
|
def count_words_in_markdown(text):
|
|
|
|
|
|
|
|
|
|
# Comments
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"<!--(.*?)-->", "", text, flags=re.MULTILINE)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Tabs to spaces
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = text.replace("\t", " ")
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# More than 1 space to 4 spaces
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"[ ]{2,}", " ", text)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Footnotes
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"^\[[^]]*\][^(].*", "", text, flags=re.MULTILINE)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Indented blocks of code
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"^( {4,}[^-*]).*", "", text, flags=re.MULTILINE)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Replace newlines with spaces for uniform handling
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = text.replace("\n", " ")
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Custom header IDs
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"{#.*}", "", text)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Remove images
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Remove HTML tags
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"</?[^>]*>", "", text)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Remove special characters
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"[#*`~\-–^=<>+|/:]", "", text)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Remove footnote references
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"\[[0-9]*\]", "", text)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
# Remove enumerations
|
2025-04-13 10:24:24 +02:00
|
|
|
|
text = re.sub(r"[0-9#]*\.", "", text)
|
2025-04-12 11:07:56 +02:00
|
|
|
|
|
|
|
|
|
return len(text.split())
|
2025-03-11 22:04:07 +01:00
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
# jinja filter for date formatting
|
|
|
|
|
def prettydate(value, format="%d/%m/%Y"):
|
2025-02-15 10:19:38 +01:00
|
|
|
|
return datetime.fromtimestamp(int(value)).strftime(format)
|
|
|
|
|
|
|
|
|
|
|
2025-04-20 19:59:35 +02:00
|
|
|
|
def get_ins_count(html_string):
|
|
|
|
|
global ins_count
|
|
|
|
|
cnt = len(re.findall(r'<\s*ins\b', html_string, re.IGNORECASE))
|
|
|
|
|
return cnt
|
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
# jinja filter to replace shortcodes in HTML
|
2025-02-15 10:19:38 +01:00
|
|
|
|
def shortcode_filter(value):
|
|
|
|
|
|
2025-04-19 12:23:57 +02:00
|
|
|
|
shortcode_callbacks = {"show": listDocuments, "showImages": imageSpread}
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
def shortcode_replacer(match):
|
|
|
|
|
|
|
|
|
|
shortcode_name = match.group(1).strip()
|
|
|
|
|
param = match.group(2).strip()
|
|
|
|
|
|
|
|
|
|
if shortcode_name in shortcode_callbacks:
|
|
|
|
|
return shortcode_callbacks[shortcode_name](param)
|
|
|
|
|
|
|
|
|
|
return match.group(0)
|
|
|
|
|
|
|
|
|
|
pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}")
|
|
|
|
|
return pattern.sub(shortcode_replacer, value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
env.filters["shortcode"] = shortcode_filter
|
2025-04-13 10:24:24 +02:00
|
|
|
|
env.filters["slugify"] = slugify
|
2025-02-15 10:19:38 +01:00
|
|
|
|
env.filters["prettydate"] = prettydate
|
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
# translate a single file into HTML
|
|
|
|
|
def render_single_file(path, dist, name=False):
|
2025-02-16 13:52:06 +01:00
|
|
|
|
name = Path(path).stem
|
2025-02-16 11:08:10 +01:00
|
|
|
|
template = env.select_template([f"{name}.jinja", "post.jinja"])
|
2025-04-13 10:24:24 +02:00
|
|
|
|
page = get_page_data(path)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
html = template.render(documents=documents, page=page, name=name)
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(dist):
|
|
|
|
|
os.makedirs(dist)
|
|
|
|
|
|
|
|
|
|
with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file:
|
|
|
|
|
output_file.write(html)
|
|
|
|
|
|
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
# find a pre-rendered page
|
|
|
|
|
def get_existing_page(path, slug):
|
2025-03-11 22:04:07 +01:00
|
|
|
|
stem = Path(path).stem
|
2025-04-13 09:03:57 +02:00
|
|
|
|
folder = os.path.basename(os.path.dirname(path))
|
2025-02-16 11:08:10 +01:00
|
|
|
|
|
|
|
|
|
if stem == "index" and folder != "content":
|
|
|
|
|
folder = Path(path).parent.parent.name
|
|
|
|
|
|
|
|
|
|
if slug in documents:
|
|
|
|
|
return documents[slug]
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
if folder == "content":
|
|
|
|
|
return False
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
return [item for item in documents[folder] if item.get("slug") == slug]
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# compile markdown into cited HTML
|
2025-03-11 22:04:07 +01:00
|
|
|
|
def get_page_data(path):
|
2025-04-12 11:07:56 +02:00
|
|
|
|
global word_count
|
2025-03-13 14:06:43 +01:00
|
|
|
|
|
2025-02-15 10:19:38 +01:00
|
|
|
|
filename = Path(path).stem
|
2025-03-11 22:04:07 +01:00
|
|
|
|
folder = Path(path).parent.name
|
2025-04-13 10:24:24 +02:00
|
|
|
|
slug = slugify(filename) if folder == "content" else slugify(f"{folder}/{filename}")
|
2025-02-16 11:08:10 +01:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
if prerendered := get_existing_page(path, slug):
|
2025-02-16 11:08:10 +01:00
|
|
|
|
return prerendered
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
page = frontmatter.load(path)
|
2025-03-11 22:04:07 +01:00
|
|
|
|
page["slug"] = slug
|
|
|
|
|
page["filename"] = filename
|
|
|
|
|
page["folder"] = folder
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
if "start_datetime" in page:
|
2025-04-13 10:24:24 +02:00
|
|
|
|
page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-03-11 22:04:07 +01:00
|
|
|
|
content = page.content
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
if ".include" in page.content:
|
|
|
|
|
print("doing an include!")
|
2025-03-11 22:04:07 +01:00
|
|
|
|
content = pypandoc.convert_text(
|
2025-02-16 11:08:10 +01:00
|
|
|
|
page.content,
|
2025-04-13 10:24:24 +02:00
|
|
|
|
to="md",
|
|
|
|
|
format="md",
|
|
|
|
|
extra_args=["--lua-filter=include-files.lua"],
|
|
|
|
|
)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
page.body = pypandoc.convert_text(
|
2025-03-11 22:04:07 +01:00
|
|
|
|
content,
|
2025-02-15 10:19:38 +01:00
|
|
|
|
to="html",
|
|
|
|
|
format="md",
|
|
|
|
|
extra_args=[
|
|
|
|
|
"--citeproc",
|
|
|
|
|
"--bibliography=library.bib",
|
2025-03-13 14:06:43 +01:00
|
|
|
|
"--csl=harvard-cite-them-right.csl",
|
2025-04-13 10:24:24 +02:00
|
|
|
|
],
|
|
|
|
|
)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
return page
|
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
# Do stuff to the circuit's pcb
|
2025-02-16 11:08:10 +01:00
|
|
|
|
def save_circuit_svg(filepath, outpath, name):
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
tree = ET.parse(filepath)
|
|
|
|
|
root = tree.getroot()
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
# Extract current width/height (in pixels)
|
|
|
|
|
width_px = float(root.get("width", 0))
|
|
|
|
|
height_px = float(root.get("height", 0))
|
|
|
|
|
|
|
|
|
|
# Set new width/height in mm
|
|
|
|
|
root.set("width", f"{width_px}mm")
|
|
|
|
|
root.set("height", f"{height_px}mm")
|
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
os.makedirs(outpath, exist_ok=True)
|
2025-02-16 11:08:10 +01:00
|
|
|
|
|
|
|
|
|
tree.write(f"{outpath}/{name}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# combine HTML & data with Jinja templates
|
|
|
|
|
def render_posts(path, output_path=OUTPUT_D):
|
|
|
|
|
name = Path(path).stem
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-03-01 16:38:05 +01:00
|
|
|
|
for filename in sorted(os.listdir(path)):
|
2025-03-11 22:04:07 +01:00
|
|
|
|
file_path = Path(path) / filename
|
2025-02-16 11:08:10 +01:00
|
|
|
|
|
2025-03-11 22:04:07 +01:00
|
|
|
|
if file_path.suffix == ".md":
|
2025-04-13 10:24:24 +02:00
|
|
|
|
render_single_file(file_path, f"{output_path}/{name}")
|
2025-03-11 22:04:07 +01:00
|
|
|
|
elif file_path.is_dir():
|
2025-02-16 11:08:10 +01:00
|
|
|
|
render_posts(file_path, f"{output_path}/{name}")
|
2025-03-11 22:04:07 +01:00
|
|
|
|
elif file_path.suffix == ".svg":
|
2025-02-16 11:08:10 +01:00
|
|
|
|
save_circuit_svg(file_path, f"{output_path}/{name}", filename)
|
2025-04-19 12:23:57 +02:00
|
|
|
|
elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png", ".JPG", ".webp"}:
|
2025-04-13 09:03:57 +02:00
|
|
|
|
os.makedirs(f"{output_path}/{name}", exist_ok=True)
|
2025-02-16 13:52:06 +01:00
|
|
|
|
shutil.copyfile(file_path, f"{output_path}/{name}/{filename}")
|
2025-02-16 11:08:10 +01:00
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
# Pre-load before compiling
|
2025-02-15 10:19:38 +01:00
|
|
|
|
def preload_documents():
|
2025-03-11 22:04:07 +01:00
|
|
|
|
global documents
|
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
version = (
|
|
|
|
|
subprocess.check_output(["git", "rev-list", "--count", "HEAD"])
|
|
|
|
|
.decode("utf-8")
|
|
|
|
|
.strip()
|
|
|
|
|
)
|
2025-03-11 22:04:07 +01:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
documents["meta"] = {"now": now.strftime("%d %B %Y - %H:%M:%S"), "version": version}
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
for subdir in os.listdir(CONTENT_D):
|
|
|
|
|
path = os.path.join(CONTENT_D, subdir)
|
|
|
|
|
|
|
|
|
|
if os.path.isdir(path):
|
|
|
|
|
name = Path(path).stem
|
2025-02-16 11:08:10 +01:00
|
|
|
|
documents.setdefault(name, [])
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-02-16 11:08:10 +01:00
|
|
|
|
for filename in sorted(os.listdir(path)):
|
2025-02-16 13:52:06 +01:00
|
|
|
|
cpath = os.path.join(path, filename)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
if filename.endswith(".md"):
|
2025-03-11 22:04:07 +01:00
|
|
|
|
documents[name].append(get_page_data(cpath))
|
2025-02-16 13:52:06 +01:00
|
|
|
|
elif os.path.isdir(cpath):
|
2025-04-13 10:24:24 +02:00
|
|
|
|
documents[name].append(
|
|
|
|
|
get_page_data(os.path.join(cpath, "index.md"))
|
|
|
|
|
)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
elif Path(path).suffix == ".md":
|
2025-03-11 22:04:07 +01:00
|
|
|
|
documents[Path(path).stem] = get_page_data(path)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_inventory():
|
2025-03-11 22:04:07 +01:00
|
|
|
|
global documents
|
|
|
|
|
|
2025-02-15 10:19:38 +01:00
|
|
|
|
with open("src/content/component-inventory.csv") as f:
|
|
|
|
|
documents['inventory'] = []
|
2025-03-11 22:04:07 +01:00
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where', 'Mounting type')):
|
2025-02-15 10:19:38 +01:00
|
|
|
|
documents['inventory'].append(line)
|
|
|
|
|
|
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
def get_wordcount():
|
2025-04-20 19:59:35 +02:00
|
|
|
|
global word_count, ins_count
|
2025-04-13 10:24:24 +02:00
|
|
|
|
word_count += count_words_in_markdown(documents["thesis"].body)
|
|
|
|
|
|
|
|
|
|
for section in ["chapters", "components", "recipes"]:
|
|
|
|
|
for c in documents[section]:
|
|
|
|
|
if section == "recipes" or c["filename"] != "index":
|
|
|
|
|
count = count_words_in_markdown(c.body)
|
2025-04-20 19:59:35 +02:00
|
|
|
|
icount = get_ins_count(c.body)
|
2025-04-13 10:24:24 +02:00
|
|
|
|
word_count += count
|
2025-04-20 19:59:35 +02:00
|
|
|
|
ins_count += icount
|
|
|
|
|
print(f"{c['filename']} or {c['title']}: has {count} words and {icount} todos, totalling {word_count}")
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
|
|
|
|
print(f"word count: { word_count} ")
|
2025-04-13 10:24:24 +02:00
|
|
|
|
documents["meta"]["count"] = word_count
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
|
|
|
|
|
2025-02-15 10:19:38 +01:00
|
|
|
|
def main():
|
2025-04-20 19:59:35 +02:00
|
|
|
|
|
2025-03-13 09:24:22 +01:00
|
|
|
|
print("....Start putting together a new document....")
|
2025-02-15 10:19:38 +01:00
|
|
|
|
get_inventory()
|
|
|
|
|
preload_documents()
|
2025-04-13 09:03:57 +02:00
|
|
|
|
get_wordcount()
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
|
|
|
|
for subdir in os.listdir(CONTENT_D):
|
|
|
|
|
path = os.path.join(CONTENT_D, subdir)
|
|
|
|
|
|
|
|
|
|
if os.path.isdir(path):
|
2025-03-13 14:06:43 +01:00
|
|
|
|
print("Compile: an entire directory", Path(path).name)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
render_posts(path)
|
2025-04-13 10:24:24 +02:00
|
|
|
|
elif Path(path).suffix == ".md":
|
2025-03-13 14:06:43 +01:00
|
|
|
|
print("Compile: single page", Path(path).name)
|
2025-04-13 10:24:24 +02:00
|
|
|
|
render_single_file(path, OUTPUT_D)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
elif Path(path).suffix in [".csv"]:
|
2025-03-13 14:06:43 +01:00
|
|
|
|
print("Compile: not compiling ", Path(path).name)
|
2025-02-15 10:19:38 +01:00
|
|
|
|
|
2025-04-13 10:24:24 +02:00
|
|
|
|
if os.path.exists(OUT_ASSETS):
|
|
|
|
|
shutil.rmtree(OUT_ASSETS)
|
|
|
|
|
|
|
|
|
|
shutil.copytree(SRC_ASSETS, OUT_ASSETS)
|
2025-04-20 19:59:35 +02:00
|
|
|
|
global ins_count
|
2025-04-12 11:07:56 +02:00
|
|
|
|
print(f"total words: {word_count}")
|
2025-04-20 19:59:35 +02:00
|
|
|
|
print(f"total ins left: {ins_count}")
|
2025-04-12 11:07:56 +02:00
|
|
|
|
|
2025-04-13 09:03:57 +02:00
|
|
|
|
|
2025-02-15 10:19:38 +01:00
|
|
|
|
main()
|