2025-04-12 11:07:56 +02:00

295 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import subprocess
import os
from pathlib import Path
import shutil
import csv
import re
from datetime import datetime
from jinja2 import Environment, PackageLoader, select_autoescape
import frontmatter
from slugify import slugify
import pypandoc
import xml.etree.ElementTree as ET
# TODO make newsletter URL's absolute to klank.school
env = Environment(
loader=PackageLoader("src"),
autoescape=select_autoescape()
)
CONTENT_D = os.path.abspath("src/content")
OUTPUT_D = "dist"
OUT_ASSETS = "dist/assets"
SRC_ASSETS = "src/assets"
documents = {}
now = datetime.now()
word_count = 0
# Utils
def getParam(params, index):
return params[index] if len(params) > index else False
# jinja filter that can list documents
def listDocuments(params):
param = params.split(" ")
template = env.select_template(["snippets/list-documents.jinja"])
html = template.render(documents=documents, layout=param[0], type=param[1])
return html
# jinja filter to make a slug out of a stirng
def slugify_filter(value):
return slugify(value)
# Source: https://github.com/gandreadis/markdown-word-count
def count_words_in_markdown(text):
# Comments
text = re.sub(r'<!--(.*?)-->', '', text, flags=re.MULTILINE)
# Tabs to spaces
text = text.replace('\t', ' ')
# More than 1 space to 4 spaces
text = re.sub(r'[ ]{2,}', ' ', text)
# Footnotes
text = re.sub(r'^\[[^]]*\][^(].*', '', text, flags=re.MULTILINE)
# Indented blocks of code
text = re.sub(r'^( {4,}[^-*]).*', '', text, flags=re.MULTILINE)
# Replace newlines with spaces for uniform handling
text = text.replace('\n', ' ')
# Custom header IDs
text = re.sub(r'{#.*}', '', text)
# Remove images
text = re.sub(r'!\[[^\]]*\]\([^)]*\)', '', text)
# Remove HTML tags
text = re.sub(r'</?[^>]*>', '', text)
# Remove special characters
text = re.sub(r'[#*`~\-^=<>+|/:]', '', text)
# Remove footnote references
text = re.sub(r'\[[0-9]*\]', '', text)
# Remove enumerations
text = re.sub(r'[0-9#]*\.', '', text)
return len(text.split())
# jinja filter for date formatting
def prettydate(value, format='%d/%m/%Y'):
return datetime.fromtimestamp(int(value)).strftime(format)
# jinja filter to replace shortcodes in HTML
def shortcode_filter(value):
shortcode_callbacks = {
"show": listDocuments
}
def shortcode_replacer(match):
shortcode_name = match.group(1).strip()
param = match.group(2).strip()
if shortcode_name in shortcode_callbacks:
return shortcode_callbacks[shortcode_name](param)
return match.group(0)
pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}")
return pattern.sub(shortcode_replacer, value)
env.filters["shortcode"] = shortcode_filter
env.filters["slugify"] = slugify_filter
env.filters["prettydate"] = prettydate
# translate a single file into HTML
def render_single_file(page, path, dist, name = False):
name = Path(path).stem
template = env.select_template([f"{name}.jinja", "post.jinja"])
html = template.render(documents=documents, page=page, name=name)
if not os.path.exists(dist):
os.makedirs(dist)
with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file:
output_file.write(html)
# find a pre-rendered page
def get_existing_page(path, slug):
stem = Path(path).stem
folder = os.path.basename(os.path.dirname(path))
if stem == "index" and folder != "content":
folder = Path(path).parent.parent.name
if slug in documents:
return documents[slug]
if folder == "content":
return False
for doc in documents[folder]:
if doc:
if doc["slug"] == slug:
return doc
return False
# build a slug including the folder
def get_slug(path, folder, filename):
if folder == "content":
return slugify(filename)
else:
return slugify(f"{folder}/{filename}")
# compile markdown into cited HTML
def get_page_data(path):
global word_count
filename = Path(path).stem
folder = Path(path).parent.name
slug = get_slug(path, folder, filename)
prerendered = get_existing_page(path, slug)
if prerendered:
return prerendered
page = frontmatter.load(path)
page["slug"] = slug
page["filename"] = filename
page["folder"] = folder
if "start_datetime" in page:
page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now
content = page.content
if "`include" in page.content:
content = pypandoc.convert_text(
page.content,
to='md',
format='md',
extra_args=[
"--lua-filter=include-files.lua"
])
page.body = pypandoc.convert_text(
content,
to="html",
format="md",
extra_args=[
"--citeproc",
"--bibliography=library.bib",
"--csl=harvard-cite-them-right.csl",
])
word_count += count_words_in_markdown(page.body)
return page
# Do stuff to the circuit's pcb
def save_circuit_svg(filepath, outpath, name):
tree = ET.parse(filepath)
root = tree.getroot()
# Extract current width/height (in pixels)
width_px = float(root.get("width", 0))
height_px = float(root.get("height", 0))
# Set new width/height in mm
root.set("width", f"{width_px}mm")
root.set("height", f"{height_px}mm")
os.makedirs(outpath, exist_ok = True)
tree.write(f"{outpath}/{name}")
# combine HTML & data with Jinja templates
def render_posts(path, output_path=OUTPUT_D):
name = Path(path).stem
for filename in sorted(os.listdir(path)):
file_path = Path(path) / filename
if file_path.suffix == ".md":
render_single_file(get_page_data(file_path), file_path, f"{output_path}/{name}")
elif file_path.is_dir():
render_posts(file_path, f"{output_path}/{name}")
elif file_path.suffix == ".svg":
save_circuit_svg(file_path, f"{output_path}/{name}", filename)
elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png"}:
os.makedirs(f"{output_path}/{name}", exist_ok = True)
shutil.copyfile(file_path, f"{output_path}/{name}/{filename}")
# Pre-load before compiling
def preload_documents():
global documents
version = subprocess.check_output(["git", "rev-list", "--count", "HEAD"]).decode("utf-8").strip()
documents["meta"] = {"now": now.strftime("%d %B %Y"), "version": version}
for subdir in os.listdir(CONTENT_D):
path = os.path.join(CONTENT_D, subdir)
if os.path.isdir(path):
name = Path(path).stem
documents.setdefault(name, [])
for filename in sorted(os.listdir(path)):
cpath = os.path.join(path, filename)
if filename.endswith(".md"):
documents[name].append(get_page_data(cpath))
elif os.path.isdir(cpath):
documents[name].append(get_page_data(os.path.join(cpath, "index.md")))
elif Path(path).suffix == '.md':
documents[Path(path).stem] = get_page_data(path)
def copy_assets():
if os.path.exists(OUT_ASSETS):
shutil.rmtree(OUT_ASSETS)
shutil.copytree(SRC_ASSETS, OUT_ASSETS)
def get_inventory():
global documents
with open("src/content/component-inventory.csv") as f:
documents['inventory'] = []
for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where','Mounting type')):
documents['inventory'].append(line)
def main():
print("....Start putting together a new document....")
get_inventory()
preload_documents()
for subdir in os.listdir(CONTENT_D):
path = os.path.join(CONTENT_D, subdir)
if os.path.isdir(path):
print("Compile: an entire directory", Path(path).name)
render_posts(path)
elif Path(path).suffix == '.md':
print("Compile: single page", Path(path).name)
render_single_file(get_page_data(path), path, OUTPUT_D)
elif Path(path).suffix in [".csv"]:
print("Compile: not compiling ", Path(path).name)
copy_assets()
print(f"total words: {word_count}")
main()