import csv import os import re import shutil import subprocess import xml.etree.ElementTree as ET from datetime import datetime from pathlib import Path import frontmatter import pypandoc from jinja2 import Environment, PackageLoader, select_autoescape from slugify import slugify # TODO make newsletter URL's absolute to klank.school env = Environment(loader=PackageLoader("src"), autoescape=select_autoescape()) CONTENT_D = os.path.abspath("src/content") OUTPUT_D = "dist" OUT_ASSETS = "dist/assets" SRC_ASSETS = "src/assets" documents = {} now = datetime.now() word_count = 0 ins_count = 0 def imageSpread(params): global documents param = params.split(" ") print(param[1]) for item in documents[param[0]]: print(item.get("filename")) d = [item for item in documents[param[0]] if item.get("filename") == param[1]] print(d) template = env.select_template(["snippets/spread-images.jinja"]) html = template.render(documents=documents, content=d[0], type=param[1]) return html # jinja filter that can list documents def listDocuments(params): param = params.split(" ") template = env.select_template(["snippets/list-documents.jinja"]) html = template.render(documents=documents, layout=param[0], type=param[1]) return html # Source: https://github.com/gandreadis/markdown-word-count def count_words_in_markdown(text): # Comments text = re.sub(r"", "", text, flags=re.MULTILINE) # Tabs to spaces text = text.replace("\t", " ") # More than 1 space to 4 spaces text = re.sub(r"[ ]{2,}", " ", text) # Footnotes text = re.sub(r"^\[[^]]*\][^(].*", "", text, flags=re.MULTILINE) # Indented blocks of code text = re.sub(r"^( {4,}[^-*]).*", "", text, flags=re.MULTILINE) # Replace newlines with spaces for uniform handling text = text.replace("\n", " ") # Custom header IDs text = re.sub(r"{#.*}", "", text) # Remove images text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text) # Remove HTML tags text = re.sub(r"]*>", "", text) # Remove special characters text = re.sub(r"[#*`~\-–^=<>+|/:]", "", text) # Remove footnote references text = re.sub(r"\[[0-9]*\]", "", text) # Remove enumerations text = re.sub(r"[0-9#]*\.", "", text) print("counting!") print(text) return len(text.split()) # jinja filter for date formatting def prettydate(value, format="%d/%m/%Y"): return datetime.fromtimestamp(int(value)).strftime(format) def get_ins_count(html_string): global ins_count cnt = len(re.findall(r'<\s*ins\b', html_string, re.IGNORECASE)) return cnt # jinja filter to replace shortcodes in HTML def shortcode_filter(value): shortcode_callbacks = {"show": listDocuments, "showImages": imageSpread} def shortcode_replacer(match): shortcode_name = match.group(1).strip() param = match.group(2).strip() if shortcode_name in shortcode_callbacks: return shortcode_callbacks[shortcode_name](param) return match.group(0) pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}") return pattern.sub(shortcode_replacer, value) env.filters["shortcode"] = shortcode_filter env.filters["slugify"] = slugify env.filters["prettydate"] = prettydate # translate a single file into HTML def render_single_file(path, dist, name=False): name = Path(path).stem template = env.select_template([f"{name}.jinja", "post.jinja"]) page = get_page_data(path) html = template.render(documents=documents, page=page, name=name) if not os.path.exists(dist): os.makedirs(dist) with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file: output_file.write(html) # find a pre-rendered page def get_existing_page(path, slug): stem = Path(path).stem folder = os.path.basename(os.path.dirname(path)) if stem == "index" and folder != "content": folder = Path(path).parent.parent.name if slug in documents: return documents[slug] if folder == "content": return False return [item for item in documents[folder] if item.get("slug") == slug] return False # compile markdown into cited HTML def get_page_data(path): global word_count filename = Path(path).stem folder = Path(path).parent.name slug = slugify(filename) if folder == "content" else slugify(f"{folder}/{filename}") if prerendered := get_existing_page(path, slug): return prerendered page = frontmatter.load(path) page["slug"] = slug page["filename"] = filename page["folder"] = folder if "start_datetime" in page: page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now content = page.content if ".include" in page.content: print("doing an include!") content = pypandoc.convert_text( page.content, to="md", format="md", extra_args=["--lua-filter=include-files.lua"], ) page.body = pypandoc.convert_text( content, to="html", format="md", extra_args=[ "--citeproc", "--bibliography=library.bib", "--csl=harvard-cite-them-right.csl", ], ) return page # Do stuff to the circuit's pcb def save_circuit_svg(filepath, outpath, name): tree = ET.parse(filepath) root = tree.getroot() # Extract current width/height (in pixels) width_px = float(root.get("width", 0)) height_px = float(root.get("height", 0)) # Set new width/height in mm root.set("width", f"{width_px}mm") root.set("height", f"{height_px}mm") os.makedirs(outpath, exist_ok=True) tree.write(f"{outpath}/{name}") # combine HTML & data with Jinja templates def render_posts(path, output_path=OUTPUT_D): name = Path(path).stem for filename in sorted(os.listdir(path)): file_path = Path(path) / filename if file_path.suffix == ".md": render_single_file(file_path, f"{output_path}/{name}") elif file_path.is_dir(): render_posts(file_path, f"{output_path}/{name}") elif file_path.suffix == ".svg": save_circuit_svg(file_path, f"{output_path}/{name}", filename) elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png", ".JPG", ".webp"}: os.makedirs(f"{output_path}/{name}", exist_ok=True) shutil.copyfile(file_path, f"{output_path}/{name}/{filename}") # Pre-load before compiling def preload_documents(): global documents version = ( subprocess.check_output(["git", "rev-list", "--count", "HEAD"]) .decode("utf-8") .strip() ) documents["meta"] = {"now": now.strftime("%d %B %Y - %H:%M:%S"), "version": version} for subdir in os.listdir(CONTENT_D): path = os.path.join(CONTENT_D, subdir) if os.path.isdir(path): name = Path(path).stem documents.setdefault(name, []) for filename in sorted(os.listdir(path)): cpath = os.path.join(path, filename) if filename.endswith(".md"): documents[name].append(get_page_data(cpath)) elif os.path.isdir(cpath): documents[name].append( get_page_data(os.path.join(cpath, "index.md")) ) elif Path(path).suffix == ".md": documents[Path(path).stem] = get_page_data(path) def get_inventory(): global documents with open("src/content/component-inventory.csv") as f: documents['inventory'] = [] for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where', 'Mounting type')): documents['inventory'].append(line) def get_wordcount(): global word_count, ins_count word_count += count_words_in_markdown(documents["thesis"].body) for section in ["chapters", "components", "recipes"]: for c in documents[section]: if section == "recipes" or c["filename"] != "index": count = count_words_in_markdown(c.body) icount = get_ins_count(c.body) word_count += count ins_count += icount print(f"{c['filename']} or {c['title']}: has {count} words and {icount} todos, totalling {word_count}") print(f"word count: { word_count} ") documents["meta"]["count"] = word_count def main(): print("....Start putting together a new document....") get_inventory() preload_documents() get_wordcount() for subdir in os.listdir(CONTENT_D): path = os.path.join(CONTENT_D, subdir) if os.path.isdir(path): print("Compile: an entire directory", Path(path).name) render_posts(path) elif Path(path).suffix == ".md": print("Compile: single page", Path(path).name) render_single_file(path, OUTPUT_D) elif Path(path).suffix in [".csv"]: print("Compile: not compiling ", Path(path).name) if os.path.exists(OUT_ASSETS): shutil.rmtree(OUT_ASSETS) shutil.copytree(SRC_ASSETS, OUT_ASSETS) global ins_count print(f"total words: {word_count}") print(f"total ins left: {ins_count}") main()