field-guide-to-salvaging-sound/app.py

import csv
import os
import re
import shutil
import subprocess
import xml.etree.ElementTree as ET
from datetime import datetime
from pathlib import Path

import frontmatter
import pypandoc
from jinja2 import Environment, PackageLoader, select_autoescape
from slugify import slugify

# TODO make newsletter URL's absolute to klank.school
env = Environment(loader=PackageLoader("src"), autoescape=select_autoescape())

CONTENT_D = os.path.abspath("src/content")
OUTPUT_D = "dist"
OUT_ASSETS = "dist/assets"
SRC_ASSETS = "src/assets"
documents = {}
now = datetime.now()
word_count = 0
ins_count = 0

def imageSpread(params):
    global documents
    param = params.split(" ")
    print(param[1])
    for item in documents[param[0]]:
        print(item.get("filename"))

    d = [item for item in documents[param[0]] if item.get("filename") == param[1]]
    print(d)
    template = env.select_template(["snippets/spread-images.jinja"])
    html = template.render(documents=documents, content=d[0], type=param[1])

    return html

# jinja filter that can list documents
def listDocuments(params):
    param = params.split(" ")
    template = env.select_template(["snippets/list-documents.jinja"])
    html = template.render(documents=documents, layout=param[0], type=param[1])

    return html


# Source: https://github.com/gandreadis/markdown-word-count
def count_words_in_markdown(text):

    # Comments
    text = re.sub(r"<!--(.*?)-->", "", text, flags=re.MULTILINE)
    # Tabs to spaces
    text = text.replace("\t", "    ")
    # More than 1 space to 4 spaces
    text = re.sub(r"[ ]{2,}", "    ", text)
    # Footnotes
    text = re.sub(r"^\[[^]]*\][^(].*", "", text, flags=re.MULTILINE)
    # Indented blocks of code
    text = re.sub(r"^( {4,}[^-*]).*", "", text, flags=re.MULTILINE)
    # Replace newlines with spaces for uniform handling
    text = text.replace("\n", " ")
    # Custom header IDs
    text = re.sub(r"{#.*}", "", text)
    # Remove images
    text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text)
    # Remove HTML tags
    text = re.sub(r"</?[^>]*>", "", text)
    # Remove special characters
    text = re.sub(r"[#*`~\-–^=<>+|/:]", "", text)
    # Remove footnote references
    text = re.sub(r"\[[0-9]*\]", "", text)
    # Remove enumerations
    text = re.sub(r"[0-9#]*\.", "", text)

    return len(text.split())


# jinja filter for date formatting
def prettydate(value, format="%d/%m/%Y"):
    return datetime.fromtimestamp(int(value)).strftime(format)


def get_ins_count(html_string):
    global ins_count
    cnt = len(re.findall(r'<\s*ins\b', html_string, re.IGNORECASE))
    return cnt

# jinja filter to replace shortcodes in HTML
def shortcode_filter(value):

    shortcode_callbacks = {"show": listDocuments, "showImages": imageSpread}

    def shortcode_replacer(match):

        shortcode_name = match.group(1).strip()
        param = match.group(2).strip()

        if shortcode_name in shortcode_callbacks:
            return shortcode_callbacks[shortcode_name](param)

        return match.group(0)

    pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}")
    return pattern.sub(shortcode_replacer, value)


env.filters["shortcode"] = shortcode_filter
env.filters["slugify"] = slugify
env.filters["prettydate"] = prettydate


# translate a single file into HTML
def render_single_file(path, dist, name=False):
    name = Path(path).stem
    template = env.select_template([f"{name}.jinja", "post.jinja"])
    page = get_page_data(path)
    html = template.render(documents=documents, page=page, name=name)

    if not os.path.exists(dist):
        os.makedirs(dist)

    with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file:
        output_file.write(html)


# find a pre-rendered page
def get_existing_page(path, slug):
    stem = Path(path).stem
    folder = os.path.basename(os.path.dirname(path))

    if stem == "index" and folder != "content":
        folder = Path(path).parent.parent.name

    if slug in documents:
        return documents[slug]

    if folder == "content":
        return False

    return [item for item in documents[folder] if item.get("slug") == slug]

    return False


# compile markdown into cited HTML
def get_page_data(path):
    global word_count

    filename = Path(path).stem
    folder = Path(path).parent.name
    slug = slugify(filename) if folder == "content" else slugify(f"{folder}/{filename}")

    if prerendered := get_existing_page(path, slug):
        return prerendered

    page = frontmatter.load(path)
    page["slug"] = slug
    page["filename"] = filename
    page["folder"] = folder

    if "start_datetime" in page:
        page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now

    content = page.content

    if ".include" in page.content:
        print("doing an include!")
        content = pypandoc.convert_text(
            page.content,
            to="md",
            format="md",
            extra_args=["--lua-filter=include-files.lua"],
        )

    page.body = pypandoc.convert_text(
        content,
        to="html",
        format="md",
        extra_args=[
            "--citeproc",
            "--bibliography=library.bib",
            "--csl=harvard-cite-them-right.csl",
        ],
    )

    return page


# Do stuff to the circuit's pcb
def save_circuit_svg(filepath, outpath, name):

    tree = ET.parse(filepath)
    root = tree.getroot()

    # Extract current width/height (in pixels)
    width_px = float(root.get("width", 0))
    height_px = float(root.get("height", 0))

    # Set new width/height in mm
    root.set("width", f"{width_px}mm")
    root.set("height", f"{height_px}mm")

    os.makedirs(outpath, exist_ok=True)

    tree.write(f"{outpath}/{name}")


# combine HTML & data with Jinja templates
def render_posts(path, output_path=OUTPUT_D):
    name = Path(path).stem

    for filename in sorted(os.listdir(path)):
        file_path = Path(path) / filename

        if file_path.suffix == ".md":
            render_single_file(file_path, f"{output_path}/{name}")
        elif file_path.is_dir():
            render_posts(file_path, f"{output_path}/{name}")
        elif file_path.suffix == ".svg":
            save_circuit_svg(file_path, f"{output_path}/{name}", filename)
        elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png", ".JPG", ".webp"}:
            os.makedirs(f"{output_path}/{name}", exist_ok=True)
            shutil.copyfile(file_path, f"{output_path}/{name}/{filename}")


# Pre-load before compiling
def preload_documents():
    global documents

    version = (
        subprocess.check_output(["git", "rev-list", "--count", "HEAD"])
        .decode("utf-8")
        .strip()
    )

    documents["meta"] = {"now": now.strftime("%d %B %Y - %H:%M:%S"), "version": version}

    for subdir in os.listdir(CONTENT_D):
        path = os.path.join(CONTENT_D, subdir)

        if os.path.isdir(path):
            name = Path(path).stem
            documents.setdefault(name, [])

            for filename in sorted(os.listdir(path)):
                cpath = os.path.join(path, filename)
                if filename.endswith(".md"):
                    documents[name].append(get_page_data(cpath))
                elif os.path.isdir(cpath):
                    documents[name].append(
                        get_page_data(os.path.join(cpath, "index.md"))
                    )

        elif Path(path).suffix == ".md":
            documents[Path(path).stem] = get_page_data(path)


def get_inventory():
    global documents

    with open("src/content/component-inventory.csv") as f:
        documents['inventory'] = []

        for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where', 'Mounting type')):
            documents['inventory'].append(line)


def get_wordcount():
    global word_count, ins_count
    word_count += count_words_in_markdown(documents["thesis"].body)

    for section in ["chapters", "components", "recipes"]:
        for c in documents[section]:
            if section == "recipes" or c["filename"] != "index":
                count = count_words_in_markdown(c.body)
                icount = get_ins_count(c.body)
                word_count += count
                ins_count += icount
                print(f"{c['filename']} or {c['title']}: has {count} words and {icount} todos, totalling {word_count}")

    print(f"word count: { word_count} ")
    documents["meta"]["count"] = word_count


def main():

    print("....Start putting together a new document....")
    get_inventory()
    preload_documents()
    get_wordcount()

    for subdir in os.listdir(CONTENT_D):
        path = os.path.join(CONTENT_D, subdir)

        if os.path.isdir(path):
            print("Compile: an entire directory", Path(path).name)
            render_posts(path)
        elif Path(path).suffix == ".md":
            print("Compile: single page", Path(path).name)
            render_single_file(path, OUTPUT_D)
        elif Path(path).suffix in [".csv"]:
            print("Compile: not compiling ", Path(path).name)

    if os.path.exists(OUT_ASSETS):
        shutil.rmtree(OUT_ASSETS)

    shutil.copytree(SRC_ASSETS, OUT_ASSETS)
    global ins_count
    print(f"total words: {word_count}")
    print(f"total ins left: {ins_count}")


main()