field-guide-to-salvaging-sound/app.py

import subprocess
import os
from pathlib import Path
import shutil
import csv
import re
from datetime import datetime

from jinja2 import Environment, PackageLoader, select_autoescape
import frontmatter
from slugify import slugify
import pypandoc

import xml.etree.ElementTree as ET

# TODO make newsletter URL's absolute to klank.school
env = Environment(
    loader=PackageLoader("src"),
    autoescape=select_autoescape()
)

CONTENT_D = os.path.abspath("src/content")
OUTPUT_D = "dist"
OUT_ASSETS = "dist/assets"
SRC_ASSETS = "src/assets"
documents = {}
now = datetime.now()
word_count = 0

# Utils
def getParam(params, index):
    return params[index] if len(params) > index else False


# jinja filter that can list documents
def listDocuments(params):
    param = params.split(" ")
    template = env.select_template(["snippets/list-documents.jinja"])
    html = template.render(documents=documents, layout=param[0], type=param[1])

    return html

# jinja filter to make a slug out of a stirng
def slugify_filter(value):
    return slugify(value)

# Source: https://github.com/gandreadis/markdown-word-count
def count_words_in_markdown(text):

    # Comments
    text = re.sub(r'<!--(.*?)-->', '', text, flags=re.MULTILINE)
    # Tabs to spaces
    text = text.replace('\t', '    ')
    # More than 1 space to 4 spaces
    text = re.sub(r'[ ]{2,}', '    ', text)
    # Footnotes
    text = re.sub(r'^\[[^]]*\][^(].*', '', text, flags=re.MULTILINE)
    # Indented blocks of code
    text = re.sub(r'^( {4,}[^-*]).*', '', text, flags=re.MULTILINE)
    # Replace newlines with spaces for uniform handling
    text = text.replace('\n', ' ')
    # Custom header IDs
    text = re.sub(r'{#.*}', '', text)
    # Remove images
    text = re.sub(r'!\[[^\]]*\]\([^)]*\)', '', text)
    # Remove HTML tags
    text = re.sub(r'</?[^>]*>', '', text)
    # Remove special characters
    text = re.sub(r'[#*`~\-–^=<>+|/:]', '', text)
    # Remove footnote references
    text = re.sub(r'\[[0-9]*\]', '', text)
    # Remove enumerations
    text = re.sub(r'[0-9#]*\.', '', text)

    return len(text.split())

# jinja filter for date formatting
def prettydate(value, format='%d/%m/%Y'):
    return datetime.fromtimestamp(int(value)).strftime(format)


# jinja filter to replace shortcodes in HTML
def shortcode_filter(value):

    shortcode_callbacks = {
        "show": listDocuments
    }

    def shortcode_replacer(match):

        shortcode_name = match.group(1).strip()
        param = match.group(2).strip()

        if shortcode_name in shortcode_callbacks:
            return shortcode_callbacks[shortcode_name](param)

        return match.group(0)

    pattern = re.compile(r"{{\s*(\w+)\s+([^{}]+?)\s*}}")
    return pattern.sub(shortcode_replacer, value)


env.filters["shortcode"] = shortcode_filter
env.filters["slugify"] = slugify_filter
env.filters["prettydate"] = prettydate

# translate a single file into HTML
def render_single_file(page, path, dist, name = False):
    name = Path(path).stem
    template = env.select_template([f"{name}.jinja", "post.jinja"])
    html = template.render(documents=documents, page=page, name=name)

    if not os.path.exists(dist):
        os.makedirs(dist)

    with open(f"{dist}/{name}.html", "w", encoding="utf-8") as output_file:
        output_file.write(html)


# find a pre-rendered page
def get_existing_page(path, slug):
    stem = Path(path).stem
    folder =  os.path.basename(os.path.dirname(path))

    if stem == "index" and folder != "content":
        folder = Path(path).parent.parent.name

    if slug in documents:
        return documents[slug]

    if folder == "content":
        return False

    for doc in documents[folder]:
        if doc:
            if doc["slug"] == slug:
                return doc

    return False

# build a slug including the folder
def get_slug(path, folder, filename):
    if folder == "content":
        return slugify(filename)
    else:
        return slugify(f"{folder}/{filename}")

# compile markdown into cited HTML
def get_page_data(path):
    global word_count

    filename = Path(path).stem
    folder = Path(path).parent.name
    slug = get_slug(path, folder, filename)
    prerendered = get_existing_page(path, slug)

    if prerendered:
        return prerendered

    page = frontmatter.load(path)
    page["slug"] = slug
    page["filename"] = filename
    page["folder"] = folder

    if "start_datetime" in page:
        page["has_passed"] = datetime.fromtimestamp(page["start_datetime"]) < now

    content = page.content

    if "`include" in page.content:
        content = pypandoc.convert_text(
            page.content,
            to='md',
            format='md',
            extra_args=[
                "--lua-filter=include-files.lua"
            ])

    page.body = pypandoc.convert_text(
        content,
        to="html",
        format="md",
        extra_args=[
            "--citeproc",
            "--bibliography=library.bib",
            "--csl=harvard-cite-them-right.csl",
        ])

    word_count += count_words_in_markdown(page.body)

    return page

# Do stuff to the circuit's pcb
def save_circuit_svg(filepath, outpath, name):

    tree = ET.parse(filepath)
    root = tree.getroot()

    # Extract current width/height (in pixels)
    width_px = float(root.get("width", 0))
    height_px = float(root.get("height", 0))

    # Set new width/height in mm
    root.set("width", f"{width_px}mm")
    root.set("height", f"{height_px}mm")

    os.makedirs(outpath, exist_ok = True)

    tree.write(f"{outpath}/{name}")


# combine HTML & data with Jinja templates
def render_posts(path, output_path=OUTPUT_D):
    name = Path(path).stem

    for filename in sorted(os.listdir(path)):
        file_path = Path(path) / filename

        if file_path.suffix == ".md":
            render_single_file(get_page_data(file_path), file_path, f"{output_path}/{name}")
        elif file_path.is_dir():
            render_posts(file_path, f"{output_path}/{name}")
        elif file_path.suffix == ".svg":
            save_circuit_svg(file_path, f"{output_path}/{name}", filename)
        elif file_path.suffix in {".jpeg", ".mp3", ".jpg", ".png"}:
            os.makedirs(f"{output_path}/{name}", exist_ok = True)
            shutil.copyfile(file_path, f"{output_path}/{name}/{filename}")

# Pre-load before compiling
def preload_documents():
    global documents

    version = subprocess.check_output(["git", "rev-list", "--count", "HEAD"]).decode("utf-8").strip()


    documents["meta"] = {"now": now.strftime("%d %B %Y"), "version": version}

    for subdir in os.listdir(CONTENT_D):
        path = os.path.join(CONTENT_D, subdir)

        if os.path.isdir(path):
            name = Path(path).stem
            documents.setdefault(name, [])

            for filename in sorted(os.listdir(path)):
                cpath = os.path.join(path, filename)
                if filename.endswith(".md"):
                    documents[name].append(get_page_data(cpath))
                elif os.path.isdir(cpath):
                    documents[name].append(get_page_data(os.path.join(cpath, "index.md")))

        elif Path(path).suffix == '.md':
            documents[Path(path).stem] = get_page_data(path)


def copy_assets():
    if os.path.exists(OUT_ASSETS):
        shutil.rmtree(OUT_ASSETS)

    shutil.copytree(SRC_ASSETS, OUT_ASSETS)


def get_inventory():
    global documents

    with open("src/content/component-inventory.csv") as f:
        documents['inventory'] = []

        for line in csv.DictReader(f, fieldnames=('ID', 'Amount', 'Name', 'Value', 'type', 'Date', 'Where','Mounting type')):
            documents['inventory'].append(line)


def main():
    print("....Start putting together a new document....")
    get_inventory()
    preload_documents()

    for subdir in os.listdir(CONTENT_D):
        path = os.path.join(CONTENT_D, subdir)

        if os.path.isdir(path):
            print("Compile: an entire directory", Path(path).name)
            render_posts(path)
        elif Path(path).suffix == '.md':
            print("Compile: single page", Path(path).name)
            render_single_file(get_page_data(path), path, OUTPUT_D)
        elif Path(path).suffix in [".csv"]:
            print("Compile: not compiling ", Path(path).name)

    copy_assets()

    print(f"total words: {word_count}")

main()