allaroundhere.org/build.py

450 lines
13 KiB
Python

import argparse
import datetime
import hashlib
import shutil
import sys
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Sequence
from typing import Tuple
from typing import Union
import jinja2
import jsmin
import marshmallow as msh
import minify_html
import ruamel.yaml
yaml = ruamel.yaml.YAML(typ="safe")
def multi_replace(source: str, replacements: Sequence[Tuple[str, str]]) -> str:
for old, new in replacements:
replaced = source.replace(old, new)
return replaced
class PathField(msh.fields.String):
def _deserialize(self, value, *args, **kwargs):
return Path(value).expanduser().resolve()
class BaseSchema(msh.Schema):
@msh.post_load
def _make_dataclass(self, data: Dict[str, Any], *args, **kwargs):
return self.Container(**data)
class MediaSerializer(BaseSchema):
@dataclass
class Container:
title: str
link: str
anchor: str
content: Optional[str]
def preload_url(self, config) -> str:
if config.build.kodak:
return f"{config.build.kodak.baseurl}image/{self.link}/{config.build.kodak.preload}.jpeg"
return self.link
def asset_url(self, config) -> str:
if config.build.kodak:
return f"{config.build.kodak.baseurl}image/{self.link}/{config.build.kodak.asset}.jpeg"
return self.link
def source_url(self, config) -> str:
if config.build.kodak:
return f"{config.build.kodak.baseurl}image/{self.link}/original"
return self.link
title = msh.fields.String()
link = msh.fields.String()
anchor = msh.fields.String(allow_none=True, missing=None)
content = msh.fields.String(allow_none=True, missing=None)
@msh.post_load
def _make_default_anchor(self, data, **kwargs):
if not data.anchor:
data.anchor = multi_replace(
data.title, [(" ", "-"), ("?", ""), ("!", ""), (".", ""), (":", "")]
)
return data
class LinkSerializer(BaseSchema):
@dataclass
class Container:
title: Optional[str]
url: str
icon: str
url = msh.fields.URL()
title = msh.fields.String(allow_none=True, missing=None)
icon = msh.fields.String(missing="fas fa-external-link-square-alt")
class LocationSeralizer(BaseSchema):
class Container(NamedTuple):
title: str
link: str
title = msh.fields.String()
link = msh.fields.URL()
class PostSerializer(BaseSchema):
@dataclass
class Container:
title: str
description: Optional[str]
location: LocationSeralizer.Container
date: datetime.date
banner: Optional[str]
slug: str
links: Sequence[LinkSerializer.Container]
media: Sequence[MediaSerializer.Container]
def banner_url(self, config) -> str:
if config.build.kodak:
return f"{config.build.kodak.baseurl}image/{self.banner}/{config.build.kodak.banner}.jpeg"
return self.banner
title = msh.fields.String()
description = msh.fields.String(missing=None, allow_none=True)
location = msh.fields.Nested(LocationSeralizer)
date = msh.fields.Raw()
banner = msh.fields.String(missing=None, allow_none=True)
slug = msh.fields.String(
validate=msh.validate.Regexp(r"^[a-z0-9][a-z0-9\-]+[a-z0-9]$")
)
links = msh.fields.List(msh.fields.Nested(LinkSerializer), missing=list())
media = msh.fields.List(msh.fields.Nested(MediaSerializer), missing=list())
@msh.validates_schema
def _unique_anchors(self, data: Dict[str, Any], **kwargs):
anchors = [item.anchor for item in data["media"] if item.anchor is not None]
if len(anchors) != len(set(anchors)):
raise msh.ValidationError(
f"Media anchors used multiple times: {set([item for item in anchors if anchors.count(item) > 1])}"
)
class ConfigBuildKodakSerializer(BaseSchema):
@dataclass
class Container:
baseurl: str
link_original: bool
asset: str
banner: str
preload: str
baseurl = msh.fields.URL()
link_original = msh.fields.Boolean(missing=False)
asset = msh.fields.String()
banner = msh.fields.String()
preload = msh.fields.String()
class ConfigBuildSerializer(BaseSchema):
@dataclass
class Container:
generated: Path
posts: Path
static: Path
bundle: Path
templates: Path
post_base: str
kodak: ConfigBuildKodakSerializer.Container
generated = PathField(missing=Path("publish"))
posts = PathField(missing=Path("posts"))
static = PathField(missing=Path("static"))
bundle = PathField(missing=Path("bundle"))
templates = PathField(missing=Path("templates"))
post_base = msh.fields.String(
missing="explore", validate=msh.validate.Regexp(r"[a-z0-9\-]+")
)
kodak = msh.fields.Nested(ConfigBuildKodakSerializer, missing=None)
class ConfigSerializer(BaseSchema):
@dataclass
class Container:
domain: str
https: bool
baseurl: str
title: str
email: str
description: str
keywords: Sequence[str]
social: Dict[str, str]
build: ConfigBuildSerializer.Container
@property
def url(self) -> str:
return f"http{'s' if self.https else ''}://{self.domain}{self.baseurl}"
domain = msh.fields.String()
https = msh.fields.Boolean(missing=True)
baseurl = msh.fields.String()
title = msh.fields.String()
email = msh.fields.Email()
description = msh.fields.String()
keywords = msh.fields.List(
msh.fields.String(validate=msh.validate.Regexp(r"^[a-z0-9]+$"))
)
social = msh.fields.Dict(
keys=msh.fields.String(
validate=msh.validate.OneOf(
["instagram", "facebook", "twitter", "mastodon", "patreon"]
)
),
values=msh.fields.Url(),
missing=dict(),
)
build = msh.fields.Nested(ConfigBuildSerializer)
def get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument(
"-c",
"--config",
help="Path to the config file",
default=(Path.cwd() / "config.yaml"),
)
parser.add_argument(
"--check", action="store_true", help="Check the config without building"
)
parser.add_argument(
"--dev",
action="store_true",
help="Run local development server",
)
return parser.parse_args()
def _hash_from_file(path: Union[str, Path]):
"""Construct from a file path, generating the hash of the file
.. note:: This method attempts to _efficiently_ compute a hash of large image files. The
hashing code was adapted from here:
https://stackoverflow.com/a/44873382/5361209
"""
hasher = hashlib.sha256()
view = memoryview(bytearray(1024 * 1024))
with Path(path).open("rb", buffering=0) as infile:
for chunk in iter(lambda: infile.readinto(view), 0): # type: ignore
hasher.update(view[:chunk])
return hasher
def _copy_resource(path: Path, dest_dir: Path):
if path.is_file():
dest_dir.mkdir(parents=True, exist_ok=True)
shutil.copyfile(path, dest_dir / path.name, follow_symlinks=True)
elif path.is_dir():
for item in path.iterdir():
_copy_resource(item, dest_dir / path.name)
def _write_template(env: jinja2.Environment, name: str, dest: Path, **kwargs):
dest.parent.mkdir(exist_ok=True)
template = env.get_template(name).render(**kwargs)
minified = minify_html.minify(template, keep_comments=False)
with dest.open("w") as outfile:
outfile.write(minified)
def _build_bundle(
config: ConfigSerializer.Container, ftype: str, dest: str, sources: List[str]
) -> str:
(config.build.generated / ftype.lower()).mkdir(exist_ok=True, parents=True)
working_path = (
config.build.generated / ftype.lower() / f"{uuid.uuid4().hex}.{ftype.lower()}"
)
content: List[str] = []
for source in sources:
try:
with (
config.build.bundle / ftype.lower() / f"{source}.{ftype.lower()}"
).open("r") as infile:
content.append(infile.read())
except FileNotFoundError as err:
raise ValueError(
f"No {ftype.upper()} source file to bundle named '{source}'"
) from err
if ftype.lower() == "js":
minified = jsmin.jsmin("\n\n".join(content))
else:
minified = minify_html.minify("\n\n".join(content), keep_comments=False)
hasher = hashlib.sha256()
hasher.update(minified.encode("utf-8"))
slug = f"{dest}-{hasher.hexdigest()[:8]}"
final_path = config.build.generated / ftype.lower() / f"{slug}.{ftype.lower()}"
with final_path.open("w") as outfile:
outfile.write(minified)
return slug
def _dev(
cwd: Path,
config: ConfigSerializer.Container,
posts: Sequence[PostSerializer.Container],
):
config.https = False
config.domain = "localhost:5000"
config.base_url = "/"
# server = http.server.HTTPServer(
# ("127.0.0.1", 5000),
# functools.partial(
# http.server.SimpleHTTPRequestHandler, directory=str(cwd / config.build.generated)
# ),
# )
_build(cwd, config, posts)
# print(f"Serving dev site at {config.url}, press Ctrl+C to exit", file=sys.stderr)
# try:
# server.serve_forever()
# except KeyboardInterrupt:
# print("Stopping...", file=sys.stderr)
# server.shutdown()
def _build(
cwd: Path,
config: ConfigSerializer.Container,
posts: Sequence[PostSerializer.Container],
):
print(
f"Rebuilding static assets into {cwd / config.build.generated}", file=sys.stderr
)
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(str(cwd / config.build.templates)),
autoescape=jinja2.select_autoescape(["html", "xml"]),
)
output = cwd / config.build.generated
static = cwd / config.build.static
today = datetime.datetime.utcnow()
bundle_slug = uuid.uuid4().hex[:8]
index_css_bundle = _build_bundle(config, "css", "index", ["common", "home"])
index_js_bundle = _build_bundle(
config, "js", "index", ["random-background", "preloader"]
)
_write_template(
env,
"index.html.j2",
output / "index.html",
config=config,
today=today,
css_bundle=index_css_bundle,
js_bundle=index_js_bundle,
)
_write_template(
env, "sitemap.xml.j2", output / "sitemap.xml", config=config, today=today
)
_write_template(
env,
"robots.txt.j2",
output / "robots.txt",
config=config,
today=today,
disallowed=[item.name for item in static.iterdir() if item.is_dir()],
)
static = cwd / config.build.static
if static.exists():
for item in static.iterdir():
_copy_resource(item, output)
explore_css_bundle = _build_bundle(config, "css", "explore", ["common", "explore"])
explore_js_bundle = _build_bundle(
config,
"js",
"explore",
["random-background", "preloader", "toggle-article-text-button"],
)
_write_template(
env,
"explore.html.j2",
output / config.build.post_base / "index.html",
config=config,
today=today,
posts=posts,
css_bundle=explore_css_bundle,
js_bundle=explore_js_bundle,
)
post_css_bundle = _build_bundle(config, "css", "post", ["common"])
post_js_bundle = _build_bundle(config, "js", "post", ["preloader"])
for post in posts:
_write_template(
env,
"post.html.j2",
output / config.build.post_base / post.slug / "index.html",
config=config,
today=today,
post=post,
css_bundle=post_css_bundle,
js_bundle=post_js_bundle,
)
def main():
args = get_args()
cwd = Path.cwd().resolve()
with Path(args.config).resolve().open(encoding="utf-8") as infile:
config = ConfigSerializer().load(yaml.load(infile))
posts = []
post_serializer = PostSerializer()
for item in (cwd / config.build.posts).iterdir():
if item.suffix.lower() == ".yaml":
with item.open() as infile:
raw = yaml.load(infile)
raw["slug"] = raw.get("slug", item.stem)
posts.append(post_serializer.load(raw))
slugs = [post.slug for post in posts]
if len(set(slugs)) != len(slugs):
raise msh.ValidationError("Duplicate post slugs found in config")
if args.check:
print("Config check successful!", file=sys.stderr)
return 0
posts = sorted(posts, key=lambda item: item.date, reverse=True)
if args.dev:
_dev(cwd, config, posts)
else:
_build(cwd, config, posts)
return 0
if __name__ == "__main__":
sys.exit(main())