diff --git a/kodak/database/__init__.py b/kodak/database/__init__.py index bbfc7a5..54cde94 100644 --- a/kodak/database/__init__.py +++ b/kodak/database/__init__.py @@ -8,6 +8,7 @@ import peewee from kodak import constants from kodak import exceptions from kodak.configuration import KodakConfig +from kodak.database._shared import Checksum from kodak.database._shared import INTERFACE as interface from kodak.database._shared import KodakModel from kodak.database.access import AccessRecord diff --git a/kodak/database/_shared.py b/kodak/database/_shared.py index dcef188..036abfc 100644 --- a/kodak/database/_shared.py +++ b/kodak/database/_shared.py @@ -1,11 +1,14 @@ import datetime import enum +import hashlib import typing import uuid from pathlib import Path +from typing import Callable from typing import Dict from typing import NamedTuple from typing import Type +from typing import Union import peewee @@ -31,6 +34,24 @@ class Checksum(NamedTuple): """Construct from a hashlib object""" return cls(algorithm=data.name, digest=data.hexdigest()) + @classmethod + def from_path(cls, path: Union[str, Path], constructor: Callable = hashlib.sha256): + """Construct from a file path, generating the hash of the file + + .. note:: This method attempts to _efficiently_ compute a hash of large image files. The + hashing code was adapted from here: + + https://stackoverflow.com/a/44873382/5361209 + """ + + hasher = constructor() + view = memoryview(bytearray(1024 * 1024)) + with path.open("rb", buffering=0) as infile: + for chunk in iter(lambda: infile.readinto(view), 0): # type: ignore + hasher.update(view[:chunk]) + + return cls.from_hash(hasher) + def as_header(self) -> str: """Format the checksum for the Content-Digest HTTP header""" if self.algorithm.startswith("sha"): diff --git a/kodak/database/image.py b/kodak/database/image.py index 3a173e6..cc28fb3 100644 --- a/kodak/database/image.py +++ b/kodak/database/image.py @@ -1,4 +1,3 @@ -import hashlib import os from pathlib import Path @@ -29,18 +28,7 @@ class ImageRecord(KodakModel): :param config: Populated application configuration object :param path: Full path to the image file to process. The file path provided is expected to already be absolute, with all symlinks and aliases resolved. - - .. note:: This method attempts to _efficiently_ compute a hash of large image files. The - hashing code was adapted from here: - - https://stackoverflow.com/a/44873382/5361209 """ - hasher = hashlib.sha256() - view = memoryview(bytearray(1024 * 1024)) - with path.open("rb", buffering=0) as infile: - for chunk in iter(lambda: infile.readinto(view), 0): # type: ignore - hasher.update(view[:chunk]) - name = path.stem extension = path.suffix @@ -56,7 +44,7 @@ class ImageRecord(KodakModel): ) return cls( - name=name, source=path, format_=format_, checksum=Checksum.from_hash(hasher) + name=name, source=path, format_=format_, checksum=Checksum.from_path(path) ) def create_link(self, config: configuration.KodakConfig) -> Path: @@ -65,7 +53,8 @@ class ImageRecord(KodakModel): :param config: Populated application configuration object :returns: Path to the created symbolic link back to the source file """ - link = Path(config.content_dir, self.name) + Path(config.content_dir, self.name).mkdir(exist_ok=True) + link = Path(config.content_dir, self.name, "original") try: link.symlink_to(self.source) except FileExistsError: @@ -77,4 +66,4 @@ class ImageRecord(KodakModel): :param config: Populated application configuration object """ - Path(config.content_dir, self.name).unlink(missing_ok=True) + Path(config.content_dir, self.name, "original").unlink(missing_ok=True)