1
0
mirror of https://github.com/enpaul/kodak.git synced 2024-11-14 18:46:50 +00:00

Generalize hashing function to checksum container

Fix sylink creation/removal on image record model
This commit is contained in:
Ethan Paul 2021-11-14 18:16:27 -05:00
parent ed46f8ab17
commit dc81600d0c
No known key found for this signature in database
GPG Key ID: D0E2CBF1245E92BF
3 changed files with 26 additions and 15 deletions

View File

@ -8,6 +8,7 @@ import peewee
from kodak import constants from kodak import constants
from kodak import exceptions from kodak import exceptions
from kodak.configuration import KodakConfig from kodak.configuration import KodakConfig
from kodak.database._shared import Checksum
from kodak.database._shared import INTERFACE as interface from kodak.database._shared import INTERFACE as interface
from kodak.database._shared import KodakModel from kodak.database._shared import KodakModel
from kodak.database.access import AccessRecord from kodak.database.access import AccessRecord

View File

@ -1,11 +1,14 @@
import datetime import datetime
import enum import enum
import hashlib
import typing import typing
import uuid import uuid
from pathlib import Path from pathlib import Path
from typing import Callable
from typing import Dict from typing import Dict
from typing import NamedTuple from typing import NamedTuple
from typing import Type from typing import Type
from typing import Union
import peewee import peewee
@ -31,6 +34,24 @@ class Checksum(NamedTuple):
"""Construct from a hashlib object""" """Construct from a hashlib object"""
return cls(algorithm=data.name, digest=data.hexdigest()) return cls(algorithm=data.name, digest=data.hexdigest())
@classmethod
def from_path(cls, path: Union[str, Path], constructor: Callable = hashlib.sha256):
"""Construct from a file path, generating the hash of the file
.. note:: This method attempts to _efficiently_ compute a hash of large image files. The
hashing code was adapted from here:
https://stackoverflow.com/a/44873382/5361209
"""
hasher = constructor()
view = memoryview(bytearray(1024 * 1024))
with path.open("rb", buffering=0) as infile:
for chunk in iter(lambda: infile.readinto(view), 0): # type: ignore
hasher.update(view[:chunk])
return cls.from_hash(hasher)
def as_header(self) -> str: def as_header(self) -> str:
"""Format the checksum for the Content-Digest HTTP header""" """Format the checksum for the Content-Digest HTTP header"""
if self.algorithm.startswith("sha"): if self.algorithm.startswith("sha"):

View File

@ -1,4 +1,3 @@
import hashlib
import os import os
from pathlib import Path from pathlib import Path
@ -29,18 +28,7 @@ class ImageRecord(KodakModel):
:param config: Populated application configuration object :param config: Populated application configuration object
:param path: Full path to the image file to process. The file path provided is expected to :param path: Full path to the image file to process. The file path provided is expected to
already be absolute, with all symlinks and aliases resolved. already be absolute, with all symlinks and aliases resolved.
.. note:: This method attempts to _efficiently_ compute a hash of large image files. The
hashing code was adapted from here:
https://stackoverflow.com/a/44873382/5361209
""" """
hasher = hashlib.sha256()
view = memoryview(bytearray(1024 * 1024))
with path.open("rb", buffering=0) as infile:
for chunk in iter(lambda: infile.readinto(view), 0): # type: ignore
hasher.update(view[:chunk])
name = path.stem name = path.stem
extension = path.suffix extension = path.suffix
@ -56,7 +44,7 @@ class ImageRecord(KodakModel):
) )
return cls( return cls(
name=name, source=path, format_=format_, checksum=Checksum.from_hash(hasher) name=name, source=path, format_=format_, checksum=Checksum.from_path(path)
) )
def create_link(self, config: configuration.KodakConfig) -> Path: def create_link(self, config: configuration.KodakConfig) -> Path:
@ -65,7 +53,8 @@ class ImageRecord(KodakModel):
:param config: Populated application configuration object :param config: Populated application configuration object
:returns: Path to the created symbolic link back to the source file :returns: Path to the created symbolic link back to the source file
""" """
link = Path(config.content_dir, self.name) Path(config.content_dir, self.name).mkdir(exist_ok=True)
link = Path(config.content_dir, self.name, "original")
try: try:
link.symlink_to(self.source) link.symlink_to(self.source)
except FileExistsError: except FileExistsError:
@ -77,4 +66,4 @@ class ImageRecord(KodakModel):
:param config: Populated application configuration object :param config: Populated application configuration object
""" """
Path(config.content_dir, self.name).unlink(missing_ok=True) Path(config.content_dir, self.name, "original").unlink(missing_ok=True)