1
0
mirror of https://github.com/enpaul/kodak.git synced 2024-11-14 10:36:55 +00:00

Generalize hashing function to checksum container

Fix sylink creation/removal on image record model
This commit is contained in:
Ethan Paul 2021-11-14 18:16:27 -05:00
parent ed46f8ab17
commit dc81600d0c
No known key found for this signature in database
GPG Key ID: D0E2CBF1245E92BF
3 changed files with 26 additions and 15 deletions

View File

@ -8,6 +8,7 @@ import peewee
from kodak import constants
from kodak import exceptions
from kodak.configuration import KodakConfig
from kodak.database._shared import Checksum
from kodak.database._shared import INTERFACE as interface
from kodak.database._shared import KodakModel
from kodak.database.access import AccessRecord

View File

@ -1,11 +1,14 @@
import datetime
import enum
import hashlib
import typing
import uuid
from pathlib import Path
from typing import Callable
from typing import Dict
from typing import NamedTuple
from typing import Type
from typing import Union
import peewee
@ -31,6 +34,24 @@ class Checksum(NamedTuple):
"""Construct from a hashlib object"""
return cls(algorithm=data.name, digest=data.hexdigest())
@classmethod
def from_path(cls, path: Union[str, Path], constructor: Callable = hashlib.sha256):
"""Construct from a file path, generating the hash of the file
.. note:: This method attempts to _efficiently_ compute a hash of large image files. The
hashing code was adapted from here:
https://stackoverflow.com/a/44873382/5361209
"""
hasher = constructor()
view = memoryview(bytearray(1024 * 1024))
with path.open("rb", buffering=0) as infile:
for chunk in iter(lambda: infile.readinto(view), 0): # type: ignore
hasher.update(view[:chunk])
return cls.from_hash(hasher)
def as_header(self) -> str:
"""Format the checksum for the Content-Digest HTTP header"""
if self.algorithm.startswith("sha"):

View File

@ -1,4 +1,3 @@
import hashlib
import os
from pathlib import Path
@ -29,18 +28,7 @@ class ImageRecord(KodakModel):
:param config: Populated application configuration object
:param path: Full path to the image file to process. The file path provided is expected to
already be absolute, with all symlinks and aliases resolved.
.. note:: This method attempts to _efficiently_ compute a hash of large image files. The
hashing code was adapted from here:
https://stackoverflow.com/a/44873382/5361209
"""
hasher = hashlib.sha256()
view = memoryview(bytearray(1024 * 1024))
with path.open("rb", buffering=0) as infile:
for chunk in iter(lambda: infile.readinto(view), 0): # type: ignore
hasher.update(view[:chunk])
name = path.stem
extension = path.suffix
@ -56,7 +44,7 @@ class ImageRecord(KodakModel):
)
return cls(
name=name, source=path, format_=format_, checksum=Checksum.from_hash(hasher)
name=name, source=path, format_=format_, checksum=Checksum.from_path(path)
)
def create_link(self, config: configuration.KodakConfig) -> Path:
@ -65,7 +53,8 @@ class ImageRecord(KodakModel):
:param config: Populated application configuration object
:returns: Path to the created symbolic link back to the source file
"""
link = Path(config.content_dir, self.name)
Path(config.content_dir, self.name).mkdir(exist_ok=True)
link = Path(config.content_dir, self.name, "original")
try:
link.symlink_to(self.source)
except FileExistsError:
@ -77,4 +66,4 @@ class ImageRecord(KodakModel):
:param config: Populated application configuration object
"""
Path(config.content_dir, self.name).unlink(missing_ok=True)
Path(config.content_dir, self.name, "original").unlink(missing_ok=True)