Coverage for src/bob/pipelines/dataset/protocols/hashing.py: 70%
23 statements
« prev ^ index » next coverage.py v7.0.5, created at 2023-06-16 14:21 +0200
« prev ^ index » next coverage.py v7.0.5, created at 2023-06-16 14:21 +0200
1"""Hashing functionalities for verifying files and computing CRCs."""
4import hashlib
5import os
7from pathlib import Path
8from typing import Any, Callable, Union
11def md5_hash(readable: Any, chunk_size: int = 65535) -> str:
12 """Computes the md5 hash of any object with a read method."""
13 hasher = hashlib.md5()
14 for chunk in iter(lambda: readable.read(chunk_size), b""):
15 hasher.update(chunk)
16 return hasher.hexdigest()
19def sha256_hash(readable: Any, chunk_size: int = 65535) -> str:
20 """Computes the SHA256 hash of any object with a read method."""
21 hasher = hashlib.sha256()
22 for chunk in iter(lambda: readable.read(chunk_size), b""):
23 hasher.update(chunk)
24 return hasher.hexdigest()
27def verify_file(
28 file_path: Union[str, os.PathLike],
29 file_hash: str,
30 hash_fct: Callable[[Any, int], str] = sha256_hash,
31 full_match: bool = False,
32) -> bool:
33 """Returns True if the file computed hash corresponds to `file_hash`.
35 For comfort, we allow ``file_hash`` to match with the first
36 characters of the digest, allowing storing only e.g. the first 8
37 char.
39 Parameters
40 ----------
41 file_path
42 The path to the file needing verification.
43 file_hash
44 The expected file hash digest.
45 hash_fct
46 A function taking a path and returning a digest. Defaults to SHA256.
47 full_match
48 If set to False, allows ``file_hash`` to match the first characters of
49 the files digest (this allows storing e.g. 8 chars of a digest instead
50 of the whole 64 characters of SHA256, and still matching.)
51 """
52 file_path = Path(file_path)
53 with file_path.open("rb") as f:
54 digest = hash_fct(f, 65535)
55 return digest == file_hash if full_match else digest.startswith(file_hash)
58def compute_crc(
59 file_path: Union[str, os.PathLike],
60 hash_fct: Callable[[Any, int], str] = sha256_hash,
61) -> str:
62 """Returns the CRC of a file."""
63 file_path = Path(file_path)
64 with file_path.open("rb") as f:
65 return hash_fct(f, 65535)