"""For internal use only. It provides a slice-like file reader.""" import os from typing import Union try: # pylint: disable=no-name-in-module from multiprocessing import Lock except ImportError: from threading import Lock # type: ignore class FileBuffer: """A slice-able file reader""" def __init__(self, database: str) -> None: # pylint: disable=consider-using-with self._handle = open(database, "rb") self._size = os.fstat(self._handle.fileno()).st_size if not hasattr(os, "pread"): self._lock = Lock() def __getitem__(self, key: Union[slice, int]): if isinstance(key, slice): return self._read(key.stop - key.start, key.start) if isinstance(key, int): return self._read(1, key)[0] raise TypeError("Invalid argument type.") def rfind(self, needle: bytes, start: int) -> int: """Reverse find needle from start""" pos = self._read(self._size - start - 1, start).rfind(needle) if pos == -1: return pos return start + pos def size(self) -> int: """Size of file""" return self._size def close(self) -> None: """Close file""" self._handle.close() if hasattr(os, "pread"): def _read(self, buffersize: int, offset: int) -> bytes: """read that uses pread""" # pylint: disable=no-member return os.pread(self._handle.fileno(), buffersize, offset) else: def _read(self, buffersize: int, offset: int) -> bytes: """read with a lock This lock is necessary as after a fork, the different processes will share the same file table entry, even if we dup the fd, and as such the same offsets. There does not appear to be a way to duplicate the file table entry and we cannot re-open based on the original path as that file may have replaced with another or unlinked. """ with self._lock: self._handle.seek(offset) return self._handle.read(buffersize)