Coverage for src/bob/io/base/__init_

1# import Libraries of other lib packages

2import logging

4import h5py

5import imageio

6import numpy as np

8logger = logging.getLogger(__name__)

9import os

11# Allowing the loading of truncated files in case PIL is used

12# https://github.com/kirumang/Pix2Pose/issues/2

13from PIL import ImageFile

15ImageFile.LOAD_TRUNCATED_IMAGES = True

18hdf5_extensions = [".hdf5", ".h5", ".hdf", ".hdf5", ".h5", ".hdf", ".hdf5"]

19image_extensions = [

20 ".jpg",

21 ".jpeg",

22 ".png",

23 ".bmp",

24 ".gif",

25 ".tif",

26 ".tiff",

27 ".pgm",

28 ".pbm",

29 ".pnm",

30 ".ppm",

31]

34def _is_string(s):

35 """Returns ``True`` if the given object is a string or bytes."""

36 return isinstance(s, (bytes, str))

39@np.deprecate(new_name="os.makedirs(directory, exist_ok=True)")

40def create_directories_safe(directory, dryrun=False):

41 """Creates a directory if it does not exists, with concurrent access

42 support. This function will also create any parent directories that might

43 be required. If the dryrun option is selected, it does not actually create

44 the directory, but just writes the (Linux) command that would have been

45 executed.

47 **Parameters:**

49 ``directory`` : str

50 The directory that you want to create.

52 ``dryrun`` : bool

53 Only ``print`` the command to console, but do not execute it.

54 """

55 if dryrun:

56 print("[dry-run] mkdir -p '%s'" % directory)

57 else:

58 os.makedirs(directory, exist_ok=True)

61def open_file(filename) -> np.ndarray:

62 """Reads a file content.

64 Parameters

65 ----------

67 ``filename`` : str

68 The name of the file to open.

69 """

71 def check_gray(img):

72 # Checking for gray scaled images

73 if (

74 img.ndim > 2

75 and np.array_equal(img[:, :, 0], img[:, :, 1])

76 and np.array_equal(img[:, :, 0], img[:, :, 2])

77 ):

78 img = img[:, :, 0]

79 return img

81 # get the extension

82 extension = os.path.splitext(filename)[1].lower()

84 if extension in hdf5_extensions:

85 with h5py.File(filename, "r") as f:

86 keys = list(f.keys())

87 if len(keys) == 1:

88 key = keys[0]

89 else:

90 key = "array"

91 if key not in keys:

92 raise RuntimeError(

93 f"The file {filename} does not contain the key {key}"

94 )

95 dataset = f[key]

96 # if the data was saved as a string, load it back as string

97 string_dtype = h5py.check_string_dtype(dataset.dtype)

98 if string_dtype is not None:

99 dataset = dataset.asstr()

100 return dataset[()]

101

102 elif extension in image_extensions:

103 from ..image import to_bob

104

105 img = imageio.imread(filename)

106

107 # PNGs have a 4th channel, which we don't want

108 # Alpha channels for instance have to be ignored

109 if img.ndim > 2:

110 if extension.lower() == ".png":

111 img = img[:, :, 0:3]

112

113 img = check_gray(img)

114 return img if img.ndim == 2 else to_bob(img)

115 else:

116 raise ValueError(f"Unknown file extension: {extension}")

117

118

119def write_file(filename, data, format="pillow") -> None:

120 """Writes the contents of a :py:class:`numpy.ndarray` to a file.

121

122 Parameters

123 ----------

124

125 ``filename`` : str

126 The name of the file to write to.

127

128 ``data`` : :py:class:`numpy.ndarray`

129 The data to write to the file.

130

131 ``format`` : str

132 The format to use to read the file. By default imageio selects the appropriate for you based on the filename and its contents

133 """

134 extension = os.path.splitext(filename)[1] # get the extension

135

136 if extension in hdf5_extensions:

137 with h5py.File(filename, "w") as f:

138 f["array"] = data

139 elif extension in image_extensions:

140 # Pillow is the format with the best support for all image formats

141 from ..image import to_matplotlib

142

143 imageio.imwrite(filename, to_matplotlib(data), format=format)

144 else:

145 raise RuntimeError(f"Unknown file extension: {extension}")

146

147

148def load(inputs) -> np.ndarray:

149 """Loads the content of a file.

150

151 Will take a filename (or an iterable of filenames) and put the content into a

152 :py:class:`numpy.ndarray`.

153

154 **Parameters:**

155

156 ``inputs`` : various types

157

158 This might represent several different entities:

159

160 1. The name of a file (full path) from where to load the data. In this

161 case, this assumes that the file contains an array and returns a loaded

162 numpy ndarray.

163 2. An iterable of filenames to be loaded in memory. In this case, this

164 would assume that each file contains a single 1D sample or a set of 1D

165 samples, load them in memory and concatenate them into a single and

166 returned 2D :py:class:`numpy.ndarray`.

167

168 **Returns:**

169

170 ``data`` : :py:class:`numpy.ndarray`

171 The data loaded from the given ``inputs``.

172 """

173 from collections.abc import Iterable

174

175 import numpy

176

177 if _is_string(inputs):

178 if not os.path.exists(inputs):

179 raise RuntimeError(f"`{inputs}' does not exist!")

180 try:

181 return open_file(inputs)

182 except Exception as e:

183 raise RuntimeError(f"Could not load `{inputs}'!") from e

184

185 elif isinstance(inputs, Iterable):

186 retval = []

187 for obj in inputs:

188 if _is_string(obj):

189 retval.append(load(obj))

190 else:

191 raise TypeError(

192 "Iterable contains an object which is not a filename"

193 )

194 return numpy.vstack(retval)

195 else:

196 raise TypeError(

197 "Unexpected input object. This function is expecting a filename, "

198 "or an iterable of filenames."

199 )

200

201

202def save(array, filename, create_directories=False):

203 """Saves the contents of an array-like object to file.

204

205 Effectively, this is the same as opening a file with the mode flag set to ``'w'``

206 (write with truncation) and calling ``file.write`` passing ``array`` as parameter.

207

208 Parameters:

209

210 ``array`` : array_like

211 The array-like object to be saved on the file

212

213 ``filename`` : str

214 The name of the file where you need the contents saved to

215

216 ``create_directories`` : bool

217 Automatically generate the directories if required (defaults to ``False``

218 because of compatibility reasons; might change in future to default to

219 ``True``)

220 """

221 # create directory if not existent yet

222 if create_directories:

223 create_directories_safe(os.path.dirname(filename))

224

225 # if array is a string, don't create a numpy array

226 if not isinstance(array, str):

227 # requires data is c-contiguous and aligned, will create a copy otherwise

228 array = np.require(array, requirements=("C_CONTIGUOUS", "ALIGNED"))

229

230 write_file(filename, array)

231

232

233# Just to make it homogenous with the C++ API

234write = save

235read = load

236

237

238# Keeps compatibility with the previously existing API

239# open = File

240

241

242def _generate_features(reader, paths, same_size=False):

243 """Load and stack features in a memory efficient way. This function is

244 meant to be used inside :py:func:`vstack_features`.

245

246 Parameters

247 ----------

248 reader : ``collections.Callable``

249 See the documentation of :py:func:`vstack_features`.

250 paths : ``collections.Iterable``

251 See the documentation of :py:func:`vstack_features`.

252 same_size : :obj:`bool`, optional

253 See the documentation of :py:func:`vstack_features`.

254

255 Yields

256 ------

257 object

258 The first object returned is a tuple of :py:class:`numpy.dtype` of

259 features and the shape of the first feature. The rest of objects are

260 the actual values in features. The features are returned in C order.

261 """

262 shape_determined = False

263 for i, path in enumerate(paths):

264

265 feature = np.atleast_2d(reader(path))

266 feature = np.ascontiguousarray(feature)

267 if not shape_determined:

268 shape_determined = True

269 dtype = feature.dtype

270 shape = list(feature.shape)

271 yield (dtype, shape)

272 else:

273 # make sure all features have the same shape and dtype

274 if same_size:

275 assert shape == list(

276 feature.shape

277 ), f"Expected feature shape of {shape}, got {feature.shape}"

278 else:

279 assert shape[1:] == list(

280 feature.shape[1:]

281 ), f"Ignoring first dimension, expected feature shape of {shape}, got {feature.shape}"

282 assert dtype == feature.dtype

283

284 if same_size:

285 yield (feature.ravel(),)

286 else:

287 for feat in feature:

288 yield (feat.ravel(),)

289

290

291def vstack_features(reader, paths, same_size=False, dtype=None):

292 """Stacks all features in a memory efficient way.

293

294 Parameters

295 ----------

296 reader : ``collections.Callable``

297 The function to load the features. The function should only take one

298 argument ``path`` and return loaded features. Use :any:`functools.partial`

299 to accommodate your reader to this format.

300 The features returned by ``reader`` are expected to have the same

301 :py:class:`numpy.dtype` and the same shape except for their first

302 dimension. First dimension should correspond to the number of samples.

303 paths : ``collections.Iterable``

304 An iterable of paths to iterate on. Whatever is inside path is given to

305 ``reader`` so they do not need to be necessarily paths to actual files.

306 If ``same_size`` is ``True``, ``len(paths)`` must be valid.

307 same_size : :obj:`bool`, optional

308 If ``True``, it assumes that arrays inside all the paths are the same

309 shape. If you know the features are the same size in all paths, set this

310 to ``True`` to improve the performance.

311 dtype : :py:class:`numpy.dtype`, optional

312 If provided, the data will be casted to this format.

313

314 Returns

315 -------

316 numpy.ndarray

317 The read features with the shape ``(n_samples, *features_shape[1:])``.

318

319 Examples

320 --------

321 This function in a simple way is equivalent to calling

322 ``numpy.vstack([reader(p) for p in paths])``.

323

324 >>> import numpy

325 >>> from bob.io.base import vstack_features

326 >>> def reader(path):

327 ... # in each file, there are 5 samples and features are 2 dimensional.

328 ... return numpy.arange(10).reshape(5,2)

329 >>> paths = ['path1', 'path2']

330 >>> all_features = vstack_features(reader, paths)

331 >>> numpy.allclose(all_features, numpy.array(

332 ... [[0, 1],

333 ... [2, 3],

334 ... [4, 5],

335 ... [6, 7],

336 ... [8, 9],

337 ... [0, 1],

338 ... [2, 3],

339 ... [4, 5],

340 ... [6, 7],

341 ... [8, 9]]))

342 True

343 >>> all_features_with_more_memory = numpy.vstack([reader(p) for p in paths])

344 >>> numpy.allclose(all_features, all_features_with_more_memory)

345 True

346

347 You can allocate the array at once to improve the performance if you know

348 that all features in paths have the same shape and you know the total number

349 of the paths:

350

351 >>> all_features = vstack_features(reader, paths, same_size=True)

352 >>> numpy.allclose(all_features, numpy.array(

353 ... [[0, 1],

354 ... [2, 3],

355 ... [4, 5],

356 ... [6, 7],

357 ... [8, 9],

358 ... [0, 1],

359 ... [2, 3],

360 ... [4, 5],

361 ... [6, 7],

362 ... [8, 9]]))

363 True

364 """

365 iterable = _generate_features(reader, paths, same_size)

366 data_dtype, shape = next(iterable)

367 if dtype is None:

368 dtype = data_dtype

369 if same_size:

370 # numpy black magic: https://stackoverflow.com/a/12473478/1286165

371 field_dtype = [("", (dtype, (np.prod(shape),)))]

372 total_size = len(paths)

373 all_features = np.fromiter(iterable, field_dtype, total_size)

374 else:

375 field_dtype = [("", (dtype, (np.prod(shape[1:]),)))]

376 all_features = np.fromiter(iterable, field_dtype)

377

378 # go from a field array to a normal array

379 all_features = all_features.view(dtype)

380 # the shape is assumed to be (n_samples, ...) it can be (5, 2) or (5, 3, 4).

381 shape = list(shape)

382 shape[0] = -1

383 return np.reshape(all_features, shape, order="C")

384

385

386# gets sphinx autodoc done right - don't remove it

387__all__ = [_ for _ in dir() if not _.startswith("_")]

Coverage for src/bob/io/base/init.py: 84%

114 statements