File: helper.py

package info (click to toggle)
pyexiftool 0.5.6-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 356 kB
  • sloc: python: 1,406; makefile: 5
file content (585 lines) | stat: -rw-r--r-- 22,942 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
# -*- coding: utf-8 -*-
#
# This file is part of PyExifTool.
#
# PyExifTool <http://github.com/sylikc/pyexiftool>
#
# Copyright 2019-2023 Kevin M (sylikc)
# Copyright 2012-2014 Sven Marnach
#
# Community contributors are listed in the CHANGELOG.md for the PRs
#
# PyExifTool is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the licence, or
# (at your option) any later version, or the BSD licence.
#
# PyExifTool is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING.GPL or COPYING.BSD for more details.


"""
This submodule contains the ``ExifToolHelper`` class, which makes the core ``ExifTool`` class easier, and safer to use.

.. note::
	:py:class:`exiftool.helper.ExifToolHelper` class of this submodule is available in the ``exiftool`` namespace as :py:class:`exiftool.ExifToolHelper`

"""

import re

from .exiftool import ExifTool
from .exceptions import ExifToolOutputEmptyError, ExifToolJSONInvalidError, ExifToolExecuteError, ExifToolTagNameError

# basestring makes no sense in Python 3, so renamed tuple to this const
TUPLE_STR_BYTES = (bytes, str)

from typing import Any, Union, Optional, List, Dict




# ======================================================================================================================


def _is_iterable(in_param: Any, ignore_str_bytes: bool = False) -> bool:
	"""
	Checks if this item is iterable, instead of using isinstance(list), anything iterable can be ok

	.. note::
		STRINGS ARE CONSIDERED ITERABLE by Python

		if you need to consider a code path for strings first, check that before checking if a parameter is iterable via this function

		or specify ``ignore_str_bytes=True``

	:param in_param: Something to check if iterable or not
	:param ignore_str_bytes: str/bytes are iterable.  But usually we don't want to check that.  set ``ignore_str_bytes`` to ``True`` to ignore strings on check
	"""

	if ignore_str_bytes and isinstance(in_param, TUPLE_STR_BYTES):
		return False

	# a different type of test of iterability, instead of using isinstance(list)
	# https://stackoverflow.com/questions/1952464/in-python-how-do-i-determine-if-an-object-is-iterable
	try:
		iterator = iter(in_param)
	except TypeError:
		return False

	return True



# ======================================================================================================================

class ExifToolHelper(ExifTool):
	"""
	This class extends the low-level :py:class:`exiftool.ExifTool` class with 'wrapper'/'helper' functionality

	It keeps low-level core functionality with the base class but extends helper functions in a separate class
	"""

	##########################################################################################
	#################################### OVERRIDE METHODS ####################################
	##########################################################################################

	# ----------------------------------------------------------------------------------------------------------------------
	def __init__(self, auto_start: bool = True, check_execute: bool = True, check_tag_names: bool = True, **kwargs) -> None:
		"""
		:param bool auto_start: Will automatically start the exiftool process on first command run, defaults to True
		:param bool check_execute: Will check the exit status (return code) of all commands.  This catches some invalid commands passed to exiftool subprocess, defaults to True.  See :py:attr:`check_execute` for more info.
		:param bool check_tag_names: Will check the tag names provided to methods which work directly with tag names.  This catches unintended uses and bugs, default to True.  See :py:attr:`check_tag_names` for more info.

		:param kwargs: All other parameters are passed directly to the super-class constructor: :py:meth:`exiftool.ExifTool.__init__()`
		"""
		# call parent's constructor
		super().__init__(**kwargs)

		self._auto_start: bool = auto_start
		self._check_execute: bool = check_execute
		self._check_tag_names: bool = check_tag_names


	# ----------------------------------------------------------------------------------------------------------------------
	def execute(self, *params: Any, **kwargs) -> Union[str, bytes]:
		"""
		Override the :py:meth:`exiftool.ExifTool.execute()` method

		Adds logic to auto-start if not running, if :py:attr:`auto_start` == True

		Adds logic to str() any parameter which is not a str or bytes.  (This allows passing in objects like Path _without_ casting before passing it in.)

		:raises ExifToolExecuteError: If :py:attr:`check_execute` == True, and exit status was non-zero
		"""
		if self._auto_start and not self.running:
			self.run()

		# by default, any non-(str/bytes) would throw a TypeError from ExifTool.execute(), so they're casted to a string here
		#
		# duck-type any given object to string
		# this was originally to support Path() but it's now generic enough to support any object that str() to something useful
		#
		# Thanks @jangop for the single line contribution!
		str_bytes_params = [x if isinstance(x, TUPLE_STR_BYTES) else str(x) for x in params]
		# TODO: this list copy could be expensive if the input is a very huge list.  Perhaps in the future have a flag that takes the lists in verbatim without any processing?


		result: Union[str, bytes] = super().execute(*str_bytes_params, **kwargs)

		# imitate the subprocess.run() signature.  check=True will check non-zero exit status
		if self._check_execute and self._last_status:
			raise ExifToolExecuteError(self._last_status, self._last_stdout, self._last_stderr, str_bytes_params)

		return result

	# ----------------------------------------------------------------------------------------------------------------------
	def run(self) -> None:
		"""
		override the :py:meth:`exiftool.ExifTool.run()` method

		Will not attempt to run if already running (so no warning about 'ExifTool already running' will trigger)
		"""
		if self.running:
			return

		super().run()


	# ----------------------------------------------------------------------------------------------------------------------
	def terminate(self, **opts) -> None:
		"""
		Overrides the :py:meth:`exiftool.ExifTool.terminate()` method.

		Will not attempt to terminate if not running (so no warning about 'ExifTool not running' will trigger)

		:param opts: passed directly to the parent call :py:meth:`exiftool.ExifTool.terminate()`
		"""
		if not self.running:
			return

		super().terminate(**opts)


	########################################################################################
	#################################### NEW PROPERTIES ####################################
	########################################################################################

	# ----------------------------------------------------------------------------------------------------------------------
	@property
	def auto_start(self) -> bool:
		"""
		Read-only property.  Gets the current setting passed into the constructor as to whether auto_start is enabled or not.

		(There's really no point to having this a read-write property, but allowing a read can be helpful at runtime to detect expected behavior.)
		"""
		return self._auto_start



	# ----------------------------------------------------------------------------------------------------------------------
	@property
	def check_execute(self) -> bool:
		"""
		Flag to enable/disable checking exit status (return code) on execute

		If enabled, will raise :py:exc:`exiftool.exceptions.ExifToolExecuteError` if a non-zero exit status is returned during :py:meth:`execute()`

		.. warning::
			While this property is provided to give callers an option to enable/disable error checking, it is generally **NOT** recommended to disable ``check_execute``.

			**If disabled, exiftool will fail silently, and hard-to-catch bugs may arise.**

			That said, there may be some use cases where continue-on-error behavior is desired.  (Example: dump all exif in a directory with files which don't all have the same tags, exiftool returns exit code 1 for unknown files, but results are valid for other files with those tags)

		:getter: Returns current setting
		:setter: Enable or Disable the check

			.. note::
				This settings can be changed any time and will only affect subsequent calls

		:type: bool
		"""
		return self._check_execute

	@check_execute.setter
	def check_execute(self, new_setting: bool) -> None:
		self._check_execute = new_setting


	# ----------------------------------------------------------------------------------------------------------------------
	@property
	def check_tag_names(self) -> bool:
		"""
		Flag to enable/disable checking of tag names

		If enabled, will raise :py:exc:`exiftool.exceptions.ExifToolTagNameError` if an invalid tag name is detected.

		.. warning::
			ExifToolHelper only checks the validity of the Tag **NAME** based on a simple regex pattern.

			* It *does not* validate whether the tag name is actually valid on the file type(s) you're accessing.
			* It *does not* validate whether the tag you passed in that "looks like" a tag is actually an option
			* It does support a "#" at the end of the tag name to disable print conversion

			Please refer to `ExifTool Tag Names`_ documentation for a complete list of valid tags recognized by ExifTool.

		.. warning::
			While this property is provided to give callers an option to enable/disable tag names checking, it is generally **NOT** recommended to disable ``check_tag_names``.

			**If disabled, you could accidentally edit a file when you meant to read it.**

			Example: ``get_tags("a.jpg", "tag=value")`` will call ``execute_json("-tag=value", "a.jpg")`` which will inadvertently write to a.jpg instead of reading it!

			That said, if PH's exiftool changes its tag name regex and tag names are being erroneously rejected because of this flag, disabling this could be used as a workaround (more importantly, if this is happening, please `file an issue`_!).

		:getter: Returns current setting
		:setter: Enable or Disable the check

			.. note::
				This settings can be changed any time and will only affect subsequent calls

		:type: bool


		.. _file an issue: https://github.com/sylikc/pyexiftool/issues
		.. _ExifTool Tag Names: https://exiftool.org/TagNames/
		"""
		return self._check_tag_names

	@check_tag_names.setter
	def check_tag_names(self, new_setting: bool) -> None:
		self._check_tag_names = new_setting


	# ----------------------------------------------------------------------------------------------------------------------






	#####################################################################################
	#################################### NEW METHODS ####################################
	#####################################################################################


	# all generic helper functions will follow a convention of
	# function(files to be worked on, ... , params=)


	# ----------------------------------------------------------------------------------------------------------------------
	def get_metadata(self, files: Union[str, List], params: Optional[Union[str, List]] = None) -> List:
		"""
		Return all metadata for the given files.

		.. note::

			This is a convenience method.

			The implementation calls :py:meth:`get_tags()` with ``tags=None``

		:param files: Files parameter matches :py:meth:`get_tags()`

		:param params: Optional parameters to send to *exiftool*
		:type params: list or None

		:return: The return value will have the format described in the documentation of :py:meth:`get_tags()`.
		"""
		return self.get_tags(files, None, params=params)


	# ----------------------------------------------------------------------------------------------------------------------
	def get_tags(self, files: Union[Any, List[Any]], tags: Optional[Union[str, List]], params: Optional[Union[str, List]] = None) -> List:
		"""
		Return only specified tags for the given files.

		:param files: File(s) to be worked on.

			* If a non-iterable is provided, it will get tags for a single item (str(non-iterable))
			* If an iterable is provided, the list is passed into :py:meth:`execute_json` verbatim.

			.. note::
				Any files/params which are not bytes/str will be casted to a str in :py:meth:`execute()`.

			.. warning::
				Currently, filenames are NOT checked for existence!  That is left up to the caller.

			.. warning::
				Wildcard strings are valid and passed verbatim to exiftool.

				However, exiftool's wildcard matching/globbing may be different than Python's matching/globbing,
				which may cause unexpected behavior if you're using one and comparing the result to the other.
				Read `ExifTool Common Mistakes - Over-use of Wildcards in File Names`_ for some related info.

		:type files: Any or List(Any) - see Note


		:param tags: Tag(s) to read.  If tags is None, or [], method will returns all tags

			.. note::
				The tag names may include group names, as usual in the format ``<group>:<tag>``.

		:type tags: str, list, or None


		:param params: Optional parameter(s) to send to *exiftool*
		:type params: Any, List[Any], or None


		:return: The format of the return value is the same as for :py:meth:`exiftool.ExifTool.execute_json()`.


		:raises ValueError: Invalid Parameter
		:raises TypeError: Invalid Parameter
		:raises ExifToolExecuteError: If :py:attr:`check_execute` == True, and exit status was non-zero


		.. _ExifTool Common Mistakes - Over-use of Wildcards in File Names: https://exiftool.org/mistakes.html#M2

		"""

		final_tags: Optional[List] = None
		final_files: List = self.__class__._parse_arg_files(files)

		if tags is None:
			# all tags
			final_tags = []
		elif isinstance(tags, TUPLE_STR_BYTES):
			final_tags = [tags]
		elif _is_iterable(tags):
			final_tags = tags
		else:
			raise TypeError(f"{self.__class__.__name__}.get_tags: argument 'tags' must be a str/bytes or a list")

		if self._check_tag_names:
			# run check if enabled
			self.__class__._check_tag_list(final_tags)

		exec_params: List = []

		# we extend an empty list to avoid modifying any referenced inputs
		if params:
			if _is_iterable(params, ignore_str_bytes=True):
				exec_params.extend(params)
			else:
				exec_params.append(params)

		# tags is always a list by this point.  It will always be iterable... don't have to check for None
		exec_params.extend([f"-{t}" for t in final_tags])

		exec_params.extend(final_files)

		try:
			ret = self.execute_json(*exec_params)
		except ExifToolOutputEmptyError:
			raise
			#raise RuntimeError(f"{self.__class__.__name__}.get_tags: exiftool returned no data")
		except ExifToolJSONInvalidError:
			raise
		except ExifToolExecuteError:
			# if last_status is <> 0, raise an error that one or more files failed?
			raise

		return ret


	# ----------------------------------------------------------------------------------------------------------------------
	def set_tags(self, files: Union[Any, List[Any]], tags: Dict, params: Optional[Union[str, List]] = None):
		"""
		Writes the values of the specified tags for the given file(s).

		:param files: File(s) to be worked on.

			* If a non-iterable is provided, it will get tags for a single item (str(non-iterable))
			* If an iterable is provided, the list is passed into :py:meth:`execute_json` verbatim.

			.. note::
				Any files/params which are not bytes/str will be casted to a str in :py:meth:`execute()`.

			.. warning::
				Currently, filenames are NOT checked for existence!  That is left up to the caller.

			.. warning::
				Wildcard strings are valid and passed verbatim to exiftool.

				However, exiftool's wildcard matching/globbing may be different than Python's matching/globbing,
				which may cause unexpected behavior if you're using one and comparing the result to the other.
				Read `ExifTool Common Mistakes - Over-use of Wildcards in File Names`_ for some related info.

		:type files: Any or List(Any) - see Note


		:param tags: Tag(s) to write.

			Dictionary keys = tags, values = tag values (str or list)

			* If a value is a str, will set key=value
			* If a value is a list, will iterate over list and set each individual value to the same tag (

			.. note::
				The tag names may include group names, as usual in the format ``<group>:<tag>``.

			.. note::
				Value of the dict can be a list, in which case, the tag will be passed with each item in the list, in the order given

				This allows setting things like ``-Keywords=a -Keywords=b -Keywords=c`` by passing in ``tags={"Keywords": ['a', 'b', 'c']}``

		:type tags: dict


		:param params: Optional parameter(s) to send to *exiftool*
		:type params: str, list, or None


		:return: The format of the return value is the same as for :py:meth:`execute()`.


		:raises ValueError: Invalid Parameter
		:raises TypeError: Invalid Parameter
		:raises ExifToolExecuteError: If :py:attr:`check_execute` == True, and exit status was non-zero


		.. _ExifTool Common Mistakes - Over-use of Wildcards in File Names: https://exiftool.org/mistakes.html#M2

		"""
		final_files: List = self.__class__._parse_arg_files(files)

		if not tags:
			raise ValueError(f"{self.__class__.__name__}.set_tags: argument 'tags' cannot be empty")
		elif not isinstance(tags, dict):
			raise TypeError(f"{self.__class__.__name__}.set_tags: argument 'tags' must be a dict")


		if self._check_tag_names:
			# run check if enabled
			self.__class__._check_tag_list(list(tags))  # gets only the keys (tag names)

		exec_params: List = []

		# we extend an empty list to avoid modifying any referenced inputs
		if params:
			if _is_iterable(params, ignore_str_bytes=True):
				exec_params.extend(params)
			else:
				exec_params.append(params)

		for tag, value in tags.items():
			# contributed by @daviddorme in https://github.com/sylikc/pyexiftool/issues/12#issuecomment-821879234
			# allows setting things like Keywords which require separate directives
			# > exiftool -Keywords=keyword1 -Keywords=keyword2 -Keywords=keyword3 file.jpg
			# which are not supported as duplicate keys in a dictionary
			if isinstance(value, list):
				for item in value:
					exec_params.append(f"-{tag}={item}")
			else:
				exec_params.append(f"-{tag}={value}")

		exec_params.extend(final_files)

		try:
			return self.execute(*exec_params)
			#TODO if execute returns data, then error?
		except ExifToolExecuteError:
			# last status non-zero
			raise


	# ----------------------------------------------------------------------------------------------------------------------







	#########################################################################################
	#################################### PRIVATE METHODS ####################################
	#########################################################################################



	# ----------------------------------------------------------------------------------------------------------------------
	@staticmethod
	def _parse_arg_files(files: Union[str, List]) -> List:
		"""
		This logic to process the files argument is common across most ExifToolHelper methods

		It can be used by a developer to process the files argument the same way if this class is extended

		:param files: File(s) to be worked on.
		:type files: str or list

		:return: A list of one or more elements containing strings of files

		:raises ValueError: Files parameter is empty
		"""

		final_files: List = []

		if not files:
			# Exiftool process would return an error anyways
			raise ValueError("ERROR: Argument 'files' cannot be empty")
		elif not _is_iterable(files, ignore_str_bytes=True):
			# if it's not a string but also not iterable
			final_files = [files]
		else:
			final_files = files


		return final_files


	# ----------------------------------------------------------------------------------------------------------------------
	@staticmethod
	def _check_tag_list(tags: List) -> None:
		"""
		Private method.  This method is used to check the validity of a tag list passed in.

		See any notes/warnings in the property :py:attr:`check_tag_names` to get a better understanding of what this is for and not for.

		:param list tags: List of tags to check

		:return: None if checks passed.  Raises an error otherwise.  (Think of it like an assert statement)
		"""
		# In the future if a specific version changed the match pattern,
		# we can check self.version ... then this method will no longer
		# be static and requires the underlying exiftool process to be running to get the self.version
		#
		# This is not done right now because the odds of the tag name format changing is very low, and requirin
		# exiftool to be running during this tag check could introduce unneccesary overhead at this time



		# According to the exiftool source code, the valid regex on tags is (/^([-\w*]+:)*([-\w*?]+)#?$/)
		# However, it appears that "-" may be allowed within a tag name/group (i.e. https://exiftool.org/TagNames/XMP.html Description tags)
		#
		# \w in Perl => https://perldoc.perl.org/perlrecharclass#Backslash-sequences
		# \w in Python => https://docs.python.org/3/library/re.html#regular-expression-syntax
		#
		# Perl vs Python's "\w" seem to mean slightly different things, so we write our own regex / matching algo


		# * make sure the first character is not a special one
		# * "#" can only appear at the end
		# * Tag:Tag:tag is not valid, but passes the simple regex (it's ok, this is not supposed to be a catch-all)... exiftool subprocess accepts it anyways, even if invalid.
		# * *wildcard* tags are permitted by exiftool
		tag_regex = r"[\w\*][\w\:\-\*]*(#|)"

		for t in tags:
			if re.fullmatch(tag_regex, t) is None:
				raise ExifToolTagNameError(t)

		# returns nothing, if no error was raised, the tags passed

		# considering making this...
		# * can't begin with -
		# * can't have "=" anywhere, and that's it...
		# there's a lot of variations which might make this code buggy for some edge use cases



	# ----------------------------------------------------------------------------------------------------------------------