File: clearml_logger.py

package info (click to toggle)
pytorch-ignite 0.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 11,712 kB
  • sloc: python: 46,874; sh: 376; makefile: 27
file content (991 lines) | stat: -rw-r--r-- 37,473 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
"""ClearML logger and its helper handlers."""

import os
import tempfile
import warnings
from collections import defaultdict
from datetime import datetime
from enum import Enum
from typing import Any, Callable, DefaultDict, List, Mapping, Optional, Tuple, Type, Union

from torch.optim import Optimizer

import ignite.distributed as idist
from ignite.engine import Engine, Events
from ignite.handlers.base_logger import (
    BaseLogger,
    BaseOptimizerParamsHandler,
    BaseOutputHandler,
    BaseWeightsHandler,
    BaseWeightsScalarHandler,
)
from ignite.handlers.checkpoint import DiskSaver
from ignite.handlers.utils import global_step_from_engine  # noqa

__all__ = [
    "ClearMLLogger",
    "ClearMLSaver",
    "OptimizerParamsHandler",
    "OutputHandler",
    "WeightsScalarHandler",
    "WeightsHistHandler",
    "GradsScalarHandler",
    "GradsHistHandler",
    "global_step_from_engine",
]


class ClearMLLogger(BaseLogger):
    """
    `ClearML <https://github.com/allegroai/clearml>`_ handler to log metrics, text, model/optimizer parameters,
    plots during training and validation.
    Also supports model checkpoints logging and upload to the storage solution of your choice (i.e. ClearML File server,
    S3 bucket etc.)

    .. code-block:: bash

        pip install clearml
        clearml-init

    Args:
        kwargs: Keyword arguments accepted from ``Task.init`` method.
            All arguments are optional. If a ClearML Task has already been created,
            kwargs will be ignored and the current ClearML Task will be used.

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Attach the logger to the trainer to log training loss at each iteration
            clearml_logger.attach_output_handler(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                tag="training",
                output_transform=lambda loss: {"loss": loss}
            )

            # Attach the logger to the evaluator on the training dataset and log NLL, Accuracy metrics after each epoch
            # We setup `global_step_transform=global_step_from_engine(trainer)` to take the epoch
            # of the `trainer` instead of `train_evaluator`.
            clearml_logger.attach_output_handler(
                train_evaluator,
                event_name=Events.EPOCH_COMPLETED,
                tag="training",
                metric_names=["nll", "accuracy"],
                global_step_transform=global_step_from_engine(trainer),
            )

            # Attach the logger to the evaluator on the validation dataset and log NLL, Accuracy metrics after
            # each epoch. We setup `global_step_transform=global_step_from_engine(trainer)` to take the epoch of the
            # `trainer` instead of `evaluator`.
            clearml_logger.attach_output_handler(
                evaluator,
                event_name=Events.EPOCH_COMPLETED,
                tag="validation",
                metric_names=["nll", "accuracy"],
                global_step_transform=global_step_from_engine(trainer)),
            )

            # Attach the logger to the trainer to log optimizer's parameters, e.g. learning rate at each iteration
            clearml_logger.attach_opt_params_handler(
                trainer,
                event_name=Events.ITERATION_STARTED,
                optimizer=optimizer,
                param_name='lr'  # optional
            )

            # Attach the logger to the trainer to log model's weights norm after each iteration
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=WeightsScalarHandler(model)
            )

    """

    def __init__(self, **kwargs: Any):
        try:
            from clearml import Task
            from clearml.binding.frameworks.tensorflow_bind import WeightsGradientHistHelper
        except ImportError:
            raise ModuleNotFoundError(
                "This contrib module requires clearml to be installed. "
                "You may install clearml using: \n pip install clearml \n"
            )

        experiment_kwargs = {k: v for k, v in kwargs.items() if k not in ("project_name", "task_name", "task_type")}

        if self.bypass_mode():
            warnings.warn("ClearMLSaver: running in bypass mode")

        # Try to retrieve current the ClearML Task before trying to create a new one
        self._task = Task.current_task()

        if self._task is None:
            self._task = Task.init(
                project_name=kwargs.get("project_name"),
                task_name=kwargs.get("task_name"),
                task_type=kwargs.get("task_type", Task.TaskTypes.training),
                **experiment_kwargs,
            )

        self.clearml_logger = self._task.get_logger()

        self.grad_helper = WeightsGradientHistHelper(logger=self.clearml_logger, report_freq=1)

    @classmethod
    def set_bypass_mode(cls, bypass: bool) -> None:
        """
        Set ``clearml.Task`` to offline mode.
        Will bypass all outside communication, and will save all data and logs to a local session folder.
        Should only be used in "standalone mode", when there is no access to the *clearml-server*.

        Args:
            bypass: If ``True``, all outside communication is skipped.
                Data and logs will be stored in a local session folder.
                For more information, please refer to `ClearML docs
                <https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#offline-mode>`_.
        """
        from clearml import Task

        setattr(cls, "_bypass", bypass)
        Task.set_offline(offline_mode=bypass)

    @classmethod
    def bypass_mode(cls) -> bool:
        """
        Returns the bypass mode state.

        Note:
            `GITHUB_ACTIONS` env will automatically set bypass_mode to ``True``
            unless overridden specifically with ``ClearMLLogger.set_bypass_mode(False)``.
            For more information, please refer to `ClearML docs
            <https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#offline-mode>`_.

        Return:
            If True, ``clearml.Task`` is on offline mode, and all outside communication is skipped.
        """
        return getattr(cls, "_bypass", bool(os.environ.get("CI")))

    def __getattr__(self, attr: Any) -> Any:
        """
        Calls the corresponding method of ``clearml.Logger``.

        Args:
            attr: methods of the ``clearml.Logger`` class.
        """
        return getattr(self.clearml_logger, attr)

    def get_task(self) -> Any:
        """
        Returns the task context that the logger is reporting.

        Return:
            Returns the current task, equivalent to ``clearml.Task.current_task()``.
        """
        return self._task

    def close(self) -> None:
        self.clearml_logger.flush()

    def _create_output_handler(self, *args: Any, **kwargs: Any) -> "OutputHandler":
        return OutputHandler(*args, **kwargs)

    def _create_opt_params_handler(self, *args: Any, **kwargs: Any) -> "OptimizerParamsHandler":
        return OptimizerParamsHandler(*args, **kwargs)


class OutputHandler(BaseOutputHandler):
    """Helper handler to log engine's output and/or metrics

    Args:
        tag: common title for all produced plots. For example, "training"
        metric_names: list of metric names to plot or a string "all" to plot all available
            metrics.
        output_transform: output transform function to prepare `engine.state.output` as a number.
            For example, `output_transform = lambda output: output`
            This function can also return a dictionary, e.g `{"loss": loss1, "another_loss": loss2}` to label the plot
            with corresponding keys.
        global_step_transform: global step transform function to output a desired global step.
            Input of the function is `(engine, event_name)`. Output of function should be an integer.
            Default is None, global_step based on attached engine. If provided,
            uses function output as global_step. To setup global step from another engine, please use
            :meth:`~ignite.handlers.clearml_logger.global_step_from_engine`.
        state_attributes: list of attributes of the ``trainer.state`` to plot.

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Attach the logger to the evaluator on the validation dataset and log NLL, Accuracy metrics after
            # each epoch. We setup `global_step_transform=global_step_from_engine(trainer)` to take the epoch
            # of the `trainer`:
            clearml_logger.attach(
                evaluator,
                log_handler=OutputHandler(
                    tag="validation",
                    metric_names=["nll", "accuracy"],
                    global_step_transform=global_step_from_engine(trainer)
                ),
                event_name=Events.EPOCH_COMPLETED
            )
            # or equivalently
            clearml_logger.attach_output_handler(
                evaluator,
                event_name=Events.EPOCH_COMPLETED,
                tag="validation",
                metric_names=["nll", "accuracy"],
                global_step_transform=global_step_from_engine(trainer)
            )

        Another example, where model is evaluated every 500 iterations:

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            @trainer.on(Events.ITERATION_COMPLETED(every=500))
            def evaluate(engine):
                evaluator.run(validation_set, max_epochs=1)

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            def global_step_transform(*args, **kwargs):
                return trainer.state.iteration

            # Attach the logger to the evaluator on the validation dataset and log NLL, Accuracy metrics after
            # every 500 iterations. Since evaluator engine does not have access to the training iteration, we
            # provide a global_step_transform to return the trainer.state.iteration for the global_step, each time
            # evaluator metrics are plotted on ClearML.

            clearml_logger.attach_output_handler(
                evaluator,
                event_name=Events.EPOCH_COMPLETED,
                tag="validation",
                metrics=["nll", "accuracy"],
                global_step_transform=global_step_transform
            )

        Another example where the State Attributes ``trainer.state.alpha`` and ``trainer.state.beta``
        are also logged along with the NLL and Accuracy after each iteration:

        .. code-block:: python

            clearml_logger.attach(
                trainer,
                log_handler=OutputHandler(
                    tag="training",
                    metric_names=["nll", "accuracy"],
                    state_attributes=["alpha", "beta"],
                ),
                event_name=Events.ITERATION_COMPLETED
            )

        Example of `global_step_transform`

        .. code-block:: python

            def global_step_transform(engine, event_name):
                return engine.state.get_event_attrib_value(event_name)

    .. versionchanged:: 0.4.7
        accepts an optional list of `state_attributes`
    """

    def __init__(
        self,
        tag: str,
        metric_names: Optional[List[str]] = None,
        output_transform: Optional[Callable] = None,
        global_step_transform: Optional[Callable[[Engine, Union[str, Events]], int]] = None,
        state_attributes: Optional[List[str]] = None,
    ):
        super(OutputHandler, self).__init__(
            tag, metric_names, output_transform, global_step_transform, state_attributes
        )

    def __call__(self, engine: Engine, logger: ClearMLLogger, event_name: Union[str, Events]) -> None:
        if not isinstance(logger, ClearMLLogger):
            raise RuntimeError("Handler OutputHandler works only with ClearMLLogger")

        metrics = self._setup_output_metrics_state_attrs(engine)

        global_step = self.global_step_transform(engine, event_name)

        if not isinstance(global_step, int):
            raise TypeError(
                f"global_step must be int, got {type(global_step)}."
                " Please check the output of global_step_transform."
            )

        for key, value in metrics.items():
            if len(key) == 2:
                logger.clearml_logger.report_scalar(title=key[0], series=key[1], iteration=global_step, value=value)
            elif len(key) == 3:
                logger.clearml_logger.report_scalar(
                    title=f"{key[0]}/{key[1]}", series=key[2], iteration=global_step, value=value
                )


class OptimizerParamsHandler(BaseOptimizerParamsHandler):
    """Helper handler to log optimizer parameters

    Args:
        optimizer: torch optimizer or any object with attribute ``param_groups``
            as a sequence.
        param_name: parameter name
        tag: common title for all produced plots. For example, "generator"

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Attach the logger to the trainer to log optimizer's parameters, e.g. learning rate at each iteration
            clearml_logger.attach(
                trainer,
                log_handler=OptimizerParamsHandler(optimizer),
                event_name=Events.ITERATION_STARTED
            )
            # or equivalently
            clearml_logger.attach_opt_params_handler(
                trainer,
                event_name=Events.ITERATION_STARTED,
                optimizer=optimizer
            )
    """

    def __init__(self, optimizer: Optimizer, param_name: str = "lr", tag: Optional[str] = None):
        super(OptimizerParamsHandler, self).__init__(optimizer, param_name, tag)

    def __call__(self, engine: Engine, logger: ClearMLLogger, event_name: Union[str, Events]) -> None:
        if not isinstance(logger, ClearMLLogger):
            raise RuntimeError("Handler OptimizerParamsHandler works only with ClearMLLogger")

        global_step = engine.state.get_event_attrib_value(event_name)
        tag_prefix = f"{self.tag}/" if self.tag else ""
        params = {
            str(i): float(param_group[self.param_name]) for i, param_group in enumerate(self.optimizer.param_groups)
        }

        for k, v in params.items():
            logger.clearml_logger.report_scalar(
                title=f"{tag_prefix}{self.param_name}", series=k, value=v, iteration=global_step
            )


class WeightsScalarHandler(BaseWeightsScalarHandler):
    """Helper handler to log model's weights as scalars.
    Handler, upon construction, iterates over named parameters of the model and keep
    reference to ones permitted by `whitelist`. Then at every call, applies
    reduction function to each parameter, produces a scalar and logs it.

    Args:
        model: model to log weights
        reduction: function to reduce parameters into scalar
        tag: common title for all produced plots. For example, "generator"
        whitelist: specific weights to log. Should be list of model's submodules
            or parameters names, or a callable which gets weight along with its name
            and determines if it should be logged. Names should be fully-qualified.
            For more information please refer to `PyTorch docs
            <https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.get_submodule>`_.
            If not given, all of model's weights are logged.

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Attach the logger to the trainer to log model's weights norm after each iteration
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=WeightsScalarHandler(model, reduction=torch.norm)
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log only `fc` weights
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=WeightsScalarHandler(
                    model,
                    whitelist=['fc']
                )
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log weights which have `bias` in their names
            def has_bias_in_name(n, p):
                return 'bias' in n

            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=WeightsScalarHandler(model, whitelist=has_bias_in_name)
            )

    ..  versionchanged:: 0.4.9
        optional argument `whitelist` added.
    """

    def __call__(self, engine: Engine, logger: ClearMLLogger, event_name: Union[str, Events]) -> None:
        if not isinstance(logger, ClearMLLogger):
            raise RuntimeError("Handler WeightsScalarHandler works only with ClearMLLogger")

        global_step = engine.state.get_event_attrib_value(event_name)
        tag_prefix = f"{self.tag}/" if self.tag else ""
        for name, p in self.weights:
            title_name, _, series_name = name.partition(".")
            logger.clearml_logger.report_scalar(
                title=f"{tag_prefix}weights_{self.reduction.__name__}/{title_name}",
                series=series_name,
                value=self.reduction(p.data),
                iteration=global_step,
            )


class WeightsHistHandler(BaseWeightsHandler):
    """Helper handler to log model's weights as histograms.

    Args:
        model: model to log weights
        tag: common title for all produced plots. For example, 'generator'
        whitelist: specific weights to log. Should be list of model's submodules
            or parameters names, or a callable which gets weight along with its name
            and determines if it should be logged. Names should be fully-qualified.
            For more information please refer to `PyTorch docs
            <https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.get_submodule>`_.
            If not given, all of model's weights are logged.

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Attach the logger to the trainer to log model's weights norm after each iteration
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=WeightsHistHandler(model)
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log weights of `fc` layer
            weights = ['fc']

            # Attach the logger to the trainer to log weights norm after each iteration
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=WeightsHistHandler(model, whitelist=weights)
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log weights which name include 'conv'.
            weight_selector = lambda name, p: 'conv' in name

            # Attach the logger to the trainer to log weights norm after each iteration
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=WeightsHistHandler(model, whitelist=weight_selector)
            )

    ..  versionchanged:: 0.4.9
        optional argument `whitelist` added.
    """

    def __call__(self, engine: Engine, logger: ClearMLLogger, event_name: Union[str, Events]) -> None:
        if not isinstance(logger, ClearMLLogger):
            raise RuntimeError("Handler 'WeightsHistHandler' works only with ClearMLLogger")

        global_step = engine.state.get_event_attrib_value(event_name)
        tag_prefix = f"{self.tag}/" if self.tag else ""
        for name, p in self.weights:
            title_name, _, series_name = name.partition(".")

            logger.grad_helper.add_histogram(
                title=f"{tag_prefix}weights_{title_name}",
                series=series_name,
                step=global_step,
                hist_data=p.data.cpu().numpy(),
            )


class GradsScalarHandler(BaseWeightsScalarHandler):
    """Helper handler to log model's gradients as scalars.
    Handler, upon construction, iterates over named parameters of the model and keep
    reference to ones permitted by the `whitelist`. Then at every call, applies
    reduction function to each parameter's gradient, produces a scalar and logs it.

    Args:
        model: model to log weights
        reduction: function to reduce parameters into scalar
        tag: common title for all produced plots. For example, "generator"
        whitelist: specific gradients to log. Should be list of model's submodules
            or parameters names, or a callable which gets weight along with its name
            and determines if its gradient should be logged. Names should be
            fully-qualified. For more information please refer to `PyTorch docs
            <https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.get_submodule>`_.
            If not given, all of model's gradients are logged.

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Attach the logger to the trainer to log model's weights norm after each iteration
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=GradsScalarHandler(model, reduction=torch.norm)
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log gradient of `base`
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=GradsScalarHandler(
                    model,
                    reduction=torch.norm,
                    whitelist=['base']
                )
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log gradient of weights which belong to a `fc` layer
            def is_in_fc_layer(n, p):
                return 'fc' in n

            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=GradsScalarHandler(model, whitelist=is_in_fc_layer)
            )

    ..  versionchanged:: 0.4.9
        optional argument `whitelist` added.
    """

    def __call__(self, engine: Engine, logger: ClearMLLogger, event_name: Union[str, Events]) -> None:
        if not isinstance(logger, ClearMLLogger):
            raise RuntimeError("Handler GradsScalarHandler works only with ClearMLLogger")

        global_step = engine.state.get_event_attrib_value(event_name)
        tag_prefix = f"{self.tag}/" if self.tag else ""
        for name, p in self.weights:
            if p.grad is None:
                continue

            title_name, _, series_name = name.partition(".")
            logger.clearml_logger.report_scalar(
                title=f"{tag_prefix}grads_{self.reduction.__name__}/{title_name}",
                series=series_name,
                value=self.reduction(p.grad),
                iteration=global_step,
            )


class GradsHistHandler(BaseWeightsHandler):
    """Helper handler to log model's gradients as histograms.

    Args:
        model: model to log weights
        tag: common title for all produced plots. For example, 'generator'
        whitelist: specific gradients to log. Should be list of model's submodules
            or parameters names, or a callable which gets weight along with its name
            and determines if its gradient should be logged. Names should be
            fully-qualified. For more information please refer to `PyTorch docs
            <https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.get_submodule>`_.
            If not given, all of model's gradients are logged.

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            # Create a logger

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Attach the logger to the trainer to log model's weights norm after each iteration
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=GradsHistHandler(model)
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log gradient of `fc.bias`
            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=GradsHistHandler(model, whitelist=['fc.bias'])
            )

        .. code-block:: python

            from ignite.handlers.clearml_logger import *

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            # Log gradient of weights which have shape (2, 1)
            def has_shape_2_1(n, p):
                return p.shape == (2,1)

            clearml_logger.attach(
                trainer,
                event_name=Events.ITERATION_COMPLETED,
                log_handler=GradsHistHandler(model, whitelist=has_shape_2_1)
            )

    ..  versionchanged:: 0.4.9
            optional argument `whitelist` added.
    """

    def __call__(self, engine: Engine, logger: ClearMLLogger, event_name: Union[str, Events]) -> None:
        if not isinstance(logger, ClearMLLogger):
            raise RuntimeError("Handler 'GradsHistHandler' works only with ClearMLLogger")

        global_step = engine.state.get_event_attrib_value(event_name)
        tag_prefix = f"{self.tag}/" if self.tag else ""
        for name, p in self.weights:
            if p.grad is None:
                continue

            title_name, _, series_name = name.partition(".")
            logger.grad_helper.add_histogram(
                title=f"{tag_prefix}grads_{title_name}",
                series=series_name,
                step=global_step,
                hist_data=p.grad.cpu().numpy(),
            )


class ClearMLSaver(DiskSaver):
    """
    Handler that saves input checkpoint as ClearML artifacts

    Args:
        logger: An instance of :class:`~ignite.handlers.clearml_logger.ClearMLLogger`,
            ensuring a valid ClearML ``Task`` has been initialized. If not provided, and a ClearML Task
            has not been manually initialized, a runtime error will be raised.
        output_uri: The default location for output models and other artifacts uploaded by ClearML. For
            more information, see ``clearml.Task.init``.
        dirname: Directory path where the checkpoint will be saved. If not provided, a temporary
            directory will be created.

    Examples:
        .. code-block:: python

            from ignite.handlers.clearml_logger import *
            from ignite.handlers import Checkpoint

            clearml_logger = ClearMLLogger(
                project_name="pytorch-ignite-integration",
                task_name="cnn-mnist"
            )

            to_save = {"model": model}

            handler = Checkpoint(
                to_save,
                ClearMLSaver(),
                n_saved=1,
                score_function=lambda e: 123,
                score_name="acc",
                filename_prefix="best",
                global_step_transform=global_step_from_engine(trainer)
            )

            validation_evaluator.add_event_handler(Events.EVENT_COMPLETED, handler)

    """

    def __init__(
        self,
        logger: Optional[ClearMLLogger] = None,
        output_uri: Optional[str] = None,
        dirname: Optional[str] = None,
        *args: Any,
        **kwargs: Any,
    ):
        self._setup_check_clearml(logger, output_uri)

        if not dirname:
            dirname = ""
            if idist.get_rank() == 0:
                dirname = tempfile.mkdtemp(prefix=f"ignite_checkpoints_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_')}")
            if idist.get_world_size() > 1:
                dirname = idist.all_gather(dirname)[0]  # type: ignore[index, assignment]

            warnings.warn(f"ClearMLSaver created a temporary checkpoints directory: {dirname}")
            idist.barrier()

        # Let's set non-atomic tmp dir saving behaviour
        if "atomic" not in kwargs:
            kwargs["atomic"] = False

        self._checkpoint_slots: DefaultDict[Union[str, Tuple[str, str]], List[Any]] = defaultdict(list)

        super(ClearMLSaver, self).__init__(dirname=dirname, *args, **kwargs)  # type: ignore[misc]

    @idist.one_rank_only()
    def _setup_check_clearml(self, logger: ClearMLLogger, output_uri: str) -> None:
        try:
            from clearml import Task
        except ImportError:
            try:
                # Backwards-compatibility for legacy Trains SDK
                from trains import Task
            except ImportError:
                raise ModuleNotFoundError(
                    "This contrib module requires clearml to be installed. "
                    "You may install clearml using: \n pip install clearml \n"
                )

        if logger and not isinstance(logger, ClearMLLogger):
            raise TypeError("logger must be an instance of ClearMLLogger")

        self._task = Task.current_task()
        if not self._task:
            raise RuntimeError(
                "ClearMLSaver requires a ClearML Task to be initialized. "
                "Please use the `logger` argument or call `clearml.Task.init()`."
            )

        if output_uri:
            self._task.output_uri = output_uri

    class _CallbacksContext:
        def __init__(
            self,
            callback_type: Type[Enum],
            slots: List,
            checkpoint_key: str,
            filename: str,
            basename: str,
            metadata: Optional[Mapping] = None,
        ) -> None:
            self._callback_type = callback_type
            self._slots = slots
            self._checkpoint_key = str(checkpoint_key)
            self._filename = filename
            self._basename = basename
            self._metadata = metadata

        def pre_callback(self, action: str, model_info: Any) -> Any:
            if action != self._callback_type.save:  # type: ignore[attr-defined]
                return model_info

            try:
                slot = self._slots.index(None)
                self._slots[slot] = model_info.upload_filename
            except ValueError:
                self._slots.append(model_info.upload_filename)
                slot = len(self._slots) - 1

            model_info.upload_filename = f"{self._basename}_{slot}{os.path.splitext(self._filename)[1]}"
            model_info.local_model_id = f"{self._checkpoint_key}:{model_info.upload_filename}"
            return model_info

        def post_callback(self, action: str, model_info: Any) -> Any:
            if action != self._callback_type.save:  # type: ignore[attr-defined]
                return model_info

            model_info.model.name = f"{model_info.task.name}: {self._filename}"
            prefix = "Checkpoint Metadata: "
            metadata_items = ", ".join(f"{k}={v}" for k, v in self._metadata.items()) if self._metadata else "none"
            metadata = f"{prefix}{metadata_items}"
            comment = "\n".join(
                metadata if line.startswith(prefix) else line for line in (model_info.model.comment or "").split("\n")
            )
            if prefix not in comment:
                comment += "\n" + metadata
            model_info.model.comment = comment

            return model_info

    def __call__(self, checkpoint: Mapping, filename: str, metadata: Optional[Mapping] = None) -> None:
        try:
            from clearml.binding.frameworks import WeightsFileHandler
        except ImportError:
            try:
                # Backwards-compatibility for legacy Trains SDK
                from trains.binding.frameworks import WeightsFileHandler
            except ImportError:
                raise ModuleNotFoundError(
                    "This contrib module requires clearml to be installed. "
                    "You may install clearml using: \n pip install clearml \n"
                )

        try:
            basename = metadata["basename"]  # type: ignore[index]
        except (TypeError, KeyError):
            warnings.warn("Checkpoint metadata missing or basename cannot be found")
            basename = "checkpoint"

        checkpoint_key = (str(self.dirname), basename)

        cb_context = self._CallbacksContext(
            callback_type=WeightsFileHandler.CallbackType,
            slots=self._checkpoint_slots[checkpoint_key],
            checkpoint_key=str(checkpoint_key),
            filename=filename,
            basename=basename,
            metadata=metadata,
        )

        pre_cb_id = WeightsFileHandler.add_pre_callback(cb_context.pre_callback)
        post_cb_id = WeightsFileHandler.add_post_callback(cb_context.post_callback)

        try:
            super(ClearMLSaver, self).__call__(checkpoint, filename, metadata)
        finally:
            WeightsFileHandler.remove_pre_callback(pre_cb_id)
            WeightsFileHandler.remove_post_callback(post_cb_id)

    @idist.one_rank_only()
    def get_local_copy(self, filename: str) -> Optional[str]:
        """Get artifact local copy.

        .. warning::

            In distributed configuration this method should be called on rank 0 process.

        Args:
            filename: artifact name.

        Returns:
             a local path to a downloaded copy of the artifact
        """
        artifact = self._task.artifacts.get(filename)
        if artifact:
            return artifact.get_local_copy()
        self._task.get_logger().report_text(f"Can not find artifact {filename}")

        return None

    @idist.one_rank_only()
    def remove(self, filename: str) -> None:
        super(ClearMLSaver, self).remove(filename)
        for slots in self._checkpoint_slots.values():
            try:
                slots[slots.index(filename)] = None
            except ValueError:
                pass
            else:
                break