File: huggingface.yaml

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (110 lines) | stat: -rw-r--r-- 2,873 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
skip:
  all:
    # Difficult to setup accuracy test because .eval() not supported
    - Reformer
    # Fails deepcopy
    - BlenderbotForConditionalGeneration
    - GPTNeoForCausalLM
    - GPTNeoForSequenceClassification
    # Fails with even batch size = 1
    - GPTJForCausalLM
    - GPTJForQuestionAnswering

  device:
    cpu: []

  control_flow:
    - AllenaiLongformerBase

batch_size:
  # TODO - Fails even after fake tensors
  divisors:
    AlbertForMaskedLM: 2
    AlbertForQuestionAnswering: 2
    AllenaiLongformerBase: 2
    BartForCausalLM: 2
    BartForConditionalGeneration: 2
    BertForMaskedLM: 2
    BertForQuestionAnswering: 2
    BlenderbotForCausalLM: 8
    # BlenderbotForConditionalGeneration : 16
    BlenderbotSmallForCausalLM: 4
    BlenderbotSmallForConditionalGeneration: 2
    CamemBert: 2
    DebertaForMaskedLM: 4
    DebertaForQuestionAnswering: 2
    DebertaV2ForMaskedLM: 4
    DebertaV2ForQuestionAnswering: 8
    DistilBertForMaskedLM: 2
    DistilBertForQuestionAnswering: 2
    DistillGPT2: 2
    ElectraForCausalLM: 2
    ElectraForQuestionAnswering: 2
    GPT2ForSequenceClassification: 2
    # GPTJForCausalLM : 2
    # GPTJForQuestionAnswering : 2
    # GPTNeoForCausalLM : 32
    # GPTNeoForSequenceClassification : 2
    GoogleFnet: 2
    LayoutLMForMaskedLM: 2
    LayoutLMForSequenceClassification: 2
    M2M100ForConditionalGeneration: 4
    MBartForCausalLM: 2
    MBartForConditionalGeneration: 2
    MT5ForConditionalGeneration: 2
    MegatronBertForCausalLM: 4
    MegatronBertForQuestionAnswering: 2
    MobileBertForMaskedLM: 2
    MobileBertForQuestionAnswering: 2
    OPTForCausalLM: 2
    PLBartForCausalLM: 2
    PLBartForConditionalGeneration: 2
    PegasusForCausalLM: 4
    PegasusForConditionalGeneration: 2
    RobertaForCausalLM: 2
    RobertaForQuestionAnswering: 2
    Speech2Text2ForCausalLM: 4
    T5ForConditionalGeneration: 2
    T5Small: 2
    TrOCRForCausalLM: 2
    XGLMForCausalLM: 4
    XLNetLMHeadModel: 2
    YituTechConvBert: 2


tolerance:
  higher_training:
    - MT5ForConditionalGeneration
    # AlbertForQuestionAnswering fails in CI GCP A100 but error does not seem
    # harmful.
    - AlbertForQuestionAnswering

  higher_max_autotune_training:
    # DebertaForQuestionAnswering needs higher tolerance in Max-Autotune mode
    - DebertaForQuestionAnswering

  higher_inference:
    - GPT2ForSequenceClassification
    - RobertaForQuestionAnswering

  higher_inference_cpu:
    - LayoutLMForSequenceClassification
    - GPT2ForSequenceClassification

  cosine: []


accuracy:
  skip:
    large_models:
      # Models too large to have eager, dynamo and fp64_numbers simultaneously
      # even for 40 GB machine.
      - DebertaV2ForMaskedLM
      - BlenderbotForCausalLM

only_inference:
  # Fails with dynamo for train mode
  - M2M100ForConditionalGeneration

only_fp32:
  - GoogleFnet