1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
|
"""Performance tests for large schemas and complex code generation scenarios.
These tests are designed to measure code generation performance with:
- Large number of models (500+)
- Duplicate/similar names requiring disambiguation
- Complex reference chains and circular references
- Deeply nested structures
- Multiple file inputs
- Large OpenAPI specifications
Tests are marked with @pytest.mark.perf for exclusion from regular CI runs.
Core tests are also marked with @pytest.mark.benchmark for CodSpeed integration.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from datamodel_code_generator import DataModelType, InputFileType, generate
PERFORMANCE_DATA_PATH: Path = Path(__file__).parent.parent / "data" / "performance"
@pytest.mark.perf
def test_perf_large_models(tmp_path: Path) -> None:
"""Performance test: Generate 500 models from a single schema.
This tests the parser's ability to handle a large number of model definitions
and the code generator's performance with many models.
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "large_models.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
# Verify we generated all 500 models
assert content.count("class Model") >= 500
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_large_models_pydantic_v2(tmp_path: Path) -> None:
"""Performance test: Generate 500 Pydantic v2 models."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "large_models.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
)
content = output_file.read_text()
assert content.count("class Model") >= 500
@pytest.mark.perf
def test_perf_large_models_dataclass(tmp_path: Path) -> None:
"""Performance test: Generate 500 dataclass models."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "large_models.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.DataclassesDataclass,
)
content = output_file.read_text()
assert content.count("class Model") >= 500
@pytest.mark.perf
def test_perf_large_models_typed_dict(tmp_path: Path) -> None:
"""Performance test: Generate 500 TypedDict models."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "large_models.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.TypingTypedDict,
)
content = output_file.read_text()
assert content.count("class Model") >= 500
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_duplicate_names(tmp_path: Path) -> None:
"""Performance test: Handle 300 models with duplicate/similar names.
This tests the name disambiguation logic when many models have similar names
requiring prefixes or suffixes to avoid collisions.
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "duplicate_names.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
# Verify we generated Container models
assert content.count("class Container") >= 100
@pytest.mark.perf
def test_perf_duplicate_names_multiple_files(tmp_path: Path) -> None:
"""Performance test: Handle duplicate names with multiple file output."""
output_dir = tmp_path / "models"
generate(
input_=PERFORMANCE_DATA_PATH / "duplicate_names.json",
input_file_type=InputFileType.JsonSchema,
output=output_dir,
)
# Check output directory was created
assert output_dir.exists()
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_complex_refs(tmp_path: Path) -> None:
"""Performance test: Handle 200 models with complex reference chains.
This tests the reference resolution logic with:
- Circular references
- Deep reference chains
- Self-references
- Cross-references between models
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "complex_refs.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
# Verify we generated Node models
assert content.count("class Node") >= 200
@pytest.mark.perf
def test_perf_complex_refs_collapse_root(tmp_path: Path) -> None:
"""Performance test: Complex refs with collapse-root-models enabled."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "complex_refs.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
collapse_root_models=True,
)
content = output_file.read_text()
assert "class Node" in content
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_deep_nested(tmp_path: Path) -> None:
"""Performance test: Handle deeply nested structures (50 levels deep).
This tests:
- Deep nesting resolution
- Wide models with many fields
- Combined deep and wide structures
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "deep_nested.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
# Verify we generated Level and WideModel classes
assert content.count("class Level") >= 50
assert content.count("class WideModel") >= 100
@pytest.mark.perf
def test_perf_deep_nested_use_annotated(tmp_path: Path) -> None:
"""Performance test: Deep nested with use-annotated and field-constraints enabled."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "deep_nested.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
use_annotated=True,
field_constraints=True,
)
content = output_file.read_text()
assert "class Level" in content
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_multiple_files_input(tmp_path: Path) -> None:
"""Performance test: Process 50 schema files (500 models total).
This tests:
- Directory input handling
- Multiple file parsing
- Cross-file model coordination
"""
output_dir = tmp_path / "models"
generate(
input_=PERFORMANCE_DATA_PATH / "multiple_files",
input_file_type=InputFileType.JsonSchema,
output=output_dir,
)
# Verify output directory was created with models
assert output_dir.exists()
py_files = list(output_dir.glob("**/*.py"))
assert len(py_files) >= 1
# Count total Module models across all files
total_models = 0
for py_file in py_files:
content = py_file.read_text()
total_models += content.count("class Module")
assert total_models >= 500
@pytest.mark.perf
def test_perf_multiple_files_to_multiple_outputs(tmp_path: Path) -> None:
"""Performance test: Multiple input files to multiple output files."""
output_dir = tmp_path / "models"
generate(
input_=PERFORMANCE_DATA_PATH / "multiple_files",
input_file_type=InputFileType.JsonSchema,
output=output_dir,
)
# Check multiple output files were created
assert output_dir.exists()
py_files = list(output_dir.glob("**/*.py"))
assert len(py_files) >= 1
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_openapi_large(tmp_path: Path) -> None:
"""Performance test: Large OpenAPI spec with 300 schemas and 900 endpoints.
This tests:
- OpenAPI parsing performance
- Path/endpoint processing
- Component schema handling
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "openapi_large.yaml",
input_file_type=InputFileType.OpenAPI,
output=output_file,
)
content = output_file.read_text()
# Verify we generated Entity models
assert content.count("class Entity") >= 300
@pytest.mark.perf
def test_perf_openapi_large_strict_types(tmp_path: Path) -> None:
"""Performance test: Large OpenAPI with strict types enabled."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "openapi_large.yaml",
input_file_type=InputFileType.OpenAPI,
output=output_file,
strict_types=[
"str",
"int",
"float",
"bool",
],
)
content = output_file.read_text()
assert "class Entity" in content
@pytest.mark.perf
def test_perf_openapi_large_field_constraints(tmp_path: Path) -> None:
"""Performance test: Large OpenAPI with field constraints enabled."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "openapi_large.yaml",
input_file_type=InputFileType.OpenAPI,
output=output_file,
field_constraints=True,
)
content = output_file.read_text()
assert "class Entity" in content
@pytest.mark.perf
def test_perf_combined_large_models_with_formatting(tmp_path: Path) -> None:
"""Performance test: Large models with all formatting options.
This tests the full pipeline including:
- Parsing
- Code generation
- Import sorting
- Code formatting
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "large_models.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
use_standard_collections=True,
use_union_operator=True,
use_annotated=True,
field_constraints=True,
)
content = output_file.read_text()
assert content.count("class Model") >= 500
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_all_options_enabled(tmp_path: Path) -> None:
"""Performance test: Large schema with many options enabled.
This stress tests the code generator with multiple features enabled simultaneously.
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "deep_nested.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
use_standard_collections=True,
use_union_operator=True,
use_annotated=True,
field_constraints=True,
collapse_root_models=True,
use_field_description=True,
use_default_kwarg=True,
)
content = output_file.read_text()
assert "class" in content
# Real-world style schemas based on popular APIs
@pytest.mark.perf
def test_perf_kubernetes_style(tmp_path: Path) -> None:
"""Performance test: Kubernetes-style schema with 300+ definitions.
Tests patterns common in Kubernetes APIs:
- Deep metadata/spec/status patterns
- Many resource types with similar structures
- CRD-like custom resources
- Complex allOf compositions
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "kubernetes_style.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
# Verify we generated core K8s types
assert "ObjectMeta" in content
assert "class" in content
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_kubernetes_style_pydantic_v2(tmp_path: Path) -> None:
"""Performance test: Kubernetes-style schema with Pydantic v2."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "kubernetes_style.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
)
content = output_file.read_text()
assert "ObjectMeta" in content
@pytest.mark.perf
def test_perf_stripe_style(tmp_path: Path) -> None:
"""Performance test: Stripe-style schema with 100+ definitions.
Tests patterns common in Stripe API:
- Event/webhook patterns
- Expandable references (oneOf string or object)
- Many payment-related types
- Nested billing details
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "stripe_style.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
# Verify we generated core Stripe types
assert "class" in content
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_stripe_style_pydantic_v2(tmp_path: Path) -> None:
"""Performance test: Stripe-style schema with Pydantic v2."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "stripe_style.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
)
content = output_file.read_text()
assert "class" in content
@pytest.mark.perf
def test_perf_aws_style_openapi(tmp_path: Path) -> None:
"""Performance test: AWS-style OpenAPI with 350+ schemas.
Tests patterns common in AWS APIs:
- Many resource types across services
- Request/Response patterns
- ARN references
- Paginated list operations
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "aws_style.yaml",
input_file_type=InputFileType.OpenAPI,
output=output_file,
)
content = output_file.read_text()
# Verify we generated AWS resource types
assert "class" in content
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_aws_style_openapi_pydantic_v2(tmp_path: Path) -> None:
"""Performance test: AWS-style OpenAPI with Pydantic v2."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "aws_style.yaml",
input_file_type=InputFileType.OpenAPI,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
)
content = output_file.read_text()
assert "class" in content
@pytest.mark.perf
def test_perf_graphql_style(tmp_path: Path) -> None:
"""Performance test: GraphQL-style schema with 150+ definitions.
Tests patterns common in GraphQL APIs converted to JSON Schema:
- Connection/Edge patterns for pagination
- Node interface pattern
- Input/Payload types for mutations
- Union types (oneOf)
"""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "graphql_style.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
# Verify we generated GraphQL patterns
assert "PageInfo" in content
assert "class" in content
@pytest.mark.perf
@pytest.mark.benchmark
def test_perf_graphql_style_pydantic_v2(tmp_path: Path) -> None:
"""Performance test: GraphQL-style schema with Pydantic v2."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "graphql_style.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
)
content = output_file.read_text()
assert "PageInfo" in content
@pytest.mark.perf
def test_perf_graphql_style_typed_dict(tmp_path: Path) -> None:
"""Performance test: GraphQL-style schema with TypedDict."""
output_file = tmp_path / "output.py"
generate(
input_=PERFORMANCE_DATA_PATH / "graphql_style.json",
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.TypingTypedDict,
)
content = output_file.read_text()
assert "PageInfo" in content
# =============================================================================
# Dynamically generated extreme-scale tests
# These tests generate schemas at runtime to avoid bloating the repository
# =============================================================================
@pytest.fixture
def extreme_large_schema(tmp_path: Path) -> Path:
"""Generate an extremely large schema with 2000 models."""
import json
schema: dict = {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "ExtremeLargeSchema",
"definitions": {},
}
for i in range(2000):
schema["definitions"][f"Model{i:04d}"] = {
"type": "object",
"properties": {
"id": {"type": "integer"},
"name": {"type": "string"},
"value": {"type": "number"},
"active": {"type": "boolean"},
"tags": {"type": "array", "items": {"type": "string"}},
"metadata": {"type": "object", "additionalProperties": {"type": "string"}},
"ref_prev": {"$ref": f"#/definitions/Model{max(0, i - 1):04d}"},
},
"required": ["id", "name"],
}
schema["$ref"] = "#/definitions/Model1999"
schema_file = tmp_path / "extreme_large.json"
schema_file.write_text(json.dumps(schema))
return schema_file
@pytest.fixture
def massive_files_input(tmp_path: Path) -> Path:
"""Generate 200 separate schema files with cross-references."""
import json
input_dir = tmp_path / "massive_input"
input_dir.mkdir()
for i in range(200):
schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": f"Schema{i:03d}",
"definitions": {},
}
for j in range(20):
model_name = f"Module{i:03d}Model{j:02d}"
schema["definitions"][model_name] = {
"type": "object",
"properties": {
"id": {"type": "integer"},
"name": {"type": "string"},
"data": {"type": "object", "additionalProperties": True},
},
"required": ["id"],
}
schema["$ref"] = f"#/definitions/Module{i:03d}Model00"
schema_file = input_dir / f"schema_{i:03d}.json"
schema_file.write_text(json.dumps(schema))
return input_dir
@pytest.fixture
def extreme_duplicate_names_schema(tmp_path: Path) -> Path:
"""Generate schema with 1000 models having highly similar/duplicate names."""
import json
schema: dict = {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "ExtremeDuplicateNames",
"definitions": {},
}
prefixes = ["User", "Account", "Order", "Product", "Item", "Entity", "Record", "Data", "Info", "Detail"]
suffixes = ["Request", "Response", "Input", "Output", "Model", "Schema", "Type", "DTO", "Payload", "Result"]
idx = 0
for prefix in prefixes:
for suffix in suffixes:
for variant in range(10):
name = f"{prefix}{suffix}"
if variant > 0:
name = f"{name}{variant}"
schema["definitions"][f"def_{idx}_{name}"] = {
"title": name,
"type": "object",
"properties": {
"id": {"type": "integer"},
"value": {"type": "string"},
"nested": {
"type": "object",
"properties": {
"inner_id": {"type": "integer"},
},
},
},
}
idx += 1
schema["$ref"] = "#/definitions/def_0_UserRequest"
schema_file = tmp_path / "extreme_duplicates.json"
schema_file.write_text(json.dumps(schema))
return schema_file
@pytest.mark.perf
def test_perf_extreme_large_schema(tmp_path: Path, extreme_large_schema: Path) -> None:
"""Performance test: Extremely large schema with 2000 models.
Tests the generator's ability to handle very large schemas that would be
impractical to store in the repository.
"""
output_file = tmp_path / "output.py"
generate(
input_=extreme_large_schema,
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
assert content.count("class Model") >= 2000
@pytest.mark.perf
def test_perf_extreme_large_schema_pydantic_v2(tmp_path: Path, extreme_large_schema: Path) -> None:
"""Performance test: Extremely large schema with Pydantic v2."""
output_file = tmp_path / "output.py"
generate(
input_=extreme_large_schema,
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
)
content = output_file.read_text()
assert content.count("class Model") >= 2000
@pytest.mark.perf
def test_perf_massive_files_input(tmp_path: Path, massive_files_input: Path) -> None:
"""Performance test: Process 200 separate schema files (4000 models total).
Tests directory input handling with a very large number of files.
"""
output_dir = tmp_path / "output"
generate(
input_=massive_files_input,
input_file_type=InputFileType.JsonSchema,
output=output_dir,
)
assert output_dir.exists()
py_files = list(output_dir.glob("**/*.py"))
assert len(py_files) >= 1
@pytest.mark.perf
def test_perf_massive_files_single_output(tmp_path: Path, massive_files_input: Path) -> None:
"""Performance test: Merge 200 schema files into output directory."""
output_dir = tmp_path / "merged"
generate(
input_=massive_files_input,
input_file_type=InputFileType.JsonSchema,
output=output_dir,
)
assert output_dir.exists()
py_files = list(output_dir.glob("**/*.py"))
assert len(py_files) >= 1
@pytest.mark.perf
def test_perf_extreme_duplicate_names(tmp_path: Path, extreme_duplicate_names_schema: Path) -> None:
"""Performance test: Handle 1000 models with highly similar names.
Tests the name disambiguation logic under extreme conditions.
"""
output_file = tmp_path / "output.py"
generate(
input_=extreme_duplicate_names_schema,
input_file_type=InputFileType.JsonSchema,
output=output_file,
)
content = output_file.read_text()
assert content.count("class ") >= 1000
@pytest.mark.perf
def test_perf_extreme_duplicate_names_pydantic_v2(tmp_path: Path, extreme_duplicate_names_schema: Path) -> None:
"""Performance test: Extreme duplicate names with Pydantic v2."""
output_file = tmp_path / "output.py"
generate(
input_=extreme_duplicate_names_schema,
input_file_type=InputFileType.JsonSchema,
output=output_file,
output_model_type=DataModelType.PydanticV2BaseModel,
)
content = output_file.read_text()
assert content.count("class ") >= 1000
|