# Pull MIP table files; reformulate for CMOR 3.10
<div style="text-align: right">
<p>
    <img src="https://pcmdi.github.io/assets/PCMDI/199x65px-PCMDI-Logo-Text-rectangle.png"
         width="91"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Program for Climate Model Diagnosis and Intercomparison"
         alt="Program for Climate Model Diagnosis and Intercomparison"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/LLNL/212px-LLNLiconPMS286-WHITEBACKGROUND.png"
         width="30"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Lawrence Livermore National Laboratory"
         alt="Lawrence Livermore National Laboratory"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/DOE/459x127px-DOE-Logo_Color_TextOnly.png"
         width="108"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="United States Department of Energy"
         alt="United States Department of Energy"
    >
</p>
</div>

**Summary**

This file pulls a CMIP6Plus/CMOR3.9.0-era MIP table files, strips out extraneous variables and saves the files for local use

**Authors**

Paul J. Durack ([durack1](https://github.com/durack1); [PCMDI](https://pcmdi.llnl.gov/), [Lawrence Livermore National Laboratory](https://www.llnl.gov/))

**Notes**

PJD 25 Feb 2025 - initiated<br>
PJD 25 Feb 2025 - first pass at initial CMOR 3.10 test tables and CMIP7_CV.json<br>
KET 25 Feb 2025 - update to run local in TestTables<br>
PJD  3 Mar 2025 - updated frequency with approx_interval<br>
PJD  3 Mar 2025 - augmented with CMOR-required entries<BR>
PJD  5 Mar 2025 - updates following comments in https://github.com/PCMDI/cmor/pull/778/files/b9f28097dcf1afc99c7823dbdd0991e646de600f<br>
PJD  9 Mar 2025 - adding nominal_resolution, data_archive_id, and regions<br>
PJD  9 Mar 2025 - frequency tweaks (correct for pt entries)<br>
PJD  9 Mar 2025 - add nominal_resolution = 0.25 km; update region identifiers; remove monC frequency<br>
PJD 12 Mar 2025 - further updates to 1) add DRS; 2) remove nested branding_label dict; 3) remove nom*_res 1x1 degree<br>

TODO:

**Links**

### imports

In [1]:
%%time
import datetime
import hashlib
import json
import os
import requests

CPU times: user 40.2 ms, sys: 17.4 ms, total: 57.6 ms
Wall time: 65.8 ms


### define functions

In [2]:
def returnJsonDict(key, url):
    """
    take dictionary and return Json from url to key
    variable
    """
    # process url
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4/5xx)
        tmp = json.loads(response.text)
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON decode failed: {e}")
    except Exception as e:
        print(f"Unexpected error occurred: {e}")

    return tmp

### set upstream table URLs and pull files

In [3]:
%%time
urls = {
    "APday": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_APday.json",
    "OPmon": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_OPmon.json",
    "OPmonLev": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_OPmonLev.json",
}
for key in urls.keys():
    print(key)
    vars()[key] = returnJsonDict(key, urls[key])

APday
OPmon
OPmonLev
CPU times: user 36.7 ms, sys: 16.8 ms, total: 53.5 ms
Wall time: 15.8 s


### APday - trim out redundant variables

In [4]:
keyList = list(APday["variable_entry"].keys())
keepKeys = ["pr", "tas"]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        APday["variable_entry"].pop(key)

### OPmon - trim out redundant variables

In [5]:
keyList = list(OPmon["variable_entry"].keys())
keepKeys = ["tos"]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        OPmon["variable_entry"].pop(key)
# cleanup comments
for key in OPmon["variable_entry"].keys():
    OPmon["variable_entry"][key]["comment"] = OPmon["variable_entry"][key][
        "comment"
    ].replace(".", "")

### OPmonLev - trim out redundant variables

In [6]:
keyList = list(OPmonLev["variable_entry"].keys())
keepKeys = ["thetao"]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        OPmonLev["variable_entry"].pop(key)
# cleanup comments
for key in OPmonLev["variable_entry"].keys():
    OPmonLev["variable_entry"][key]["comment"] = OPmonLev["variable_entry"][key][
        "comment"
    ].replace(".", "")

### Header update notes

In [7]:
# Karl notes https://github.com/PCMDI/cmor/issues/762#issuecomment-2673038397
"""
"Header": {
    **** MOVE TO CV.json FILE: "data_specs_version": "CMIP_specs7.0.0.0-alpha",
    **** MOVE TO CV.json FILE: "mip_era": "CMIP6",
    **** MOVE TO CV.json FILE: "approx_interval": "30.00000",
    "checksum":"", **** New Header entry will contain md5 checksum value
    "cmor_version": "3.10",
    "table_id": "atmos",
    "realm": "atmos", **** Sets realm default value, overridden by modeling_realm attribute for individual variables
    "table_date":"2025-02-14",
    "missing_value": "1e20",
    "int_missing_value": "-999",
    "product": "model-output",
    "generic_levels": "alevel alevhalf",
    "Conventions": "CF-1.11 CMIP-7alpha???"
    -"type":"real", **** This and following attributes are default values, overridden for individual variables
    -"positive":"",
    -"valid_min":"",
    -"valid_max":"",
    -"ok_min_mean_abs":"",
    -"ok_max_mean_abs":"",
},
"""

'\n"Header": {\n    **** MOVE TO CV.json FILE: "data_specs_version": "CMIP_specs7.0.0.0-alpha",\n    **** MOVE TO CV.json FILE: "mip_era": "CMIP6",\n    **** MOVE TO CV.json FILE: "approx_interval": "30.00000",\n    "checksum":"", **** New Header entry will contain md5 checksum value\n    "cmor_version": "3.10",\n    "table_id": "atmos",\n    "realm": "atmos", **** Sets realm default value, overridden by modeling_realm attribute for individual variables\n    "table_date":"2025-02-14",\n    "missing_value": "1e20",\n    "int_missing_value": "-999",\n    "product": "model-output",\n    "generic_levels": "alevel alevhalf",\n    "Conventions": "CF-1.11 CMIP-7alpha???"\n    -"type":"real", **** This and following attributes are default values, overridden for individual variables\n    -"positive":"",\n    -"valid_min":"",\n    -"valid_max":"",\n    -"ok_min_mean_abs":"",\n    -"ok_max_mean_abs":"",\n},\n'

### update Headers

In [8]:
for table in ["APday", "OPmon", "OPmonLev"]:
    tmp = eval(table)
    # drop keys - move to Project_CV.json
    tmp["Header"].pop("approx_interval")
    tmp["Header"].pop("data_specs_version")
    # sort by keys
    tmp["Header"]["checksum"] = ""
    sorted_dict = dict(sorted(tmp.items()))
    # update entries
    tmp["Header"]["Conventions"] = "CF-1.11 CMIP-7.0.0.0"
    tmp["Header"]["cmor_version"] = "3.10"
    tmp["Header"]["table_id"] = "atmos2d"
    # realm specific
    if table == "APday":
        tmp["Header"][
            "generic_levels"
        ] = "alevel alevhalf"  # relevant for atmos variables
        tmp["Header"][
            "realm"
        ] = "atmos"  # Sets realm default value, overridden by modeling_realm attribute for individual variables
    elif table == "OPmon":
        tmp["Header"][
            "realm"
        ] = "ocean"  # Sets realm default value, overridden by modeling_realm attribute for individual variables
    elif table == "OPmonLev":
        tmp["Header"][
            "generic_levels"
        ] = "olevel olevhalf"  # relevant for ocean variables
        tmp["Header"][
            "realm"
        ] = "ocean"  # Sets realm default value, overridden by modeling_realm attribute for individual variables
    tmp["Header"]["table_date"] = datetime.datetime.now().strftime("%Y-%m-%d")
    tmp["Header"]["missing_value"] = "1e20"
    tmp["Header"]["int_missing_value"] = "-999"
    tmp["Header"]["product"] = "model-output"
    tmp["Header"][
        "type"
    ] = "real"  # This and following attributes are default values, overridden for individual variables
    tmp["Header"]["positive"] = ""
    tmp["Header"]["valid_min"] = ""
    tmp["Header"]["valid_max"] = ""
    tmp["Header"]["ok_min_mean_abs"] = ""
    tmp["Header"]["ok_max_mean_abs"] = ""
    # and once entries updated, generate checksum, add back in and write
    dictStr = json.dumps(tmp, sort_keys=True)
    tmp["Header"]["checksum"] = hashlib.md5(dictStr.encode("utf8")).hexdigest()

In [9]:
# remap to branded_variable
# <root name>_<temporalLabelDD>-<verticalLabelDD>-<horizontalLabelDD>-<areaLabelDD>
# pr
brandedVariable = "pr_tavg-u-hxy-u"
APday["variable_entry"][brandedVariable] = APday["variable_entry"][
    "pr"
]  # rename pr -> pr_tavg-u-hxy-u
APday["variable_entry"].pop("pr")  # remove pr from dictionary
APday["variable_entry"][brandedVariable]["brand_description"] = " ".join(
    ["precipitation flux: time means", "reported on a 2-d horizontal grid"]
)
APday["variable_entry"][brandedVariable]["variable_title"] = APday["variable_entry"][
    brandedVariable
]["long_name"]
APday["variable_entry"][brandedVariable]["realm"] = ["atmos", "ocean"]
# remove user provided
APday["variable_entry"][brandedVariable].pop("frequency")
# remove redundant
APday["variable_entry"][brandedVariable].pop("long_name")
APday["variable_entry"][brandedVariable].pop("modeling_realm")
APday["variable_entry"][brandedVariable].pop("ok_max_mean_abs")
APday["variable_entry"][brandedVariable].pop("ok_min_mean_abs")
APday["variable_entry"][brandedVariable].pop("positive")  # only relevant for fluxes
APday["variable_entry"][brandedVariable].pop("type")
APday["variable_entry"][brandedVariable].pop("valid_max")
APday["variable_entry"][brandedVariable].pop("valid_min")
# tas
brandedVariable = "tas_tavg-h2m-hxy-u"
APday["variable_entry"][brandedVariable] = APday["variable_entry"][
    "tas"
]  # rename tas -> tas_tavg-h2m-hxy-u
APday["variable_entry"].pop("tas")  # remove tas from dictionary
APday["variable_entry"][brandedVariable]["valid_max"] = 350.0
APday["variable_entry"][brandedVariable]["valid_min"] = 240.0
APday["variable_entry"][brandedVariable]["brand_description"] = " ".join(
    [
        "air temperature: time means at",
        "a nominal height of 2 m reported",
        "on a 2-d horizontal grid",
    ]
)
APday["variable_entry"][brandedVariable]["variable_title"] = APday["variable_entry"][
    brandedVariable
]["long_name"]
# define missing entries
APday["variable_entry"][brandedVariable]["ok_max_mean_abs"] = 330.0
APday["variable_entry"][brandedVariable]["ok_min_mean_abs"] = -30.0
# remove user provided
APday["variable_entry"][brandedVariable].pop("frequency")
# remove redundant
APday["variable_entry"][brandedVariable].pop("long_name")
APday["variable_entry"][brandedVariable].pop("modeling_realm")
APday["variable_entry"][brandedVariable].pop("positive")  # only relevant for fluxes
APday["variable_entry"][brandedVariable].pop("type")
APday

{'Header': {'Conventions': 'CF-1.11 CMIP-7.0.0.0',
  'checksum': '16d968eb7e89b5b95d1e119e32aaa0db',
  'cmor_version': '3.10',
  'generic_levels': 'alevel alevhalf',
  'int_missing_value': '-999',
  'missing_value': '1e20',
  'product': 'model-output',
  'table_date': '2025-03-12',
  'table_id': 'atmos2d',
  'realm': 'atmos',
  'type': 'real',
  'positive': '',
  'valid_min': '',
  'valid_max': '',
  'ok_min_mean_abs': '',
  'ok_max_mean_abs': ''},
 'variable_entry': {'pr_tavg-u-hxy-u': {'cell_measures': 'area: areacella',
   'cell_methods': 'area: time: mean',
   'comment': 'includes both liquid and solid phases',
   'dimensions': ['longitude', 'latitude', 'time'],
   'out_name': 'pr',
   'standard_name': 'precipitation_flux',
   'units': 'kg m-2 s-1',
   'brand_description': 'precipitation flux: time means reported on a 2-d horizontal grid',
   'variable_title': 'Precipitation',
   'realm': ['atmos', 'ocean']},
  'tas_tavg-h2m-hxy-u': {'cell_measures': 'area: areacella',
   'cell_met

In [10]:
# remap to branded_variable
# <root name>_<temporalLabelDD>-<verticalLabelDD>-<horizontalLabelDD>-<areaLabelDD>
brandedVariable = "tos_tavg-u-hxy-sea"
OPmon["variable_entry"][brandedVariable] = OPmon["variable_entry"][
    "tos"
]  # rename tos -> tos_tavg-u-hxy-sea
OPmon["variable_entry"].pop("tos")  # remove tos from dictionary
OPmon["variable_entry"][brandedVariable]["valid_max"] = 100.0
OPmon["variable_entry"][brandedVariable]["valid_min"] = -50.0
OPmon["variable_entry"][brandedVariable]["brand_description"] = " ".join(
    [
        "sea surface temperature: time",
        "means reported on a 2-d",
        "horizontal grid where sea",
    ]
)
OPmon["variable_entry"][brandedVariable]["variable_title"] = OPmon["variable_entry"][
    brandedVariable
]["long_name"]
# define missing entries
OPmon["variable_entry"][brandedVariable]["ok_max_mean_abs"] = 50.0
OPmon["variable_entry"][brandedVariable]["ok_min_mean_abs"] = -3.0
# remove user provided
OPmon["variable_entry"][brandedVariable].pop("frequency")
# remove redundant
OPmon["variable_entry"][brandedVariable].pop("long_name")
OPmon["variable_entry"][brandedVariable].pop("type")
OPmon["variable_entry"][brandedVariable].pop("modeling_realm")
OPmon["variable_entry"][brandedVariable].pop("positive")  # only relevant for fluxes
# update table_id
OPmon["Header"]["table_id"] = "ocean2d"
OPmon

{'Header': {'Conventions': 'CF-1.11 CMIP-7.0.0.0',
  'checksum': '0d05fb60cb5645eb4b581f5c77508f7c',
  'cmor_version': '3.10',
  'generic_levels': '',
  'int_missing_value': '-999',
  'missing_value': '1e20',
  'product': 'model-output',
  'table_date': '2025-03-12',
  'table_id': 'ocean2d',
  'realm': 'ocean',
  'type': 'real',
  'positive': '',
  'valid_min': '',
  'valid_max': '',
  'ok_min_mean_abs': '',
  'ok_max_mean_abs': ''},
 'variable_entry': {'tos_tavg-u-hxy-sea': {'cell_measures': 'area: areacello',
   'cell_methods': 'area: mean where sea time: mean',
   'comment': 'Temperature of upper boundary of the liquid ocean, including temperatures below sea-ice and floating ice shelves',
   'dimensions': ['longitude', 'latitude', 'time'],
   'ok_max_mean_abs': 50.0,
   'ok_min_mean_abs': -3.0,
   'out_name': 'tos',
   'standard_name': 'sea_surface_temperature',
   'units': 'degC',
   'valid_max': 100.0,
   'valid_min': -50.0,
   'brand_description': 'sea surface temperature: time m

In [11]:
# remap to branded_variable
# <root name>_<temporalLabelDD>-<verticalLabelDD>-<horizontalLabelDD>-<areaLabelDD>
brandedVariable = "thetao_tavg-l-hxy-sea"
OPmonLev["variable_entry"][brandedVariable] = OPmonLev["variable_entry"][
    "thetao"
]  # rename thetao -> thetao-tavg-l-hxy-u
OPmonLev["variable_entry"].pop("thetao")  # remove pr from dictionary
OPmonLev["variable_entry"][brandedVariable]["valid_max"] = 100.0
OPmonLev["variable_entry"][brandedVariable]["valid_min"] = -50.0
OPmonLev["variable_entry"][brandedVariable]["brand_description"] = " ".join(
    [
        "sea water potential temperature:",
        "time means provided on multiple",
        "model levels and reported on a",
        "2-d horizontal grid where sea",
    ]
)
OPmonLev["variable_entry"][brandedVariable]["variable_title"] = OPmonLev[
    "variable_entry"
][brandedVariable]["long_name"]
# define missing entries
OPmonLev["variable_entry"][brandedVariable]["ok_max_mean_abs"] = 50.0
OPmonLev["variable_entry"][brandedVariable]["ok_min_mean_abs"] = -3.0
# remove user provided
OPmonLev["variable_entry"][brandedVariable].pop("frequency")
# remove redundant
OPmonLev["variable_entry"][brandedVariable].pop("long_name")
OPmonLev["variable_entry"][brandedVariable].pop("type")
OPmonLev["variable_entry"][brandedVariable].pop("positive")  # only relevant for fluxes
# update table_id
OPmonLev["Header"]["table_id"] = "oceanLev"
OPmonLev

{'Header': {'Conventions': 'CF-1.11 CMIP-7.0.0.0',
  'checksum': '651bbe7b494203f2383f6feee44ad299',
  'cmor_version': '3.10',
  'generic_levels': 'olevel olevhalf',
  'int_missing_value': '-999',
  'missing_value': '1e20',
  'product': 'model-output',
  'table_date': '2025-03-12',
  'table_id': 'oceanLev',
  'realm': 'ocean',
  'type': 'real',
  'positive': '',
  'valid_min': '',
  'valid_max': '',
  'ok_min_mean_abs': '',
  'ok_max_mean_abs': ''},
 'variable_entry': {'thetao_tavg-l-hxy-sea': {'cell_measures': 'area: areacello volume: volcello',
   'cell_methods': 'area: mean where sea time: mean',
   'comment': 'Diagnostic should be contributed even for models using conservative temperature as prognostic field',
   'dimensions': ['longitude', 'latitude', 'olevel', 'time'],
   'modeling_realm': ['ocean'],
   'ok_max_mean_abs': 50.0,
   'ok_min_mean_abs': -3.0,
   'out_name': 'thetao',
   'standard_name': 'sea_water_potential_temperature',
   'units': 'degC',
   'valid_max': 100.0,
   

### create CMIP7_CVs.json

In [12]:
%%time
# get CMIP6_CVs
urls = {
    "CV": "https://raw.githubusercontent.com/PCMDI/cmip6-cmor-tables/refs/heads/main/Tables/CMIP6_CV.json",
}
for key in urls.keys():
    print(key)
    vars()[key] = returnJsonDict(key, urls[key])

CV
CPU times: user 16.5 ms, sys: 6.39 ms, total: 22.9 ms
Wall time: 5.26 s


### CVs - trim out redundant entries

In [13]:
keyList = list(CV["CV"].keys())
keepKeys = [
    "DRS",
    "frequency",
    "grid_label",
    "license",
    "nominal_resolution",
    "product",
    "realm",
    "source_type",
    "tracking_id",
]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        CV["CV"].pop(key)

### CVs - cleanup

In [14]:
# DRS
CV["CV"]["DRS"].pop("directory_path_sub_experiment_example")
CV["CV"]["DRS"].pop("filename_sub_experiment_example")
CV["CV"]["DRS"][
    "directory_path_example"
] = "CMIP7/CMIP/PCMDI-test-1-0/glb/mon/historical/r1i1p1f3/tas/tavg-h2m-hxy-u/gn/v20191207/"
CV["CV"]["DRS"][
    "directory_path_template"
] = "<mip_era>/<activity_id>/<source_id>/<region>/<frequency>/<experiment_id>/<variant_id>/<variable_id>/<branding_suffix>/<grid_label>/<version>"
CV["CV"]["DRS"][
    "filename_example"
] = "tas_tavg-h2m-hxy-u_mon_glb_gn_PCMDI-test-1-0 _historical_r1i1p1f3_185001-186912.nc"
CV["CV"]["DRS"][
    "filename_template"
] = "<variable_id>_<branding_suffix>_<frequency>_<region>_<grid_label>_<source_id>_<experiment_id>_<variant_id>[_<time_range>].nc"
# frequency
keepKeys = ["1hr", "1hrCM", "3hr", "6hr", "day", "dec", "fx", "mon", "monC", "yr"]
keyList = list(CV["CV"]["frequency"].keys())
for count, key in enumerate(keyList):
    if key not in keepKeys:
        CV["CV"]["frequency"].pop(key)
# grid_label
keepKeys = ["gm", "gn", "gna", "gng", "gnz", "gr", "gra", "grg", "grz"]
keyList = list(CV["CV"]["grid_label"].keys())
for count, key in enumerate(keyList):
    if key not in keepKeys:
        CV["CV"]["grid_label"].pop(key)
# license
CV["CV"]["license"][0] = CV["CV"]["license"][0].replace("CMIP6", "CMIP7")

### frequency

In [15]:
# remap approx_interval into frequency
desc = CV["CV"]["frequency"]["1hr"]
CV["CV"]["frequency"]["1hr"] = {}
CV["CV"]["frequency"]["1hr"]["description"] = desc
CV["CV"]["frequency"]["1hr"]["approx_interval"] = 1.0 / 24
desc = CV["CV"]["frequency"]["1hrCM"]
CV["CV"]["frequency"]["1hrCM"] = {}
CV["CV"]["frequency"]["1hrCM"]["description"] = desc
CV["CV"]["frequency"]["1hrCM"]["approx_interval"] = 1.0 / 24
desc = CV["CV"]["frequency"]["3hr"]
CV["CV"]["frequency"]["3hr"] = {}
CV["CV"]["frequency"]["3hr"]["description"] = "3 hourly samples"
CV["CV"]["frequency"]["3hr"]["approx_interval"] = 1.0 / 8
desc = CV["CV"]["frequency"]["6hr"]
CV["CV"]["frequency"]["6hr"] = {}
CV["CV"]["frequency"]["6hr"]["description"] = "6 hourly samples"
CV["CV"]["frequency"]["6hr"]["approx_interval"] = 1.0 / 4
desc = CV["CV"]["frequency"]["day"]
CV["CV"]["frequency"]["day"] = {}
CV["CV"]["frequency"]["day"]["description"] = desc
CV["CV"]["frequency"]["day"]["approx_interval"] = 1.0
desc = CV["CV"]["frequency"]["mon"]
CV["CV"]["frequency"]["mon"] = {}
CV["CV"]["frequency"]["mon"]["description"] = desc
CV["CV"]["frequency"]["mon"]["approx_interval"] = 30.0
CV["CV"]["frequency"].pop("monC")
desc = CV["CV"]["frequency"]["yr"]
CV["CV"]["frequency"]["yr"] = {}
CV["CV"]["frequency"]["yr"]["description"] = desc
CV["CV"]["frequency"]["yr"]["approx_interval"] = 365.0
desc = CV["CV"]["frequency"]["dec"]
CV["CV"]["frequency"]["dec"] = {}
CV["CV"]["frequency"]["dec"]["description"] = desc
CV["CV"]["frequency"]["dec"]["approx_interval"] = 3650.0
# not included
# CV["CV"]["approx_interval"]["subhr"] = (
#    1 / 24 / 4
# )  # 15 mins (60/25) = 25 mins CMIP6, see also "approx_interval_error", "approx_interval_warning"
# https://github.com/PCMDI/cmip6-cmor-tables/blob/e3644d3b814d632c4343b9ae5a59faf1ed20191c/Tables/CMIP6_CFsubhr.json#L11-L13

### nominal_resolution

In [16]:
# add 0.25 km
CV["CV"]["nominal_resolution"].insert(0, "0.25 km")
CV["CV"]["nominal_resolution"].remove("1x1 degree")
CV["CV"]["nominal_resolution"]

['0.25 km',
 '0.5 km',
 '1 km',
 '10 km',
 '100 km',
 '1000 km',
 '10000 km',
 '2.5 km',
 '25 km',
 '250 km',
 '2500 km',
 '5 km',
 '50 km',
 '500 km',
 '5000 km']

### other CVs

In [17]:
# other CVs
# new data_archive_id to capture WCRP-affiliated entries
CV["CV"][
    "branding_suffix"
] = "<temporal_label>-<vertical_label>-<horizontal_label>-<area_label>"
CV["CV"]["data_archive_id"] = {}
CV["CV"]["data_archive_id"]["WCRP"] = " ".join(
    [
        "a collection of datasets from the AMIP and CMIP project phases,",
        "along with project supporting datasets from the input4MIPs",
        "(forcing datasets used to drive CMIP simulations) and obs4MIPs",
        "(observational datasets used to evaluate CMIP simulations, and",
        "numerous other supporting activities",
    ]
)
CV["CV"]["data_specs_version"] = "CMIP-7.0.0.0"
CV["CV"]["mip_era"] = "CMIP7"
# adding additional https://github.com/PCMDI/cmor/pull/778#issuecomment-2695458244

### region

In [18]:
# adding regions required to define CMIP7 variables
CV["CV"]["region"] = {}
CV["CV"]["region"][
    "glb"
] = "the complete Earth surface, 90 degrees North to 90 degrees South latitude, and all longitudes"
CV["CV"]["region"][
    "gre"
] = "located in the Northern Atlantic Ocean, separated from other land masses by the Labrador Sea and Straits, and almost entirely north of 60 degrees North latitude"
CV["CV"]["region"][
    "ant"
] = "located around the South Pole, separated from other land masses by the Southern Ocean, and almost entirely south of 60 degrees South latitude"
CV["CV"]["region"][
    "nhem"
] = "the complete Earth surface from the equator to the North Pole, 0 to 90 degrees North latitude"
CV["CV"]["region"][
    "shem"
] = "the complete Earth surface from the equator to the South Pole, 0 to 90 degrees South latitude"

### required_global_attributes

In [19]:
# required_global_attributes
urls = {
    "required_global_attributes": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6Plus_CVs/refs/heads/main/CMIP6Plus_required_global_attributes.json",
}
for key in urls.keys():
    print(key)
    vars()[key] = returnJsonDict(key, urls[key])
# add missing entries
required_global_attributes["required_global_attributes"].extend(
    [
        "region",
        "branding_suffix",
        "temporal_label",
        "vertical_label",
        "horizontal_label",
        "area_label",
        "host_collection",
        "archive_id",
        "branch_method",
        "branch_time_in_child",
        "branch_time_in_parent",
        "parent_activity_id",
        "parent_experiment_id",
        "parent_mip_era",
        "parent_source_id",
        "parent_time_units",
        "parent_variant_label",
    ]
)
# remove deprecated
required_global_attributes["required_global_attributes"].remove("sub_experiment")
required_global_attributes["required_global_attributes"].remove("sub_experiment_id")
# sort
required_global_attributes["required_global_attributes"].sort()
# redirect to CVs master dict
CV["CV"]["required_global_attributes"] = required_global_attributes[
    "required_global_attributes"
]

required_global_attributes


### adding the *_label element lists

In [20]:
# pulled from Appendix F in https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit
area_label = {
    "air": "air",
    "cl": "cloud",
    "ccl": "convective cloud",
    "crp": "crops",
    "fis": "floating ice shelf",
    "gis": "grounded ice sheet",
    "ifs": "ice free sea",
    "is": "ice sheet",
    "lnd": "land",
    "li": "land ice",
    "ng": "natural grasses",
    "pst": "pasture",
    "sea": "sea",
    "si": "sea ice",
    "simp": "sea ice melt pond",
    "sir": "sea ice ridge",
    "lus": "sector (auxiliary coordinate for land-use area types",
    "shb": "shrubs",
    "sn": "snow",
    "scl": "stratiform cloud",
    "tree": "trees",
    "ufs": "unfrozen soil",
    "veg": "vegetation",  # we have crops, natural grasses, pasture, shrubs - vague should be removed
    "wl": "wetland",
    "u": 'unmasked (no "where" directive included in cell_methods)',
}
horizontal_label = {
    "hxy": "gridded",
    "hy": "zonal mean",
    "hxys": "site values",
    "hys": "basin mean",
    "ht": "labeled areas",
    "hm": "horizontal mean",
}
temporal_label = {
    "tstat": "statistic",
    "tsum": "sum",
    "tavg": "mean",
    "tpt": "point",
    "tclm": "climatology",
    "tclmdc": "diurnal cycle climatology",
    "ti": "time independent",
}
vertical_label = {
    "l": "model level",
    "rho": "density surface",
    "h2m": "2m height",
    "h10m": "10m height",
    "h100m": "100m height",
    "d10cm": "1cm depth",
    "d1m": "1m depth",
    "d0m": "surface",
    "d100m": "100m depth",
    "d300m": "300m depth",
    "d700m": "700m depth",
    "d2000m": "2000m depth",
    "10hPa": "10 hPa",
    "100hPa": "100 hPa",
    "220hPa": "220 hPa",
    "500hPa": "500 hPa",
    "560hPa": "560 hPa",
    "700hPa": "700 hPa",
    "840hPa": "840 hPa",
    "850hPa": "850 hPa",
    "1000hPa": "1000 hPa",
    "h16": "16 height levels",
    "h40": "40 height levels",
    "p3": "3 pressure levels",
    "p4": "4 pressure levels",
    "p8": "8 pressure levels",
    "p7c": "7 pressure levels",  # likely dupe
    "p7h": "7 pressure levels",  # likely dupe
    "p19": "19 pressure levels",
    "p27": "27 pressure levels",
    "p39": "39 pressure levels",
    "u": "unspecified (no vertical dimension)",
}
# sort all
area_label = dict(sorted(area_label.items()))
horizontal_label = dict(sorted(horizontal_label.items()))
temporal_label = dict(sorted(temporal_label.items()))
vertical_label = dict(sorted(vertical_label.items()))
# add to CV master dict
CV["CV"]["area_label"] = area_label
CV["CV"]["horizontal_label"] = horizontal_label
CV["CV"]["temporal_label"] = temporal_label
CV["CV"]["vertical_label"] = vertical_label

### institution_id

In [21]:
# institution_id
urls = {
    "institution_id": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/refs/heads/main/CMIP6_institution_id.json",
}
for key in urls.keys():
    print(key)
    vars()[key] = returnJsonDict(key, urls[key])
keepKeys = ["PCMDI"]
keyList = list(institution_id["institution_id"].keys())
for count, key in enumerate(keyList):
    if key not in keepKeys:
        institution_id["institution_id"].pop(key)
CV["CV"]["institution_id"] = institution_id["institution_id"]

institution_id


### experiment_id

In [22]:
# experiment_id
urls = {
    "experiment_id": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/refs/heads/main/CMIP6_experiment_id.json",
}
for key in urls.keys():
    print(key)
    vars()[key] = returnJsonDict(key, urls[key])
keepKeys = ["1pctCO2"]
keyList = list(experiment_id["experiment_id"].keys())
for count, key in enumerate(keyList):
    if key not in keepKeys:
        experiment_id["experiment_id"].pop(key)
CV["CV"]["experiment_id"] = experiment_id["experiment_id"]

experiment_id


### source_id

In [23]:
# source_id
urls = {
    "source_id": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/refs/heads/main/CMIP6_source_id.json",
}
for key in urls.keys():
    print(key)
    vars()[key] = returnJsonDict(key, urls[key])
keepKeys = ["PCMDI-test-1-0"]
keyList = list(source_id["source_id"].keys())
for count, key in enumerate(keyList):
    if key not in keepKeys:
        source_id["source_id"].pop(key)
CV["CV"]["source_id"] = source_id["source_id"]

source_id


In [24]:
# sort CV contents
CV["CV"] = dict(sorted(CV["CV"].items()))

### write all files out

In [25]:
files = {
    "APday": "atmos2d",
    "OPmon": "ocean2d",
    "OPmonLev": "oceanLev",
    "CV": "CV",
}
for count, name in enumerate(files.keys()):
    print(count, name)
    dic = eval(name)
    tableName = files[name]
    # set outpath
    outPath = "."
    # write file
    outFile = "".join(["CMIP7_", tableName, ".json"])
    outPathAndFileName = os.path.join(outPath, outFile)
    print("outPathAndFileName:", outPathAndFileName)
    with open(outPathAndFileName, "w") as f:
        json.dump(
            dic, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":")
        )

0 APday
outPathAndFileName: ./CMIP7_atmos2d.json
1 OPmon
outPathAndFileName: ./CMIP7_ocean2d.json
2 OPmonLev
outPathAndFileName: ./CMIP7_oceanLev.json
3 CV
outPathAndFileName: ./CMIP7_CV.json
