Module cvtool.ESGF.cv2ini
Expand source code
import json
import re,os,sys
import glob
from collections import Counter
from typing import Any, Dict
try: # as a module
from ..core.stdout import log
from ..core.io import exists
except:
#direct
# Get the current file's directory
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
from core.stdout import log
from core.io import exists
from pprint import pprint
print = log(__file__.split('cvtool')[1])
def loopformat(dictionary: Dict[str, Any], key: str, prefix: str) -> str:
"""
Given a dictionary, a key string and a prefix string, loop through the dictionary and format a new string with the
key-value pairs.
:param dictionary: A dictionary.
:param key: A key string.
:param prefix: A prefix string.
:return: A formatted string with the key-value pairs from the dictionary.
"""
try:
for dkey, value in dictionary.items():
prefix += f"{dkey.ljust(30)} | {value.get(key)}\n"
except AttributeError:
for dkey, value in dictionary.items():
prefix += f"{dkey.ljust(30)} | {value}\n"
return prefix
"""
Given a string, replace certain substrings within the string with the corresponding format string.
:param s: A string.
:return: A new string with the substrings formatted accordingly.
"""
path2ini = lambda s: s.replace('<', '%(').replace('>', ')s').replace('activity_id','activity_drs')
def make( output_loc , CVDIR=None, CVFILE=None, INI_CATEGORIES='' ):
exists(output_loc)
if CVDIR[-1] != '/':
CVDIR += '/'
if bool(CVDIR) & bool(CVFILE):
print.warning('Both a CV file and a CV Directory defined. Using CVDIR only!')
if exists(CVDIR):
PROJECT = Counter(
[i.split('/')[-1].split('_')[0] for i in glob.glob(f'{CVDIR}*.json')]).most_common()[0][0]
def read_section(section: str) -> Dict[str, Any]:
"""
Given a section, load its corresponding file from the CV directory, parse and return it as a dictionary.
:param section: A string.
:return: A dictionary parsed from the corresponding file in the CV directory.
"""
return json.load(open(exists(f'{CVDIR}{PROJECT}_{section}.json'), 'r'))
else:
exists(CVFILE)
CVOBJ = json.load(open(f'{CVFILE}.json', 'r'))
PROJECT = re.search('/(\w+)_CV.json', CVLOC)[1]
def read_section(section: str) -> Dict[str, Any]:
"""
Given a section, parse and return it as a dictionary.
:param section: A string.
:return: A dictionary parsed from the given object.
"""
return CVOBJ[section]
if not INI_CATEGORIES:
INI_CATEGORIES = __file__.replace(
'cv2ini.py', '')+'../sampleconf/ini_categories.json'
print.warn('Reading INI categories from preset file. Please ensure these are correct. ')
DRS = read_section('DRS').get('DRS')
# Categories
cat = json.load(open(INI_CATEGORIES, 'r'))
counter = 0
categories = ''
for requirement in ('required', 'optional'):
req = requirement == 'required'
for row in cat[requirement]:
if row.get('name') != 'description':
index = counter
counter += 1
else:
index = 99
categories += f"{row.get('name'):20} | {row.get('data_type'):6} | {bool(req):5} | {bool(row.get('supported')):5} | {index}\n"
category_defaults = ' project | ' + PROJECT
filename_format = path2ini(DRS.get("filename_template"))
dataset_id = path2ini(DRS.get("directory_path_template"))
directory_format = f'%(root)s/{dataset_id}/%(version)s'
dataset_name_format = f"mip_era=%(mip_era)s, source_id=%(source_id)s, experiment=%(experiment_title)s, member_id=%(member_id)s, variable=%(variable_id)s, version=%(version)s"
# ids
print.warning('Manually added CMIP6 Plus mipera file to CMIP6 dir')
mip_era_options = read_section('mip_era')
activity_drs_options = ', '.join(list(read_section('activity_id').keys()))
institution_id_options = ', '.join(list(read_section('institution_id').keys()))
source_id_options = ', '.join(list(read_section('source_id').keys()))
experiment_id_options = ', '.join(list(read_section('experiment_id').keys()))
# maps
maps = 'experiment_title_map, model_cohort_map, las_time_delta_map'
experiment_title_map = loopformat(read_section('experiment_id'),'experiment','map(experiment_id : experiment_title)\n')
member_id_pattern = f'[%(sub_experiment_id)s-]%(variant_label)s'
table_id_options = ', '.join(list(read_section('table_id')))
variable_id_pattern = f'%(string)s'
grid_label_options = ', '.join(list(read_section('grid_label').keys()))
model_cohort_map = loopformat(read_section('source_id'),'cohort','map(source_id : model_cohort)\n')
project_options = PROJECT
sub_experiment_id_options = ', '.join(list(read_section('sub_experiment_id').keys()))
variant_label_pattern = f'r%(digit)si%(digit)sp%(digit)sf%(digit)s'
frequency_options = ', '.join(list(read_section('frequency').keys()))
version_pattern = f'v%(digit)s'
las_time_delta_map = loopformat(read_section('frequency'),False,'map(frequency : las_time_delta)\n')
########
print.warning('Handler CMOR CF Version definitions required. Do we want a ESGFparse file?')
########
handler = 'esgcet.config.cmip6_handler:CMIP6Handler'
min_cmor_version = '3.2.4'
min_cf_version = '1.6'
min_data_specs_version = '01.00.13'
create_cim = 'true'
source_type_delimiter = 'space'
activity_id_delimiter = 'space'
realm_delimiter = 'space'
model_cohort_delimiter = 'space'
las_configure = 'false'
extract_global_attrs = 'WE HAVE NOT YET EXTRACTED THESE'
# extract_global_attrs = frequency, realm, product, nominal_resolution, source_type, grid, creation_date, variant_label, sub_experiment_id, further_info_url, activity_id, data_specs_version
print.warning('thredds_exclude_variables,variable_locate,variable_per_file,version_by_date' )
thredds_exclude_variables='CHECK WHERE TO FIND THESE'
# thredds_exclude_variables = a, a_bnds, alev1, alevel, alevhalf, alt40, b, b_bnds, bnds, bounds_lat, bounds_lon, dbze, depth, depth0m, depth100m, depth_bnds, geo_region, height, height10m, height2m, lat, lat_bnds, latitude, latitude_bnds, layer, lev, lev_bnds, location, lon, lon_bnds, longitude, longitude_bnds, olayer100m, olevel, oline, p0, p220, p500, p560, p700, p840, plev, plev3, plev7, plev8, plev_bnds, plevs, pressure1, region, rho, scatratio, sdepth, sdepth1, sza5, time, time1, time2, time_bnds, vegtype, i, j, rlat, rlat_bnds, sector, type, vertices_latitude, vertices_longitude
variable_locate = 'ASK ABOUT THIS'
# variable_locate = ps, ps_ | tau, tau_
variable_per_file = 'true'
version_by_date = 'true'
# for value in locals():
# if isinstance(value, str):
# pprint(value)
write = ('categories', 'category_defaults', 'filename_format', 'dataset_id', 'directory_format', 'dataset_name_format', 'mip_era_options', 'activity_drs_options', 'institution_id_options', 'source_id_options', 'experiment_id_options', 'maps', 'experiment_title_map', 'member_id_pattern', 'table_id_options', 'variable_id_pattern', 'grid_label_options', 'model_cohort_map', 'project_options', 'sub_experiment_id_options', 'variant_label_pattern', 'frequency_options', 'version_pattern', 'las_time_delta_map', 'handler', 'min_cmor_version', 'min_cf_version', 'min_data_specs_version', 'create_cim', 'source_type_delimiter', 'activity_id_delimiter', 'realm_delimiter', 'model_cohort_delimiter', 'las_configure', 'extract_global_attrs', 'thredds_exclude_variables', 'variable_locate', 'variable_per_file', 'version_by_date')
with open(f"{output_loc}{PROJECT}.ini",'w') as f:
for var in write:
f.write(f"{var} = {locals().get(var)}\n\n")
if __name__ == '__main__':
# only need CV DIR or CVFILE
# Combination can be used to run tests for equivalency.
CVDIR = "/Users/daniel.ellis/WIPwork/CMIP6_CVs"
# OR
CVFILE = "/Users/daniel.ellis/WIPwork/mip-cmor-tables/mip_cmor_tables/out/CMIP6Plus_CV.json"
INI_CATEGORIES = ''
make(CVDIR=CVDIR, INI_CATEGORIES=INI_CATEGORIES,
output_loc = './')
Functions
def loopformat(dictionary: Dict[str, Any], key: str, prefix: str) ‑> str-
Given a dictionary, a key string and a prefix string, loop through the dictionary and format a new string with the key-value pairs.
:param dictionary: A dictionary. :param key: A key string. :param prefix: A prefix string. :return: A formatted string with the key-value pairs from the dictionary.
Expand source code
def loopformat(dictionary: Dict[str, Any], key: str, prefix: str) -> str: """ Given a dictionary, a key string and a prefix string, loop through the dictionary and format a new string with the key-value pairs. :param dictionary: A dictionary. :param key: A key string. :param prefix: A prefix string. :return: A formatted string with the key-value pairs from the dictionary. """ try: for dkey, value in dictionary.items(): prefix += f"{dkey.ljust(30)} | {value.get(key)}\n" except AttributeError: for dkey, value in dictionary.items(): prefix += f"{dkey.ljust(30)} | {value}\n" return prefix def make(output_loc, CVDIR=None, CVFILE=None, INI_CATEGORIES='')-
Expand source code
def make( output_loc , CVDIR=None, CVFILE=None, INI_CATEGORIES='' ): exists(output_loc) if CVDIR[-1] != '/': CVDIR += '/' if bool(CVDIR) & bool(CVFILE): print.warning('Both a CV file and a CV Directory defined. Using CVDIR only!') if exists(CVDIR): PROJECT = Counter( [i.split('/')[-1].split('_')[0] for i in glob.glob(f'{CVDIR}*.json')]).most_common()[0][0] def read_section(section: str) -> Dict[str, Any]: """ Given a section, load its corresponding file from the CV directory, parse and return it as a dictionary. :param section: A string. :return: A dictionary parsed from the corresponding file in the CV directory. """ return json.load(open(exists(f'{CVDIR}{PROJECT}_{section}.json'), 'r')) else: exists(CVFILE) CVOBJ = json.load(open(f'{CVFILE}.json', 'r')) PROJECT = re.search('/(\w+)_CV.json', CVLOC)[1] def read_section(section: str) -> Dict[str, Any]: """ Given a section, parse and return it as a dictionary. :param section: A string. :return: A dictionary parsed from the given object. """ return CVOBJ[section] if not INI_CATEGORIES: INI_CATEGORIES = __file__.replace( 'cv2ini.py', '')+'../sampleconf/ini_categories.json' print.warn('Reading INI categories from preset file. Please ensure these are correct. ') DRS = read_section('DRS').get('DRS') # Categories cat = json.load(open(INI_CATEGORIES, 'r')) counter = 0 categories = '' for requirement in ('required', 'optional'): req = requirement == 'required' for row in cat[requirement]: if row.get('name') != 'description': index = counter counter += 1 else: index = 99 categories += f"{row.get('name'):20} | {row.get('data_type'):6} | {bool(req):5} | {bool(row.get('supported')):5} | {index}\n" category_defaults = ' project | ' + PROJECT filename_format = path2ini(DRS.get("filename_template")) dataset_id = path2ini(DRS.get("directory_path_template")) directory_format = f'%(root)s/{dataset_id}/%(version)s' dataset_name_format = f"mip_era=%(mip_era)s, source_id=%(source_id)s, experiment=%(experiment_title)s, member_id=%(member_id)s, variable=%(variable_id)s, version=%(version)s" # ids print.warning('Manually added CMIP6 Plus mipera file to CMIP6 dir') mip_era_options = read_section('mip_era') activity_drs_options = ', '.join(list(read_section('activity_id').keys())) institution_id_options = ', '.join(list(read_section('institution_id').keys())) source_id_options = ', '.join(list(read_section('source_id').keys())) experiment_id_options = ', '.join(list(read_section('experiment_id').keys())) # maps maps = 'experiment_title_map, model_cohort_map, las_time_delta_map' experiment_title_map = loopformat(read_section('experiment_id'),'experiment','map(experiment_id : experiment_title)\n') member_id_pattern = f'[%(sub_experiment_id)s-]%(variant_label)s' table_id_options = ', '.join(list(read_section('table_id'))) variable_id_pattern = f'%(string)s' grid_label_options = ', '.join(list(read_section('grid_label').keys())) model_cohort_map = loopformat(read_section('source_id'),'cohort','map(source_id : model_cohort)\n') project_options = PROJECT sub_experiment_id_options = ', '.join(list(read_section('sub_experiment_id').keys())) variant_label_pattern = f'r%(digit)si%(digit)sp%(digit)sf%(digit)s' frequency_options = ', '.join(list(read_section('frequency').keys())) version_pattern = f'v%(digit)s' las_time_delta_map = loopformat(read_section('frequency'),False,'map(frequency : las_time_delta)\n') ######## print.warning('Handler CMOR CF Version definitions required. Do we want a ESGFparse file?') ######## handler = 'esgcet.config.cmip6_handler:CMIP6Handler' min_cmor_version = '3.2.4' min_cf_version = '1.6' min_data_specs_version = '01.00.13' create_cim = 'true' source_type_delimiter = 'space' activity_id_delimiter = 'space' realm_delimiter = 'space' model_cohort_delimiter = 'space' las_configure = 'false' extract_global_attrs = 'WE HAVE NOT YET EXTRACTED THESE' # extract_global_attrs = frequency, realm, product, nominal_resolution, source_type, grid, creation_date, variant_label, sub_experiment_id, further_info_url, activity_id, data_specs_version print.warning('thredds_exclude_variables,variable_locate,variable_per_file,version_by_date' ) thredds_exclude_variables='CHECK WHERE TO FIND THESE' # thredds_exclude_variables = a, a_bnds, alev1, alevel, alevhalf, alt40, b, b_bnds, bnds, bounds_lat, bounds_lon, dbze, depth, depth0m, depth100m, depth_bnds, geo_region, height, height10m, height2m, lat, lat_bnds, latitude, latitude_bnds, layer, lev, lev_bnds, location, lon, lon_bnds, longitude, longitude_bnds, olayer100m, olevel, oline, p0, p220, p500, p560, p700, p840, plev, plev3, plev7, plev8, plev_bnds, plevs, pressure1, region, rho, scatratio, sdepth, sdepth1, sza5, time, time1, time2, time_bnds, vegtype, i, j, rlat, rlat_bnds, sector, type, vertices_latitude, vertices_longitude variable_locate = 'ASK ABOUT THIS' # variable_locate = ps, ps_ | tau, tau_ variable_per_file = 'true' version_by_date = 'true' # for value in locals(): # if isinstance(value, str): # pprint(value) write = ('categories', 'category_defaults', 'filename_format', 'dataset_id', 'directory_format', 'dataset_name_format', 'mip_era_options', 'activity_drs_options', 'institution_id_options', 'source_id_options', 'experiment_id_options', 'maps', 'experiment_title_map', 'member_id_pattern', 'table_id_options', 'variable_id_pattern', 'grid_label_options', 'model_cohort_map', 'project_options', 'sub_experiment_id_options', 'variant_label_pattern', 'frequency_options', 'version_pattern', 'las_time_delta_map', 'handler', 'min_cmor_version', 'min_cf_version', 'min_data_specs_version', 'create_cim', 'source_type_delimiter', 'activity_id_delimiter', 'realm_delimiter', 'model_cohort_delimiter', 'las_configure', 'extract_global_attrs', 'thredds_exclude_variables', 'variable_locate', 'variable_per_file', 'version_by_date') with open(f"{output_loc}{PROJECT}.ini",'w') as f: for var in write: f.write(f"{var} = {locals().get(var)}\n\n") def path2ini(s)-
Expand source code
path2ini = lambda s: s.replace('<', '%(').replace('>', ')s').replace('activity_id','activity_drs')