Module cvtool.ESGF.read

Expand source code
import re, os
import shutil
import pprint
import configparser
import pandas as pd

from ..core.stdout import log
print = log(__file__.split('cvtool')[1])


class ESGFIni:
    """
    A class to parse and access data from an ESGF INI configuration file

    Attributes:
        project (str): The project name of the INI file
        categories (pandas.DataFrame): A DataFrame containing information about the categories of each key
        headers (List[str]): A list of the headers for the INI file
    """

    def __init__(self, ini_file: str) -> None:
        """
        Initializes the class by reading the INI file and parsing the data

        Args:
            ini_file (str): The file path to the INI file to be parsed
        """
        assert os.path.exists(ini_file)
        self._complete = read_esgf_ini(ini_file)
        self.project = list(self._complete)[0]
        self._esgf_data = self._complete.get(self.project)
        self.headers = list(self._esgf_data)
        self.categories = categories(self._esgf_data.get('categories'))
        self._flatten_data = {}
        self._flatten_keys(self._esgf_data, '')

    def __getattr__(self, key: str):
        """
        Overrides the default behavior of getting an attribute from the class to get data from the INI file.

        Args:
            key (str): The key to get from the INI file.

        Returns:
            The value of the key in the INI file.

        Raises:
            AttributeError: if the key is not found in the INI file.
        """
        if key in self._flatten_data:
            return self._flatten_data[key]
        raise AttributeError(f"'ESGFData' object has no attribute '{key}'")

    def _flatten_keys(self, data: dict, prefix: str) -> None:
        """
        Recursively flattens the keys of the INI file to make them accessible as attributes of the class.

        Args:
            data (dict): The data of the INI file.
            prefix (str): The prefix to add to each key.
        """
        for key, value in data.items():
            if isinstance(value, dict):
                new_prefix = f"{prefix}{key}."
                self._flatten_keys(value, new_prefix)
            elif '|' in value:
                value = cfg2obj(value, False)
            elif ', ' in value and '%' not in value:
                value = value.split(', ')
            self._flatten_data[f"{prefix}{key}"] = value

    def print(self,key):
        '''
        Set up pretty print formatting scaling to the width of the terminal. 

        Example usage: 
            ```class.print('experiment_title_map')```

        '''
        terminal_width = shutil.get_terminal_size().columns
        return pprint.pprint(cmip6.__getattr__(key), width = terminal_width)


'''
Single use functions: 
    We do not want to package these up with the class and propagate them forwards. 
'''

def read_esgf_ini(file_path: str) -> dict:
    """
    Reads the INI file and turns it into a dictionary.

    Args:
        file_path (str): The file path to the INI file to be parsed.

    Returns:
        A dictionary containing the data from the INI file.
    """
    config = configparser.ConfigParser(interpolation=None)
    config.read(file_path)
    esgf_data = {}
    for section in config.sections():
        section_data = {}
        for key, value in config.items(section):
            section_data[key] = value
        esgf_data[section] = section_data
    return esgf_data


def categories(categories_str: str) -> pd.DataFrame:
    """
    Parses the categories string from the INI file and turns it into a DataFrame.

    Args:
        categories_str (str): The categories string from the INI file.

    Returns:
        A DataFrame containing information about the categories of each key.
    """
    pattern = r"\s+(\w+)\s+\|\s+(\w+)\s+\|\s+(\w+)\s+\|\s+(\w+)\s+\|\s+(\d+)"
    matches = re.findall(pattern, categories_str)
    df = pd.DataFrame(
        matches,
        columns=["Column", "Type", "Required", "Used", "Priority"]
    )
    df = df.astype({
        "Column": str,
        "Type": str,
        "Required": bool,
        "Used": bool,
        "Priority": int
    })
    return df.set_index('Column', inplace=False)


def cfg2obj(config_str: str, dataseries: bool = False):
    """
    Parses a configuration string and turns it into a dictionary or a DataFrame.

    Args:
        config_str (str): The configuration string to be parsed.
        dataseries (bool, optional): Whether or not to return a DataFrame. Defaults to False.

    Returns:
        The parsed configuration string as a dictionary or a DataFrame.
    """
    pattern = r"([^\s|]+)\s*\|\s*([^|]+)"
    matches = re.findall(pattern, config_str)
    data = {key.strip(): value.strip() for key, value in matches}
    if dataseries:
        df = pd.DataFrame.from_dict(data, orient="index", columns=["value"])
        df.index.name = "key"
        return df
    return data




# if __name__ == '__main__':
#     base = '/Users/daniel.ellis/WIPwork/esgf-config/publisher-configs/ini/'
#     path = f'{base}esg.cmip6.ini'
#     cmip6 = ESGFIni(path)

Functions

def categories(categories_str: str) ‑> pandas.core.frame.DataFrame

Parses the categories string from the INI file and turns it into a DataFrame.

Args

categories_str : str
The categories string from the INI file.

Returns

A DataFrame containing information about the categories of each key.

Expand source code
def categories(categories_str: str) -> pd.DataFrame:
    """
    Parses the categories string from the INI file and turns it into a DataFrame.

    Args:
        categories_str (str): The categories string from the INI file.

    Returns:
        A DataFrame containing information about the categories of each key.
    """
    pattern = r"\s+(\w+)\s+\|\s+(\w+)\s+\|\s+(\w+)\s+\|\s+(\w+)\s+\|\s+(\d+)"
    matches = re.findall(pattern, categories_str)
    df = pd.DataFrame(
        matches,
        columns=["Column", "Type", "Required", "Used", "Priority"]
    )
    df = df.astype({
        "Column": str,
        "Type": str,
        "Required": bool,
        "Used": bool,
        "Priority": int
    })
    return df.set_index('Column', inplace=False)
def cfg2obj(config_str: str, dataseries: bool = False)

Parses a configuration string and turns it into a dictionary or a DataFrame.

Args

config_str : str
The configuration string to be parsed.
dataseries : bool, optional
Whether or not to return a DataFrame. Defaults to False.

Returns

The parsed configuration string as a dictionary or a DataFrame.

Expand source code
def cfg2obj(config_str: str, dataseries: bool = False):
    """
    Parses a configuration string and turns it into a dictionary or a DataFrame.

    Args:
        config_str (str): The configuration string to be parsed.
        dataseries (bool, optional): Whether or not to return a DataFrame. Defaults to False.

    Returns:
        The parsed configuration string as a dictionary or a DataFrame.
    """
    pattern = r"([^\s|]+)\s*\|\s*([^|]+)"
    matches = re.findall(pattern, config_str)
    data = {key.strip(): value.strip() for key, value in matches}
    if dataseries:
        df = pd.DataFrame.from_dict(data, orient="index", columns=["value"])
        df.index.name = "key"
        return df
    return data
def read_esgf_ini(file_path: str) ‑> dict

Reads the INI file and turns it into a dictionary.

Args

file_path : str
The file path to the INI file to be parsed.

Returns

A dictionary containing the data from the INI file.

Expand source code
def read_esgf_ini(file_path: str) -> dict:
    """
    Reads the INI file and turns it into a dictionary.

    Args:
        file_path (str): The file path to the INI file to be parsed.

    Returns:
        A dictionary containing the data from the INI file.
    """
    config = configparser.ConfigParser(interpolation=None)
    config.read(file_path)
    esgf_data = {}
    for section in config.sections():
        section_data = {}
        for key, value in config.items(section):
            section_data[key] = value
        esgf_data[section] = section_data
    return esgf_data

Classes

class ESGFIni (ini_file: str)

A class to parse and access data from an ESGF INI configuration file

Attributes

project : str
The project name of the INI file
categories : pandas.DataFrame
A DataFrame containing information about the categories of each key
headers : List[str]
A list of the headers for the INI file

Initializes the class by reading the INI file and parsing the data

Args

ini_file : str
The file path to the INI file to be parsed
Expand source code
class ESGFIni:
    """
    A class to parse and access data from an ESGF INI configuration file

    Attributes:
        project (str): The project name of the INI file
        categories (pandas.DataFrame): A DataFrame containing information about the categories of each key
        headers (List[str]): A list of the headers for the INI file
    """

    def __init__(self, ini_file: str) -> None:
        """
        Initializes the class by reading the INI file and parsing the data

        Args:
            ini_file (str): The file path to the INI file to be parsed
        """
        assert os.path.exists(ini_file)
        self._complete = read_esgf_ini(ini_file)
        self.project = list(self._complete)[0]
        self._esgf_data = self._complete.get(self.project)
        self.headers = list(self._esgf_data)
        self.categories = categories(self._esgf_data.get('categories'))
        self._flatten_data = {}
        self._flatten_keys(self._esgf_data, '')

    def __getattr__(self, key: str):
        """
        Overrides the default behavior of getting an attribute from the class to get data from the INI file.

        Args:
            key (str): The key to get from the INI file.

        Returns:
            The value of the key in the INI file.

        Raises:
            AttributeError: if the key is not found in the INI file.
        """
        if key in self._flatten_data:
            return self._flatten_data[key]
        raise AttributeError(f"'ESGFData' object has no attribute '{key}'")

    def _flatten_keys(self, data: dict, prefix: str) -> None:
        """
        Recursively flattens the keys of the INI file to make them accessible as attributes of the class.

        Args:
            data (dict): The data of the INI file.
            prefix (str): The prefix to add to each key.
        """
        for key, value in data.items():
            if isinstance(value, dict):
                new_prefix = f"{prefix}{key}."
                self._flatten_keys(value, new_prefix)
            elif '|' in value:
                value = cfg2obj(value, False)
            elif ', ' in value and '%' not in value:
                value = value.split(', ')
            self._flatten_data[f"{prefix}{key}"] = value

    def print(self,key):
        '''
        Set up pretty print formatting scaling to the width of the terminal. 

        Example usage: 
            ```class.print('experiment_title_map')```

        '''
        terminal_width = shutil.get_terminal_size().columns
        return pprint.pprint(cmip6.__getattr__(key), width = terminal_width)

Methods

def print(self, key)

Set up pretty print formatting scaling to the width of the terminal.

Example usage: class.print('experiment_title_map')

Expand source code
def print(self,key):
    '''
    Set up pretty print formatting scaling to the width of the terminal. 

    Example usage: 
        ```class.print('experiment_title_map')```

    '''
    terminal_width = shutil.get_terminal_size().columns
    return pprint.pprint(cmip6.__getattr__(key), width = terminal_width)