Module cvtool.CVII.components.clean

Expand source code
from copy import deepcopy

from collections import OrderedDict
from jsonschema import validate
import cvtool.core as core
from p_tqdm import p_map
from tqdm import tqdm
from pprint import pprint
from functools import partial



# Logging 'info' level message using 'core.stdout.log' function
logger = core.stdout.log('cleaner', level='info')



def run(cvloc,prefix,metadata):

    files = ['activity_id','experiment_id','sub_experiment_id'] 
    collection = {}
    for f in files:
        loc = f"{cvloc}{prefix}{f}.json"
        load = core.io.json_read(loc)[f]

        collection[f] = load 

    

    activity_id = []
    source_type = []
    sub_experiment_id = []
    
    for _,experiment in collection.get('experiment_id',{}).items():

        for a in ['activity_id','parent_activity_id']:
            activity_id.extend(experiment.get(a,[]))

        source_type.extend(experiment.get('additional_allowed_model_model_components',[]))

        source_type.extend(experiment.get('required_model_components',[]))

        sub_experiment_id.extend(experiment.get('sub_experiment_id',[]))

    # activites fileter
    activity_diff = set(collection['activity_id']) -  set(activity_id) - set(['no_parent'])
    if activity_diff :
        corrected = {**metadata,"activity_id":core.io.filter_dict(collection['activity_id'],list(activity_id))}

        loc = f"{cvloc}{prefix}activity_id.json"
        core.io.write(corrected,loc)

        print(f"corrected activites file: <removed> {activity_diff}")


    #  sub experiment filter 
    sub_experiment_diff = set(collection['sub_experiment_id']) -  set(sub_experiment_id) 
    if sub_experiment_diff :
        corrected = {**metadata,"sub_experiment_id":core.io.filter_dict(collection['sub_experiment_id'],list(sub_experiment_id))}

        loc = f"{cvloc}{prefix}sub_experiment_id.json"
        core.io.write(corrected,loc)

        print(f"corrected sub_experiment file: <removed> {activity_diff}")


    core.io.rm_older(cvloc,minutes = 2)
    

Functions

def run(cvloc, prefix, metadata)
Expand source code
def run(cvloc,prefix,metadata):

    files = ['activity_id','experiment_id','sub_experiment_id'] 
    collection = {}
    for f in files:
        loc = f"{cvloc}{prefix}{f}.json"
        load = core.io.json_read(loc)[f]

        collection[f] = load 

    

    activity_id = []
    source_type = []
    sub_experiment_id = []
    
    for _,experiment in collection.get('experiment_id',{}).items():

        for a in ['activity_id','parent_activity_id']:
            activity_id.extend(experiment.get(a,[]))

        source_type.extend(experiment.get('additional_allowed_model_model_components',[]))

        source_type.extend(experiment.get('required_model_components',[]))

        sub_experiment_id.extend(experiment.get('sub_experiment_id',[]))

    # activites fileter
    activity_diff = set(collection['activity_id']) -  set(activity_id) - set(['no_parent'])
    if activity_diff :
        corrected = {**metadata,"activity_id":core.io.filter_dict(collection['activity_id'],list(activity_id))}

        loc = f"{cvloc}{prefix}activity_id.json"
        core.io.write(corrected,loc)

        print(f"corrected activites file: <removed> {activity_diff}")


    #  sub experiment filter 
    sub_experiment_diff = set(collection['sub_experiment_id']) -  set(sub_experiment_id) 
    if sub_experiment_diff :
        corrected = {**metadata,"sub_experiment_id":core.io.filter_dict(collection['sub_experiment_id'],list(sub_experiment_id))}

        loc = f"{cvloc}{prefix}sub_experiment_id.json"
        core.io.write(corrected,loc)

        print(f"corrected sub_experiment file: <removed> {activity_diff}")


    core.io.rm_older(cvloc,minutes = 2)