Source code for ETIA.CRV.adjustment.adjset_R

import pandas as pd
import numpy as np
import subprocess
import os


[docs]
def read_adjset(csv_name, path_):

    '''
    Read the output csv file from R packages
    Author: kbiza@csd.uoc.gr
    Parameters
    ----------
    csv_name (str): the name of the file
    path_(str): the path of the file

    Returns
    -------
    adj_set(list or None): list if adjustment set exists, None if no adjustment set exists

    '''

    adjset_pd = pd.read_csv(os.path.join(path_, csv_name))

    if 'X1' in adjset_pd:
        print('adjustment set exists')
        adj_set=[]
        for i in range(adjset_pd.shape[1]):
            cur_set = adjset_pd['X'+str(i+1)].tolist()
            adj_set.append(cur_set)
            #adj_set = adjset_pd.to_numpy().reshape(-1).tolist()
    else:
        print('no adjustment set exists')
        adj_set = None

    return adj_set




[docs]
def adjset_pcalg(graph_pd, graph_type, x, y, r_path='R'):

    '''
    Run the pcalg R package to identify the adjustment set of X and Y
    Author: kbiza@csd.uoc.gr
    Change R version in line 57 if needed
    Parameters
    ----------
    graph_pd(pandas Dataframe):
    graph_type(str): {'dag', 'cpdag', 'mag', 'pag'}
    x(list): list of variable names
    y(list): list of variable names

    Returns
    -------
        canonical_set(list): the variable names of the canonical adj. set (if exists)
        minimal_set(list):: the variable names of the minimal adj. set (if exists)
    '''

    r_path = r_path
    path_ = os.path.dirname(__file__)
    graph_name='graph_r.csv'
    graph_pd.to_csv(graph_name)

    subprocess.call([r_path, '--vanilla', os.path.join(path_, 'run_adjset_pcalg_r.R'),
                     graph_name, graph_type, str(x), str(y)], shell=True)

    canonical_set = read_adjset('canonical_pcalg.csv', path_)
    minimal_set = read_adjset('minimal_pcalg.csv', path_)

    # r indexing --> we need to subtract 1
    canonical_set = [[value - 1 for value in sublist] for sublist in canonical_set]
    minimal_set = [[value - 1 for value in sublist] for sublist in minimal_set]

    return canonical_set, minimal_set




[docs]
def adjset_dagitty(graph_pd, graph_type, x_name, y_name, r_path='R'):

    '''
    Run the dagitty R package to identify the adjustment set of X and Y
    Author: kbiza@csd.uoc.gr
    Change R version in line 92 if needed
    Args:
        graph_pd(pandas Dataframe): the graph as adjacency matrix
        graph_type(str): the type of the graph : {'dag', 'cpdag', 'mag', 'pag'}
        x_name(list): list of variable names
        y_name(list): list of variable names

    Returns:
        canonical_set(list): the variable names of the canonical adj. set (if exists)
        minimal_set(list):: the variable names of the minimal adj. set (if exists)
    '''

    r_path = r_path
    path_ = os.path.dirname(__file__)

    graph_name = 'graph_r.csv'
    exp_name = 'exposures.csv'
    out_name = 'outcomes.csv'
    graph_pd.to_csv(os.path.join(path_, graph_name))

    x_names_pd = pd.DataFrame(np.array(x_name), columns=['x_names_dagitty'])
    y_names_pd = pd.DataFrame(np.array(y_name), columns=['y_names_dagitty'])
    x_names_pd.to_csv(os.path.join(path_, exp_name))
    y_names_pd.to_csv(os.path.join(path_, out_name))

    subprocess.call([r_path, '--vanilla', os.path.join(path_, 'run_adjset_dagitty_r.R'),
                     graph_name, graph_type, exp_name , out_name],shell=True)

    canonical_set = read_adjset('canonical_dagitty.csv', path_)
    minimal_set = read_adjset('minimal_dagitty.csv',path_)

    # it returns variable names, not indexes
    return canonical_set, minimal_set