Module zedstat.ci

Expand source code
import pandas as pd
import numpy as np

def ci(df,xvar,yvar,alpha=.05):
    '''Get CI bounds for OLS regresssion
    
    Args:
        df (pandas.DataFrame): input dataframe
        xvar (str): x variable name
        yvar (str): y variable name
        alpha (float): significance level float

    Returns:
        pandas.DataFrame: x, pred,ub,lb
        float: pvalue (f_pvalue of OLS estimator)
        float: aic (aic of OLS estimator)
        float: correlation (numpy corrcoeff with predicted values)
        statsmodels.regression.linear_model.OLSResults
    '''
    import pandas as pd  
    import statsmodels.api as sm
    
    df = df.sort_values(xvar)
    X = sm.add_constant(df[xvar].values)
    ols_model = sm.OLS(df[yvar].values, X)
    est = ols_model.fit()
    out = est.conf_int(alpha=alpha, cols=None)
    y_pred = est.predict(X)
    x_pred = df[xvar].values
    pred = est.get_prediction(X).summary_frame()
    predf=pd.DataFrame({'pred': est.params[0]+x_pred*est.params[1],
                        'lb':pred['mean_ci_lower'].values,
                        'ub':pred['mean_ci_upper'].values},
                       index=x_pred)
    corr=np.corrcoef(predf.pred,df[yvar])[0,1]
    return predf,est.f_pvalue,est.aic,corr,est

Functions

def ci(df, xvar, yvar, alpha=0.05)

Get CI bounds for OLS regresssion

Args

df : pandas.DataFrame
input dataframe
xvar : str
x variable name
yvar : str
y variable name
alpha : float
significance level float

Returns

pandas.DataFrame
x, pred,ub,lb
float
pvalue (f_pvalue of OLS estimator)
float
aic (aic of OLS estimator)
float
correlation (numpy corrcoeff with predicted values)

statsmodels.regression.linear_model.OLSResults

Expand source code
def ci(df,xvar,yvar,alpha=.05):
    '''Get CI bounds for OLS regresssion
    
    Args:
        df (pandas.DataFrame): input dataframe
        xvar (str): x variable name
        yvar (str): y variable name
        alpha (float): significance level float

    Returns:
        pandas.DataFrame: x, pred,ub,lb
        float: pvalue (f_pvalue of OLS estimator)
        float: aic (aic of OLS estimator)
        float: correlation (numpy corrcoeff with predicted values)
        statsmodels.regression.linear_model.OLSResults
    '''
    import pandas as pd  
    import statsmodels.api as sm
    
    df = df.sort_values(xvar)
    X = sm.add_constant(df[xvar].values)
    ols_model = sm.OLS(df[yvar].values, X)
    est = ols_model.fit()
    out = est.conf_int(alpha=alpha, cols=None)
    y_pred = est.predict(X)
    x_pred = df[xvar].values
    pred = est.get_prediction(X).summary_frame()
    predf=pd.DataFrame({'pred': est.params[0]+x_pred*est.params[1],
                        'lb':pred['mean_ci_lower'].values,
                        'ub':pred['mean_ci_upper'].values},
                       index=x_pred)
    corr=np.corrcoef(predf.pred,df[yvar])[0,1]
    return predf,est.f_pvalue,est.aic,corr,est