This page allows you to analyse data correlations. Select your data set, and then axis.
Data analysisCodingThe coding is: import numpy as np import pandas as pd import sys import matplotlib.pyplot as plt import statsmodels.api as sm def fit_line2(x, y): """Return slope, intercept of best fit line.""" X = sm.add_constant(x) model = sm.OLS(y, X, missing='drop') # ignores entires where x or y is NaN fit = model.fit() return fit.params[1], fit.params[0] xval = 'preg'; yval = 'pres'; file='1111' fdata='di.csv' type=0 if (len(sys.argv)>1): file=str(sys.argv[1]) if (len(sys.argv)>2): xval=str(sys.argv[2]) if (len(sys.argv)>3): yval=str(sys.argv[3]) if (len(sys.argv)>4): fdata=str(sys.argv[4]) if (len(sys.argv)>5): type=int(sys.argv[5]) ver=pd.read_csv(fdata) ver2 = ver ver = ver.replace(np.nan,-1, regex=True) plt.title(yval+' v ' + xval) plt.xlabel(xval) plt.ylabel(yval) type1= ver[xval].dtype type2= ver[yval].dtype if (type1==object): width = 0.8 if (type2!=object): y_pos = np.arange(len(ver[xval])) plt.bar(y_pos,ver[yval], width=width, color="blue") plt.xticks(y_pos, ver[xval],rotation=90) axes = plt.gca() for item in (axes.get_xticklabels() + axes.get_yticklabels()): item.set_fontsize(10) for item in ([axes.title, axes.xaxis.label, axes.yaxis.label]): item.set_fontsize(15) elif (type==0): plt.scatter(ver[xval],ver[yval]) axes = plt.gca() m, b = np.polyfit(ver[xval], ver[yval], 1) X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100) plt.plot(X_plot, m*X_plot + b, '-') if (b>0): print yval,'=',round(m,4),' x ',xval,'+',round(b,4) else: print yval,'=',round(m,4),' x ',xval,round(b,3) print sm.OLS(ver2[xval], ver2[yval],missing='drop').fit().summary() elif (type==1): ver.hist() print ver.corr() elif (type==2): ver.boxplot() print ver.describe() plt.show() |