[ Log On ]
  • Home
  • Tst
  • Cha
  • Enc
  • Code
  • IP
  • Fun
  • Sub
  • DigF
  • Cis
  • Com
  • Db
  • About
  • Netsim
  • Big Data

Data analysis

 

[Back] This page allows you to analyse data correlations. Select your data set, and then axis.

Graph axis

X-axis:

Y-axis:

Data set

Data set:

Type:

View image [SVG] View image [PNG]

Coding

The coding is:

import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt
import statsmodels.api as sm

def fit_line2(x, y):
    """Return slope, intercept of best fit line."""
    X = sm.add_constant(x)
    model = sm.OLS(y, X, missing='drop') # ignores entires where x or y is NaN
    fit = model.fit()
    return fit.params[1], fit.params[0] 

xval = 'preg';
yval = 'pres';

file='1111'
fdata='di.csv'
type=0

if (len(sys.argv)>1):
	file=str(sys.argv[1])

if (len(sys.argv)>2):
	xval=str(sys.argv[2])

if (len(sys.argv)>3):
	yval=str(sys.argv[3])


if (len(sys.argv)>4):
	fdata=str(sys.argv[4])

if (len(sys.argv)>5):
	type=int(sys.argv[5])

ver=pd.read_csv(fdata)
ver2 = ver
ver = ver.replace(np.nan,-1, regex=True)

plt.title(yval+' v ' + xval)
plt.xlabel(xval)
plt.ylabel(yval)


type1= ver[xval].dtype
type2= ver[yval].dtype

if (type1==object):
	width = 0.8
	if (type2!=object):
		y_pos = np.arange(len(ver[xval]))

		plt.bar(y_pos,ver[yval], width=width, color="blue")
		plt.xticks(y_pos, ver[xval],rotation=90)

		axes = plt.gca()
		for item in (axes.get_xticklabels() + axes.get_yticklabels()):
			item.set_fontsize(10)

		for item in ([axes.title, axes.xaxis.label, axes.yaxis.label]):
			item.set_fontsize(15)


elif (type==0):
	plt.scatter(ver[xval],ver[yval])
	axes = plt.gca()
	m, b = np.polyfit(ver[xval], ver[yval], 1)
	X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
	plt.plot(X_plot, m*X_plot + b, '-')

	if (b>0):
		print yval,'=',round(m,4),' x ',xval,'+',round(b,4)
	else:
		print yval,'=',round(m,4),' x ',xval,round(b,3)

	print sm.OLS(ver2[xval], ver2[yval],missing='drop').fit().summary()

elif (type==1):
	ver.hist()
	print ver.corr()
elif (type==2):
	ver.boxplot()
	print ver.describe()
	
plt.show()