This page outlines Tutorial 4 [link]
The following are the commands used:
import numpy as np import pandas as pd import sys x1="Infant MR" x2="Heart Disease DR" x3="Suicide DR" x4="Cancer DR" fdata="datasets/df.csv" print "Training data:\t\t",x1,",",x2,",",x3 print "Training against:\t",x4 print "Data set:\t\t",fdata print "======================" ver=pd.read_csv(fdata) from sklearn.cross_validation import train_test_split from sklearn.ensemble import RandomForestRegressor type1= ver[x1].dtype type2= ver[x2].dtype type3= ver[x3].dtype type4= ver[x4].dtype if (type1==object or type2==object or type3==object or type4==object): print "One of the data values is an object" sys.exit(1) train, test, y_train, y_test = train_test_split(ver[[x1,x2,x3]],ver[x4],test_size=0.5, random_state=1) ind = ver.columns[0] model= RandomForestRegressor() model.fit(train,y_train) predictions =model.predict(ver[[x1,x2,x3]]) success=0 failure=0 r = float(float(ver[x4].max())-float(ver[x4].min())) print "Range of values:\t",r limit=r/5 print "Success limit:\t\t",limit print "==============================================\n" c=len(predictions) print ('%22s %8s %8s %8s %8s' % ("Index","Pred","Actual","Diff","Success")) print "=====================================================" for x in range(0,c): error = abs(predictions[x]-ver[x4][x]) if (error<=limit): str = "Success" success=success+1 else: str="Failed!" failure = failure+1 print('%22s %8.2f %8.2f %8.2f %8s' % (ver[ind][x][:22],predictions[x],ver[x4][x],error,str) ) print ('Success: %3d Fail: %3d' % (success,failure)) print "\n\n\nTraining data:" print train print "Training data (y):" print y_train