Pandas is a Python Analysis package. In this case we use data from education attainment. The data is [here][Pandas plotting].
Big Data Analysis with Pandas - Education |
Data
The data used is [here]
State,High school graduate,Bachelor's degree,Advanced degree,,,,Household income,Cost of living Alabama,82.1,22,7.7,0.82,0.22,0.08,40489.00,92.74 Alaska,91.4,26.6,9,0.91,0.27,0.09,66953.00,132.64 Arizona,84.2,25.6,9.3,0.84,0.26,0.09,48745.00,103.73 Arkansas,82.4,18.9,6.1,0.82,0.19,0.06,40489.00,90.61 California,80.6,29.9,10.7,0.81,0.30,0.11,58931.00,132.56 Colorado,89.3,35.9,12.7,0.89,0.36,0.13,55430.00,102.23 Connecticut,88.6,35.6,15.5,0.89,0.36,0.16,67034.00,130.22 Delaware,87.4,28.7,11.4,0.87,0.29,0.11,56860.00,102.40 District of Columbia,87.1,48.5,28,0.87,0.49,0.28,57936.00,100.00 Florida,85.3,25.3,9,0.85,0.25,0.09,44736.00,98.39 Georgia,83.9,27.5,9.9,0.84,0.28,0.10,47590.00,92.21 Hawaii,90.4,29.6,9.9,0.90,0.30,0.10,64098.00,165.56 Idaho,88.4,23.9,7.5,0.88,0.24,0.08,44926.00,93.04 Illinois,86.4,30.6,11.7,0.86,0.31,0.12,53966.00,96.08 Indiana,86.6,22.5,8.1,0.87,0.23,0.08,45424.00,94.19 Iowa,91.4,25.1,7.4,0.91,0.25,0.07,48044.00,93.98 Kansas,89.7,29.5,10.2,0.90,0.30,0.10,47817.00,91.31 Kentucky,81.7,21,8.5,0.82,0.21,0.09,40072.00,89.21 Louisiana,82.2,21.4,6.9,0.82,0.21,0.07,42492.00,96.15 Maine,90.2,26.9,9.6,0.90,0.27,0.10,45734.00,116.42 Maryland,89,37.3,16,0.89,0.37,0.16,69272.00,124.81 Massachusetts,89,38.2,16.4,0.89,0.38,0.16,64081.00,117.80 Michigan,87.9,24.6,9.4,0.88,0.25,0.09,45255.00,95.25 Minnesota,91.5,31.5,10.3,0.92,0.32,0.10,55616.00,102.23 Mississippi,80.4,19.6,7.1,0.80,0.20,0.07,36646.00,92.26 Missouri,86.8,25.2,9.5,0.87,0.25,0.10,45229.00,91.66 Montana,90.8,27.4,8.3,0.91,0.27,0.08,42322.00,100.00 Nebraska,89.8,27.4,8.8,0.90,0.27,0.09,47357.00,91.09 Nevada,83.9,21.8,7.6,0.84,0.22,0.08,53341.00,101.39 New Hampshire,91.3,32,11.2,0.91,0.32,0.11,60567.00,116.68 New Jersey,87.4,34.5,12.9,0.87,0.35,0.13,68342.00,128.47 New Mexico,82.8,25.3,10.4,0.83,0.25,0.10,43028.00,98.88 New York,84.7,32.4,14,0.85,0.32,0.14,54659.00,128.29 North Carolina,84.3,26.5,8.8,0.84,0.27,0.09,43674.00,96.21 North Dakota,90.1,25.8,6.7,0.90,0.26,0.07,47827.00,95.91 Ohio,87.6,24.1,8.8,0.88,0.24,0.09,45395.00,93.85 Oklahoma,85.6,22.7,7.4,0.86,0.23,0.07,41664.00,90.09 Oregon,89.1,29.2,10.4,0.89,0.29,0.10,48457.00,110.47 Pennsylvania,87.9,26.4,10.2,0.88,0.26,0.10,49520.00,100.67 Rhode Island,84.7,30.5,11.7,0.85,0.31,0.12,54119.00,123.25 South Carolina,83.6,24.3,8.4,0.84,0.24,0.08,44625.00,98.71 South Dakota,89.9,25.1,7.3,0.90,0.25,0.07,45043.00,98.53 Tennessee,83.1,23,7.9,0.83,0.23,0.08,41725.00,89.49 Texas,79.9,25.5,8.5,0.80,0.26,0.09,48259.00,91.04 United States,85.3,27.9,10.3,0.85,0.28,0.10,52029.00,100.00 Utah,90.4,28.5,9.1,0.90,0.29,0.09,55117.00,95.15 Vermont,91,33.1,13.3,0.91,0.33,0.13,51618.00,120.38 Virginia,86.6,34,14.1,0.87,0.34,0.14,59330.00,97.66 Washington,89.7,31,11.1,0.90,0.31,0.11,56548.00,103.98 West Virginia,82.8,17.3,6.7,0.83,0.17,0.07,37435.00,94.40 Wisconsin,89.8,25.7,8.4,0.90,0.26,0.08,49993.00,96.45 Wyoming,91.8,23.8,7.9,0.92,0.24,0.08,52664.00,98.66
Code
An outline of the code is:
import numpy as np import pandas as pd import sys import statsmodels.api as sm