import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
java = pickle.load(open('JavaSafe.p','rb')) ##import 2d array
python = pickle.load(open('PythonSafe.p','rb')) ##import 2d array
javaFrame = pd.DataFrame(java,columns=['Town','Java Jobs'])
pythonFrame = pd.DataFrame(python,columns=['Town','Python Jobs'])
javaFrame = javaFrame.sort_values(by='Java Jobs',ascending=False)
pythonFrame = pythonFrame.sort_values(by='Python Jobs',ascending=False)
print(javaFrame,"\n",pythonFrame)
This code comes out with the following:
                Town  Java Jobs
435          York,NY       3593
212       NewYork,NY       3585
584       Seattle,WA       2080
624       Chicago,IL       1920
301        Boston,MA       1571
...
79        Holland,MI          5
38      Manhattan,KS          5
497        Vernon,IL          5
30        Clayton,MO          5
90       Waukegan,IL          5
[653 rows x 2 columns] 
                 Town  Python Jobs
160       NewYork,NY         2949
11           York,NY         2938
349       Seattle,WA         1321
91        Chicago,IL         1312
167        Boston,MA         1117
383       Hanover,NH            5
209      Bulverde,TX            5
203     Salisbury,NC            5
67       Rockford,IL            5
256       Ventura,CA            5
[416 rows x 2 columns]
I want to make a new dataframe that uses the town names as an index and has a column for each java and python. However, some of the towns will only have results for one of the languages.
result = pd.merge(pythonFrame, javeFrame, on='Town', how='outer').set_index('Town')