










































Study with the several resources on Docsity
Earn points by helping other students or get them with a premium plan
Prepare for your exams
Study with the several resources on Docsity
Earn points to download
Earn points by helping other students or get them with a premium plan
1 / 50
This page cannot be seen from the preview
Don't miss anything!











































from future import print_function import os import pandas as ps # using panda to convert jsonstat dataset to pandas dataframe import jsonstat # import jsonstat.py package
import matplotlib as plt # for plotting
%matplotlib inline
url = 'http://json-stat.org/samples/oecd-canada.json' file_name = "oecd-canada.json"
file_path = os.path.abspath(os.path.join("..", "tests", "fixtures", "www.json-stat.org ˓→", file_name)) if os.path.exists(file_path): print("using already downloaded file {}".format(file_path)) else : print("download file and storing on disk") jsonstat.download(url, file_name) file_path = file_name
using already downloaded file /Users/26fe_nas/gioprj.on_mac/prj.python/jsonstat.py/ ˓→tests/fixtures/www.json-stat.org/oecd-canada.json
collection = jsonstat.from_file(file_path) collection
oecd = collection.dataset('oecd') oecd
oecd.dimension('concept')
oecd.dimension('area')
oecd.dimension('year')
oecd.data(area='IT', year='2012')
JsonStatValue(idx=201, value=10.55546863, status= None )
oecd.value(area='IT', year='2012')
oecd.value(concept='unemployment rate',area='Australia',year='2004') # 5.
oecd.value(concept='UNR',area='AU',year='2004')
df_oecd = oecd.to_data_frame('year', content='id') df_oecd.head()
df_oecd['area'].describe() # area contains 36 values
[['indicator', 'OECD countries, EU15 and total', '2003-2014', 'Value'], ['unemployment rate', 'Australia', '2003', 5.943826289], ['unemployment rate', 'Australia', '2004', 5.39663128], ['unemployment rate', 'Australia', '2005', 5.044790587], ['unemployment rate', 'Australia', '2006', 4.789362794]]
order = [i.did() for i in oecd.dimensions()] order = order[::-1] # reverse list table = oecd.to_table(order=order) table[:5]
[['indicator', 'OECD countries, EU15 and total', '2003-2014', 'Value'], ['unemployment rate', 'Australia', '2003', 5.943826289], ['unemployment rate', 'Austria', '2003', 4.278559338], ['unemployment rate', 'Belgium', '2003', 8.158333333], ['unemployment rate', 'Canada', '2003', 7.594616751]]
Notebook: using jsonstat.py python library with jsonstat format ver-
sion 2.
from future import print_function import os import pandas as ps # using panda to convert jsonstat dataset to pandas dataframe import jsonstat # import jsonstat.py package
import matplotlib as plt # for plotting %matplotlib inline
url = 'http://json-stat.org/samples/oecd-canada-col.json' file_name = "oecd-canada-col.json"
file_path = os.path.abspath(os.path.join("..", "tests", "fixtures", "www.json-stat.org ˓→", file_name)) if os.path.exists(file_path): print("using already downloaded file {}".format(file_path)) else : print("download file and storing on disk") jsonstat.download(url, file_name) file_path = file_name
using already downloaded file /Users/26fe_nas/gioprj.on_mac/prj.python/jsonstat.py/ ˓→tests/fixtures/www.json-stat.org/oecd-canada-col.json
collection = jsonstat.from_file(file_path) collection
oecd = collection.dataset(0) oecd
oecd.dimension('concept')
oecd.dimension('area')
oecd.dimension('year')
oecd.data(area='IT', year='2012')
JsonStatValue(idx=201, value=10.55546863, status= None )
oecd.value(area='IT', year='2012')
oecd.value(concept='unemployment rate',area='Australia',year='2004') # 5.
oecd.value(concept='UNR',area='AU',year='2004')
df_oecd = oecd.to_data_frame('year', content='id') df_oecd.head()
df_oecd['area'].describe() # area contains 36 values
count 432 unique 36 top ES freq 12 Name: area, dtype: object
order = [i.did() for i in oecd.dimensions()] order = order[::-1] # reverse list table = oecd.to_table(order=order) table[:5]
[['indicator', 'OECD countries, EU15 and total', '2003-2014', 'Value'], ['unemployment rate', 'Australia', '2003', 5.943826289], ['unemployment rate', 'Austria', '2003', 4.278559338], ['unemployment rate', 'Belgium', '2003', 8.158333333], ['unemployment rate', 'Canada', '2003', 7.594616751]]
Notebook: using jsonstat.py with eurostat api
from future import print_function import os import pandas as pd import jsonstat
import matplotlib as plt %matplotlib inline
url_1 = 'http://ec.europa.eu/eurostat/wdds/rest/data/v1.1/json/en/nama_gdp_c? ˓→precision=1&geo=IT&unit=EUR_HAB&indic_na=B1GM' file_name_1 = "eurostat-name_gpd_c-geo_IT.json"
file_path_1 = os.path.abspath(os.path.join("..", "tests", "fixtures", "www.ec.europa. ˓→eu_eurostat", file_name_1)) if os.path.exists(file_path_1): print("using already donwloaded file {}".format(file_path_1)) else : print("download file") jsonstat.download(url_1, file_name_1) file_path_1 = file_name_
using already donwloaded file /Users/26fe_nas/gioprj.on_mac/prj.python/jsonstat.py/ ˓→tests/fixtures/www.ec.europa.eu_eurostat/eurostat-name_gpd_c-geo_IT.json
collection_1 = jsonstat.from_file(file_path_1) collection_
nama_gdp_c_1 = collection_1.dataset('nama_gdp_c') nama_gdp_c_
nama_gdp_c_1.dimension('time')
nama_gdp_c_1.value(time='2012')
df_1 = nama_gdp_c_1.to_data_frame('time', content='id') df_1.tail()
df_1 = df_1.dropna() # remove rows with NaN values df_1.plot(grid= True , figsize=(20,5))
url_2 = 'http://ec.europa.eu/eurostat/wdds/rest/data/v1.1/json/en/nama_gdp_c? ˓→precision=1&geo=IT&geo=FR&unit=EUR_HAB&indic_na=B1GM' file_name_2 = "eurostat-name_gpd_c-geo_IT_FR.json"
file_path_2 = os.path.abspath(os.path.join("..", "tests", "fixtures", "www.ec.europa. ˓→eu_eurostat", file_name_2)) if os.path.exists(file_path_2):
df_4 = nama_gdp_c_2.to_data_frame('time', content='id', blocked_dims={'geo':'IT'}) df_4 = df_4.dropna() df_4.plot(grid= True ,figsize=(20,5))
Notebook: using jsonstat.py to explore ISTAT data (house price in-
dex)
from future import print_function import os import istat from IPython.core.display import HTML
cache_dir = os.path.abspath(os.path.join("..", "tmp", "istat_cached")) istat.cache_dir(cache_dir) print("cache_dir is '{}'".format(istat.cache_dir()))
cache_dir is '/Users/26fe_nas/gioprj.on_mac/prj.python/jsonstat.py/tmp/istat_cached'
istat.areas()
istat_area_prices = istat.area('Prices') istat_area_prices.datasets()
istat_dataset_dcsp_ipab = istat_area_prices.dataset('DCSP_IPAB') istat_dataset_dcsp_ipab
spec = { "Territory": 1, "Index type": 18,
}
collection = istat_dataset_dcsp_ipab.getvalues(spec) collection
json_stat_data = istat_dataset_dcsp_ipab.getvalues("1,18,0,0,0") json_stat_data
jsonstat_dataset = collection.dataset('IDMISURA1IDTYPPURCHIDTIME') jsonstat_dataset
jsonstat_dataset.dimension('IDMISURA1')
jsonstat_dataset.dimension('IDTYPPURCH')
from future import print_function import os import pandas as pd from IPython.core.display import HTML import matplotlib.pyplot as plt %matplotlib inline
import istat
cache_dir = os.path.abspath(os.path.join("..", "tmp", "istat_cached")) # you could ˓→choice /tmp istat.cache_dir(cache_dir) print("cache_dir is '{}'".format(istat.cache_dir()))
cache_dir is '/Users/26fe_nas/gioprj.on_mac/prj.python/jsonstat.py/tmp/istat_cached'
istat.areas()
istat_area_lab = istat.area('LAB') istat_area_lab
istat_dataset_taxdisoccu = istat_area_lab.dataset('DCCV_TAXDISOCCU') istat_dataset_taxdisoccu
spec = { "Territory": 0, # 1 Italy "Data type": 6, # (6:'unemployment rate') 'Measure': 1, # 1 : 'percentage values' 'Gender': 3, # 3 total 'Age class':31, # 31:'15-74 years' 'Highest level of education attained': 12, # 12:'total', 'Citizenship': 3, # 3:'total') 'Duration of unemployment': 3, # 3:'total' 'Time and frequency': 0 # All }
collection = istat_dataset_taxdisoccu.getvalues(spec) collection
jsonstat_dataset = collection.dataset(0) jsonstat_dataset
df_all = jsonstat_dataset.to_table(rtype=pd.DataFrame) df_all.head()
df_all.pivot('Territory', 'Time and frequency', 'Value').head()
spec = { "Territory": 1, # 1 Italy "Data type": 6, # (6:'unemployment rate') 'Measure': 1, 'Gender': 3, 'Age class':0, # all classes 'Highest level of education attained': 12, # 12:'total', 'Citizenship': 3, # 3:'total') 'Duration of unemployment': 3, # 3:'total') 'Time and frequency': 0 # All }
collection_2 = istat_dataset_taxdisoccu.getvalues(spec) collection_
df = collection_2.dataset(0).to_table(rtype=pd.DataFrame, blocked_dims={'IDCLASETA28': ˓→'31'}) df.head(6)
df = df.dropna() df = df[df['Time and frequency'].str.contains(r'^Q.*')]
df.head(6)
df.plot(x='Time and frequency',y='Value', figsize=(18,4))
fig = plt.figure(figsize=(18,6)) ax = fig.add_subplot(111) plt.grid( True ) df.plot(x='Time and frequency',y='Value', ax=ax, grid= True )