International-Debt-Statistics

GitHub Repository: https://github.com/skhiearth/International-Debt-Statistics

Analysing international debt of various countries using data from International Monetary Fund using Visualisations.

The data used is provided by International Monetary Fund. It contains debt statistics for several countries across the globe as recorded from 1950 to 2018.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="ticks", color_codes=True)

import warnings
warnings.filterwarnings('ignore')

# Read the XLS
df = pd.read_csv('Dataset/imf-debt.csv', na_values = 'no data')
df.head(3)
Out[1]:
Central Government Debt (Percent of GDP) 1950 1951 1952 1953 1954 1955 1956 1957 1958 ... 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 Afghanistan NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 16.247261 7.709434 7.512624 6.769370 6.914434 8.700291 9.144177 7.823194 7.452149 6.894410
2 Albania NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 59.670180 57.710251 59.399812 62.119953 65.529876 70.025578 72.670025 72.375941 70.091619 68.044939

3 rows × 70 columns

In [2]:
df.drop(df.index[0], inplace = True)
df.head(2)
Out[2]:
Central Government Debt (Percent of GDP) 1950 1951 1952 1953 1954 1955 1956 1957 1958 ... 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
1 Afghanistan NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 16.247261 7.709434 7.512624 6.769370 6.914434 8.700291 9.144177 7.823194 7.452149 6.894410
2 Albania NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 59.670180 57.710251 59.399812 62.119953 65.529876 70.025578 72.670025 72.375941 70.091619 68.044939

2 rows × 70 columns

In [3]:
# Converting dataframe from wide to tall
df2 = df.reset_index()
cols = list(df2.columns[2:])
time_series = pd.melt(df2, id_vars='Central Government Debt (Percent of GDP)', value_vars=cols)
time_series.tail()
Out[3]:
Central Government Debt (Percent of GDP) variable value
12139 Yemen 2018 64.752349
12140 Zambia 2018 78.109917
12141 Zimbabwe 2018 37.057447
12142 NaN 2018 NaN
12143 ©IMF, 2019 2018 NaN
In [4]:
# Rename columns
time_series.rename(columns = {'Central Government Debt (Percent of GDP)':'Debt'}, inplace = True) 

# Dropping redunant rows
filtered_df = time_series[time_series['Debt'].notnull()]
filtered_df = filtered_df[:-1]

filter = ['1950', '1955', '1960', '1965', '1970', '1975', '1980', '1985', '1990',
         '1995', '2000', '2005', '2010', '2015', '2018'] 

filtered_df = filtered_df[filtered_df['variable'].isin(filter)] 

print(filtered_df.shape)
filtered_df.tail()
(2624, 3)
Out[4]:
Debt variable value
12137 Vietnam 2018 NaN
12138 West Bank and Gaza 2018 36.621482
12139 Yemen 2018 64.752349
12140 Zambia 2018 78.109917
12141 Zimbabwe 2018 37.057447
In [5]:
# Selected Countries
countries = ["India", "United States", "Switzerland", "Germany", "United Kingdom", "France"]
select_countries = filtered_df[filtered_df['Debt'].isin(countries)]

# Rename columns
select_countries.rename(columns = {'Debt': 'Country'}, inplace = True) 

# Convert to numeric
select_countries['variable'] = select_countries['variable'].astype(int)
select_countries['value'] = select_countries['value'].astype(float)
In [6]:
rel = sns.relplot(x = "variable", y = "value", data = select_countries, 
            hue = "Country", kind = "line", height = 6, aspect = 2.0)
rel.set(xlabel='Year', ylabel='Central Government Debt (Percent of GDP)')
rel.set(title = "Change in Central Government Debt of Selected Countries")
Out[6]:
<seaborn.axisgrid.FacetGrid at 0x1a185796d0>
In [7]:
sns.set_style("white")
facet = sns.FacetGrid(select_countries, col = "Country", col_wrap=3, height = 4)
facet.map(sns.regplot, "variable", "value").set(xlim=(1950,2018), ylim=(0,100))

# Adding supertitle
plt.subplots_adjust(top=0.87)
supertitle = "Regression models fitted to the change in Central Government Debt of Selected Countries"
facet.fig.suptitle(supertitle, size=16)
Out[7]:
Text(0.5, 0.98, 'Regression models fitted to the change in Central Government Debt of Selected Countries')
In [8]:
# Choropleth Map
import geopandas as gpd
shapefile = 'dataset/countries_110m/ne_110m_admin_0_countries.shp'

# Read shapefile using Geopandas
gdf = gpd.read_file(shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]

# Rename columns
gdf.columns = ['Country', 'country_code', 'geometry']
gdf = gdf.drop(gdf.index[159]) # Row corresponding to Antarctica
gdf.at[4,'Country']='United States'
gdf.at[18,'Country']='Russian Federation'
gdf.head(5)
Out[8]:
Country country_code geometry
0 Fiji FJI MULTIPOLYGON (((180.00000 -16.06713, 180.00000...
1 United Republic of Tanzania TZA POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...
2 Western Sahara SAH POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
3 Canada CAN MULTIPOLYGON (((-122.84000 49.00000, -122.9742...
4 United States USA MULTIPOLYGON (((-122.84000 49.00000, -120.0000...
In [9]:
import ipywidgets as widgets
from IPython.display import display
import json

years = ['1950', '1955', '1960', '1965', '1970', '1975', '1980', '1985', '1990',
         '1995', '2000', '2005', '2010', '2015', '2018'] 
year_to_show = ''

def custom_df_define(year):
    year_to_show = year
    custom_df = filtered_df[filtered_df['variable'] == year]
    merged = gdf.merge(custom_df, left_on = 'Country', right_on = 'Debt', how = 'left')
    merged.value.fillna('No data', inplace = True)
    merged_json = json.loads(merged.to_json())
    json_data = json.dumps(merged_json)
    plot_geo()
    
dropdown_year = widgets.Dropdown(options = years)

def dropdown_year_eventhandler(change):
    year_to_show = change.new
    custom_df_define(change.new)
    
dropdown_year.observe(dropdown_year_eventhandler, names='value')
display(dropdown_year)
In [10]:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

def plot_geo():
    # Input GeoJSON source that contains features for plotting.
    geosource = GeoJSONDataSource(geojson = json_data)

    # Dark blue is highest debt percentage.
    palette = brewer['YlGnBu'][8]
    palette = palette[::-1]

    # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors
    color_mapper = LinearColorMapper(palette = palette, low = 0, high = 85, nan_color = '#d9d9d9')

    #Define custom tick labels for color bar.
    tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', '20':'20%', 
                   '25':'25%', '30':'30%','35':'35%', '40': '40%',
                   '50':'50%', '55':'55%','60':'60%', '65': '65%',
                   '70':'70%', '75':'75%','80':'80%', '85': '>85%',}

    #Create color bar. 
    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width = 500, 
                         height = 20, border_line_color=None,location = (0,0), 
                         orientation = 'horizontal', major_label_overrides = tick_labels)

    title_ = 'Central Government Debt (Percent of GDP), ' + year
    p = figure(title = title_, plot_height = 530 , 
               plot_width = 950, toolbar_location = None)
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.axis.visible = False

    # Add patch renderer to figure. 
    p.patches('xs','ys', source = geosource,fill_color = {'field' :'value', 'transform' : color_mapper},
              line_color = 'black', line_width = 0.25, fill_alpha = 1)

    #Specify figure layout.
    p.add_layout(color_bar, 'below')

    output_notebook()
    show(p)

    output_file("map.html", title="Choropleth GDP", mode='inline')