Missing Migrants Crisis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import output_notebook, show , ColumnDataSource,figure,gridplot,gmap,GMap,output_file
output_notebook()
Loading BokehJS ...
In [2]:
df = pd.read_csv("MissingMigrantsProject.csv",encoding = "ISO-8859-1")
In [3]:
df.isnull().sum()
Out[3]:
id                         0
cause_of_death           203
region_origin            443
affected_nationality    1575
missing                 2149
dead                     102
incident_region           10
date                       9
source                     7
reliability              324
lat                        4
lon                        4
dtype: int64
In [4]:
df.dtypes
Out[4]:
id                        int64
cause_of_death           object
region_origin            object
affected_nationality     object
missing                 float64
dead                    float64
incident_region          object
date                     object
source                   object
reliability              object
lat                     float64
lon                     float64
dtype: object
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2420 entries, 0 to 2419
Data columns (total 12 columns):
id                      2420 non-null int64
cause_of_death          2217 non-null object
region_origin           1977 non-null object
affected_nationality    845 non-null object
missing                 271 non-null float64
dead                    2318 non-null float64
incident_region         2410 non-null object
date                    2411 non-null object
source                  2413 non-null object
reliability             2096 non-null object
lat                     2416 non-null float64
lon                     2416 non-null float64
dtypes: float64(4), int64(1), object(7)
memory usage: 227.0+ KB
In [6]:
df.describe()
Out[6]:
id missing dead lat lon
count 2420.00000 271.000000 2318.000000 2416.000000 2416.000000
mean 95926.26405 39.656827 4.729077 26.901380 -13.995935
std 62191.92252 84.130918 20.539717 11.518420 61.694081
min 1.00000 0.000000 0.000000 -26.224531 -117.070560
25% 28571.25000 3.000000 1.000000 19.535706 -97.035666
50% 121178.00000 10.000000 1.000000 29.348889 14.471100
75% 144678.25000 33.000000 3.000000 34.041291 32.010437
max 184750.00000 750.000000 750.000000 66.967199 116.224928
In [7]:
df.head(1)
Out[7]:
id cause_of_death region_origin affected_nationality missing dead incident_region date source reliability lat lon
0 1 Presumed drowning Middle East Iraq 1.0 1.0 Mediterranean 05/11/2015 IOM Greece Verified 36.8915 27.2877
In [8]:
df.date = pd.to_datetime(df.date)
In [9]:
from bokeh.models import GMapOptions,GMapPlot,Circle,DataRange1d,PanTool,WheelZoomTool,BoxZoomTool,BoxSelectTool,ZoomOutTool
map_options = GMapOptions(lat =df.lat.mean(), lng = df.lon.mean(),map_type = "roadmap", zoom = 11)

plot = GMapPlot(x_range = DataRange1d() , y_range = DataRange1d() , map_options = map_options)

plot.title.text = "Incident Location"
plot.api_key = "AIzaSyBs1aqt6rB_wQKtRClHuQj54pME9eKL4pM"
source = ColumnDataSource(df)

circle =Circle(x ="lon", y = "lat" , size = 15, fill_color = "blue")
plot.add_glyph(source , circle)
plot.add_tools(PanTool(),WheelZoomTool(),ZoomOutTool())
output_file("Missing Migrant.html")
show(plot)
In [10]:
df.missing.fillna(0, inplace = True)
df.dead.fillna(0, inplace = True)
In [11]:
df["Year"] = df.date.dt.year
df["Month"] = df.date.dt.month
df["missing_dead"] = df.missing + df.dead

df.cause_of_death.fillna("unknown",inplace =True)
In [12]:
df.cause_of_death.loc[df.cause_of_death.str.contains("drown",case= False)] = "Drowned"

df.cause_of_death.loc[df.cause_of_death.str.contains("shot",case= False)] = "Shot"

df.cause_of_death.loc[df.cause_of_death.str.contains("train",case= False)] = "Train related"

df.cause_of_death.loc[df.cause_of_death.str.contains("truck",case= False)] = "Truck related"

df.cause_of_death.loc[df.cause_of_death.str.contains("suffocat",case= False)] = "suffocation"

df.cause_of_death.loc[df.cause_of_death.str.contains("unknow",case= False)] = "Unknown"

df.cause_of_death.loc[df.cause_of_death.str.contains("accident|vehicle",case= False)] = "Road accident"

df.cause_of_death.loc[df.cause_of_death.str.contains("rape",case= False)] = "Raped"

df.cause_of_death.loc[df.cause_of_death.str.contains("murder|killed",case= False)] = "Murdered"

df.cause_of_death.loc[df.cause_of_death.str.contains("dehydrat",case= False)] = "Dehydration"

df.cause_of_death.loc[df.cause_of_death.str.contains("crush",case= False)] = "Crushed to death"

df.cause_of_death.loc[df.cause_of_death.str.contains("violence|violent",case= False)] = "Violence"

df.cause_of_death.loc[df.cause_of_death.str.contains("sex",case= False)]="Raped"

df.cause_of_death.loc[df.cause_of_death.str.contains("sick|medicine",case= False)]="Sicknes lack of medicine"

df.cause_of_death.loc[df.cause_of_death.str.contains("heart",case= False)] = "Heart Attack"

df.cause_of_death.loc[df.cause_of_death.str.contains("vehicle",case= False)] = "Road accident"

df.cause_of_death.loc[df.cause_of_death.str.contains("respirat",case= False)] = "Respiration problem"

df.cause_of_death.loc[df.cause_of_death.str.contains("starv",case= False)] = "Starvation"

df.cause_of_death.loc[df.cause_of_death.str.contains("fall|fell",case= False)] = "Fell down"

df.cause_of_death.loc[df.cause_of_death.str.contains("toxic",case= False)] = "inhaltion of toxic fumes"

df.cause_of_death.loc[df.cause_of_death.str.contains("abuse",case= False)] = "Excessive Physical Abuse"

df.cause_of_death.loc[df.cause_of_death.str.contains("exposure|Hypothermia",case= False)] = "Exposure/Hypthermia"

df.cause_of_death.loc[df.cause_of_death.str.contains("harsh",case= False)] = "Harsh conditions"

df.cause_of_death.loc[df.cause_of_death.str.contains("injur",case= False)] = "Injury"

df.cause_of_death.loc[df.cause_of_death.str.contains("elec",case= False)] = "Electrocution"

df.cause_of_death.loc[df.cause_of_death.str.contains("mix|other",case= False)] = "Mixed/Other"

df.cause_of_death.loc[df.cause_of_death.str.contains("unspecified",case= False)] = "Unknown"

df.cause_of_death.loc[df.cause_of_death.str.contains("hyperthermia",case= False)] ="Hyperthermia"

df.cause_of_death.loc[df.cause_of_death.str.contains("burn",case= False)] ="Fuel Burn/Gas Explosion"
C:\Users\Mostafa\Anaconda3\lib\site-packages\pandas\core\indexing.py:179: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
In [13]:
df.cause_of_death.value_counts()
Out[13]:
Drowned                                             692
Unknown                                             446
Road accident                                       216
Sicknes lack of medicine                            209
Shot                                                161
Dehydration                                         100
suffocation                                          85
Train related                                        82
Exposure/Hypthermia                                  51
Hyperthermia                                         49
Starvation                                           40
Murdered                                             38
Raped                                                36
Mixed/Other                                          33
Violence                                             33
Excessive Physical Abuse                             25
Harsh conditions                                     21
Truck related                                        13
Heart Attack                                          8
Electrocution                                         8
Injury                                                7
Crushed to death                                      6
Fell down                                             5
Asphyxiation                                          5
Stabbed                                               3
Plane stowaway                                        3
Fuel Burn/Gas Explosion                               2
Presumed asphyxiation                                 2
inhaltion of toxic fumes                              2
Respiration problem                                   2
                                                   ... 
Asphyxiation (Silica sand inhalation)                 1
Poison                                                1
Blunt force head trauma                               1
Homicide, likely by asphyxiation                      1
Tekeze River, near Himora, Ethiopia                   1
Head trauma (hit by boat propeller)                   1
AH1N1 influenza virus, while stuck at border          1
Pima County (see spreadsheet for exact location)      1
Pulmonary edema                                       1
Clubbed/beaten to death                               1
Boat fire                                             1
Suicide                                               1
Assaulted by smugglers                                1
Renal insufficiency and pulmonary edema               1
Pulmonary edema, Kidney failure                       1
Pulmonary complications                               1
Bronchial aspiration                                  1
Various                                               1
Found hanged                                          1
Landmine                                              1
Bronchopneumonia                                      1
Meningitis                                            1
Digestive bleeding                                    1
Pending                                               1
Tortured to death                                     1
Hunger, fatigue                                       1
Fuel Inhalation                                       1
Renal insufficiency                                   1
Cervix cancer                                         1
Lung infection                                        1
Name: cause_of_death, Length: 65, dtype: int64
In [14]:
%matplotlib inline
df.cause_of_death.value_counts().sort_values(ascending = True).plot(kind = "barh",figsize = (8,10))
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x1b4851bf748>
In [15]:
df.head()
Out[15]:
id cause_of_death region_origin affected_nationality missing dead incident_region date source reliability lat lon Year Month missing_dead
0 1 Drowned Middle East Iraq 1.0 1.0 Mediterranean 2015-05-11 IOM Greece Verified 36.89150 27.287700 2015.0 5.0 2.0
1 3 Train related Central America & Mexico Honduras 0.0 1.0 Central America incl. Mexico 2015-03-11 La Jornada Partially Verified 15.95640 -93.663100 2015.0 3.0 1.0
2 4 Drowned Middle East NaN 0.0 1.0 Mediterranean 2015-03-11 Hellenic Coast Guard Verified 36.50439 27.363253 2015.0 3.0 1.0
3 6 Drowned MENA NaN 6.0 4.0 Mediterranean 2015-01-11 Reuters Partially Verified 37.28560 27.086600 2015.0 1.0 10.0
4 7 Road accident South East Asia Cambodia 0.0 4.0 Southeast Asia 2015-01-11 Phnom Penh Post Partially Verified 13.36110 100.985000 2015.0 1.0 4.0
In [16]:
df.groupby("cause_of_death").missing_dead.sum().sort_values().dropna().plot(kind = "barh", figsize = (10,20))
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x1b486768da0>
In [17]:
death_cause = df.groupby("cause_of_death").missing_dead.sum().sort_values().dropna()
In [18]:
dict_death = dict(cause = death_cause.index, sum_death = death_cause)
In [19]:
death_cause = pd.DataFrame(death_cause)
death_cause.columns = ["Sum_of_Death"]
death_cause.head(1)
Out[19]:
Sum_of_Death
cause_of_death
AH1N1 influenza virus, while stuck at border 1.0
In [20]:
from bokeh.models import map_plots,HoverTool
import math
death_cause = df.groupby("cause_of_death").missing_dead.sum().sort_values(ascending = False)


source = ColumnDataSource(dict_death)
hover = HoverTool(tooltips = [("Cause of Death","@cause"),("Number Of Death","@sum_death")])
# source.data["sum_death"][50]
list(source.data["cause"])
f = figure(x_range = list(source.data["cause"]), width = 800 , height = 400)
f.vbar(x = "cause",width = 0.9, top = "sum_death", source =source,color = "red")
# f.hbar(y = "cause", height = 0.6 , right = "sum_death", source = source)
f.xaxis.minor_tick_line_alpha =0
# f.xaxis.major_label_orientation =math.pi / 4
f.xaxis.major_label_text_color = "red"
f.xaxis.visible = False
f.add_tools(hover)
f.yaxis.minor_tick_in = 0
f.yaxis.minor_tick_out = 0
f.yaxis.major_label_orientation = "vertical"
f.title.text = "Number Of Missing and Dead and Cause of death"
f.title_location = "above"
f.title.align ="center"
f.yaxis.axis_label = "Number Of Death and Missing"
f.yaxis.axis_label_text_alpha =1
f.yaxis.axis_label_text_font = "Aries"

show(f)
In [21]:
test = []
for i in range(1,100,20):
    r = death_cause.where(death_cause > i).count()
    test.append(r)
plt.figure(figsize = (12,5))
plt.bar(left = range(1,6),height = test)
# plt.xticks(range(1,100,20));
Out[21]:
<Container object of 5 artists>
In [22]:
death_cause.where(death_cause < 200).sort_values(ascending = False)
Out[22]:
cause_of_death
Starvation                                          139.0
Exposure/Hypthermia                                 126.0
Train related                                       102.0
Violence                                             90.0
Murdered                                             80.0
Raped                                                64.0
Harsh conditions                                     64.0
Hyperthermia                                         58.0
Crushed to death                                     53.0
Presumed asphyxiation                                51.0
Excessive Physical Abuse                             47.0
Asphyxiation                                         45.0
Fuel Inhalation                                      29.0
Truck related                                        15.0
Various                                              12.0
Fuel Burn/Gas Explosion                              11.0
Heart Attack                                          8.0
Electrocution                                         8.0
Injury                                                7.0
Fell down                                             5.0
Attacked by hippopotamus                              4.0
Boat fire                                             4.0
Clubbed/beaten to death                               4.0
Plane stowaway                                        3.0
Stabbed                                               3.0
inhaltion of toxic fumes                              2.0
Plane Stowaway                                        2.0
Respiration problem                                   2.0
AH1N1 influenza virus, while stuck at border          1.0
Head trauma (hit by boat propeller)                   1.0
                                                    ...  
Meningitis                                            1.0
Pending                                               1.0
Pima County (see spreadsheet for exact location)      1.0
Poison                                                1.0
Pulmonary complications                               1.0
Pulmonary edema                                       1.0
Pulmonary edema, Kidney failure                       1.0
Renal insufficiency                                   1.0
Renal insufficiency and pulmonary edema               1.0
Suicide                                               1.0
Tekeze River, near Himora, Ethiopia                   1.0
Tortured to death                                     1.0
Glycemic crisis (Lack of Insuline Treatment)          1.0
Found hanged                                          1.0
Landmine                                              1.0
Assaulted by smugglers                                1.0
Bronchial aspiration                                  1.0
Bronchopneumonia                                      1.0
Cervix cancer                                         1.0
Blunt force head trauma                               1.0
Asphyxiation (Silica sand inhalation)                 1.0
Beat-up and thrown into river                         1.0
Drowned                                               NaN
Unknown                                               NaN
Dehydration                                           NaN
Mixed/Other                                           NaN
Road accident                                         NaN
Sicknes lack of medicine                              NaN
Shot                                                  NaN
suffocation                                           NaN
Name: missing_dead, Length: 65, dtype: float64
In [23]:
d_c = df.groupby("cause_of_death").missing_dead.agg(["sum","count","mean","max"]).sort_values("max",ascending = False)
In [ ]:

In [27]:
df.affected_nationality.fillna("unknown", inplace = True)
In [37]:
df.groupby("region_origin").missing_dead.sum().sort_values(ascending = False).plot(kind = "bar")
plt.xlabel("Region Of Origin")
plt.ylabel("Total Missing and dead")
plt.title("Migrants death by region of Origin")
Out[37]:
<matplotlib.text.Text at 0x1b489557710>
In [48]:
df.groupby(["Year","Month"]).missing_dead.sum().unstack().T.plot(kind = "bar")
plt.plot(df.groupby("Month").missing_dead.sum(), color = "cyan", label = "Over all Month deceased")
plt.legend()
Out[48]:
<matplotlib.legend.Legend at 0x1b4899e1c88>
In [54]:
df.head()
Out[54]:
id cause_of_death region_origin affected_nationality missing dead incident_region date source reliability lat lon Year Month missing_dead
0 1 Drowned Middle East Iraq 1.0 1.0 Mediterranean 2015-05-11 IOM Greece Verified 36.89150 27.287700 2015.0 5.0 2.0
1 3 Train related Central America & Mexico Honduras 0.0 1.0 Central America incl. Mexico 2015-03-11 La Jornada Partially Verified 15.95640 -93.663100 2015.0 3.0 1.0
2 4 Drowned Middle East unknown 0.0 1.0 Mediterranean 2015-03-11 Hellenic Coast Guard Verified 36.50439 27.363253 2015.0 3.0 1.0
3 6 Drowned MENA unknown 6.0 4.0 Mediterranean 2015-01-11 Reuters Partially Verified 37.28560 27.086600 2015.0 1.0 10.0
4 7 Road accident South East Asia Cambodia 0.0 4.0 Southeast Asia 2015-01-11 Phnom Penh Post Partially Verified 13.36110 100.985000 2015.0 1.0 4.0
In [66]:
df.groupby("incident_region").missing_dead.sum().sort_values(ascending = False).plot(kind ="line", rot = 90)
plt.xlabel("Region of Incident")
plt.ylabel("Total Number of Incidents")
plt.title("Number of Death vs region of incident")
Out[66]:
<matplotlib.text.Text at 0x1b48a78c7f0>
In [67]:
df.head()
Out[67]:
id cause_of_death region_origin affected_nationality missing dead incident_region date source reliability lat lon Year Month missing_dead
0 1 Drowned Middle East Iraq 1.0 1.0 Mediterranean 2015-05-11 IOM Greece Verified 36.89150 27.287700 2015.0 5.0 2.0
1 3 Train related Central America & Mexico Honduras 0.0 1.0 Central America incl. Mexico 2015-03-11 La Jornada Partially Verified 15.95640 -93.663100 2015.0 3.0 1.0
2 4 Drowned Middle East unknown 0.0 1.0 Mediterranean 2015-03-11 Hellenic Coast Guard Verified 36.50439 27.363253 2015.0 3.0 1.0
3 6 Drowned MENA unknown 6.0 4.0 Mediterranean 2015-01-11 Reuters Partially Verified 37.28560 27.086600 2015.0 1.0 10.0
4 7 Road accident South East Asia Cambodia 0.0 4.0 Southeast Asia 2015-01-11 Phnom Penh Post Partially Verified 13.36110 100.985000 2015.0 1.0 4.0