import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import output_notebook, show , ColumnDataSource,figure,gridplot,gmap,GMap,output_file
output_notebook()
df = pd.read_csv("MissingMigrantsProject.csv",encoding = "ISO-8859-1")
df.isnull().sum()
df.dtypes
df.info()
df.describe()
df.head(1)
df.date = pd.to_datetime(df.date)
from bokeh.models import GMapOptions,GMapPlot,Circle,DataRange1d,PanTool,WheelZoomTool,BoxZoomTool,BoxSelectTool,ZoomOutTool
map_options = GMapOptions(lat =df.lat.mean(), lng = df.lon.mean(),map_type = "roadmap", zoom = 11)
plot = GMapPlot(x_range = DataRange1d() , y_range = DataRange1d() , map_options = map_options)
plot.title.text = "Incident Location"
plot.api_key = "AIzaSyBs1aqt6rB_wQKtRClHuQj54pME9eKL4pM"
source = ColumnDataSource(df)
circle =Circle(x ="lon", y = "lat" , size = 15, fill_color = "blue")
plot.add_glyph(source , circle)
plot.add_tools(PanTool(),WheelZoomTool(),ZoomOutTool())
output_file("Missing Migrant.html")
show(plot)
df.missing.fillna(0, inplace = True)
df.dead.fillna(0, inplace = True)
df["Year"] = df.date.dt.year
df["Month"] = df.date.dt.month
df["missing_dead"] = df.missing + df.dead
df.cause_of_death.fillna("unknown",inplace =True)
df.cause_of_death.loc[df.cause_of_death.str.contains("drown",case= False)] = "Drowned"
df.cause_of_death.loc[df.cause_of_death.str.contains("shot",case= False)] = "Shot"
df.cause_of_death.loc[df.cause_of_death.str.contains("train",case= False)] = "Train related"
df.cause_of_death.loc[df.cause_of_death.str.contains("truck",case= False)] = "Truck related"
df.cause_of_death.loc[df.cause_of_death.str.contains("suffocat",case= False)] = "suffocation"
df.cause_of_death.loc[df.cause_of_death.str.contains("unknow",case= False)] = "Unknown"
df.cause_of_death.loc[df.cause_of_death.str.contains("accident|vehicle",case= False)] = "Road accident"
df.cause_of_death.loc[df.cause_of_death.str.contains("rape",case= False)] = "Raped"
df.cause_of_death.loc[df.cause_of_death.str.contains("murder|killed",case= False)] = "Murdered"
df.cause_of_death.loc[df.cause_of_death.str.contains("dehydrat",case= False)] = "Dehydration"
df.cause_of_death.loc[df.cause_of_death.str.contains("crush",case= False)] = "Crushed to death"
df.cause_of_death.loc[df.cause_of_death.str.contains("violence|violent",case= False)] = "Violence"
df.cause_of_death.loc[df.cause_of_death.str.contains("sex",case= False)]="Raped"
df.cause_of_death.loc[df.cause_of_death.str.contains("sick|medicine",case= False)]="Sicknes lack of medicine"
df.cause_of_death.loc[df.cause_of_death.str.contains("heart",case= False)] = "Heart Attack"
df.cause_of_death.loc[df.cause_of_death.str.contains("vehicle",case= False)] = "Road accident"
df.cause_of_death.loc[df.cause_of_death.str.contains("respirat",case= False)] = "Respiration problem"
df.cause_of_death.loc[df.cause_of_death.str.contains("starv",case= False)] = "Starvation"
df.cause_of_death.loc[df.cause_of_death.str.contains("fall|fell",case= False)] = "Fell down"
df.cause_of_death.loc[df.cause_of_death.str.contains("toxic",case= False)] = "inhaltion of toxic fumes"
df.cause_of_death.loc[df.cause_of_death.str.contains("abuse",case= False)] = "Excessive Physical Abuse"
df.cause_of_death.loc[df.cause_of_death.str.contains("exposure|Hypothermia",case= False)] = "Exposure/Hypthermia"
df.cause_of_death.loc[df.cause_of_death.str.contains("harsh",case= False)] = "Harsh conditions"
df.cause_of_death.loc[df.cause_of_death.str.contains("injur",case= False)] = "Injury"
df.cause_of_death.loc[df.cause_of_death.str.contains("elec",case= False)] = "Electrocution"
df.cause_of_death.loc[df.cause_of_death.str.contains("mix|other",case= False)] = "Mixed/Other"
df.cause_of_death.loc[df.cause_of_death.str.contains("unspecified",case= False)] = "Unknown"
df.cause_of_death.loc[df.cause_of_death.str.contains("hyperthermia",case= False)] ="Hyperthermia"
df.cause_of_death.loc[df.cause_of_death.str.contains("burn",case= False)] ="Fuel Burn/Gas Explosion"
df.cause_of_death.value_counts()
%matplotlib inline
df.cause_of_death.value_counts().sort_values(ascending = True).plot(kind = "barh",figsize = (8,10))
df.head()
df.groupby("cause_of_death").missing_dead.sum().sort_values().dropna().plot(kind = "barh", figsize = (10,20))
death_cause = df.groupby("cause_of_death").missing_dead.sum().sort_values().dropna()
dict_death = dict(cause = death_cause.index, sum_death = death_cause)
death_cause = pd.DataFrame(death_cause)
death_cause.columns = ["Sum_of_Death"]
death_cause.head(1)
from bokeh.models import map_plots,HoverTool
import math
death_cause = df.groupby("cause_of_death").missing_dead.sum().sort_values(ascending = False)
source = ColumnDataSource(dict_death)
hover = HoverTool(tooltips = [("Cause of Death","@cause"),("Number Of Death","@sum_death")])
# source.data["sum_death"][50]
list(source.data["cause"])
f = figure(x_range = list(source.data["cause"]), width = 800 , height = 400)
f.vbar(x = "cause",width = 0.9, top = "sum_death", source =source,color = "red")
# f.hbar(y = "cause", height = 0.6 , right = "sum_death", source = source)
f.xaxis.minor_tick_line_alpha =0
# f.xaxis.major_label_orientation =math.pi / 4
f.xaxis.major_label_text_color = "red"
f.xaxis.visible = False
f.add_tools(hover)
f.yaxis.minor_tick_in = 0
f.yaxis.minor_tick_out = 0
f.yaxis.major_label_orientation = "vertical"
f.title.text = "Number Of Missing and Dead and Cause of death"
f.title_location = "above"
f.title.align ="center"
f.yaxis.axis_label = "Number Of Death and Missing"
f.yaxis.axis_label_text_alpha =1
f.yaxis.axis_label_text_font = "Aries"
show(f)
test = []
for i in range(1,100,20):
r = death_cause.where(death_cause > i).count()
test.append(r)
plt.figure(figsize = (12,5))
plt.bar(left = range(1,6),height = test)
# plt.xticks(range(1,100,20));
death_cause.where(death_cause < 200).sort_values(ascending = False)
d_c = df.groupby("cause_of_death").missing_dead.agg(["sum","count","mean","max"]).sort_values("max",ascending = False)
df.affected_nationality.fillna("unknown", inplace = True)
df.groupby("region_origin").missing_dead.sum().sort_values(ascending = False).plot(kind = "bar")
plt.xlabel("Region Of Origin")
plt.ylabel("Total Missing and dead")
plt.title("Migrants death by region of Origin")
df.groupby(["Year","Month"]).missing_dead.sum().unstack().T.plot(kind = "bar")
plt.plot(df.groupby("Month").missing_dead.sum(), color = "cyan", label = "Over all Month deceased")
plt.legend()
df.head()
df.groupby("incident_region").missing_dead.sum().sort_values(ascending = False).plot(kind ="line", rot = 90)
plt.xlabel("Region of Incident")
plt.ylabel("Total Number of Incidents")
plt.title("Number of Death vs region of incident")
df.head()