By David Taylor, www.prooffreader.com (blog), www.dtdata.io (hire me!)
There is a blog post about this notebook at (html).
The CDC's Compressed Mortality Files were downloaded from http://wonder.cdc.gov/mortsql.html. They can't be linked to directly, so I'll describe how to download them here.
On the mortsql.html page, follow the following steps for the following three links:
I looked through the ICD-8 through ICD-10 at http://www.wolfbane.com/icd/index.html to find unusual causes of death.
import sys
print(sys.version) #I'm using Python 2.7 because I can't get plotly to work in 3.4
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.plotly as py
import plotly.tools as tls
from plotly.graph_objs import *
py.sign_in(open('Z:/DT/plotly_emanresu.txt', 'r').read(), #keep moving, nothing to see here.
open('Z:/DT/plotly_yekipa.txt', 'r').read())
df = pd.read_csv('Compressed Mortality, 1968-1978 per year.txt', sep='\t', encoding="latin-1")
df = pd.merge(df, pd.read_csv('Compressed Mortality, 1979-1998 per year.txt', sep='\t', encoding="latin-1"), 'outer')
df = pd.merge(df, pd.read_csv('Compressed Mortality, 1999-2012 per year.txt', sep='\t', encoding="latin-1"), 'outer')
df = df.dropna(subset=['Year'])
df.drop(['Notes', 'Year Code'], axis=1, inplace=True)
df.head()
# note that causes of death like "Classical, 000.0" are subsets of groups of cause of death;
# the group names are not included in this table
Note: Plotly is good for making quick stacked bar graphs, but its legends are not very customizable (that I could determine, in any case). So I have made versions with and without legends, to be assembled later in Photoshop.
def plot_graphs(df, search_term, title):
dfchart = df[(df['Cause of death'].str.contains(search_term))]
years = range(1968, 2013)
bars = []
for cause in dfchart['Cause of death'].unique():
deaths = []
for year in years:
if len(dfchart[(dfchart['Cause of death'] == cause)&(dfchart.Year == year)]) > 0:
deaths.append(dfchart[(dfchart['Cause of death'] == cause)&(dfchart.Year == year)].Deaths.iloc[0])
else:
deaths.append(0)
bars.append( Bar(
x=years,
y=deaths,
name=cause
) )
data = Data(bars)
layout_nolegend = Layout(
autosize=False,
width=650,
height=250,
margin=Margin(
l=50,
r=10,
b=20,
t=40,
pad=0
),
barmode='stack',
title=title,
yaxis=YAxis(
title='Deaths per year',
titlefont=Font(
size=12
),
tickfont=Font(
size=12,
)),
showlegend=False
)
layout_legend = Layout(
autosize=True,
legend=Legend(
x=0,
y=0,
),
barmode='stack',
)
fig1 = Figure(data=data, layout=layout_nolegend)
py.image.ishow(fig1)
fig2 = Figure(data=data, layout=layout_legend)
py.image.ishow(fig2)
plot_graphs(df, 'caries', 'Deaths due to Dental Caries in CDC database, 1968-2012')
plot_graphs(df, '[Pp]enis', "Causes of death containing the word 'penis' in CDC database, 1968-2012")
df[(df['Cause of death'].str.contains('[Tt]ransvest'))]
plot_graphs(df, '([Ww]eather|[^ro] storm)', "Causes of death contain 'weather' or 'storm' in CDC database, 1968-2012")
plot_graphs(df, '[Mm]igraine', "Causes of death containing word 'migraine', CDC database 1968-2012")
plot_graphs(df, '[Cc]left [pl]', "Causes of death containing words 'cleft palate'/'cleft lip', CDC database 1968-2012")
plot_graphs(df, '[Ff]oreign body', "Causes of death containing the words 'foreign body' in CDC database, 1968-2012")
plot_graphs(df, '[Ee]lbow', "Causes of death containing the word 'elbow' in CDC database, 1968-2012")
plot_graphs(df, '[Ee]nthesopath', "Causes of death containing the word 'enthesopathy' in CDC database, 1968-2012")
df2 = df[~df['Cause of death'].str.contains('pedestrian')]
plot_graphs(df2, '[Cc]onjunctivitis', "Causes of death containing the word 'conjunctivitis' in CDC database, 1968-2012")
plot_graphs(df, '[Dd]og', "Causes of death containing the word 'dog' in CDC database, 1968-2012")
plot_graphs(df, ' war[^tf]', "Causes of death containing the word 'war' in CDC database, 1968-2012")
plot_graphs(df, '[Pp]soriasis', "Causes of death containing the word 'psoriasis' in CDC database, 1968-2012")
plot_graphs(df, '[Ii]ngrow', "Cause of death by ingrowing nail in CDC database, 1968-2012")
df[(df['Cause of death'].str.contains('ngrow'))&(df.Deaths>0)]
plot_graphs(df, '[Ss]pacecraft', "Causes of death containing the word 'spacecraft' in CDC database, 1968-2012")
plot_graphs(df, '[Aa]nimal', "Causes of death containing the word 'animal' in CDC database, 1968-2012")
df2 = df[~df['Cause of death'].str.contains('pedestrian')]
plot_graphs(df2, '([Aa]nimal|[Bb]ee|[Dd]og)', "Causes of death by 'animal'/'dog'/'bee' in CDC database, 1968-2012")
# No cats
df2 = df[~df['Cause of death'].str.contains('[^ ]cat')]
df2 = df2[~df2['Cause of death'].str.contains('cath')]
df2[df2['Cause of death'].str.contains('cat')]['Cause of death'].unique()
plot_graphs(df, '[Ss]yndactyl', "Causes of death containing the word 'syndactyly' in CDC database, 1968-2012")