dataset=[]
with open ('MLTutorial/Udacity/simpsons.txt') as file:
for line in file:
dataset.append((line.strip().split(',')))
import pprint
import pandas as pd
category=dataset[0]
data=dataset[1:]
#print(category)
#pprint.pprint(data)
df=pd.DataFrame(data=data, columns=category)
df
maleFemale=(list(df.groupby('Gender')))
maleFemale[1][1]
males=maleFemale[1][1]['Freq'].astype(int).aggregate(sum)
males
maleFemale=(list(df.groupby('Gender')))
maleFemale[0][1]
females=maleFemale[0][1]['Freq'].astype(int).aggregate(sum)
females
(males/(males+females),females/(males+females)) #male female applicant ratio
#department wise statistic
deptStat=list(df.groupby('Dept'))
stat=[]
for i in range(6):
dr=list(deptStat[i][1].groupby('Gender'))[1][1]['Freq'].astype(int).agg(sum)
nr=list(list(deptStat[i][1].groupby('Gender'))[1][1].groupby('Admit'))[0][1]['Freq'].astype(int).aggregate(sum)
maleRatio=round((nr/dr)*100,2)
dr=list(deptStat[i][1].groupby('Gender'))[0][1]['Freq'].astype(int).agg(sum)
nr=list(list(deptStat[i][1].groupby('Gender'))[0][1].groupby('Admit'))[0][1]['Freq'].astype(int).aggregate(sum)
femRatio=round((nr/dr)*100,2)
stat.append((deptStat[i][0], maleRatio, femRatio))
categ=['Department','Male Acceptance (%)', 'Female Acceptance (%)']
df2=pd.DataFrame(data=stat, columns=categ)
df2
The research paper by Bickel et al. concluded that women tended to apply to competitive departments with low rates of admission even among qualified applicants (such as in the English Department), whereas men tended to apply to less-competitive departments with high rates of admission among the qualified applicants (such as in engineering and chemistry).