-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbivariate.py
More file actions
125 lines (92 loc) · 3.52 KB
/
bivariate.py
File metadata and controls
125 lines (92 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
'''Bivariate Analysis:
Helps in bivariate analysis of data
Plotting of the data'''
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency
import seaborn as sns
class Bivariate:
def __init__(self,df):
self.df=df
def correlation_matrix(self):
print("Do you want correlation matrix[0/1]")
inp=input()
if inp=="0":
return
corr = self.df.corr()
sns.heatmap(corr,
xticklabels=corr.columns.values,
yticklabels=corr.columns.values)
plt.show()
def continuous_continuous(self,colm1,colm2):
self.df.plot.scatter(colm1,colm2)
plt.xlabel(colm1)
plt.ylabel(colm2)
plt.show()
print("Correlation between {} and {} is ".format(colm1,colm2))
print(self.df[colm1].corr(self.df[colm2]))
print('''if between -0.1 to 0.1 - WEAK RELATIONSHIP
\nif beyond -0.5 to 0.5 - STRONG LINEAR RELATIONSHIP\n''')
input("Press ENTER to continue")
return
def continuous_categorical(self,colm1,colm2):
print("Mean when grouping according to {}".format(colm2))
print(self.df.groupby(colm2)[colm1].mean())
self.df.groupby(colm2)[colm1].mean().plot.bar()
plt.ylabel(colm1)
plt.xlabel(colm2)
plt.show()
var = colm2
data = pd.concat([self.df[colm1], self.df[var]], axis=1)
f, ax = plt.subplots(figsize=(8, 6))
fig = sns.boxplot(x=var, y=colm1, data=data)
plt.show()
input("Press ENTER to continue")
return
def categorical_categorical(self,colm1,colm2):
stacked_plot=pd.crosstab(self.df[colm1], self.df[colm2])
print("Cross table of {} and {}".format(colm1,colm2))
print(pd.crosstab(self.df[colm1],self.df[colm2]))
print("Chi 2 test :")
print(chi2_contingency(pd.crosstab(self.df[colm1],self.df[colm2])))
stacked_plot.plot.bar()
plt.ylabel(colm2)
plt.xlabel(colm1)
plt.show()
input("Press ENTER to continue")
return
def bivariate_plot(self):
#Landing function
next_iter="0"
self.correlation_matrix()
while(next_iter!="-1"):
print("Enter colm 1 :")
colm1=input()
colm1 = colm1.lower()
type_colm1 = int(input("Is it categorical[0/1] "))
print("Enter colm 2 :")
colm2 = input()
colm2 = colm2.lower()
type_colm2 = int(input("Is it categorical[0/1] "))
if type_colm1==1 and type_colm2==0:
try:
self.continuous_categorical(colm2 , colm1)
except RuntimeError:
self.continuous_categorical(colm1, colm2)
elif type_colm1==0 and type_colm2==1:
try:
self.continuous_categorical(colm1 , colm2)
except RuntimeError:
self.continuous_categorical(colm2, colm1)
elif type_colm1==0 and type_colm2==0:
self.continuous_continuous(colm1 , colm2)
elif type_colm1==1 and type_colm2==1:
try:
self.categorical_categorical(colm1 , colm2)
except RuntimeError:
self.categorical_categorical(colm2, colm1)
next_iter=input("Press -1 to exit/ Press any other character to continue")
return self.df