-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathunivariate.py
More file actions
96 lines (71 loc) · 2.44 KB
/
univariate.py
File metadata and controls
96 lines (71 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
'''Univariate Analysis:
Helps in analysis of continuous/categorical data
1. Data description
2.Plotting of data '''
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
import numpy as np
import pandas as pd
class Univariate:
def __init__(self,df):
self.df=df
def colm_details(self,colm):
#Describes the details for the columns
print(self.df[colm].describe())
print("\n")
print("Null count : " + str(self.df[colm].isnull().sum()))
return
def create_plot(self,colm):
#Creates the plot for the function
print('''Is the variable :
\n1.Categorical
\n2.Continuos''')
colm_type=int(input())
if colm_type==1:
#Categorical Data Analysis
print("Column Details:")
self.colm_details(colm)
print("Individual category total count:")
print(self.df[colm].value_counts())
print("\n")
print("Individual category percentage count:")
print(self.df[colm].value_counts()/len(self.df[colm]))
print("\n")
self.df[colm].value_counts().plot.bar()
plt.show()
elif colm_type==2:
#Continuous Data Analysis
print("Column Details:")
self.colm_details(colm)
print("Do you want histogram[0/1]")
hist=int(input())
if hist==1:
self.df[colm].plot.hist()
plt.ylabel(colm)
plt.show()
print("Do you want lineplot[0/1]")
linep = int(input())
if linep == 1:
self.df[colm].value_counts().sort_index().plot.line()
plt.ylabel(colm)
plt.show()
print("Do you want boxplot[0/1]")
boxp = int(input())
if boxp == 1:
self.df[colm].plot.box()
plt.ylabel(colm)
plt.show()
input("Press ENTER to continue")
return
def univariate_plot(self):
#landing function from data_intake
while(1):
print("Enter the column , which you want to analyse")
colm=input("Enter -1 to exit ")
colm = colm.lower()
if colm=="-1":
break;
#calling the function to create the plot and describe the column
self.create_plot(colm)
return self.df