-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGaussian.py
108 lines (81 loc) · 2.94 KB
/
Gaussian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import math
import matplotlib.pyplot as plt
class Gaussian():
"""
Gaussian distribution class for calculating and
visualizing a Gaussian distribution.
Attributes:
mean (float) representing the mean value of the distribution
stdev (float) representing the standard deviation of the distribution
data_list (list of floats) a list of floats extracted from the data file
"""
def __init__(self, mu=0, sigma=1):
self.mean = mu
self.stddev = sigma
self.data = []
def calculate_mean(self):
avg = 1.0 * sum(self.data) / len(self.data)
self.mean = avg
return self.mean
def calculate_stddev(self, sample=True):
if sample:
n = len(self.data) - 1
else:
n = len(self.data)
mean = self.mean
sigma = 0
for d in self.data:
sigma += (d - mean) ** 2
sigma = math.sqrt(sigma / n)
self.stddev = sigma
return self.stddev
def read_data_file(self, file_name, sample=True):
with open(file_name) as file:
data_list = []
line = file.readline()
while line:
data_list.append(int(line))
line = file.readline()
file.close()
self.data = data_list
self.mean = self.calculate_mean()
self.stddev = self.calculate_stddev(sample)
def plot_histogram(self):
plt.hist(self.data)
plt.title('Histogram of Data')
plt.xlabel('data')
plt.ylabel('count')
def pop_density(self, x):
return (1.0 / (self.stddev * math.sqrt(2 * math.pi))) * math.exp(-0.5 * ((x - self.mean) / self.stddev) ** 2)
def plot_histogram_pdf(self, n_spaces=50):
mu = self.mean
sigma = self.stdev
min_range = min(self.data)
max_range = max(self.data)
# calculates the interval between x values
interval = 1.0 * (max_range - min_range) / n_spaces
x = []
y = []
# calculate the x values to visualize
for i in range(n_spaces):
tmp = min_range + interval * i
x.append(tmp)
y.append(self.pdf(tmp))
# make the plots
fig, axes = plt.subplots(2, sharex=True)
fig.subplots_adjust(hspace=.5)
axes[0].hist(self.data, density=True)
axes[0].set_title('Normed Histogram of Data')
axes[0].set_ylabel('Density')
axes[1].plot(x, y)
axes[1].set_title('Normal Distribution for \n Sample Mean and Sample Standard Deviation')
axes[0].set_ylabel('Density')
plt.show()
return x, y
def __add__(self, other):
result = Gaussian()
result.mean = self.mean + other.mean
result.stddev = math.sqrt(self.stddev ** 2 + other.stddev ** 2)
return result
def __repr__(self):
return "mean {}, standard deviation {}".format(self.mean, self.stddev)