-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregression.py
216 lines (158 loc) · 6.45 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
numsteps = 0
#Counting Steps Function
def steps(inp):
global numsteps
numsteps+=inp
#The parsing of the data happens here.
def Parsing(data):
try:
for i in range(len(data)):
data[i] = data[i].replace(" ","")
data[i] = data[i].replace("(","")
data[i] = data[i].replace("\n","")
data[i] = data[i].replace(")","")
data[i] = data[i].split(",")
x.append(float(data[i][0]))
y.append(float(data[i][1]))
return x,y
except:
print("Make sure the text file has ordered pair in newlines and separated by commas.\n")
raise SystemExit
'''
The error function, this returns the summation of every point and their vertical distance from
our datapoints squared.
'''
def error(x,y,m,b):
error = 0
for i in range(len(x)):
error+=(y[i]-m*(x[i])-b)**2
steps(8)
return error
'''
Dynamic Step Size.
Gradient Descent can be slow if the stepsize is short.Can overshoot if the stepsize is big.
Dynamic step size tries to make the step size as big as possible.Since the error function is convex,
the gradient constantly decreases.
'''
def DynamicDelta(x0,y0,m,b,delta):
fm,fb = gradient(x,y,m,b,delta)
fm2,fb2 = gradient(x,y,m-fm,fb-b,delta)
steps(16)
while( np.sqrt((fm2**2+fm2**2)) > np.sqrt((fm**2+fb**2))): #Follows the gradient until the next gradient is less than the previous one.
fm,fb = gradient(x,y,m,b,delta)
fm2,fb2 = gradient(x,y,m-fm,fb-b,delta)
delta/=2 #Starts with a big step size,narrows it down by a factor of 0.5 every iteration.
steps(27)
return delta
'''
Approximates the gradient.
Doesn't assume the gradient to be the known error function, thus its scalable.
'''
def gradient(x0,y0,m,b,delta):
fm = (error(x0,y0,m+delta,b) - error(x0,y0,m,b))
fb = (error(x0,y0,m,b+delta) - error(x0,y0,m,b))
steps(9)
return fm,fb
'''Error Reduction
Stops where it thinks we have done enough iterations.
'''
def ErrorReduce(x,y,m,b,n,delta,flag):
iterations = 0
M = []
B = []
while(iterations<n):
Error.append(error(x,y,m,b))
fm,fb = gradient(x,y,m,b,delta)
m-=fm
b-=fb
M.append(m)
B.append(b)
if(flag == False):
delta = DynamicDelta(x,y,m,b,1)
if(len(Error)>3):
if (np.abs(Error[iterations]-Error[iterations-1])<0.1): #If the error stops working.
break
iterations+=1
steps(18*iterations)
return m,b,M,B,Error,iterations
#All the graphing is done here.
def Graph(x,y,m,b,M,B,t1,ax1,stop,Error):
ax1.set(ylabel="Error",xlabel="Iterations")
ax1.set_title("Iterations")
ax2.set(ylabel="Evolution of Algorithm")
if(stop==False):
plt.plot(x, y,'go',markersize = 2)
steps(11)
plt.plot(t1,m*t1+b , 'b.-',markersize = 0.5)
ax3.set(xlabel="Final Fit y="+str(m)+"x + "+str(b))
ax1.set_ylim(Error[len(Error)-1],Error[0]) #Scales the plot accordingly
ax1.plot(Error,'b.-')
M = np.asarray(M)
B = np.asarray(B)
ax2.set_ylim(np.min(M)-0.01,np.max(M)+0.01 ) #Scales the plot accordingly
ax2.set_xlim(np.min(B)-0.01,np.max(B)+np.max(B)/10+0.01 )
ax1.set_yscale('linear')
ax2.scatter(B,M,c=B*2,cmap=plt.cm.get_cmap('winter_r'),s=10)
ax2.plot(B[len(B)-1],M[len(B)-1],'ro')
#Parsing
try:
text_file = open('data.txt','r')
file_data = text_file.readlines()
text_file.close() #close the file.
# Create a copy of the file_data to work on
numbers = file_data
except:
print("Please have a file called data.txt in the directory with ordered pairs.")
raise SystemExit
x = []
y = []
x,y = Parsing(numbers)
#Initializtions
iterations = 0
m = 0 #Start from m = 0, b = 0 as initial guess.
b = 0
M = []
M.append(m)
B = []
B.append(b)
Error = []
#Graphing Initialization
fig, (ax1, ax2,ax3) = plt.subplots(3,sharey=False,sharex=False,figsize=(10,7))
t1 = np.arange( min(x) , max(x), 0.1)
input("This program will do a line of best fit on the ordered pairs of data.txt file.\nPlease Keep a data.txt file on the program directory with ordered co-ordinates.\nPress Enter to Continue\n")
#Computes the initial stepsize, if the user chooses fixed size, this stays put for the rest of the program.
delta = DynamicDelta(x,y,m,b,0.1)
flag = None
while(flag == None):
inp = input("Enter a for fixed step size to reduce error, enter q if you prefer dynamic step size.\n")
if ( inp =="a" or inp == "A"):
try:
n = int(input("Enter the amount of times you want error reducing algorithm to run."))
flag = True
except:
n = int(input("Enter the amount of times you want error reducing algorithm to run."))
flag = False
elif (inp == "q" or inp == 'Q'):
n = 1000
flag = False
print("Please wait as this process takes a bit time.\n")
#The main error reducing function.
m,b,M,B,Error,iterations = ErrorReduce(x,y,m,b,n,delta,flag)
#Cumulative steps that are not in the loop added in the end.
steps(86)
print("It took",numsteps,"operational steps\n")
print("Final Fit y="+str(m)+"x + "+str(b))
print("Initial Error =",str(Error[0]),". Final Error = "+str(Error[len(Error)-1]))
if(iterations == n):
print("Number of Iterations of gradient descent to reduce the error with a step size of ",delta," is",iterations)
print("Please wait for graphing to finish since that takes quite a lot of time too.")
else:
print("Optimal Iterations of gradient descent to reduce the error with a step size of ",delta," is",iterations)
print("Please wait for graphing to finish.")
#Graphs after printing is done to make the process faster.
Graph(x,y,m,b,M,B,t1,ax1,False,Error)
#After making all the plots before, it finally shows the plots.
plt.show()