import numpy as np
fFloat = open("iris.csv","r")
dataset = np.loadtxt(fFloat, delimiter=",")
fFloat.close()
x = dataset[:,0:4]
y = dataset[:,4]
percentTrainingset = 0.8
np.random.seed(42)
TrainSet = np.random.choice(x.shape[0],int(x.shape[0]*percentTrainingset),replace=False)
XTrain = x[TrainSet,:]
YTrain = y[TrainSet]
TestSet = np.delete(np.arange(0,len(y)), TrainSet)
XTest = x[TestSet,:]
YTest = y[TestSet]
def plainKNNclassification(xTrain, yTrain, xQuery, k, normOrd=None):
xMin = xTrain.min(axis=0); xMax = xTrain.max(axis=0) #*\label{code:knnscale1}
xTrain = (xTrain - xMin) / (xMax - xMin)
xQuery = (xQuery - xMin) / (xMax - xMin) #*\label{code:knnscale2}
diff = xTrain - xQuery
dist = np.linalg.norm(diff,axis=1, ord=normOrd)
knearest = np.argsort(dist)[:k]
(classification, counts) = np.unique(YTrain[knearest], return_counts=True)
theChoosenClass = np.argmax(counts) #*\label{code:knnmehrheit}
return(classification[theChoosenClass])
errors = 0
for i in range(len(YTest)):
myClass = plainKNNclassification(XTrain, YTrain, XTest[i,:], 3)
if myClass != YTest[i]:
errors = errors +1
print('%s wurde als %d statt %d klassifiziert' % (str(XTest[i,:]),myClass,YTest[i]))
from twoMoonsProblem import twoMoonsProblem
(XTrain,YTrain) = twoMoonsProblem()
XX, YY = np.mgrid[-1:2:0.01, -1:2:0.01]
X = np.array([XX.ravel(), YY.ravel()]).T
yP = np.zeros(X.shape[0])
for i in range(X.shape[0]):
yP[i] = plainKNNclassification(XTrain, YTrain, X[i,:], 3,normOrd=np.inf)
indexA = np.flatnonzero(YTrain>0.5)
indexB = np.flatnonzero(YTrain<0 .5="" p="">import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
yP = yP.reshape(XX.shape)
ax.pcolormesh(XX, YY, yP, cmap=plt.cm.Set1)
ax.scatter(XTrain[indexA,0],XTrain[indexA,1],color='white', marker='o')
ax.scatter(XTrain[indexB,0],XTrain[indexB,1],color='black', marker='+')
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_xlim([-1,2])
ax.set_ylim([-1,2])
ax.set_title("Klassifikation mit inf-Norm")
::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from scipy.spatial import KDTree
class knnRegression:
def fit(self,X,Y):
self.xMin = X.min(axis=0)
self.xMax = X.max(axis=0)
self.XTrain = (X - self.xMin) / (self.xMax - self.xMin)
self.kdTree = KDTree(self.XTrain)
self.YTrain = Y
def predict(self,X, k=3, smear = 1):
X = (X - self.xMin) / (self.xMax - self.xMin)
(dist, neighbours) = self.kdTree.query(X,k)
distsum = np.sum( 1/(dist+smear/k), axis=1) #*\label{code:knnReg1}
distsum = np.repeat(distsum[:,None],k,axis=1)
dist = (1/distsum)*1/(dist + smear/k) #*\label{code:knnReg2}
y = np.sum( dist*YTrain[neighbours],axis=1)
return(y)
if __name__ == '__main__':
samples = 5000
pNoise = 1
myK = 3
mysmear = 0.5
np.random.seed(42)
x = np.random.rand(samples,2)
y = np.tanh( 500*( (1/16) - (x[:,0]-0.5)**2 - (x[:,1]-0.5)**2 ) )
Noise = np.random.normal(size=len(y))
y = (1+Noise*pNoise/100)*y
percentTrainingset = 0.8
TrainSet = np.random.choice(x.shape[0],int(x.shape[0]*percentTrainingset),replace=False)
XTrain = x[TrainSet,:]
YTrain = y[TrainSet]
TestSet = np.delete(np.arange(0,len(y)), TrainSet)
XTest = x[TestSet,:]
YTest = y[TestSet]
myRegression = knnRegression()
myRegression.fit(XTrain,YTrain)
yP = myRegression.predict(XTest,k=myK, smear=mysmear)
diff = yP-YTest
MAE = np.mean(np.abs(diff))
print(MAE)
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
fig = plt.figure(1)
ax = fig.add_subplot(1,1,1, projection='3d')
ax.scatter(XTest[:,0],XTest[:,1],yP,alpha=0.6,c =yP, cmap=cm.copper)
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_zlabel('$y_P$')
#
#
#lookUpTree = KDTree(x)
#closeToCenter = lookUpTree.query_ball_point([0, 0], 0.2)
#print(closeToCenter)
#print(x[closeToCenter,:])
:::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
x = np.random.rand(50)
x = np.hstack( (x,x) )
y = 2*x - 0.5
noise = 0.2*np.random.normal(size=x.shape[0])
ym = y + noise
plt.plot(x,y,color='k')
r = np.array( [ [x[95],x[95]],[ym[95],y[95]] ] )
plt.plot(r[0,:],r[1,:], 'k:' )
plt.scatter(x,ym,color='r')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
:::::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
fFloat = open("BostonFeature.csv","r") #*\label{code:lstqBoston:1}
X = np.loadtxt(fFloat, delimiter=","); fFloat.close()
fFloat = open("BostonTarget.csv","r")
y = np.loadtxt(fFloat, delimiter=","); fFloat.close()
TrainSet = np.random.choice(X.shape[0],int(X.shape[0]*0.80), replace=False)
XTrain = X[TrainSet,:]
YTrain = y[TrainSet]
TestSet = np.delete(np.arange(0, len(y) ), TrainSet)
XTest = X[TestSet,:]
YTest = y[TestSet] #*\label{code:lstqBoston:2}
A = np.ones( (XTrain.shape[0],14) ) #*\label{code:lstqBoston:3}
A[:,1:14] = XTrain #*\label{code:lstqBoston:9}
maxValue = np.max(A,axis=0) #*\label{code:lstqBoston:10}
A = A/maxValue #*\label{code:lstqBoston:11}
(u, _, Arank, _) = np.linalg.lstsq(A, YTrain) #*\label{code:lstqBoston:4}
r = A@u - YTrain #*\label{code:lstqBoston:12}
print(np.linalg.norm(r)/r.shape[0], np.mean(np.abs(r)), np.max(np.abs(r))) #*\label{code:lstqBoston:5}
print(u) #*\label{code:lstqBoston:6}
B = np.ones( (XTest.shape[0],14) ) #*\label{code:lstqBoston:7}
B[:,1:14] = XTest
B = B /maxValue
yPredit = B@u #*\label{code:lstqBoston:8}
rT = yPredit - YTest
print(np.linalg.norm(rT)/rT.shape[0], np.mean(np.abs(rT)), np.max(np.abs(r)))
fig = plt.figure(1)
ax = fig.add_subplot(1,2,1)
ax.set_title('Verteilung der Abweichungen auf der Trainingsmenge')
ax.hist(r,color='gray')
ax.set_xlabel('Abweichung in Tausenden')
ax.set_ylabel('Anzahl')
ax = fig.add_subplot(1,2,2)
ax.set_title('Verteilung der Abweichungen auf der Testmenge')
ax.hist(rT,color='gray')
ax.set_xlabel('Abweichung in Tausenden')
ax.set_ylabel('Anzahl')
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
import matplotlib.pyplot as plt
def plotUnitCircle(p, sampels):
x = 3*np.random.rand(sampels,2)-1.5
n = np.linalg.norm(x,p,axis=1)
indexIn = np.flatnonzero(n <= 1)
indexOut = np.flatnonzero(n > 1)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(x[indexOut,0],x[indexOut,1],c='red',s=60,alpha=0.1, marker='*')
ax.scatter(x[indexIn,0],x[indexIn,1],c='black',s=60, marker='+')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.grid(True,linestyle='-',color='0.75')
plotUnitCircle(np.inf, 5000)
:::::::::::::::::::::::::::::::::::::::::::
import numpy as np
def twoMoonsProblem( SamplesPerMoon=240, pNoise=2):
tMoon0 = np.linspace(0, np.pi, SamplesPerMoon)
tMoon1 = np.linspace(0, np.pi, SamplesPerMoon)
Moon0x = np.cos(tMoon0)
Moon0y = np.sin(tMoon0)
Moon1x = 1 - np.cos(tMoon1)
Moon1y = 0.5 - np.sin(tMoon1)
X = np.vstack((np.append(Moon0x, Moon1x), np.append(Moon0y, Moon1y))).T
X = X + pNoise/100*np.random.normal(size=X.shape)
Y = np.hstack([np.zeros(SamplesPerMoon), np.ones(SamplesPerMoon)])
return X, Y
if __name__ == '__main__':
(X,Y) = twoMoonsProblem()
import matplotlib.pyplot as plt
fig = plt.figure(1)
ax = fig.add_subplot(1,1,1)
indexA = np.flatnonzero(Y>0.5)
indexB = np.flatnonzero(Y<0 .5="" br=""> ax.scatter(X[indexA,0],X[indexA,1],color='red', marker='o')
ax.scatter(X[indexB,0],X[indexB,1],color='black', marker='+')
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_ylim([-1,2])
ax.set_ylim([-1,2])
ax.set_title("Two Moons Set")0>
::::::::::::::::::::::::::::::::::::::::::::::::::::::
0>
Mittwoch, 29. Mai 2019
Abonnieren
Kommentare zum Post (Atom)
Keine Kommentare:
Kommentar veröffentlichen
Hinweis: Nur ein Mitglied dieses Blogs kann Kommentare posten.