import matplotlib.pyplot as plt
from keras.layers import Dense
from keras.models import Sequential
from keras.datasets import mnist
(XTrain, YTrain), (XTest, YTest) = mnist.load_data()
XTrain = XTrain.reshape(60000, 784)
XTest = XTest.reshape(10000, 784)
XTrain = XTrain/255
XTest = XTest/255
Layer1 = 196
Layer2 = 98
zielZahlMerkmale = 24
autoencoder = Sequential()
autoencoder.add(Dense(Layer1,input_dim=784,activation='sigmoid'))
autoencoder.add(Dense(Layer2,activation='relu'))
autoencoder.add(Dense(zielZahlMerkmale,activation='relu'))
autoencoder.add(Dense(Layer2,activation='relu'))
autoencoder.add(Dense(Layer1,activation='relu'))
autoencoder.add(Dense(784,activation='sigmoid'))
autoencoder.compile(loss='mean_squared_error', optimizer='adam')
autoencoder.fit(XTrain, XTrain, epochs=25, verbose=True,validation_data=(XTest, XTest))
encoder = Sequential()
encoder.add(Dense(Layer1,input_dim=784,activation='sigmoid'))
encoder.add(Dense(Layer2,activation='relu'))
encoder.add(Dense(zielZahlMerkmale,activation='relu'))
for i in range(len(encoder.layers)):
W = autoencoder.layers[i].get_weights()
encoder.layers[i].set_weights(W)
decoder = Sequential()
decoder.add(Dense(Layer2,input_dim=zielZahlMerkmale, activation='relu'))
decoder.add(Dense(Layer1,activation='relu'))
decoder.add(Dense(784,activation='sigmoid'))
for i in range(len(encoder.layers),len(autoencoder.layers)):
W = autoencoder.layers[i].get_weights()
decoder.layers[i-len(encoder.layers)].set_weights(W)
encodedData = encoder.predict(XTest)
decodedData = decoder.predict(encodedData)
plt.figure(figsize=(20, 4))
for i in range(10):
ax = plt.subplot(2, 10, i + 1)
plt.imshow(XTest[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(2, 10, i + 1 + 10)
plt.imshow(decodedData[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
XTrainRed = encoder.predict(XTrain)
XTestRed = encoder.predict(XTest)
def unscaledKNNclassification(xTrain, yTrain, xQuery, k, normOrd=None):
diff = xTrain - xQuery
dist = np.linalg.norm(diff,axis=1, ord=normOrd)
knearest = np.argsort(dist)[:k]
(classification, counts) = np.unique(YTrain[knearest], return_counts=True)
theChoosenClass = np.argmax(counts) #*\label{code:knnmehrheit}
return(classification[theChoosenClass])
errors = 0
for i in range(len(YTest)):
myClass = unscaledKNNclassification(XTrain, YTrain, XTest[i,:], 3)
if myClass != YTest[i]:
errors = errors +1
print('%d wurde als %d statt %d klassifiziert' % (i,myClass,YTest[i]))
::::::::::::::::::::::::::::::::::::::::
import numpy as np
class tree:
def __init__(self, varNo, value, operator):
self.rootNode = treeNode(0, value, varNo=varNo, operator=operator)
self.nodes = []
self.nodes.append(self.rootNode)
self.leafNodes = []
self.leafNodes.append(0)
def addNode(self, ChildOf, branch, value, operator='<', varNo=0):
node = treeNode(len(self.nodes),value,ChildOf=ChildOf,operator=operator,varNo=varNo)
self.leafNodes.append(node.number)
self.nodes.append(node)
parent = self.nodes[ChildOf]
if branch is True:
parent.leftTrue = node
else:
parent.rightFalse = node
if parent.leftTrue is not None and parent.rightFalse is not None:
toDelete = self.leafNodes.index(parent.number)
del self.leafNodes[toDelete]
return(node.number)
def trace(self, x):
traceRoute = self.rootNode.trace(x)[0]
return traceRoute
def eval(self, x):
traceRoute = self.trace(x)
y = np.zeros(len(traceRoute))
for i in range(len(y)):
y[i] = self.nodes[traceRoute[i][-1]]()
return(y)
def weightedPathLength(self, X):
traceroute = self.trace(X)
sum = 0
for i in range(len(traceroute)):
sum = sum + len(traceroute[i]) -1
return(sum)
def numberOfLeafs(self):
return(len(self.leafNodes))
def print(self):
ongoingstring = "\node {"+self.rootNode.conditionString()+" } "
if self.rootNode.leftTrue is not None:
ongoingstring = self.rootNode.leftTrue.addMyString(ongoingstring)
if self.rootNode.rightFalse is not None:
ongoingstring = self.rootNode.rightFalse.addMyString(ongoingstring)
ongoingstring = ongoingstring + " ;"
return(ongoingstring)
class treeNode:
def __init__(self, number, value, ChildOf=None, operator='<', varNo=0):
self.number = number
self.childOf = ChildOf
self.leftTrue = None
self.rightFalse = None
self.value = value
self.varNo = varNo
self.operator = operator
def __call__(self):
return(self.value)
def leafNode(self):
if self.leftTrue is not None and self.rightFalse is not None:
return(False)
else:
return(True)
def evalCondition(self, x):
if self.operator == '=':
cond = x[:, self.varNo] == self.value
elif self.operator == '<':
cond = x[:, self.varNo] < self.value
else: # case >
cond = x[:, self.varNo] > self.value
return cond
def trace(self, x, index=None, traceRoute=None):
if index is None:
index = np.arange(len(x))
if traceRoute is None:
traceRoute = [[] for x in range(len(x))]
for k in index:
traceRoute[k].append(self.number)
if self.leafNode():
return (traceRoute, index)
cond = self.evalCondition(x[index])
trueIndex = index[cond]
falseIndex = index[~cond]
if self.leftTrue is not None and trueIndex.size != 0:
traceRoute = self.leftTrue.trace(x, trueIndex, traceRoute)[0]
if self.rightFalse is not None and falseIndex.size != 0:
traceRoute = self.rightFalse.trace(x, falseIndex, traceRoute)[0]
return (traceRoute, index)
def conditionString(self):
if not self.leafNode():
mystring = "\tiny %d || x[%d] %s %.2f" % (self.number, self.varNo, self.operator, self.value)
else:
mystring = "\tiny %d || %.2f" % (self.number, self.value)
return(mystring)
def addMyString(self, ongoingstring):
if not self.leafNode():
ongoingstring = ongoingstring + "child { node {"+self.conditionString()+" } "
else:
ongoingstring = ongoingstring + "child { node[fill=gray!30] {"+self.conditionString()+" } "
if self.leftTrue is not None:
ongoingstring = self.leftTrue.addMyString(ongoingstring)
if self.rightFalse is not None:
ongoingstring = self.rightFalse.addMyString(ongoingstring)
ongoingstring = ongoingstring + " } "
return(ongoingstring)
if __name__ == '__main__':
np.random.seed(3)
bicycleTree = tree(0,1,'=')
No = bicycleTree.addNode(0,False,1,varNo=1,operator='=')
bicycleTree.addNode(No,False,0)
bicycleTree.addNode(No,True,1)
No = bicycleTree.addNode(0,True,1,varNo=2,operator='=')
bicycleTree.addNode(No,True,0)
No = bicycleTree.addNode(No,False,1,varNo=3,operator='=')
bicycleTree.addNode(No,True,0)
bicycleTree.addNode(No,False,1)
import time
x = np.array([True,False,False,False]).reshape(1,4)
y = bicycleTree.eval(x)
traceRoute = bicycleTree.trace(x)
print(traceRoute)
print(y)
x = np.random.randint(2, size=(1000000,4))
t1 = time.clock()
y = bicycleTree.eval(x)
t2 = time.clock()
print(t2-t1)
traceRoute = bicycleTree.trace(x)
:::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from binaryTree import tree
class bRegressionTree:
def _calLRSS(self,y):
yMean = np.sum(y)/len(y)
L2 = np.sum( (y-yMean)**2)
return(L2)
def _bestSplit(self,X,y,feature):
RSS = np.inf #*\label{code:RSSinit}
bestSplit = np.inf
XSort = np.unique(X[:,feature].round(self.xDecimals))
XDiff = (XSort[1:len(XSort)] + XSort[0:len(XSort)-1])/2
for i in range(XDiff.shape[0]):
index = np.less(X[:,feature], XDiff[i])
if not (np.all(index) or np.all(~index)):
RSS_1 = self._calLRSS(y[index])
RSS_2 = self._calLRSS(y[~index])
RSSSplit = RSS_1 + RSS_2 #*\label{code:CARTJustAdd}
if RSS > RSSSplit:
RSS = RSSSplit
bestSplit = XDiff[i]
return (bestSplit, RSS)
def _ComputeValue(self,y):
return(np.sum(y)/len(y))
def _chooseFeature(self,X,y):
G = np.zeros(X.shape[1])
bestSplit = np.zeros(X.shape[1])
for i in range(X.shape[1]):
( bestSplit[i] , G[i] ) = self._bestSplit(X,y,i)
smallest = np.argmin(G)
return (G[smallest], bestSplit[smallest],smallest)
def __init__(self,threshold = 10**-8, xDecimals = 8, minLeafNodeSize=3):
self.bTree = None
self.threshold = threshold
self.xDecimals = xDecimals
self.minLeafNodeSize = minLeafNodeSize
def _GenTree(self,X,y,parentNode,branch):
commonValue = self._ComputeValue(y)
initG = self._calLRSS(y)
if initG < self.threshold or X.shape[0] <= self.minLeafNodeSize:
self.bTree.addNode(parentNode,branch,commonValue)
return()
(G, bestSplit ,chooseA) = self._chooseFeature(X,y)
if G > initG :
self.bTree.addNode(parentNode,branch,commonValue)
return()
if parentNode == None:
self.bTree = tree(chooseA, bestSplit, '<')
myNo = 0
else:
myNo = self.bTree.addNode(parentNode,branch,bestSplit,operator='<',varNo=chooseA)
index = np.less(X[:,chooseA],bestSplit)
XTrue = X[index,:]
yTrue = y[index]
XFalse = X[~index,:]
yFalse = y[~index]
if XTrue.shape[0] > self.minLeafNodeSize:
self._GenTree(XTrue,yTrue,myNo,True)
else:
commonValue = self._ComputeValue(yTrue)
self.bTree.addNode(myNo,True,commonValue)
if XFalse.shape[0] > self.minLeafNodeSize:
self._GenTree(XFalse,yFalse,myNo,False)
else:
commonValue = self._ComputeValue(yFalse)
self.bTree.addNode(myNo,False,commonValue)
return()
def fit(self, X,y):
self._GenTree(X,y,None,None)
def predict(self, X):
return(self.bTree.eval(X))
def decision_path(self, X):
return(self.bTree.trace(X))
def weightedPathLength(self,X):
return(self.bTree.weightedPathLength(X))
def numberOfLeafs(self):
return(self.bTree.numberOfLeafs())
if __name__ == '__main__':
np.random.seed(42)
numberOfSamples = 10000
X = np.random.rand(numberOfSamples,2)
Y = ( np.sin(2*np.pi*X[:,0]) + np.cos(np.pi*X[:,1])) * np.exp(1 -X[:,0]**2 -X[:,1]**2 )
MainSet = np.arange(0,X.shape[0])
Trainingsset = np.random.choice(X.shape[0], int(0.8*X.shape[0]), replace=False)
Testset = np.delete(MainSet,Trainingsset)
regressionError = np.zeros(5)
for i in range(5):
errorRate = 0.05*i #*\label{code:CARTBeispiel3}
errorFactor = 1 + 2*(np.random.rand(Trainingsset.shape[0]) - 0.5)*errorRate #*\label{code:CARTBeispiel1}
XTrain = X[Trainingsset,:]
yTrain = Y[Trainingsset] * errorFactor #*\label{code:CARTBeispiel2}
XTest = X[Testset,:]
yTest = Y[Testset]
myTree = bRegressionTree(xDecimals=3)
myTree.fit(XTrain,yTrain)
yPredict = myTree.predict(XTest)
yDiff = np.abs(yPredict - yTest)
regressionError[i] = np.mean(yDiff)
import matplotlib.pyplot as plt
fig1 = plt.figure(1)
ax = fig1.add_subplot(1,1,1)
x = np.arange(0,0.25,0.05)
ax.plot(x,regressionError,'o-',c='k')
ax.set_xlabel('% Noise')
ax.set_ylabel('Mean Absolute Error')
from mpl_toolkits.mplot3d import Axes3D
fig2 = plt.figure(2)
ax = fig2.add_subplot(1,1,1, projection='3d')
ax.scatter(XTest[:,0],XTest[:,1],yPredict,alpha=0.6,c =yPredict, cmap='gray')
ax.set_xlabel('x[0]')
ax.set_ylabel('x[1]')
ax.set_zlabel('yPredict')
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from binaryTree import tree
class bRegressionTree:
def _calLRSS(self,y):
yMean = np.sum(y)/len(y)
L2 = np.sum( (y-yMean)**2)
return(L2)
def _bestSplit(self,X,y,feature):
RSS = np.inf
bestSplit = np.inf
XSort = np.unique(X[:,feature].round(self.xDecimals))
XDiff = (XSort[1:len(XSort)] + XSort[0:len(XSort)-1])/2
for i in range(XDiff.shape[0]):
index = np.less(X[:,feature], XDiff[i])
if not (np.all(index) or np.all(~index)):
RSS_1 = self._calLRSS(y[index])
RSS_2 = self._calLRSS(y[~index])
RSSSplit = RSS_1 + RSS_2
if RSS > RSSSplit:
RSS = RSSSplit
bestSplit = XDiff[i]
return (bestSplit, RSS)
def _ComputeValue(self,y):
return(np.sum(y)/len(y))
def _chooseFeature(self,X,y):
G = np.inf*np.ones(X.shape[1]) #*\label{code:RF:0}
bestSplit = np.zeros(X.shape[1])
if self.n == 0: #*\label{code:RF:2}
feature = np.arange(X.shape[1])
elif self.n == -1:
feature = np.random.choice(X.shape[1],int(np.sqrt(X.shape[1])),replace=False)
else:
feature = np.random.choice(X.shape[1],self.n,replace=False)
for i in feature: #*\label{code:RF:3}
( bestSplit[i] , G[i] ) = self._bestSplit(X,y,i)
smallest = np.argmin(G) #*\label{code:RF:4}
return (G[smallest], bestSplit[smallest],smallest)
def __init__(self,n = 0, threshold = 10**-8, xDecimals = 8, minLeafNodeSize=3):
self.n = 0
self.bTree = None
self.threshold = threshold
self.xDecimals = xDecimals
self.minLeafNodeSize = minLeafNodeSize
def _GenTree(self,X,y,parentNode,branch):
commonValue = self._ComputeValue(y)
initG = self._calLRSS(y)
if initG < self.threshold or X.shape[0] <= self.minLeafNodeSize:
self.bTree.addNode(parentNode,branch,commonValue)
return()
(G, bestSplit ,chooseA) = self._chooseFeature(X,y)
if G > initG :
self.bTree.addNode(parentNode,branch,commonValue)
return()
if parentNode == None:
self.bTree = tree(chooseA, bestSplit, '<')
myNo = 0
else:
myNo = self.bTree.addNode(parentNode,branch,bestSplit,operator='<',varNo=chooseA)
index = np.less(X[:,chooseA],bestSplit)
XTrue = X[index,:]
yTrue = y[index]
XFalse = X[~index,:]
yFalse = y[~index]
if XTrue.shape[0] > self.minLeafNodeSize:
self._GenTree(XTrue,yTrue,myNo,True)
else:
commonValue = self._ComputeValue(yTrue)
self.bTree.addNode(myNo,True,commonValue)
if XFalse.shape[0] > self.minLeafNodeSize:
self._GenTree(XFalse,yFalse,myNo,False)
else:
commonValue = self._ComputeValue(yFalse)
self.bTree.addNode(myNo,False,commonValue)
return()
def fit(self, X,y):
self._GenTree(X,y,None,None)
def predict(self, X):
return(self.bTree.eval(X))
def decision_path(self, X):
return(self.bTree.trace(X))
def weightedPathLength(self,X):
return(self.bTree.weightedPathLength(X))
def numberOfLeafs(self):
return(self.bTree.numberOfLeafs())
if __name__ == '__main__':
np.random.seed(42)
numberOfSamples = 10000
X = np.random.rand(numberOfSamples,2)
Y = ( np.sin(2*np.pi*X[:,0]) + np.cos(np.pi*X[:,1])) * np.exp(1 -X[:,0]**2 -X[:,1]**2 )
MainSet = np.arange(0,X.shape[0])
Trainingsset = np.random.choice(X.shape[0], int(0.8*X.shape[0]), replace=False)
Testset = np.delete(MainSet,Trainingsset)
regressionError = np.zeros(5)
for i in range(5):
errorRate = 0.05*i
errorFactor = 1 + 2*(np.random.rand(Trainingsset.shape[0]) - 0.5)*errorRate
XTrain = X[Trainingsset,:]
yTrain = Y[Trainingsset] * errorFactor
XTest = X[Testset,:]
yTest = Y[Testset]
myTree = bRegressionTree(xDecimals=3)
myTree.fit(XTrain,yTrain)
yPredict = myTree.predict(XTest)
yDiff = np.abs(yPredict - yTest)
regressionError[i] = np.mean(yDiff)
import matplotlib.pyplot as plt
fig1 = plt.figure(1)
ax = fig1.add_subplot(1,1,1)
x = np.arange(0,0.25,0.05)
ax.plot(x,regressionError,'o-')
ax.set_xlabel('% Noise')
ax.set_ylabel('Mean Absolute Error')
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
fig2 = plt.figure(2)
ax = fig2.add_subplot(1,1,1, projection='3d')
ax.scatter(XTest[:,0],XTest[:,1],yPredict,alpha=0.6,c =yPredict, cmap=cm.jet)
ax.set_xlabel('x[0]')
ax.set_ylabel('x[1]')
ax.set_zlabel('yPredict')
:::::::::::::::::::::::::::::::::::::::::::
import numpy as np
import scipy.special
import copy
class MLPNet:
def __init__(self, hiddenlayer=(10,10),classification=False):
self.hl = hiddenlayer; self.classification = classification
self.xMin = 0.0; self.xMax = 1.0
self.W = []
self._sigmoid = lambda x: scipy.special.expit(x)
def _initWeights(self):
self.W.append((np.random.rand(self.hl[0],self.il) - 0.5 ))
self.W.append((np.random.rand(self.hl[1],self.hl[0]) - 0.5))
self.W.append((np.random.rand(self.ol,self.hl[1]) - 0.5))
def _calOut(self,X):
O1 = self._sigmoid(self.W[0]@X.T)
O2 = self._sigmoid(self.W[1]@O1)
y = (self.W[len(self.W)-1]@O2).T
return(y)
def predict(self,X):
X = (X - self.xMin) / (self.xMax - self.xMin)
X = np.hstack( (X,np.ones(X.shape[0])[:,None]) )
y = self._calOut(X)
if self.classification: y = np.round(y) #*\label{code:fullmlpbatch:1}
return(y)
def fit(self,X,Y,eta=0.75,maxIter=200,vareps=10**-3,scale=True,XT=None,YT=None): #*\label{code:fullmlpbatch:4}
self.xMin = X.min(axis=0) if scale else 0
self.xMax = X.max(axis=0) if scale else 1
X = (X - self.xMin) / (self.xMax - self.xMin)
X = np.hstack( (X,np.ones(X.shape[0])[:,None]) )
if len(Y.shape) == 1:
Y = Y[:,None]
self.il = X.shape[1]
self.ol = Y.shape[1] #*\label{code:fullmlpbatch:morethanone}
self._initWeights()
(XVal, YVal, X, Y) = self._divValTrainSet(X,Y) #*\label{code:fullmlpbatch:3}
self.train(X,Y,XVal,YVal,eta,maxIter,vareps,XT,YT)
def train(self,X,Y,XVal=None,YVal=None,eta=0.75,maxIter=200,vareps=10**-3,XT=None,YT=None):
if XVal is None: (XVal, YVal, X, Y) = self._divValTrainSet(X,Y)
if len(Y.shape) == 1: Y = Y[:,None]
if len(YVal.shape) == 1: YVal = YVal[:,None]
if self.il != X.shape[1]: X = np.hstack( (X,np.ones(X.shape[0])[:,None]) )
if self.il != XVal.shape[1]: XVal = np.hstack( (XVal,np.ones(XVal.shape[0])[:,None]) )
dW = []
for i in range(len(self.W)):
dW.append(np.zeros_like(self.W[i]))
yp = self._calOut(XVal)
if self.classification: yp = np.round(yp)
meanE = (np.sum((YVal-yp)**2)/XVal.shape[0])/YVal.shape[1]
minError = meanE
minW = copy.deepcopy(self.W)
self.errorVal=[]; self.errorTrain=[]; self.errorTest=[] #*\label{code:fullmlpbatch:2}
mixSet = np.random.choice(X.shape[0],X.shape[0],replace=False)
counter = 0
while meanE > vareps and counter < maxIter:
counter += 1
for m in range(self.ol): #*\label{code:fullmlpbatch:5}
for i in mixSet:
x = X[i,:]
O1 = self._sigmoid(self.W[0]@x.T)
O2 = self._sigmoid(self.W[1]@O1)
temp = self.W[2][m,:]*O2*(1-O2)[None,:]
dW[2] = O2
dW[1] = temp.T@O1[:,None].T
dW[0] = (O1*(1-O1)*(temp@self.W[1])).T@x[:,None].T
yp = self._calOut(x)[m]
yfactor = np.sum(Y[i,m]-yp)
for j in range(len(self.W)):
self.W[j] += eta * yfactor* dW[j]
yp = self._calOut(XVal)
if self.classification: yp = np.round(yp)
meanE = (np.sum((YVal-yp)**2)/XVal.shape[0])/YVal.shape[1] #*\label{code:fullmlpbatch:6}
self.errorVal.append(meanE)
if meanE < minError:
minError = meanE
minW = copy.deepcopy(self.W)
self.valChoise = counter
if XT is not None:
yp = self.predict(XT)
if len(YT.shape) == 1: YT = YT[:,None];
meanETest = (np.sum((YT-yp)**2)/XT.shape[0])/YT.shape[1]
self.errorTest.append(meanETest)
yp = self._calOut(X)
if self.classification:
yp = np.round(yp)
meanETrain = (np.sum((Y-yp)**2)/X.shape[0])/Y.shape[1]
self.errorTrain.append(meanETrain)
self.W = copy.deepcopy(minW)
def _divValTrainSet(self, X,Y):
self.ValSet = np.random.choice(X.shape[0],int(X.shape[0]*0.25),replace=False)
self.TrainSet = np.delete(np.arange(0, Y.shape[0] ), self.ValSet)
XVal = X[self.ValSet,:]
YVal = Y[self.ValSet]
X = X[self.TrainSet,:]
Y = Y[self.TrainSet]
return (XVal, YVal, X, Y)
def exportNet(self, filePrefix):
np.savetxt(filePrefix+"MinMax.csv", np.array([self.xMin, self.xMax]), delimiter=",")
np.savetxt(filePrefix+"W0.csv", self.W[0], delimiter=",")
np.savetxt(filePrefix+"W1.csv", self.W[1], delimiter=",")
np.savetxt(filePrefix+"W2.csv", self.W[2], delimiter=",")
def importNet(self,filePrefix, classification=False):
MinMax = np.loadtxt(filePrefix+'MinMax.csv',delimiter=",")
W2 = np.loadtxt(filePrefix+'W2.csv',delimiter=",")
W1 = np.loadtxt(filePrefix+'W1.csv',delimiter=",")
W0 = np.loadtxt(filePrefix+'W0.csv',delimiter=",")
self.W = [W0,W1,W2]
self.hl = (W0.shape[0], W2.shape[1])
self.il = W0.shape[1]
self.ol = W2.shape[0]
self.xMin = MinMax[0]
self.xMax = MinMax[1]
self.classification = classification
if __name__ == '__main__':
np.random.seed(42)
X = np.random.rand(1250,2)
Y = np.zeros( (1250,2) )
index1 = (X[:,0] - 0.25)**2 + (X[:,1] - 0.25)**2 < 0.2**2
Y[index1,0] = 1
index2 = (X[:,0] - 0.75)**2 + (X[:,1] - 0.75)**2 < 0.2**2
Y[index2,1] = 1
TrainSet = np.random.choice(X.shape[0],int(X.shape[0]*0.70), replace=False)
XTrain = X[TrainSet,:]
YTrain = Y[TrainSet]
TestSet = np.delete(np.arange(0, len(Y) ), TrainSet)
XTest = X[TestSet,:]
YTest = Y[TestSet]
myPredict = MLPNet(hiddenlayer=(24,24),classification=True)
myPredict.fit(XTrain,YTrain,maxIter=1200, XT=XTest , YT=YTest)
yp = myPredict.predict(XTest)
fp = np.sum(np.abs(yp - YTest))/len(TestSet)*100
print('richtig klassifiziert %0.1f%%' % (100-fp))
print('falsch klassifiziert %0.1f%%' % (fp))
myPredict.exportNet('foobar')
justTest = MLPNet()
justTest.importNet('foobar',classification=True)
yp = justTest.predict(XTest)
fp = np.sum(np.abs(yp - YTest))/len(TestSet)*100
print('richtig klassifiziert %0.1f%%' % (100-fp))
print('falsch klassifiziert %0.1f%%' % (fp))
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
plt.close('all')
fig1 = plt.figure(1)
ax = fig1.add_subplot(1,1,1)
circle1 = plt.Circle((0.25, 0.25), 0.2, color='k', alpha=0.3)
circle2 = plt.Circle((0.75, 0.75), 0.2, color='k', alpha=0.3)
ax.add_artist(circle1)
ax.add_artist(circle2)
index1 = np.logical_and( (XTest[:,0] - 0.25)**2 + (XTest[:,1] - 0.25)**2 < 0.2**2 , yp[: ,0]==0 )
ax.scatter(XTest[index1,0],XTest[index1,1], marker='v',c='r')
index2 = np.logical_and( (XTest[:,0] - 0.75)**2 + (XTest[:,1] - 0.75)**2 < 0.2**2, yp[: ,1]==0 )
ax.scatter(XTest[index2,0],XTest[index2,1], marker='^',c='r')
ax.scatter(XTest[yp[:,0]==1,0],XTest[yp[:,0]==1,1], marker='+',c='k')
ax.scatter(XTest[yp[:,1]==1,0],XTest[yp[:,1]==1,1], marker='o',c='k')
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.axis('equal')
fig3 = plt.figure(3)
ax = fig3.add_subplot(1,1,1)
epochen = np.arange(len(myPredict.errorVal))
ax.plot(epochen, np.array(myPredict.errorVal), 'r-.' , label='Validierung')
ax.plot(epochen, np.array(myPredict.errorTest), 'k--', label='Test')
ax.plot(epochen, np.array(myPredict.errorTrain), 'k:', label='Training' )
ax.legend()
ax.set_xlabel('Lernzyklus')
ax.set_ylabel('Durchschnittlicher Fehler')
:::::::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
x = np.arange(0,1.1,0.1)
y1 = 0.5*x
y2 = (x-0.5)**2
y3 = np.log(x+1)
data = np.vstack( (x,y1,y2,y3) )
kor = np.corrcoef(data)
print(kor[0,:])
y4 = x**2
data = np.vstack( (x,y4) )
kor = np.corrcoef(data)
print(kor[0,1])
:::::::::::::::::::::::::::::::::::::::::::
import numpy as np
f = open("AutodatenbankReduziert.csv", encoding="ISO-8859-1")
header = f.readline().rstrip('\n') # skip the header
featureNames = header.split(',')
dataset = np.loadtxt(f, delimiter=",", skiprows=1, usecols=range(2,9))
f.close()
xmax = np.max(dataset,axis=0)
xmin = np.min(dataset,axis=0)
XHat = (dataset - xmin) / (xmax-xmin)
print(np.std(XHat,axis=0))
print(np.mean(XHat,axis=0))
print(np.median(XHat,axis=0))
xbar = np.mean(dataset,axis=0)
sigma = np.std(dataset,axis=0)
XTilde = (dataset - xbar) / sigma
from CARTRegressionTree import bRegressionTree
MainSet = np.arange(0,dataset.shape[0])
TestSet = np.arange(0,35,4)
Trainingsset= np.delete(MainSet,TestSet)
reduTree = bRegressionTree()
reduData = dataset[:,np.array([1,2,4])] #*\label{merkmaleAuswaehlen:22}
reduTree.fit(reduData[Trainingsset,:],dataset[Trainingsset,0])
PreisApprox = reduTree.predict(reduData[TestSet,:])
errorAbs = np.abs(PreisApprox - dataset[TestSet,0])
print('Durchschnittlicher Fehler auf Testmenge:',np.mean(errorAbs))
PreisApprox = reduTree.predict(reduData[:,:])
errorAbs = np.abs(PreisApprox - dataset[:,0])
print('Durchschnittlicher Fehler auf allen Daten:',np.mean(errorAbs))
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(2,1,1)
ax.plot(dataset[:,0]-xbar[0], 'k' ,marker='o')
ax.set_ylabel('Preis')
ax = fig.add_subplot(2,1,2)
ax.plot(dataset[:,1]-xbar[1], 'k' ,marker='x')
ax.set_ylabel('kW')
fig = plt.figure()
ax = fig.add_subplot(2,1,1)
ax.plot(XTilde[:,0], 'k' ,marker='o')
ax.set_ylabel('Preis (Standardisiert)')
ax = fig.add_subplot(2,1,2)
ax.plot(XTilde[:,1], 'k' ,marker='x')
ax.set_ylabel('kW (Standardisiert)')
::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTree import bRegressionTree
f = open("AutodatenbankAllEntries.csv")
header = f.readline().rstrip('\n')
featureNames = header.split(',')
dataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9))
f.close()
Features = dataset[:,1:6]
y = dataset[:,0]
xbar = np.mean(Features,axis=0)
sigma = np.std(Features,axis=0)
X = (Features - xbar) / sigma; print(X[450,:])
Sigma = np.cov(X.T)
(lamb, W) = np.linalg.eig(Sigma)
eigenVar = np.sort(lamb)[::-1]
sumEig = np.sum(lamb)
eigenVar = eigenVar/sumEig
cumVar= np.cumsum(eigenVar)
import matplotlib.pyplot as plt
plt.bar(range(1,len(eigenVar)+1),eigenVar, alpha=0.25, align='center', label='Varianzanteil', color='gray')
plt.step(range(1,len(eigenVar)+1),cumVar, where='mid', label='Kumulativer Varianzanteil', c='k')
plt.xlabel('Hauptkomponenten'); plt.ylabel('Prozentualer Anteil')
plt.legend()
eigenVarIndex = np.argsort(lamb)[::-1]
WP = W[:,eigenVarIndex[0:2]]
XProj = ( WP.T@X.T ).T
np.random.seed(42)
MainSet = np.arange(0,XProj.shape[0])
TestSet = np.random.choice(MainSet.shape[0],int(MainSet.shape[0]*0.25), replace=False)
TrainSet = np.delete(MainSet, TestSet)
pcaTree = bRegressionTree(minLeafNodeSize=40)
pcaTree.fit(XProj[TrainSet,:],y[TrainSet])
PreisApprox = pcaTree.predict(XProj[TestSet,:])
errorMean = np.mean(np.abs(PreisApprox - y[TestSet]))
print(errorMean)
:::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
np.random.seed(42)
xs = np.arange(0.2,0.8,0.0025)
ys = np.arange(0.2,0.8,0.0025)
zs = np.arange(0.2,0.8,0.0025)
dx = 0.15*(np.random.rand(xs.shape[0])-0.5)
dy = 0.30*(np.random.rand(ys.shape[0])-0.5)
dz = 0.20*(np.random.rand(zs.shape[0])-0.5)
x = 0.5*xs + 0.25*ys + 0.3*zs + dx
y = 0.3*xs + 0.45*ys + 0.3*zs + dy
z = 0.1*xs + 0.30*ys + 0.6*zs + dz
dataset = np.vstack( (x,y,z) ).T
xbar = np.mean(dataset,axis=0)
sigma = np.std(dataset,axis=0)
X = (dataset - xbar) / sigma
Sigma = np.cov(X.T)
(lamb, w) = np.linalg.eig(Sigma)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(2)
ax = fig.add_subplot(1,1,1, projection='3d')
ax.scatter(x,y,z,c='red',s=60,alpha=0.3)
ax.set_xlim([0,1]); ax.set_ylim([0,1]); ax.set_zlim([0,1])
ax.set_xlabel('x'); ax.set_ylabel('y'); ax.set_zlabel('z')
xM = np.array([xbar[0],xbar[0],xbar[0]])
yM = np.array([xbar[1],xbar[1],xbar[1]])
zM = np.array([xbar[2],xbar[2],xbar[2]])
D = np.zeros_like(w)
D[:,0] = lamb[0]/4*w[:,0]
D[:,1] = lamb[1]/4*w[:,1]
D[:,2] = lamb[2]/4*w[:,2]
ax.quiver(xM,yM,zM, D[0,:], D[1,:], D[2,:])
::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTreeRF import bRegressionTree
class randomForestRegression:
def __init__(self,noOfTrees=10,threshold = 10**-8, xDecimals = 8, minLeafNodeSize=3, perc=1):
self.perc = perc
self.threshold = threshold
self.xDecimals = xDecimals
self.minLeafNodeSize = minLeafNodeSize
self.bTree = []
self.noOfTrees = noOfTrees
for i in range(noOfTrees):
tempTree = bRegressionTree(threshold = self.threshold, xDecimals = self.xDecimals , minLeafNodeSize=self.minLeafNodeSize)
self.bTree.append(tempTree)
def fit(self,X,y):
self.samples = []
for i in range(self.noOfTrees):
bootstrapSample = np.random.randint(X.shape[0],size=int(self.perc*X.shape[0]))
self.samples.append(bootstrapSample) #*\label{code:realRF:0}
bootstrapX = X[bootstrapSample,:]
bootstrapY = y[bootstrapSample]
self.bTree[i].fit(bootstrapX,bootstrapY)
def predict(self,X):
ypredict = np.zeros(X.shape[0])
for i in range(self.noOfTrees):
ypredict += self.bTree[i].predict(X)
ypredict = ypredict/self.noOfTrees
return(ypredict)
if __name__ == '__main__':
f = open("hourCleanUp.csv") #*\label{code:realRF:1}
header = f.readline().rstrip('\n')
featureNames = header.split(',')
dataset = np.loadtxt(f, delimiter=",")
f.close()
X = dataset[:,0:13]
Y = dataset[:,15]
index = np.flatnonzero(X[:,8]==4)
X = np.delete(X,index, axis=0)
Y = np.delete(Y,index, axis=0)
np.random.seed(42)
MainSet = np.arange(0,X.shape[0])
Trainingsset = np.random.choice(X.shape[0], int(0.8*X.shape[0]), replace=False)
Testset = np.delete(MainSet,Trainingsset)
XTrain = X[Trainingsset,:]
yTrain = Y[Trainingsset]
XTest = X[Testset,:]
yTest = Y[Testset] #*\label{code:realRF:2}
myForest = randomForestRegression(noOfTrees=24,minLeafNodeSize=5,threshold=2)
myForest.fit(XTrain,yTrain)
yPredict = np.round(myForest.predict(XTest))
yDiff = yPredict - yTest
print('Mittlere Abweichung: %e ' % (np.mean(np.abs(yDiff))))
::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTree import bRegressionTree
def SBS(X,y,k, verbose=False):
l=X.shape[1]
MainSet = np.arange(0,X.shape[0])
ValSet = np.random.choice(X.shape[0],int(X.shape[0]*0.25), replace=False)
TrainSet = np.delete(MainSet,ValSet)
suggestedFeatures = np.arange(0,l) #*\label{seqFeature:9}
while (k
for i in range(l):
Xred = np.delete(X, i, axis=1)
reduTree = bRegressionTree(minLeafNodeSize=40)
reduTree.fit(Xred[TrainSet,:],y[TrainSet])
error = y[ValSet] - reduTree.predict(Xred[ValSet,:])
Q[i] = np.mean(np.abs(error)) #*\label{seqFeature:17}
i = np.argmin(Q)
if verbose: print(Q);print(suggestedFeatures[i])
suggestedFeatures = np.delete(suggestedFeatures,i) #*\label{seqFeature:19}
X = np.delete(X, i, axis=1)
l = l -1
return(suggestedFeatures)
np.random.seed(42)
X = np.random.rand(1000,5) #*\label{seqFeature:26}
y = 2*X[:,1] - X[:,3]**2 - 0.01*X[:,0]**3 + 0.1*(X[:,2] - X[:,4]**2) #*\label{seqFeature:27}
suggestedFeatures = SBS(X,y,2, verbose=True)
print(suggestedFeatures)
f = open("AutodatenbankAllEntries.csv")
header = f.readline().rstrip('\n')
featureNames = header.split(',')
fullDataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9)) #*\label{merkmaleAuswaehlen:36}
f.close()
MainSet = np.arange(0,fullDataset.shape[0])
TestSet = np.random.choice(MainSet.shape[0],int(MainSet.shape[0]*0.25), replace=False)
TrainSet = np.delete(MainSet, TestSet)
suggestedFeatures = SBS(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0],2, verbose=True)
print(suggestedFeatures)
fullTree = bRegressionTree(minLeafNodeSize=40)
fullTree.fit(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0])
PreisApprox = fullTree.predict(fullDataset[TestSet,1:6])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)
X = fullDataset[:,np.array([1+1,2+1])]
reduTree = bRegressionTree(minLeafNodeSize=40)
reduTree.fit(X[TrainSet,:],fullDataset[TrainSet,0])
PreisApprox = reduTree.predict(X[TestSet,:])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)
::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTree import bRegressionTree
def SFS(X,y,k, verbose=False):
MainSet = np.arange(0,X.shape[0])
ValSet = np.random.choice(X.shape[0],int(X.shape[0]*0.25), replace=False)
TrainSet = np.delete(MainSet,ValSet)
featuresLeft = np.arange(0,X.shape[1])
suggestedFeatures = np.zeros(1,dtype=int)
l=0
while (k>l):
Q = np.inf*np.ones(X.shape[1])
for i in featuresLeft:
suggestedFeatures[l] = i
reduTree = bRegressionTree(minLeafNodeSize=40)
reduTree.fit(X[np.ix_(TrainSet,suggestedFeatures)],y[TrainSet])
error = y[ValSet] - reduTree.predict(X[np.ix_(ValSet,suggestedFeatures)])
Q[i] = np.mean(np.abs(error))
i = np.argmin(Q)
if verbose: print(Q);print(i)
suggestedFeatures[l] = i
featuresLeft = np.delete(featuresLeft,np.argwhere(featuresLeft == i) )
suggestedFeatures = np.hstack( (suggestedFeatures,np.array([0]) ) )
l = l +1
suggestedFeatures = suggestedFeatures[0:l]
return(suggestedFeatures)
np.random.seed(999)
X = np.random.rand(1000,5) #*\label{seqFeature:26}
y = 2*X[:,1] - X[:,3]**2 - 0.01*X[:,0]**3 + 0.1*(X[:,2] - X[:,4]**2) #*\label{seqFeature:27}
suggestedFeatures = SFS(X,y,2, verbose=True)
print(suggestedFeatures)
f = open("AutodatenbankAllEntries.csv")
header = f.readline().rstrip('\n')
featureNames = header.split(',')
fullDataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9)) #*\label{merkmaleAuswaehlen:36}
f.close()
MainSet = np.arange(0,fullDataset.shape[0])
TestSet = np.random.choice(MainSet.shape[0],int(MainSet.shape[0]*0.25), replace=False)
TrainSet = np.delete(MainSet, TestSet)
suggestedFeatures = SFS(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0],2, verbose=True)
print(suggestedFeatures)
fullTree = bRegressionTree(minLeafNodeSize=40)
fullTree.fit(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0])
PreisApprox = fullTree.predict(fullDataset[TestSet,1:6])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)
X = fullDataset[:,np.array([1+1,2+1])]
reduTree = bRegressionTree(minLeafNodeSize=40)
reduTree.fit(X[TrainSet,:],fullDataset[TrainSet,0])
PreisApprox = reduTree.predict(X[TestSet,:])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)
#errorMean = np.zeros( (5,5))
#
#for i in range(0,5):
# for j in range(i+1,5):
# X = fullDataset[:,np.array([i+1,j+1])] #*\label{merkmaleAuswaehlen:40}
# reduTree = bRegressionTree(minLeafNodeSize=40)
# reduTree.fit(X[TrainSet,:],fullDataset[TrainSet,0])
# PreisApprox = reduTree.predict(X[TestSet,:])
# errorMean[i,j] = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
#
#print(errorMean)
::::::::::::::::::::::::::::::::::::::
import numpy as np
def staticImputer(X, mode='mean', feature='all'):
if feature=='all':
feature = np.arange(X.shape[1])
for i in feature:
allData = np.arange(X.shape[0])
index = np.flatnonzero(np.isnan(X[:,i]))
correctData = np.delete(allData,index)
if mode == 'most':
unique, counts = np.unique(X[correctData,i], return_counts=True)
whichOne = np.argmax(counts)
newValue = unique[whichOne]
elif mode == 'median':
newValue = np.median(X[correctData,i])
else:
newValue = np.mean(X[correctData,i])
X[index,i] = newValue
return(X)
if __name__ == '__main__':
np.random.seed(42)
f = open("AutodatenbankAllEntries.csv")
header = f.readline().rstrip('\n')
featureNames = header.split(',')
fullDataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9))
f.close()
index = np.array([338, 361, 389, 126, 145, 308, 325, 463,465,471,472,207,213,217,218,219,483, 36, 485, 489,498,40,251,512,41,9,591,67,75,597,93,606,17,97,19])
index = index - 6 #*\label{code:imputer:0}
X = np.copy(fullDataset)
X[index,4] = np.NaN #*\label{code:imputer:1}
XFilled = staticImputer(X, mode='median')
allIndex = np.arange(X.shape[0])
traingsSet = np.delete(allIndex,index)
yTrain = X[traingsSet,4]
xTrain = X[np.ix_(traingsSet,np.array([0,1,2,3,5,6]))]
xTest = X[np.ix_(index,np.array([0,1,2,3,5,6]))]
from randomForest import randomForestRegression
myForest = randomForestRegression(noOfTrees=50,minLeafNodeSize=7)
myForest.fit(xTrain,yTrain)
XFilled[index,4] = myForest.predict(xTest)
error = np.mean(np.abs(XFilled[index,4] - fullDataset[index,4]))
from fullMLP import MLPNet
myPredict = MLPNet(hiddenlayer=(36,12))
myPredict.fit(xTrain,yTrain,maxIter=1200, eta=0.1)
xMLP = myPredict.predict(xTest)
error = np.mean(np.abs(xMLP.reshape(35,) - fullDataset[index,4]))
:::::::::::::::::::::::::::::::::::::::::::::::
Keine Kommentare:
Kommentar veröffentlichen
Hinweis: Nur ein Mitglied dieses Blogs kann Kommentare posten.