Mittwoch, 29. Mai 2019

Künstliche Intelligenz SourceCode Python Teil-9

import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense
from keras.models import Sequential
from keras.datasets import mnist
(XTrain, YTrain), (XTest, YTest) = mnist.load_data()
XTrain = XTrain.reshape(60000, 784)
XTest = XTest.reshape(10000, 784)
XTrain = XTrain/255
XTest  = XTest/255
Layer1 = 196
Layer2 = 98
zielZahlMerkmale = 24
autoencoder = Sequential()
autoencoder.add(Dense(Layer1,input_dim=784,activation='sigmoid'))
autoencoder.add(Dense(Layer2,activation='relu'))
autoencoder.add(Dense(zielZahlMerkmale,activation='relu'))
autoencoder.add(Dense(Layer2,activation='relu'))
autoencoder.add(Dense(Layer1,activation='relu'))
autoencoder.add(Dense(784,activation='sigmoid'))
autoencoder.compile(loss='mean_squared_error', optimizer='adam')
autoencoder.fit(XTrain, XTrain, epochs=25, verbose=True,validation_data=(XTest, XTest))
encoder = Sequential()
encoder.add(Dense(Layer1,input_dim=784,activation='sigmoid'))
encoder.add(Dense(Layer2,activation='relu'))
encoder.add(Dense(zielZahlMerkmale,activation='relu'))
for i in range(len(encoder.layers)):
    W = autoencoder.layers[i].get_weights()
    encoder.layers[i].set_weights(W)
   
decoder = Sequential()
decoder.add(Dense(Layer2,input_dim=zielZahlMerkmale, activation='relu'))
decoder.add(Dense(Layer1,activation='relu'))
decoder.add(Dense(784,activation='sigmoid'))
for i in range(len(encoder.layers),len(autoencoder.layers)):
    W = autoencoder.layers[i].get_weights()
    decoder.layers[i-len(encoder.layers)].set_weights(W)
   
encodedData = encoder.predict(XTest)
decodedData = decoder.predict(encodedData)
plt.figure(figsize=(20, 4))
for i in range(10):
    ax = plt.subplot(2, 10, i + 1)
    plt.imshow(XTest[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax = plt.subplot(2, 10, i + 1 + 10)
    plt.imshow(decodedData[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()
XTrainRed = encoder.predict(XTrain)
XTestRed = encoder.predict(XTest)
def unscaledKNNclassification(xTrain, yTrain, xQuery, k, normOrd=None):
    diff = xTrain - xQuery
    dist = np.linalg.norm(diff,axis=1, ord=normOrd)
    knearest = np.argsort(dist)[:k]
    (classification, counts) = np.unique(YTrain[knearest], return_counts=True)
    theChoosenClass = np.argmax(counts) #*\label{code:knnmehrheit}
    return(classification[theChoosenClass])

errors = 0
for i in range(len(YTest)):
    myClass = unscaledKNNclassification(XTrain, YTrain, XTest[i,:], 3)
    if myClass != YTest[i]:
        errors = errors +1
        print('%d wurde als %d statt %d klassifiziert' % (i,myClass,YTest[i]))

::::::::::::::::::::::::::::::::::::::::
import numpy as np
class tree:
    def __init__(self, varNo, value, operator):
        self.rootNode = treeNode(0, value, varNo=varNo, operator=operator)
        self.nodes = []
        self.nodes.append(self.rootNode)
        self.leafNodes = []
        self.leafNodes.append(0)
    def addNode(self, ChildOf, branch, value, operator='<', varNo=0):
        node = treeNode(len(self.nodes),value,ChildOf=ChildOf,operator=operator,varNo=varNo)
        self.leafNodes.append(node.number)
        self.nodes.append(node)
        parent = self.nodes[ChildOf]
        if branch is True:
           parent.leftTrue = node
        else:
           parent.rightFalse = node
        if parent.leftTrue is not None and parent.rightFalse is not None:
            toDelete = self.leafNodes.index(parent.number)
            del self.leafNodes[toDelete]
        return(node.number)
    def trace(self, x):
        traceRoute = self.rootNode.trace(x)[0]
        return traceRoute
    def eval(self, x):
        traceRoute = self.trace(x)
        y = np.zeros(len(traceRoute))
        for i in range(len(y)):
            y[i] = self.nodes[traceRoute[i][-1]]()           
        return(y)
       
    def weightedPathLength(self, X):
        traceroute = self.trace(X)
        sum = 0
        for i in range(len(traceroute)):
            sum = sum + len(traceroute[i]) -1
        return(sum)
       
    def numberOfLeafs(self):
        return(len(self.leafNodes))
    def print(self):
        ongoingstring = "\node {"+self.rootNode.conditionString()+" } "
        if self.rootNode.leftTrue is not None:
            ongoingstring = self.rootNode.leftTrue.addMyString(ongoingstring)
        if self.rootNode.rightFalse is not None:
            ongoingstring = self.rootNode.rightFalse.addMyString(ongoingstring)
        ongoingstring = ongoingstring + " ;"
        return(ongoingstring)
class treeNode:
    def __init__(self, number, value, ChildOf=None, operator='<', varNo=0):
        self.number     = number
        self.childOf    = ChildOf
        self.leftTrue   = None
        self.rightFalse = None
        self.value      = value
        self.varNo      = varNo
        self.operator   = operator
    def __call__(self):
        return(self.value)
    def leafNode(self):
        if self.leftTrue is not None and self.rightFalse is not None:
            return(False)
        else:
            return(True)
    def evalCondition(self, x):
        if self.operator == '=':
            cond = x[:, self.varNo] == self.value
        elif self.operator == '<':
            cond = x[:, self.varNo] < self.value
        else: # case >
            cond = x[:, self.varNo] > self.value
        return cond
    def trace(self, x, index=None, traceRoute=None):
        if index is None:
            index = np.arange(len(x))
        if traceRoute is None:
            traceRoute = [[] for x in range(len(x))]
        for k in index:
            traceRoute[k].append(self.number)
        if self.leafNode():
            return (traceRoute, index)
        cond = self.evalCondition(x[index])
        trueIndex  = index[cond]
        falseIndex = index[~cond]
        if self.leftTrue is not None and trueIndex.size != 0:
            traceRoute = self.leftTrue.trace(x, trueIndex, traceRoute)[0]
        if self.rightFalse is not None and falseIndex.size != 0:
            traceRoute =  self.rightFalse.trace(x, falseIndex, traceRoute)[0]
        return (traceRoute, index)
    def conditionString(self):
        if not self.leafNode():
            mystring = "\tiny %d || x[%d] %s %.2f" % (self.number, self.varNo, self.operator, self.value)
        else:
            mystring = "\tiny %d || %.2f" % (self.number, self.value)
        return(mystring)
    def addMyString(self, ongoingstring):
        if not self.leafNode():
            ongoingstring = ongoingstring + "child { node {"+self.conditionString()+" } "
        else:
            ongoingstring = ongoingstring + "child { node[fill=gray!30] {"+self.conditionString()+" } "
        if self.leftTrue is not None:
            ongoingstring = self.leftTrue.addMyString(ongoingstring)
        if self.rightFalse is not None:
            ongoingstring = self.rightFalse.addMyString(ongoingstring)
        ongoingstring = ongoingstring + " } "
           
        return(ongoingstring)
       

if __name__ == '__main__':
    np.random.seed(3)
    bicycleTree = tree(0,1,'=')
    No = bicycleTree.addNode(0,False,1,varNo=1,operator='=')
    bicycleTree.addNode(No,False,0)
    bicycleTree.addNode(No,True,1)
    No = bicycleTree.addNode(0,True,1,varNo=2,operator='=')
    bicycleTree.addNode(No,True,0)
    No = bicycleTree.addNode(No,False,1,varNo=3,operator='=')
    bicycleTree.addNode(No,True,0)
    bicycleTree.addNode(No,False,1)
    import time
    x = np.array([True,False,False,False]).reshape(1,4)
    y = bicycleTree.eval(x)
    traceRoute = bicycleTree.trace(x)
    print(traceRoute)
    print(y)
    x = np.random.randint(2, size=(1000000,4))
    t1 = time.clock()
    y = bicycleTree.eval(x)
    t2 = time.clock()
    print(t2-t1)
    traceRoute = bicycleTree.trace(x)

:::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from binaryTree import tree
class bRegressionTree:
    def _calLRSS(self,y):
        yMean = np.sum(y)/len(y)
        L2 = np.sum( (y-yMean)**2)
        return(L2)
    def _bestSplit(self,X,y,feature):
        RSS = np.inf #*\label{code:RSSinit}
        bestSplit = np.inf
        XSort = np.unique(X[:,feature].round(self.xDecimals))
        XDiff = (XSort[1:len(XSort)] + XSort[0:len(XSort)-1])/2
        for i in range(XDiff.shape[0]):
            index = np.less(X[:,feature], XDiff[i])
            if not (np.all(index) or np.all(~index)):
                RSS_1 = self._calLRSS(y[index])
                RSS_2 = self._calLRSS(y[~index])
                RSSSplit = RSS_1 + RSS_2 #*\label{code:CARTJustAdd}
                if RSS > RSSSplit:
                    RSS = RSSSplit
                    bestSplit = XDiff[i]
        return (bestSplit, RSS)
    def _ComputeValue(self,y):
        return(np.sum(y)/len(y))
    def _chooseFeature(self,X,y):
        G         = np.zeros(X.shape[1])
        bestSplit = np.zeros(X.shape[1])
        for i in range(X.shape[1]):
            ( bestSplit[i] , G[i] ) = self._bestSplit(X,y,i)
        smallest = np.argmin(G)
        return (G[smallest], bestSplit[smallest],smallest)
    def __init__(self,threshold = 10**-8, xDecimals = 8, minLeafNodeSize=3):
        self.bTree = None
        self.threshold = threshold
        self.xDecimals = xDecimals
        self.minLeafNodeSize = minLeafNodeSize
    def _GenTree(self,X,y,parentNode,branch):
        commonValue = self._ComputeValue(y)
        initG = self._calLRSS(y)
        if  initG < self.threshold or X.shape[0] <= self.minLeafNodeSize:
            self.bTree.addNode(parentNode,branch,commonValue)
            return()   
           
        (G, bestSplit ,chooseA) = self._chooseFeature(X,y)
        if  G  > initG :
            self.bTree.addNode(parentNode,branch,commonValue)
            return()   
       
        if parentNode == None:
            self.bTree = tree(chooseA, bestSplit, '<')
            myNo = 0
        else:
            myNo = self.bTree.addNode(parentNode,branch,bestSplit,operator='<',varNo=chooseA)
        index = np.less(X[:,chooseA],bestSplit)
        XTrue  = X[index,:]
        yTrue  = y[index]
        XFalse = X[~index,:]
        yFalse = y[~index]
               
        if XTrue.shape[0] > self.minLeafNodeSize:
            self._GenTree(XTrue,yTrue,myNo,True)
        else:
            commonValue = self._ComputeValue(yTrue)
            self.bTree.addNode(myNo,True,commonValue)
        if XFalse.shape[0] > self.minLeafNodeSize:
            self._GenTree(XFalse,yFalse,myNo,False)
        else:
            commonValue = self._ComputeValue(yFalse)
            self.bTree.addNode(myNo,False,commonValue)
        return()
    def fit(self, X,y):
        self._GenTree(X,y,None,None)
   
    def predict(self, X):
        return(self.bTree.eval(X))
   
    def decision_path(self, X):
        return(self.bTree.trace(X))
       
    def weightedPathLength(self,X):
        return(self.bTree.weightedPathLength(X))
       
    def numberOfLeafs(self):
        return(self.bTree.numberOfLeafs())
       
if __name__ == '__main__':       
    np.random.seed(42)
    numberOfSamples = 10000
    X = np.random.rand(numberOfSamples,2)
    Y = ( np.sin(2*np.pi*X[:,0]) + np.cos(np.pi*X[:,1])) * np.exp(1 -X[:,0]**2 -X[:,1]**2 )
   
    MainSet = np.arange(0,X.shape[0])
    Trainingsset = np.random.choice(X.shape[0], int(0.8*X.shape[0]), replace=False)
    Testset = np.delete(MainSet,Trainingsset)
   
    regressionError = np.zeros(5)
    for i in range(5):
        errorRate = 0.05*i #*\label{code:CARTBeispiel3}
        errorFactor = 1 + 2*(np.random.rand(Trainingsset.shape[0]) - 0.5)*errorRate #*\label{code:CARTBeispiel1}
        XTrain = X[Trainingsset,:]
        yTrain = Y[Trainingsset] * errorFactor #*\label{code:CARTBeispiel2}
        XTest = X[Testset,:]
        yTest = Y[Testset]
       
        myTree = bRegressionTree(xDecimals=3)
        myTree.fit(XTrain,yTrain)
        yPredict = myTree.predict(XTest)
        yDiff = np.abs(yPredict - yTest)
        regressionError[i] = np.mean(yDiff)
   
    import matplotlib.pyplot as plt
    fig1 = plt.figure(1)
    ax = fig1.add_subplot(1,1,1)
    x = np.arange(0,0.25,0.05)
    ax.plot(x,regressionError,'o-',c='k')
    ax.set_xlabel('% Noise')
    ax.set_ylabel('Mean Absolute Error')
   
    from mpl_toolkits.mplot3d import Axes3D
   
    fig2 = plt.figure(2)
    ax = fig2.add_subplot(1,1,1, projection='3d')
    ax.scatter(XTest[:,0],XTest[:,1],yPredict,alpha=0.6,c =yPredict, cmap='gray')
    ax.set_xlabel('x[0]')
    ax.set_ylabel('x[1]')
    ax.set_zlabel('yPredict')


:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from binaryTree import tree
class bRegressionTree:
    def _calLRSS(self,y):
        yMean = np.sum(y)/len(y)
        L2 = np.sum( (y-yMean)**2)
        return(L2)
    def _bestSplit(self,X,y,feature):
        RSS = np.inf
        bestSplit = np.inf
        XSort = np.unique(X[:,feature].round(self.xDecimals))
        XDiff = (XSort[1:len(XSort)] + XSort[0:len(XSort)-1])/2
        for i in range(XDiff.shape[0]):
            index = np.less(X[:,feature], XDiff[i])
            if not (np.all(index) or np.all(~index)):
                RSS_1 = self._calLRSS(y[index])
                RSS_2 = self._calLRSS(y[~index])
                RSSSplit = RSS_1 + RSS_2
                if RSS > RSSSplit:
                    RSS = RSSSplit
                    bestSplit = XDiff[i]
        return (bestSplit, RSS)
    def _ComputeValue(self,y):
        return(np.sum(y)/len(y))
    def _chooseFeature(self,X,y):
        G         = np.inf*np.ones(X.shape[1]) #*\label{code:RF:0}
        bestSplit = np.zeros(X.shape[1])
        if self.n == 0: #*\label{code:RF:2}
            feature = np.arange(X.shape[1])
        elif self.n == -1:
            feature = np.random.choice(X.shape[1],int(np.sqrt(X.shape[1])),replace=False)
        else:
            feature = np.random.choice(X.shape[1],self.n,replace=False)
        for i in feature: #*\label{code:RF:3}
            ( bestSplit[i] , G[i] ) = self._bestSplit(X,y,i)
        smallest = np.argmin(G) #*\label{code:RF:4}
        return (G[smallest], bestSplit[smallest],smallest)
    def __init__(self,n = 0, threshold = 10**-8, xDecimals = 8, minLeafNodeSize=3):
        self.n = 0
        self.bTree = None
        self.threshold = threshold
        self.xDecimals = xDecimals
        self.minLeafNodeSize = minLeafNodeSize
    def _GenTree(self,X,y,parentNode,branch):
        commonValue = self._ComputeValue(y)
        initG = self._calLRSS(y)
        if  initG < self.threshold or X.shape[0] <= self.minLeafNodeSize:
            self.bTree.addNode(parentNode,branch,commonValue)
            return()   
           
        (G, bestSplit ,chooseA) = self._chooseFeature(X,y)
        if  G  > initG :
            self.bTree.addNode(parentNode,branch,commonValue)
            return()   
       
        if parentNode == None:
            self.bTree = tree(chooseA, bestSplit, '<')
            myNo = 0
        else:
            myNo = self.bTree.addNode(parentNode,branch,bestSplit,operator='<',varNo=chooseA)
        index = np.less(X[:,chooseA],bestSplit)
        XTrue  = X[index,:]
        yTrue  = y[index]
        XFalse = X[~index,:]
        yFalse = y[~index]
               
        if XTrue.shape[0] > self.minLeafNodeSize:
            self._GenTree(XTrue,yTrue,myNo,True)
        else:
            commonValue = self._ComputeValue(yTrue) 
            self.bTree.addNode(myNo,True,commonValue)
        if XFalse.shape[0] > self.minLeafNodeSize:
            self._GenTree(XFalse,yFalse,myNo,False)
        else:
            commonValue = self._ComputeValue(yFalse)
            self.bTree.addNode(myNo,False,commonValue)
        return()
    def fit(self, X,y):
        self._GenTree(X,y,None,None)
   
    def predict(self, X):
        return(self.bTree.eval(X))
   
    def decision_path(self, X):
        return(self.bTree.trace(X))
       
    def weightedPathLength(self,X):
        return(self.bTree.weightedPathLength(X))
       
    def numberOfLeafs(self):
        return(self.bTree.numberOfLeafs())
       
if __name__ == '__main__':       
    np.random.seed(42)
    numberOfSamples = 10000
    X = np.random.rand(numberOfSamples,2)
    Y = ( np.sin(2*np.pi*X[:,0]) + np.cos(np.pi*X[:,1])) * np.exp(1 -X[:,0]**2 -X[:,1]**2 )
   
    MainSet = np.arange(0,X.shape[0])
    Trainingsset = np.random.choice(X.shape[0], int(0.8*X.shape[0]), replace=False)
    Testset = np.delete(MainSet,Trainingsset)
   
    regressionError = np.zeros(5)
    for i in range(5):
        errorRate = 0.05*i
        errorFactor = 1 + 2*(np.random.rand(Trainingsset.shape[0]) - 0.5)*errorRate
        XTrain = X[Trainingsset,:]
        yTrain = Y[Trainingsset] * errorFactor
        XTest = X[Testset,:]
        yTest = Y[Testset]
       
        myTree = bRegressionTree(xDecimals=3)
        myTree.fit(XTrain,yTrain)
        yPredict = myTree.predict(XTest)
        yDiff = np.abs(yPredict - yTest)
        regressionError[i] = np.mean(yDiff)
   
    import matplotlib.pyplot as plt
    fig1 = plt.figure(1)
    ax = fig1.add_subplot(1,1,1)
    x = np.arange(0,0.25,0.05)
    ax.plot(x,regressionError,'o-')
    ax.set_xlabel('% Noise')
    ax.set_ylabel('Mean Absolute Error')
   
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import cm
    fig2 = plt.figure(2)
    ax = fig2.add_subplot(1,1,1, projection='3d')
    ax.scatter(XTest[:,0],XTest[:,1],yPredict,alpha=0.6,c =yPredict, cmap=cm.jet)
    ax.set_xlabel('x[0]')
    ax.set_ylabel('x[1]')
    ax.set_zlabel('yPredict')

:::::::::::::::::::::::::::::::::::::::::::
import numpy as np
import scipy.special
import copy
class MLPNet:           
    def __init__(self, hiddenlayer=(10,10),classification=False):
        self.hl = hiddenlayer; self.classification = classification
        self.xMin = 0.0; self.xMax = 1.0
        self.W = []
        self._sigmoid = lambda x: scipy.special.expit(x)
    def _initWeights(self):
        self.W.append((np.random.rand(self.hl[0],self.il) - 0.5 ))
        self.W.append((np.random.rand(self.hl[1],self.hl[0]) - 0.5))
        self.W.append((np.random.rand(self.ol,self.hl[1]) - 0.5))
    def _calOut(self,X):
        O1 = self._sigmoid(self.W[0]@X.T)
        O2 = self._sigmoid(self.W[1]@O1)
        y = (self.W[len(self.W)-1]@O2).T
        return(y)
    def predict(self,X):
        X = (X - self.xMin) / (self.xMax - self.xMin)
        X = np.hstack( (X,np.ones(X.shape[0])[:,None]) )
        y = self._calOut(X)
        if self.classification: y = np.round(y) #*\label{code:fullmlpbatch:1}
        return(y)
   
    def fit(self,X,Y,eta=0.75,maxIter=200,vareps=10**-3,scale=True,XT=None,YT=None): #*\label{code:fullmlpbatch:4}
        self.xMin = X.min(axis=0) if scale else 0
        self.xMax = X.max(axis=0) if scale else 1
        X = (X - self.xMin) / (self.xMax - self.xMin)
        X = np.hstack( (X,np.ones(X.shape[0])[:,None]) )
        if len(Y.shape) == 1:
            Y = Y[:,None]
        self.il = X.shape[1]
        self.ol = Y.shape[1] #*\label{code:fullmlpbatch:morethanone}
        self._initWeights()
        (XVal, YVal, X, Y) = self._divValTrainSet(X,Y)         #*\label{code:fullmlpbatch:3}
        self.train(X,Y,XVal,YVal,eta,maxIter,vareps,XT,YT)
    
    def train(self,X,Y,XVal=None,YVal=None,eta=0.75,maxIter=200,vareps=10**-3,XT=None,YT=None):   
        if XVal is None: (XVal, YVal, X, Y) = self._divValTrainSet(X,Y)
        if len(Y.shape) == 1: Y = Y[:,None]
        if len(YVal.shape) == 1: YVal = YVal[:,None]
        if self.il != X.shape[1]: X = np.hstack( (X,np.ones(X.shape[0])[:,None]) )
        if self.il != XVal.shape[1]: XVal = np.hstack( (XVal,np.ones(XVal.shape[0])[:,None]) )
        dW = []
        for i in range(len(self.W)):
            dW.append(np.zeros_like(self.W[i]))
        yp = self._calOut(XVal)
        if self.classification: yp = np.round(yp)
        meanE = (np.sum((YVal-yp)**2)/XVal.shape[0])/YVal.shape[1]
        minError = meanE
        minW = copy.deepcopy(self.W)
        self.errorVal=[]; self.errorTrain=[]; self.errorTest=[] #*\label{code:fullmlpbatch:2}
        mixSet = np.random.choice(X.shape[0],X.shape[0],replace=False)
        counter = 0           
        while meanE > vareps and counter < maxIter:
            counter += 1
            for m in range(self.ol): #*\label{code:fullmlpbatch:5}
                for i in mixSet:
                    x = X[i,:]
                    O1 = self._sigmoid(self.W[0]@x.T)
                    O2 = self._sigmoid(self.W[1]@O1)
                    temp = self.W[2][m,:]*O2*(1-O2)[None,:]
                    dW[2] = O2
                    dW[1] = temp.T@O1[:,None].T  
                    dW[0] = (O1*(1-O1)*(temp@self.W[1])).T@x[:,None].T
                    yp = self._calOut(x)[m]
                    yfactor = np.sum(Y[i,m]-yp)
                    for j in range(len(self.W)):    
                        self.W[j] += eta * yfactor* dW[j]
            yp = self._calOut(XVal)
            if self.classification: yp = np.round(yp)
            meanE = (np.sum((YVal-yp)**2)/XVal.shape[0])/YVal.shape[1] #*\label{code:fullmlpbatch:6}
            self.errorVal.append(meanE)
            if meanE < minError:
                minError = meanE
                minW = copy.deepcopy(self.W)     
                self.valChoise = counter
               
            if XT is not None:
                yp = self.predict(XT)
                if len(YT.shape) == 1: YT = YT[:,None];
                meanETest = (np.sum((YT-yp)**2)/XT.shape[0])/YT.shape[1]
                self.errorTest.append(meanETest)
               
                yp = self._calOut(X)
                if self.classification:
                    yp = np.round(yp)
                meanETrain = (np.sum((Y-yp)**2)/X.shape[0])/Y.shape[1]
                self.errorTrain.append(meanETrain)
        self.W = copy.deepcopy(minW)
   
    def _divValTrainSet(self, X,Y):
        self.ValSet    = np.random.choice(X.shape[0],int(X.shape[0]*0.25),replace=False)
        self.TrainSet  = np.delete(np.arange(0, Y.shape[0] ), self.ValSet)
        XVal     = X[self.ValSet,:]
        YVal     = Y[self.ValSet]
        X        = X[self.TrainSet,:]
        Y        = Y[self.TrainSet]
        return (XVal, YVal, X, Y)
   
    def exportNet(self, filePrefix):
        np.savetxt(filePrefix+"MinMax.csv", np.array([self.xMin, self.xMax]), delimiter=",")
        np.savetxt(filePrefix+"W0.csv", self.W[0], delimiter=",")
        np.savetxt(filePrefix+"W1.csv", self.W[1], delimiter=",")
        np.savetxt(filePrefix+"W2.csv", self.W[2], delimiter=",")
   
    def importNet(self,filePrefix, classification=False):
        MinMax = np.loadtxt(filePrefix+'MinMax.csv',delimiter=",")
        W2 = np.loadtxt(filePrefix+'W2.csv',delimiter=",")
        W1 = np.loadtxt(filePrefix+'W1.csv',delimiter=",")   
        W0 = np.loadtxt(filePrefix+'W0.csv',delimiter=",")
        self.W = [W0,W1,W2]
        self.hl = (W0.shape[0], W2.shape[1])
        self.il = W0.shape[1]
        self.ol = W2.shape[0]
        self.xMin = MinMax[0]
        self.xMax = MinMax[1]
        self.classification = classification
if __name__ == '__main__':
    np.random.seed(42)
    X = np.random.rand(1250,2)
    Y = np.zeros( (1250,2) )
    index1 = (X[:,0] - 0.25)**2 + (X[:,1] - 0.25)**2 < 0.2**2
    Y[index1,0] = 1
    index2 = (X[:,0] - 0.75)**2 + (X[:,1] - 0.75)**2 < 0.2**2
    Y[index2,1] = 1
   
    TrainSet     = np.random.choice(X.shape[0],int(X.shape[0]*0.70), replace=False)
    XTrain       = X[TrainSet,:]
    YTrain       = Y[TrainSet]
    TestSet      = np.delete(np.arange(0, len(Y) ), TrainSet)
    XTest        = X[TestSet,:]
    YTest        = Y[TestSet]
   
    myPredict = MLPNet(hiddenlayer=(24,24),classification=True)
    myPredict.fit(XTrain,YTrain,maxIter=1200, XT=XTest , YT=YTest)
    yp = myPredict.predict(XTest)
   
    fp = np.sum(np.abs(yp - YTest))/len(TestSet)*100
    print('richtig klassifiziert %0.1f%%' % (100-fp))
    print('falsch klassifiziert %0.1f%%' % (fp))
    myPredict.exportNet('foobar')
    justTest = MLPNet()
    justTest.importNet('foobar',classification=True)
    yp = justTest.predict(XTest)
    fp = np.sum(np.abs(yp - YTest))/len(TestSet)*100
    print('richtig klassifiziert %0.1f%%' % (100-fp))
    print('falsch klassifiziert %0.1f%%' % (fp))
   
   
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import cm
    plt.close('all')
    fig1 = plt.figure(1)
    ax = fig1.add_subplot(1,1,1)
    circle1 = plt.Circle((0.25, 0.25), 0.2, color='k', alpha=0.3)
    circle2 = plt.Circle((0.75, 0.75), 0.2, color='k', alpha=0.3)
    ax.add_artist(circle1)
    ax.add_artist(circle2)
   
    index1 = np.logical_and( (XTest[:,0] - 0.25)**2 + (XTest[:,1] - 0.25)**2 < 0.2**2 , yp[: ,0]==0 )
    ax.scatter(XTest[index1,0],XTest[index1,1], marker='v',c='r')
    index2 = np.logical_and(  (XTest[:,0] - 0.75)**2 + (XTest[:,1] - 0.75)**2 < 0.2**2, yp[: ,1]==0 )
    ax.scatter(XTest[index2,0],XTest[index2,1], marker='^',c='r')
   
    ax.scatter(XTest[yp[:,0]==1,0],XTest[yp[:,0]==1,1], marker='+',c='k')
    ax.scatter(XTest[yp[:,1]==1,0],XTest[yp[:,1]==1,1], marker='o',c='k')
    ax.set_xlabel('$x_0$')
    ax.set_ylabel('$x_1$')
    ax.set_xlim(0,1)
    ax.set_ylim(0,1)
    ax.axis('equal')
   
    fig3 = plt.figure(3)
    ax = fig3.add_subplot(1,1,1)
    epochen = np.arange(len(myPredict.errorVal))
    ax.plot(epochen, np.array(myPredict.errorVal), 'r-.' , label='Validierung') 
    ax.plot(epochen, np.array(myPredict.errorTest), 'k--', label='Test')  
    ax.plot(epochen, np.array(myPredict.errorTrain), 'k:', label='Training' ) 
    ax.legend()
    ax.set_xlabel('Lernzyklus')
    ax.set_ylabel('Durchschnittlicher Fehler')

:::::::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
x = np.arange(0,1.1,0.1)
y1 = 0.5*x
y2 = (x-0.5)**2
y3 = np.log(x+1)
data = np.vstack( (x,y1,y2,y3) )
kor = np.corrcoef(data)
print(kor[0,:])
y4 = x**2
data = np.vstack( (x,y4) )
kor = np.corrcoef(data)
print(kor[0,1])

:::::::::::::::::::::::::::::::::::::::::::
import numpy as np
f = open("AutodatenbankReduziert.csv", encoding="ISO-8859-1")
header = f.readline().rstrip('\n')  # skip the header
featureNames = header.split(',')
dataset = np.loadtxt(f, delimiter=",", skiprows=1, usecols=range(2,9))
f.close()
xmax = np.max(dataset,axis=0)
xmin = np.min(dataset,axis=0)
XHat = (dataset - xmin) / (xmax-xmin)
print(np.std(XHat,axis=0))
print(np.mean(XHat,axis=0))
print(np.median(XHat,axis=0))
xbar = np.mean(dataset,axis=0)
sigma = np.std(dataset,axis=0)
XTilde = (dataset - xbar) / sigma
from CARTRegressionTree import bRegressionTree
MainSet = np.arange(0,dataset.shape[0])
TestSet = np.arange(0,35,4)
Trainingsset= np.delete(MainSet,TestSet)
reduTree = bRegressionTree()
reduData = dataset[:,np.array([1,2,4])] #*\label{merkmaleAuswaehlen:22}
reduTree.fit(reduData[Trainingsset,:],dataset[Trainingsset,0])
PreisApprox = reduTree.predict(reduData[TestSet,:])
errorAbs = np.abs(PreisApprox - dataset[TestSet,0])
print('Durchschnittlicher Fehler auf Testmenge:',np.mean(errorAbs))
PreisApprox = reduTree.predict(reduData[:,:])
errorAbs = np.abs(PreisApprox - dataset[:,0])
print('Durchschnittlicher Fehler auf allen Daten:',np.mean(errorAbs))
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(2,1,1)
ax.plot(dataset[:,0]-xbar[0], 'k' ,marker='o') 
ax.set_ylabel('Preis')
ax = fig.add_subplot(2,1,2)
ax.plot(dataset[:,1]-xbar[1], 'k' ,marker='x') 
ax.set_ylabel('kW')
fig = plt.figure()
ax = fig.add_subplot(2,1,1)
ax.plot(XTilde[:,0], 'k' ,marker='o') 
ax.set_ylabel('Preis (Standardisiert)')
ax = fig.add_subplot(2,1,2)
ax.plot(XTilde[:,1], 'k' ,marker='x') 
ax.set_ylabel('kW (Standardisiert)')

::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTree import bRegressionTree
f = open("AutodatenbankAllEntries.csv")
header = f.readline().rstrip('\n') 
featureNames = header.split(',')
dataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9))
f.close()
Features = dataset[:,1:6]
y = dataset[:,0]
xbar = np.mean(Features,axis=0)
sigma = np.std(Features,axis=0)
X = (Features - xbar) / sigma; print(X[450,:])
Sigma = np.cov(X.T)
(lamb, W) = np.linalg.eig(Sigma)
eigenVar = np.sort(lamb)[::-1]
sumEig = np.sum(lamb)
eigenVar = eigenVar/sumEig
cumVar= np.cumsum(eigenVar)
import matplotlib.pyplot as plt
plt.bar(range(1,len(eigenVar)+1),eigenVar, alpha=0.25, align='center', label='Varianzanteil', color='gray')
plt.step(range(1,len(eigenVar)+1),cumVar, where='mid', label='Kumulativer Varianzanteil', c='k')
plt.xlabel('Hauptkomponenten'); plt.ylabel('Prozentualer Anteil')
plt.legend()
eigenVarIndex = np.argsort(lamb)[::-1]
WP = W[:,eigenVarIndex[0:2]]
XProj = ( WP.T@X.T ).T
np.random.seed(42)
MainSet    = np.arange(0,XProj.shape[0])
TestSet    = np.random.choice(MainSet.shape[0],int(MainSet.shape[0]*0.25), replace=False)
TrainSet   = np.delete(MainSet, TestSet)
pcaTree = bRegressionTree(minLeafNodeSize=40)
pcaTree.fit(XProj[TrainSet,:],y[TrainSet])
PreisApprox = pcaTree.predict(XProj[TestSet,:])
errorMean = np.mean(np.abs(PreisApprox - y[TestSet]))
print(errorMean)

:::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
np.random.seed(42)
xs = np.arange(0.2,0.8,0.0025)
ys = np.arange(0.2,0.8,0.0025)
zs = np.arange(0.2,0.8,0.0025)
dx = 0.15*(np.random.rand(xs.shape[0])-0.5)
dy = 0.30*(np.random.rand(ys.shape[0])-0.5)
dz = 0.20*(np.random.rand(zs.shape[0])-0.5)
x = 0.5*xs + 0.25*ys + 0.3*zs + dx
y = 0.3*xs + 0.45*ys + 0.3*zs + dy
z = 0.1*xs + 0.30*ys + 0.6*zs + dz
dataset = np.vstack( (x,y,z) ).T
xbar = np.mean(dataset,axis=0)
sigma = np.std(dataset,axis=0)
X = (dataset - xbar) / sigma
Sigma = np.cov(X.T)
(lamb, w) = np.linalg.eig(Sigma)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(2)
ax = fig.add_subplot(1,1,1, projection='3d')
ax.scatter(x,y,z,c='red',s=60,alpha=0.3)
ax.set_xlim([0,1]); ax.set_ylim([0,1]); ax.set_zlim([0,1])
ax.set_xlabel('x'); ax.set_ylabel('y'); ax.set_zlabel('z')
xM = np.array([xbar[0],xbar[0],xbar[0]])
yM = np.array([xbar[1],xbar[1],xbar[1]])
zM = np.array([xbar[2],xbar[2],xbar[2]])
D = np.zeros_like(w)
D[:,0] = lamb[0]/4*w[:,0]
D[:,1] = lamb[1]/4*w[:,1]
D[:,2] = lamb[2]/4*w[:,2]
ax.quiver(xM,yM,zM, D[0,:], D[1,:], D[2,:])

::::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTreeRF import bRegressionTree
class randomForestRegression:
    def __init__(self,noOfTrees=10,threshold = 10**-8, xDecimals = 8, minLeafNodeSize=3, perc=1):
        self.perc = perc
        self.threshold = threshold
        self.xDecimals = xDecimals
        self.minLeafNodeSize = minLeafNodeSize
        self.bTree = []
        self.noOfTrees = noOfTrees
        for i in range(noOfTrees):
            tempTree = bRegressionTree(threshold = self.threshold, xDecimals = self.xDecimals , minLeafNodeSize=self.minLeafNodeSize)
            self.bTree.append(tempTree)
           
    def fit(self,X,y):
        self.samples = []
        for i in range(self.noOfTrees):
            bootstrapSample = np.random.randint(X.shape[0],size=int(self.perc*X.shape[0]))
            self.samples.append(bootstrapSample)     #*\label{code:realRF:0}
            bootstrapX = X[bootstrapSample,:]
            bootstrapY = y[bootstrapSample]
            self.bTree[i].fit(bootstrapX,bootstrapY)
   
    def predict(self,X):
        ypredict = np.zeros(X.shape[0])
        for i in range(self.noOfTrees):
            ypredict += self.bTree[i].predict(X)
        ypredict = ypredict/self.noOfTrees
        return(ypredict)
       
if __name__ == '__main__':  
    f = open("hourCleanUp.csv") #*\label{code:realRF:1}
    header = f.readline().rstrip('\n') 
    featureNames = header.split(',')
    dataset = np.loadtxt(f, delimiter=",")
    f.close()
   
    X = dataset[:,0:13]
    Y = dataset[:,15]
   
    index = np.flatnonzero(X[:,8]==4)
    X = np.delete(X,index, axis=0)
    Y = np.delete(Y,index, axis=0)
   
    np.random.seed(42)
    MainSet = np.arange(0,X.shape[0])
    Trainingsset = np.random.choice(X.shape[0], int(0.8*X.shape[0]), replace=False)
    Testset = np.delete(MainSet,Trainingsset)
    XTrain = X[Trainingsset,:]
    yTrain = Y[Trainingsset]
    XTest = X[Testset,:]
    yTest = Y[Testset] #*\label{code:realRF:2}
   
    myForest = randomForestRegression(noOfTrees=24,minLeafNodeSize=5,threshold=2)
    myForest.fit(XTrain,yTrain)
    yPredict = np.round(myForest.predict(XTest))
    yDiff = yPredict - yTest
    print('Mittlere Abweichung: %e ' % (np.mean(np.abs(yDiff))))
               

::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTree import bRegressionTree
def SBS(X,y,k, verbose=False):
    l=X.shape[1]
    MainSet = np.arange(0,X.shape[0])
    ValSet  = np.random.choice(X.shape[0],int(X.shape[0]*0.25), replace=False)
    TrainSet  = np.delete(MainSet,ValSet)   
    suggestedFeatures = np.arange(0,l) #*\label{seqFeature:9}
    while (k        Q = np.zeros(l)
        for i in range(l):
            Xred = np.delete(X, i, axis=1)
            reduTree = bRegressionTree(minLeafNodeSize=40)
            reduTree.fit(Xred[TrainSet,:],y[TrainSet])
            error = y[ValSet] - reduTree.predict(Xred[ValSet,:])
            Q[i] = np.mean(np.abs(error)) #*\label{seqFeature:17}
        i = np.argmin(Q)
        if verbose: print(Q);print(suggestedFeatures[i])
        suggestedFeatures = np.delete(suggestedFeatures,i) #*\label{seqFeature:19}
        X = np.delete(X, i, axis=1)
        l = l -1
    return(suggestedFeatures)

np.random.seed(42)
X = np.random.rand(1000,5) #*\label{seqFeature:26}
y = 2*X[:,1] - X[:,3]**2 - 0.01*X[:,0]**3 + 0.1*(X[:,2] - X[:,4]**2) #*\label{seqFeature:27}
suggestedFeatures = SBS(X,y,2, verbose=True)
print(suggestedFeatures)
f = open("AutodatenbankAllEntries.csv")
header = f.readline().rstrip('\n') 
featureNames = header.split(',')
fullDataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9)) #*\label{merkmaleAuswaehlen:36}
f.close()
MainSet    = np.arange(0,fullDataset.shape[0])
TestSet    = np.random.choice(MainSet.shape[0],int(MainSet.shape[0]*0.25), replace=False)
TrainSet   = np.delete(MainSet, TestSet)
suggestedFeatures = SBS(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0],2, verbose=True)
print(suggestedFeatures)
fullTree = bRegressionTree(minLeafNodeSize=40)
fullTree.fit(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0])
PreisApprox = fullTree.predict(fullDataset[TestSet,1:6])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)
X = fullDataset[:,np.array([1+1,2+1])]
reduTree = bRegressionTree(minLeafNodeSize=40)
reduTree.fit(X[TrainSet,:],fullDataset[TrainSet,0])
PreisApprox = reduTree.predict(X[TestSet,:])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)
::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
from CARTRegressionTree import bRegressionTree
def SFS(X,y,k, verbose=False):
    MainSet = np.arange(0,X.shape[0])
    ValSet  = np.random.choice(X.shape[0],int(X.shape[0]*0.25), replace=False)
    TrainSet  = np.delete(MainSet,ValSet)   
    featuresLeft = np.arange(0,X.shape[1])
    suggestedFeatures = np.zeros(1,dtype=int)
    l=0
    while (k>l):
        Q = np.inf*np.ones(X.shape[1])
        for i in featuresLeft:
            suggestedFeatures[l] = i
            reduTree = bRegressionTree(minLeafNodeSize=40)
            reduTree.fit(X[np.ix_(TrainSet,suggestedFeatures)],y[TrainSet])
            error = y[ValSet] - reduTree.predict(X[np.ix_(ValSet,suggestedFeatures)])
            Q[i] = np.mean(np.abs(error))
        i = np.argmin(Q)
        if verbose: print(Q);print(i)
        suggestedFeatures[l] = i
        featuresLeft = np.delete(featuresLeft,np.argwhere(featuresLeft == i) )
        suggestedFeatures = np.hstack( (suggestedFeatures,np.array([0]) ) )
        l = l +1
    suggestedFeatures = suggestedFeatures[0:l]
    return(suggestedFeatures)
np.random.seed(999)
X = np.random.rand(1000,5) #*\label{seqFeature:26}
y = 2*X[:,1] - X[:,3]**2 - 0.01*X[:,0]**3 + 0.1*(X[:,2] - X[:,4]**2) #*\label{seqFeature:27}
suggestedFeatures = SFS(X,y,2, verbose=True)
print(suggestedFeatures)
f = open("AutodatenbankAllEntries.csv")
header = f.readline().rstrip('\n') 
featureNames = header.split(',')
fullDataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9)) #*\label{merkmaleAuswaehlen:36}
f.close()
MainSet    = np.arange(0,fullDataset.shape[0])
TestSet    = np.random.choice(MainSet.shape[0],int(MainSet.shape[0]*0.25), replace=False)
TrainSet   = np.delete(MainSet, TestSet)
suggestedFeatures = SFS(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0],2, verbose=True)
print(suggestedFeatures)
fullTree = bRegressionTree(minLeafNodeSize=40)
fullTree.fit(fullDataset[TrainSet,1:6],fullDataset[TrainSet,0])
PreisApprox = fullTree.predict(fullDataset[TestSet,1:6])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)
X = fullDataset[:,np.array([1+1,2+1])]
reduTree = bRegressionTree(minLeafNodeSize=40)
reduTree.fit(X[TrainSet,:],fullDataset[TrainSet,0])
PreisApprox = reduTree.predict(X[TestSet,:])
errorMean = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
print(errorMean)

#errorMean = np.zeros( (5,5))
#
#for i in range(0,5):
#    for j in range(i+1,5):
#        X = fullDataset[:,np.array([i+1,j+1])] #*\label{merkmaleAuswaehlen:40}
#        reduTree = bRegressionTree(minLeafNodeSize=40)
#        reduTree.fit(X[TrainSet,:],fullDataset[TrainSet,0])
#        PreisApprox = reduTree.predict(X[TestSet,:])
#        errorMean[i,j] = np.mean(np.abs(PreisApprox - fullDataset[TestSet,0]))
#         
#print(errorMean)

::::::::::::::::::::::::::::::::::::::
import numpy as np
def staticImputer(X, mode='mean', feature='all'):
    if feature=='all':
        feature = np.arange(X.shape[1])
    for i in feature:
        allData = np.arange(X.shape[0])
        index = np.flatnonzero(np.isnan(X[:,i]))
        correctData = np.delete(allData,index)
        if mode == 'most':
            unique, counts = np.unique(X[correctData,i], return_counts=True)
            whichOne = np.argmax(counts)
            newValue = unique[whichOne]
        elif mode == 'median':
            newValue = np.median(X[correctData,i])
        else:
            newValue = np.mean(X[correctData,i])
        X[index,i] = newValue
       
    return(X)
       
if __name__ == '__main__':  
    np.random.seed(42)
    f = open("AutodatenbankAllEntries.csv")
    header = f.readline().rstrip('\n') 
    featureNames = header.split(',')
    fullDataset = np.loadtxt(f, delimiter=",", skiprows=4,usecols=range(2,9))
    f.close()
    index = np.array([338, 361, 389, 126, 145, 308, 325, 463,465,471,472,207,213,217,218,219,483, 36, 485, 489,498,40,251,512,41,9,591,67,75,597,93,606,17,97,19])
    index = index - 6 #*\label{code:imputer:0}
    X = np.copy(fullDataset)
    X[index,4] = np.NaN #*\label{code:imputer:1}
   
    XFilled = staticImputer(X, mode='median')
   
    allIndex = np.arange(X.shape[0])
    traingsSet = np.delete(allIndex,index)
    yTrain = X[traingsSet,4]
    xTrain = X[np.ix_(traingsSet,np.array([0,1,2,3,5,6]))]
    xTest  = X[np.ix_(index,np.array([0,1,2,3,5,6]))]
   
    from randomForest import randomForestRegression
    myForest = randomForestRegression(noOfTrees=50,minLeafNodeSize=7)
    myForest.fit(xTrain,yTrain)
    XFilled[index,4] = myForest.predict(xTest)
    error = np.mean(np.abs(XFilled[index,4] - fullDataset[index,4]))
   
    from fullMLP import MLPNet
    myPredict = MLPNet(hiddenlayer=(36,12))
    myPredict.fit(xTrain,yTrain,maxIter=1200, eta=0.1)
    xMLP = myPredict.predict(xTest)
    error = np.mean(np.abs(xMLP.reshape(35,) - fullDataset[index,4]))
   
   
:::::::::::::::::::::::::::::::::::::::::::::::


Keine Kommentare:

Kommentar veröffentlichen

Hinweis: Nur ein Mitglied dieses Blogs kann Kommentare posten.