Mittwoch, 29. Mai 2019

Künstliche Intelligenz SourceCode Python Teil-12

import numpy as np
from QLearningEnv import boxEnvironment, robotAgent
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.regularizers import l2
class learningAgent():
    def __init__(self,tau=1, memoryMax= 1000000, alpha=0.5, gamma=0.7, updateFeq = 500, badMemory= 3000):
        self.alpha = alpha
        self.gamma = gamma
        self.tau = tau
        self.a = np.array([-2,0,2])
        self.updateFeq = updateFeq
        self.memoryMax = memoryMax
        self.memoryCounter = -1
        self.badMemory = badMemory
        self.initPhase = True
    def setSensors(self, number=5):
        self.distanceSensorValue= np.zeros(max(number,5))
        self.dirtSensor = np.array([0])
        if self.memoryCounter<0: br="">            self.MemoryStates = np.inf*np.ones( (self.memoryMax,number+2) )
            self.MemoryActions = np.inf*np.ones(self.memoryMax)
            self.FeedbackDirt = np.inf*np.ones(self.memoryMax)
            self.FeedbackCol = np.zeros(self.memoryMax)
            self.Rewards = np.zeros(self.memoryMax)
    def _dumpSensorsData(self):
        if self.memoryCounter<1: br="">            dDiff = 0
        else:
            dDiff = self.distanceSensorValue[2]/10 - self.MemoryStates[self.memoryCounter-1,2]
            dDiff = dDiff*10
        temp = np.hstack( (self.distanceSensorValue/10,self.dirtSensor*10**1,dDiff) )
        self.MemoryStates[self.memoryCounter,:] = temp
    def _dumpActionData(self,w):
        self.MemoryActions[self.memoryCounter] = w
    def collisionFeedback(self, yesOrNo):
        self.FeedbackCol[self.memoryCounter] = int(yesOrNo)
        if np.min(self.MemoryStates[self.memoryCounter,:]) > 0.005:
            self.FeedbackCol[self.memoryCounter] = False
    def dirtCleaned(self,dirt):
        if self.memoryCounter<0: br="" return="">        self.FeedbackDirt[self.memoryCounter] = dirt
       
    def calcReward(self):
        self.Rewards[self.memoryCounter] = 0.05
        self.Rewards[self.memoryCounter] += 10**4*self.FeedbackDirt[self.memoryCounter]
        self.Rewards[self.memoryCounter] -= self.FeedbackCol[self.memoryCounter]*10   
        self.Rewards[self.memoryCounter] -= 10*np.exp(- 5*self.distanceSensorValue[2])
        temp1 = 10*np.exp(- 5*self.distanceSensorValue[4])
        temp2 = 10*np.exp(- 5*self.distanceSensorValue[0])
        if self.MemoryActions[self.memoryCounter] > 0.001 :
            self.Rewards[self.memoryCounter] -= temp1
        elif self.MemoryActions[self.memoryCounter] < -0.001:
            self.Rewards[self.memoryCounter] -= temp2
        else:
            self.Rewards[self.memoryCounter] -= (temp1 + temp2)/2
           
    def _initQ(self):
        XTrain = np.hstack( (self.MemoryStates[0:self.memoryCounter,:],self.MemoryActions[0:self.memoryCounter,None]))
        self.Q = Sequential()
        self.Q.add(Dense(256, input_dim=XTrain.shape[1], activation='relu',kernel_regularizer=l2(0.0001)))
        self.Q.add(Dense(128, activation='relu',kernel_regularizer=l2(0.0001)))
        self.Q.add(Dense(64, activation='relu',kernel_regularizer=l2(0.0001)))
        self.Q.add(Dense(1, activation='linear'))
        clist = [keras.callbacks.EarlyStopping(monitor='loss',patience=5,verbose=0,mode='auto')]
        self.Q.compile(loss='mean_squared_error', optimizer='adam')
        self.Q.fit(XTrain,self.alpha*self.Rewards[0:XTrain.shape[0]], epochs=50, callbacks=clist, batch_size=5, verbose=False)
           
    def _updateQ(self):
        learnSet  = np.arange(0,self.memoryCounter-1)
        index1 = np.flatnonzero(self.Rewards[learnSet]<-0 .9="" br="">        if index1.shape[0]>self.badMemory:
            index = np.random.choice(index1.shape[0], self.badMemory, replace=False)
            index1 = index1[index]
        samplesleft = min(index1.shape[0]*5,self.memoryCounter-index1.shape[0]-1)
        index2 = np.random.choice(self.memoryCounter-1, samplesleft, replace=False)
        learnSet = np.hstack( (index1[None,:],index2[None,:]) )
        learnSet = np.reshape(learnSet, learnSet.shape[1])
        X = np.hstack( (self.MemoryStates[learnSet,:],self.MemoryActions[learnSet,None]))
        Qalt = np.squeeze(self.Q.predict(X))
        QChooose = np.zeros( (Qalt.shape[0],len(self.a)) )
        for i in range(len(self.a)): #*\label{code:qlearning:agent:0}
            a = self.a[i]*np.ones_like(self.MemoryActions[learnSet,None])
            Xtemp = np.hstack( (self.MemoryStates[learnSet+1,:],a))
            QChooose[:,i] = np.squeeze(self.Q.predict(Xtemp)) #*\label{code:qlearning:agent:1}
        Qmax = np.max(QChooose,axis=1) #*\label{code:qlearning:agent:2}
        r = self.Rewards[learnSet]
        QNeu = (1-self.alpha)*Qalt+self.alpha*(r + self.gamma*Qmax) #*\label{code:qlearning:agent:3}
        self.Q.fit(X,QNeu, epochs=3, batch_size=5, verbose=False) #*\label{code:qlearning:agent:4}
       
    def _softmax(self,Qa):
        return np.exp(Qa/self.tau) / np.sum(np.exp(Qa/self.tau))
    def chooseAction(self,Qa):
        toChoose = np.arange(0,len(Qa))
        pW = self._softmax(Qa)
        if np.any(np.isnan(pW)) or np.any(np.isinf(pW)):
            choosenA = np.random.randint(0,len(Qa))
        else:
            choosenA = np.random.choice(toChoose,replace=False, p=pW)
        return(choosenA)
   
    def changeAngularVelocity(self):
        self.calcReward() #*\label{code:qlearning:agent:112}
        self.memoryCounter += 1
        if self.memoryCounter%self.updateFeq == 0 and not self.initPhase:
            self._updateQ()       
        if self.memoryCounter > min(500,self.memoryMax-1) and self.initPhase:
            self.initPhase = False
            self._initQ()
            self._updateQ()
        self._dumpSensorsData() #*\label{code:qlearning:agent:13}       
        if self.initPhase: #*\label{code:qlearning:agent:5}
            w = 4*(np.random.rand(1) - 0.5)
            if self.distanceSensorValue[1] < 0.75 or self.distanceSensorValue[3] < 0.75:
                w = 1.0 if self.distanceSensorValue[1] < self.distanceSensorValue[3] else -1.0
            if self.distanceSensorValue[0] < 0.5 or self.distanceSensorValue[4] < 0.5:
                w = 1.5 if self.distanceSensorValue[0] < self.distanceSensorValue[4] else -1.5
            if self.distanceSensorValue[2] < 0.5 : w = -2 #*\label{code:qlearning:agent:6}
        else:
            Qa = np.zeros_like(self.a)
            j = self.MemoryStates[self.memoryCounter,:].shape[0]
            x = np.zeros( (1, j+1) )
            x[0,0:j] = self.MemoryStates[self.memoryCounter,:] #*\label{code:qlearning:agent:7}
            for i in range(len(self.a)):
                x[0,self.MemoryStates[self.memoryCounter,:].shape[0]] = self.a[i] #*\label{code:qlearning:agent:8}
                Qa[i] = self.Q.predict(x)[0] #*\label{code:qlearning:agent:9}
            ca = self.chooseAction(Qa) #*\label{code:qlearning:agent:10}
            w  = self.a[ca] #*\label{code:qlearning:agent:11}       
        self._dumpActionData(w) #*\label{code:qlearning:agent:12}
        return(w)
    def CoopData(self,d,theta1,theta2):
        pass
#    def addOldMemory(self,States,Actions,FeedbackDirt,FeedbackCol,counter):
#        self.setSensors(number=States.shape[1]-1)
#        if self.memoryCounter < 0 : self.memoryCounter = 0
#        self.MemoryStates[self.memoryCounter:self.memoryCounter+counter,:] = np.copy(States[0:counter,:])
#        self.MemoryActions[self.memoryCounter:self.memoryCounter+counter] = np.copy(Actions[0:counter])
#        self.FeedbackDirt[self.memoryCounter:self.memoryCounter+counter] = np.copy(FeedbackDirt[0:counter])
#        self.FeedbackCol[self.memoryCounter:self.memoryCounter+counter] = np.copy(FeedbackCol[0:counter])
#        self.memoryCounter += counter-1
#        self.calcReward(recomputeAll=True)
       
if __name__ == '__main__':
    np.random.seed(42)
    jack = learningAgent(tau=0.3, gamma=0.8, alpha=0.4,badMemory= 10000)
    jack.updateFeq = 500
    for i in range(7):
        print('new world:',i)
        myNewBox = boxEnvironment()
        myNewBox.addRobot(jack)
        myNewBox.startSim(300)
        myNewBox.evaluation()
    myNewBox.plotScene()

:::::::::::::::::::::::::::::::::::::::::::
import numpy as np
delta = np.zeros( (9,9) )
delta[3,6] = delta[3,0] = delta[2,1] = delta[1,0] = delta[0,1] = delta[0,3] = 0.5
delta[6,3] = delta[5,8] = delta[7,6] = delta[7,8] = 0.5
delta[4,4] = delta[2,2] = 1
delta[8,7] = delta[8,5] = delta[6,7] = delta[4,5] = delta[4,7] = delta[2,5] = 1/3
s = np.zeros( (9,1) )
s[0] = 1
sNew = np.linalg.matrix_power(delta,5)@s
sNew = np.linalg.matrix_power(delta,100)@s
print(sNew)
s = np.zeros( (9,1) )
s[6] = 1
sNew = np.linalg.matrix_power(delta,100)@s
print(sNew)

:::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.patches import Rectangle, Arrow
class robotAgent():
    def __init__(self):
        pass
    def CoopData(self,X,Y,nX,nY):
        pass
    def collisionFeedback(self, yesOrNo):
        pass
    def dirtCleaned(self,dirt):
        pass
   
    def setSensors(self, number=5):
        self.distanceSensorValue= np.zeros(max(number,5))
        self.dirtSensor = np.array([0])
        
    def changeAngularVelocity(self):   
        w = 0 if self.dirtSensor > 0.001 else 2*np.sign(np.random.rand(1)-0.5)
        if self.distanceSensorValue[1] < 3.0 or self.distanceSensorValue[3] < 3.0:
            w = 1.0 if self.distanceSensorValue[1] < self.distanceSensorValue[3] else -1.0
        if self.distanceSensorValue[0] < 1.0 or self.distanceSensorValue[4] < 1.0:
            w = 1.5 if self.distanceSensorValue[0] < self.distanceSensorValue[4] else -1.5
        if self.distanceSensorValue[2] < 1.25 : w = -2
        return(w)
class boxEnvironment():
    def __init__(self):
        self.rAgents=[] #*\label{code:qlearning:ann:1}
        self.rAgentsX=[]; self.rAgentsY=[]
        self.rAgentsTheta=[]; self.rAgentsW=[]
        self.rAgentsR=[]
        self.rAgentsCol=[]
        self.rAgentsDirt=[]
        self.v = [] #*\label{code:qlearning:ann:2}
        self.dt = 0.01 #*\label{code:qlearning:ann:3}
        self.xBox = [-4, 4] #*\label{code:qlearning:ann:4}
        self.yBox = [-3, 3]  #*\label{code:qlearning:ann:5}
        self.sensors = np.array([-np.pi/6,-np.pi/3,0,np.pi/3,np.pi/6 ]) #*\label{code:qlearning:ann:sensors}
        x = np.arange(self.xBox[0]+0.05,self.xBox[1],0.05) #*\label{code:qlearning:dirt:1}
        y = np.arange(self.yBox[0]+0.05,self.yBox[1],0.05)
        xv, yv = np.meshgrid(x, y)
        self.xv = np.resize(xv,(xv.size,)); self.yv = np.resize(yv,(yv.size,))
        self.dirt = np.ones_like(self.xv) #*\label{code:qlearning:dirt:2}
        tisch = robotAgent() #*\label{code:qlearning:ann:6}
        self.addRobot(tisch, r=1.75)
        self.rAgentsX[0] = 0
        self.rAgentsY[0] = 0
        self.v[0] = 0 #*\label{code:qlearning:ann:7}
    def addRobot(self,robot, v=0.5, r=0.25):
        robot.setSensors(len(self.sensors))
        self.rAgents.append(robot) #*\label{code:qlearning:ann:8}
        self.rAgentsR.append(r)
        self.v.append(v) #*\label{code:qlearning:ann:11}
        self.rAgentsCol.append(0) #*\label{code:qlearning:ann:9}
        self.rAgentsY.append( (len(self.rAgents)-1)*0.5 - 3.0 ) #*\label{code:qlearning:ann:10}
        lr = 1 if len(self.rAgents)%2 else -1
        self.rAgentsX.append(lr*3) #*\label{code:qlearning:ann:12}
        self.rAgentsTheta.append(2*np.pi*np.random.rand(1)) #*\label{code:qlearning:ann:62}
        self.rAgentsDirt.append(0)
    def _moveRobots(self):
        for i in range(len(self.rAgents)):
            w = np.clip(self.rAgents[i].changeAngularVelocity(),-2,2) #*\label{code:qlearning:ann:13}
            self.rAgentsX[i] += self.dt*self.v[i] * np.cos(self.rAgentsTheta[i]) #*\label{code:qlearning:ann:14}
            self.rAgentsY[i] += self.dt*self.v[i] * np.sin(self.rAgentsTheta[i])
            self.rAgentsTheta[i] += self.dt*w #*\label{code:qlearning:ann:15}
            if  self.rAgentsTheta[i] > 2*np.pi : self.rAgentsTheta[i] -= 2*np.pi  #*\label{code:qlearning:ann:16}
            if  self.rAgentsTheta[i] < 0 : self.rAgentsTheta[i] += 2*np.pi #*\label{code:qlearning:ann:17}
 
    def _collisionCheck(self):
        self._checkWalls()
        self._checkRobots()
     
    def _checkWalls(self):
        for i in range(len(self.rAgents)):
            checkCol = False
            if   self.rAgentsX[i] - self.rAgentsR[i] < self.xBox[0]:
                checkCol = True
                self.rAgentsX[i] = self.xBox[0] + self.rAgentsR[i] + 0.01
            elif self.rAgentsX[i] + self.rAgentsR[i] > self.xBox[1]:
                checkCol = True
                self.rAgentsX[i] = self.xBox[1] - self.rAgentsR[i] - 0.01
            elif self.rAgentsY[i] - self.rAgentsR[i] < self.yBox[0]:
                checkCol = True
                self.rAgentsY[i] = self.yBox[0] + self.rAgentsR[i] + 0.01       
            elif self.rAgentsY[i] + self.rAgentsR[i] > self.yBox[1]:
                checkCol = True
                self.rAgentsY[i] = self.yBox[1] - self.rAgentsR[i] - 0.01
            if checkCol:
                self.rAgents[i].collisionFeedback(True) #*\label{code:qlearning:ann:18}
                self.rAgentsCol[i] += 1 #*\label{code:qlearning:ann:19}
                self.rAgentsTheta[i] += np.pi #*\label{code:qlearning:ann:20}
                if  self.rAgentsTheta[i] > 2*np.pi : self.rAgentsTheta[i] -= 2*np.pi
       
    def _checkRobots(self):
        self.minDist    = -1*np.ones(len(self.rAgents), dtype=int) #*\label{code:qlearning:ann:addon:0}
        self.minDistVal = np.inf*np.ones(len(self.rAgents)) #*\label{code:qlearning:ann:addon:1}
        for i in range(len(self.rAgents)):
            for j in range(i+1,len(self.rAgents)): #*\label{code:qlearning:ann:23}
                dist  = (self.rAgentsX[i] - self.rAgentsX[j])**2  #*\label{code:qlearning:ann:21}
                dist += (self.rAgentsY[i] - self.rAgentsY[j])**2  #*\label{code:qlearning:ann:22}
                if self.minDistVal[j] > dist and i>0: #*\label{code:qlearning:ann:addon:2}
                    self.minDistVal[i] = dist
                    self.minDistVal[j] = dist
                    self.minDist[i] = j
                    self.minDist[j] = i
                if np.sqrt(dist) < self.rAgentsR[i] + self.rAgentsR[j]:
                    mylist = [i,j]
                    for k in mylist:
                        self.rAgents[k].collisionFeedback(True) #*\label{code:qlearning:ann:24}
                        self.rAgentsCol[k] += 1
                        self.rAgentsTheta[k] += 0.75*np.pi
                        if  self.rAgentsTheta[k] > 2*np.pi : self.rAgentsTheta[k] -= 2*np.pi
                        self.rAgentsX[k] += 2*self.dt*self.v[k] * np.cos(self.rAgentsTheta[k])
                        self.rAgentsY[k] += 2*self.dt*self.v[k] * np.sin(self.rAgentsTheta[k])
    def _setDistanceSensors(self):
        for i in range(len(self.rAgents)): #*\label{code:qlearning:ann:27}
            self.rAgents[i].distanceSensorValue[:] = np.Inf #*\label{code:qlearning:ann:25}
            s = np.array([self.rAgentsX[i],self.rAgentsY[i]]) #*\label{code:qlearning:ann:26}
            for j in range(len(self.rAgents)): #*\label{code:qlearning:ann:28}
                if i == j : continue #*\label{code:qlearning:ann:29}
                m = np.array([self.rAgentsX[j],self.rAgentsY[j]])  #*\label{code:qlearning:ann:30}
                dist = self._intersectionR2R(s,self.rAgentsTheta[i],m,self.rAgentsR[j])  #*\label{code:qlearning:ann:31}
                for k in range(len(dist)): #*\label{code:qlearning:ann:32}
                    if dist[k] - self.rAgentsR[i]                         self.rAgents[i].distanceSensorValue[k] = dist[k] - self.rAgentsR[i]
            dist = self._sensorR2Wall(s,self.rAgentsTheta[i])
            for k in range(len(dist)):
                if dist[k] - self.rAgentsR[i]                     self.rAgents[i].distanceSensorValue[k] = dist[k] - self.rAgentsR[i]

    def _rotateVector(self,phi):
        return [np.cos(phi), np.sin(phi)]
   
    def _sensorR2Wall(self,s,theta):
        dist = []
        for k in self.sensors:
            d = self._rotateVector(theta+k)
            if   d[0] < 0: tempX = 1/d[0]*(self.xBox[0] - s[0])
            elif d[0] > 0: tempX = 1/d[0]*(self.xBox[1] - s[0])
            else: tempX = np.inf
            if   d[1] < 0: tempY = 1/d[1]*(self.yBox[0] - s[1])
            elif d[1] > 0: tempY = 1/d[1]*(self.yBox[1] - s[1])
            else: tempY = np.inf            
            dist.append(min(tempX,tempY))
        return(dist)
    def _intersectionR2R(self,s,theta,m,r):
        dist = []
        for k in self.sensors:
            d = self._rotateVector(theta+k)
            a = s[0] - m[0]
            b = s[1] - m[1]
            term1 = a*d[0]+b*d[1]
            Discriminant = term1**2 - (a**2+b**2-r**2)
            if Discriminant >= 0:
                lambda2 = -term1-np.sqrt(Discriminant)
                if lambda2 > 0: dist.append(lambda2)
                else: dist.append(np.Inf)
            else: dist.append(np.Inf)
        return(dist)           
       
    def _setDirtSensor(self):
        for i in range(len(self.rAgents)):
            d = self._rotateVector(self.rAgentsTheta[i])
            x = self.rAgentsX[i] + 0.25*d[0] #*\label{code:qlearning:ann:33}
            y = self.rAgentsX[i] + 0.25*d[0] #*\label{code:qlearning:ann:34}
            t = (self.xv-x)**2 + (self.yv-y)**2 < self.rAgentsR[i]**2 #*\label{code:qlearning:ann:35}
            dirtSum = np.sum(self.dirt[t]) #*\label{code:qlearning:ann:36}
            self.rAgents[i].dirtSensor = 10*dirtSum/self.dirt.size #*\label{code:qlearning:ann:37}
       
    def cleanDirt(self):
        for i in range(len(self.rAgents)):
            t=(self.xv-self.rAgentsX[i])**2+(self.yv-self.rAgentsY[i])**2             dirtSum = np.sum(self.dirt[t])*0.75 #*\label{code:qlearning:ann:38}
            self.dirt[t] -= 0.9*self.dirt[t]
            self.rAgents[i].dirtCleaned(dirtSum/self.dirt.size)
            self.rAgentsDirt[i] += dirtSum/self.dirt.size
    
    def _setCoopData(self):
        for i in range(len(self.rAgents)):
            j = self.minDist[i]
            self.rAgents[i].CoopData(self.rAgentsX[i], self.rAgentsY[i], self.rAgentsX[j], self.rAgentsY[j])   
 
       
    def nextTimeStep(self):
        self._setDistanceSensors()
        self._setDirtSensor()
        self._moveRobots()
        self._collisionCheck()
        self._setCoopData()
        self.cleanDirt()
        self.dirt += self.dt/300; self.dirt = np.minimum(self.dirt,1)
               
    def startSim(self,EndTime):
        for i in range(len(self.rAgents)):
            t=(self.xv-self.rAgentsX[i])**2+(self.yv-self.rAgentsY[i])**2             self.dirt[t] -= 0.9*self.dirt[t]
        self.EndTime  = EndTime
        tau = np.arange(0,EndTime,self.dt)
        self.protocol = []
        for i in range(len(self.rAgents)):
            self.protocol.append( np.zeros( (tau.shape[0],5) ) )
            self.rAgents[i].dirtSensor=0
        self.protocol.append(np.zeros(tau.shape[0]))   
        for j in range(len(tau)):
            self.nextTimeStep()
            for i in range(len(self.rAgents)):
                self.protocol[i][j,0] = self.rAgentsX[i]
                self.protocol[i][j,1] = self.rAgentsY[i]
                self.protocol[i][j,2] = self.rAgentsCol[i]
                self.protocol[i][j,3] = self.rAgentsTheta[i]
                self.protocol[i][j,4] = self.rAgentsDirt[i]
            self.protocol[len(self.rAgents)][j] = np.sum(self.dirt)/self.dirt.size
   
    def evaluation(self):
        lines = ["-","--","-.",":"]
        tau = np.arange(0,self.EndTime,self.dt)
        fig = plt.figure()
        ax = fig.add_subplot(1,1,1)
        for i in range(1,len(self.rAgents)):
            mystr = 'Agent No.'+str(i)
            ax.plot(tau,self.protocol[i][:,4],label=mystr,linestyle=lines[i%4],color='k')
        ax.plot(tau,self.protocol[len(self.rAgents)],label='Dirt (total)', color='r', lw=2.0)     
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels)
        ax.set_title('Dirtying')
        fig = plt.figure()
        ax = fig.add_subplot(1,1,1)
        for i in range(1,len(self.rAgents)):
            mystr = 'Agent No.'+str(i)
            ax.plot(tau,self.protocol[i][:,2],label=mystr,linestyle=lines[i%4],color='k')
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels)
        ax.set_title('Collisions')
        from matplotlib import cm
        fig = plt.figure()
        ax = fig.add_subplot(111)
        x = np.arange(self.xBox[0]+0.05,self.xBox[1],0.05)
        y = np.arange(self.yBox[0]+0.05,self.yBox[1],0.05)
        xv, yv = np.meshgrid(x, y)
        z = np.resize(self.dirt, xv.shape)
        ax.pcolormesh(xv, yv, z, cmap=cm.Greys)
        plt.show()
   
    def _initScene(self):
        self.toDraw =[]
        borders = Rectangle((self.xBox[0], self.yBox[0]), self.xBox[1] - self.xBox[0],
                             self.yBox[1] - self.yBox[0], fill=True, facecolor='white', edgecolor='k' )
        self.toDraw.append(borders)
        table = plt.Circle((self.rAgentsX[0],self.rAgentsX[0]), self.rAgentsR[0], fc='k')
        self.toDraw.append(table)
        for i in range(1,len(self.rAgents)):
            agents = plt.Circle((self.rAgentsX[i],self.rAgentsY[i]), self.rAgentsR[i], fc=np.random.rand(3))
            self.toDraw.append(agents)
        return []
   
    def _updateScene(self, i):
        i=min(i*20,len(self.protocol[0][:,0])) + self.startFrame
        dirtyTrick = Rectangle((-4.5,-3.5), 9, 7, fill=True, facecolor='white', edgecolor=None )
        self.ax.add_patch(dirtyTrick)
        self.ax.add_patch(self.toDraw[0])
        for j in range(1,len(self.rAgents)+1):
            x = self.protocol[j-1][i,0]; y = self.protocol[j-1][i,1]
            self.toDraw[j].center = (x,y)
            self.ax.add_patch(self.toDraw[j])
        for j in range(2,len(self.rAgents)+1):
            x = self.protocol[j-1][i,0]; y = self.protocol[j-1][i,1]
            sensors = np.array([-np.pi/6,-np.pi/3,0,np.pi/3,np.pi/6 ])
            for k in sensors:
                d = self._rotateVector(self.protocol[j-1][i,3]+k)
                lookDir = Arrow(x,y,0.5*d[0],0.5*d[1], width=0.2)
                self.ax.add_patch(lookDir)
        return []
   
    def plotScene(self, startFrame=0):
        self.startFrame = startFrame
        self.fig = plt.figure()
        self.ax = plt.axes(xlim=(-4.5, 4.5), ylim=(-3.5, 3.5))
        self.fig.show()
        self.anim = animation.FuncAnimation(self.fig, self._updateScene,
                               init_func=self._initScene, frames=len(self.protocol[0][:,0]),
                               interval=20, repeat=False)
        self.fig.show()
       
if __name__ == '__main__':
    np.random.seed(42)
    myNewBox = boxEnvironment()
    for i in range(1):
        reaktiverAgent =  robotAgent()
        myNewBox.addRobot(reaktiverAgent)
    from time import clock
    t1 = clock()
    myNewBox.startSim(300)
    t2 = clock()
    print(t2-t1)
    myNewBox.evaluation()
    #myNewBox.plotScene()
   


::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
class environment:
    def __init__(self):
        self.envData = -1*np.ones( (5,4) ) #*\label{code:robocliff:1}
        self.envData[1,3] = self.envData[2,3] = self.envData[3,3] = -1000 #*\label{code:robocliff:2}
        self.envData[4,3] = 1000 #*\label{code:robocliff:3}
        self.dogX = 0; self.dogY = 3 #*\label{code:robocliff:4}
       
    def setDog(self, x=0, y=3):
        self.dogX = x; self.dogY = y #*\label{code:robocliff:8}
       
    def move(self,a): 
        if a == 0 : self.dogY -= 1 # a = 0 east
        if a == 1 : self.dogX += 1 # a = 1 south
        if a == 2 : self.dogY += 1 # a = 2 west
        if a == 3 : self.dogX -= 1 # a = 3 north
        if self.dogY >3 or self.dogY <0 or="" self.dogx="">4 or self.dogX <0: br="" code:robocliff:5="" label="">            r = -10
        else:
            r = self.envData[self.dogX,self.dogY]
        if self.dogY <0: br="" code:robocliff:6="" label="" self.dogy="0">        if self.dogX <0: self.dogx="0<br">        if self.dogX >4: self.dogX = 4
        if self.dogY >3: self.dogY = 3 #*\label{code:robocliff:7}
        return(r)
       
    def sensors(self):
        return(self.dogX,self.dogY) #*\label{code:robocliff:8}
       
class learningDog:
    def __init__(self, env,vareps):
        self.hatQ = np.zeros( (5,4,4) ) #*\label{code:robocliff:9}
        self.env = env
        self.vareps = vareps
       
    def _chooseAction(self,x,y):
        RonnieSoak = np.random.rand(1)
        if RonnieSoak < self.vareps:
            a = np.random.randint(4)
        else:
            localQ = self.hatQ[x,y,:]
            a = np.argmax(localQ)
        return(a)
       
    def learn(self, gamma, trainCycles):
        for i in range(trainCycles):
            (x,y) = self.env.sensors()
            a = self._chooseAction(x,y)
            r = self.env.move(a)
            (xN,yN) = self.env.sensors()
            localQ = self.hatQ[xN,yN,:]
            self.hatQ[x,y,a] = r + gamma * np.max(localQ) #*\label{code:robocliff:53}
            if r == 1000: self.env.setDog()  #*\label{code:robocliff:10}
           
    def show(self, x=0, y=3):
        self.env.setDog(x,y)
        r = -1
        counter = 0
        while r != 1000 and counter < 100:
            counter += 1
            (x,y) = self.env.sensors()
            a = self._chooseAction(x,y)
            r = self.env.move(a)
            print(a,r)
if __name__ == '__main__':
    np.random.seed(42)
    myenv = environment()
    robohund = learningDog(myenv,0.1)
    robohund.learn(0.4,2000)
    print('Start bei 0,3')
    robohund.show()
    print('Start bei 3,0')
    robohund.show(x=3,y=0)
               
               
   
:::::::::::::::::::::::::::::::::::::::::::::::::
import numpy as np
class environment:
    def __init__(self):
        self.envData = -1*np.ones( (5,4) )
        self.envData[1,3] = self.envData[2,3] = self.envData[3,3] = -1000
        self.envData[4,3] = 1000
        self.dogX = 0; self.dogY = 3
       
    def setDog(self, x=0, y=3):
        self.dogX = x; self.dogY = y
       
    def move(self,a): 
        if a == 0 : self.dogY -= 1 # a = 0 east
        if a == 1 : self.dogX += 1 # a = 1 south,
        if a == 2 : self.dogY += 1 # a = 2 west
        if a == 3 : self.dogX -= 1 # a = 3 north
        if self.dogY >3 or self.dogY <0 or="" self.dogx="">4 or self.dogX <0: br="">            r = -10
        else:
            r = self.envData[self.dogX,self.dogY]
        if self.dogY <0: self.dogy="0<br">        if self.dogX <0: self.dogx="0<br">        if self.dogX >4: self.dogX = 4
        if self.dogY >3: self.dogY = 3
        return(r)
       
    def sensors(self):
        return(self.dogX,self.dogY)
   
class learningDogSARSA:
    def __init__(self, env,vareps):
        self.hatQ = np.zeros( (5,4,4) ) # initialisiere hatQ(s,a)
        self.env = env
        self.vareps = vareps
       
    def _chooseAction(self,x,y):
        RonnieSoak = np.random.rand(1)
        if RonnieSoak < self.vareps:
            a = np.random.randint(4)
        else:
            localQ = self.hatQ[x,y,:]
            a = np.argmax(localQ)
        return(a)
       
    def learn(self, gamma, trainCycles):
        for i in range(trainCycles):
            (x,y) = self.env.sensors()
            a = self._chooseAction(x,y)
            r = self.env.move(a)
            (xN,yN) = self.env.sensors()
            rNext = self.hatQ[xN,yN,self._chooseAction(xN,yN)]
            self.hatQ[x,y,a] = r + gamma * rNext
            if r == 1000: self.env.setDog()
           
    def show(self, x=0, y=3):
        self.env.setDog(x,y)
        r = -1
        counter = 0
        while r != 1000 and counter < 100:
            counter += 1
            (x,y) = self.env.sensors()
            a = self._chooseAction(x,y)
            r = self.env.move(a)
            print(a,r)
if __name__ == '__main__':
    np.random.seed(42)
    myenv = environment()
    robohund = learningDogSARSA(myenv,0.1)
    robohund.learn(0.4,2000)
    print('Start bei 0,3')
    robohund.show()
    print('Start bei 3,0')
    robohund.show(x=3,y=0)
               
               
   

:::::::::::::::::::::::::::::::::::::::::
import numpy as np
import random
 
def softmax(Q, tau):
    return np.exp(Q/tau) / sum(np.exp(Q/tau))
Q=np.array([-10, 2.1, 1.9])
print( softmax(Q,0.1) )
print( softmax(Q,1.0) )
print( softmax(Q,100) )
def chooseAction(Q, tau=1):
    p = softmax(Q,1.0)
    toChoose = np.arange(0,len(Q))
    a = int(random.choices(toChoose,weights=p,k=1)[0])
    return(a)
   
random.seed(42)
counter = np.zeros(len(Q))
for i in range(10000):
    counter[chooseAction(Q)] += 1
print(counter)

:::::::::::::::::::::::::::::::::::::

Keine Kommentare:

Kommentar veröffentlichen

Hinweis: Nur ein Mitglied dieses Blogs kann Kommentare posten.