Mittwoch, 29. Mai 2019

Künstliche Intelligenz SourceCode Python Teil-4


import numpy as np



np.random.seed(42)



fString = open("diagnosis.data","r")

fFloat  = open("diagnosis.csv","w")

for line in fString:

    line = line.replace(",", ".")

    line = line.replace("\t", ",")

    line = line.replace("yes", "1")

    line = line.replace("no", "0")

    line = line.replace("\r\n", "\n")

    fFloat.write(line)

fString.close()

fFloat.close()



fFloat  = open("diagnosis.csv","r")

dataset = np.loadtxt(fFloat, delimiter=",")

fFloat.close()



X = dataset[:,1:6]

Y = dataset[:,6]

allData     = np.arange(0,X.shape[0])

iTesting    = np.random.choice(X.shape[0],int(X.shape[0]*0.2),replace=False)

iTraining   = np.delete(allData,iTesting)

dataRecords = len(iTraining)

XTrain = X[iTraining,:]

YTrain = Y[iTraining]



PIX = np.zeros( (2,XTrain.shape[1],2) )

PI  = np.zeros(2)

for k in range(X.shape[1]):

    PIX[0,k,0] = np.sum(np.logical_and(XTrain[:,k],YTrain))

    PIX[0,k,1] = np.sum(np.logical_and(np.logical_not(XTrain[:,k]),YTrain)) 

    PIX[1,k,0] = np.sum(np.logical_and(XTrain[:,k],np.logical_not(YTrain)))

    PIX[1,k,1] = np.sum(np.logical_not(np.logical_or(XTrain[:,k],YTrain)))

PI[0] = np.sum(YTrain)

PI[1] = dataRecords - PI[0]



PIX = (PIX + 1/2) / (dataRecords+1)

PI  = PI  / dataRecords

 

def predictNaiveBayesNominal(x):

    P = np.zeros_like(PI)

    allofthem = np.arange(XTrain.shape[1])

    for i in range(len(PI)):

        P[i] = np.prod(PIX[i,allofthem,x])*PI[i]

    denominator = np.sum(P)

    P = P/denominator

    choosenClass = np.argmax(P)

    return choosenClass



XTest = X[iTesting,:]

YTest = Y[iTesting]

correct   = np.zeros(2)

incorrect = np.zeros(2)



for i in range(XTest.shape[0]):

    klasse = predictNaiveBayesNominal(XTest[i,:].astype(int))

    if klasse == YTest[i]:

        correct[klasse] = correct[klasse] +1

    else:

        incorrect[klasse] = incorrect[klasse] +1

       

print("Von %d Testfaellen wurden %d richtig und %d falsch klassifiziert" % (XTest.shape[0],np.sum(correct),np.sum(incorrect) ))



T = dataset[:,0]

trueIndex = np.flatnonzero(YTrain==1)

falseIndex = np.flatnonzero(YTrain==0)

muApproxTrue = np.sum(T[trueIndex])/trueIndex.shape[0]

sgApproxTrue = np.sum( (T[trueIndex]-muApproxTrue)**2 ) / (trueIndex.shape[0] -1)

muApproxFalse = np.sum(T[falseIndex])/falseIndex.shape[0]

sgApproxFalse = np.sum( (T[falseIndex]-muApproxFalse)**2 ) / (falseIndex.shape[0] -1)



def Gausverteilung(x,mu,sigma):

    y = np.exp(-0.5*( (x-mu)/sigma)**2 )/(sigma*np.sqrt(2*np.pi))

    return(y)



import matplotlib.pyplot as plt

fig = plt.figure()

ax = fig.add_subplot(131)

ax.hist(T[:],15,normed=1, facecolor='k', alpha=0.5)

ax.set_xlabel('Temperatur');

ax.set_ylabel('Wahrscheinlichkeit')

Tplot = np.arange(33,44,0.05)

ax.plot(Tplot,Gausverteilung(Tplot,muApproxTrue,sgApproxTrue),'k:')

ax.plot(Tplot,Gausverteilung(Tplot,muApproxFalse,sgApproxFalse),'k-.')

ax.set_ylim([0,0.8])

ax.set_title('Alle Trainingsdaten')

ax = fig.add_subplot(132)

ax.hist(T[falseIndex],15,normed=1, facecolor='k', alpha=0.5)

ax.set_xlabel('Temperatur')

ax.plot(Tplot,Gausverteilung(Tplot,muApproxFalse,sgApproxFalse),'k-.')

ax.set_ylim([0,0.8])

ax.set_title('Negative Diagnose')

ax = fig.add_subplot(133)

ax.hist(T[trueIndex],15,normed=1, facecolor='k', alpha=0.5)

ax.set_xlabel('Temperatur')

ax.plot(Tplot,Gausverteilung(Tplot,muApproxTrue,sgApproxTrue),'k:')

ax.set_ylim([0,0.8])

ax.set_title('Positive Diagnose')

plt.tight_layout()

plt.show(block=False)



def predictNaiveBayesMixed(x,T,muTrue,sigmaTrue,muFalse,sigmaFalse):

    P = np.zeros_like(PI)

    allofthem = np.arange(XTrain.shape[1])

    P[0] = np.prod(PIX[0,allofthem,x])*PI[0]

    P[1] = np.prod(PIX[1,allofthem,x])*PI[1]

    P[0] = P[0] * Gausverteilung(T,muTrue,sigmaTrue )

    P[1] = P[1] * Gausverteilung(T,muFalse,sigmaFalse )

    choosenClass = np.argmax(P)

    return choosenClass



TTest = T[iTesting]   

def TestNaiveBayesMixed(muTrue,sigmaTrue,muFalse,sigmaFalse):

    correct   = np.zeros(2); incorrect = np.zeros(2)

    for i in range(XTest.shape[0]):

        klasse = predictNaiveBayesMixed(XTest[i,:].astype(int),TTest[i],muTrue,sigmaTrue,muFalse,sigmaFalse)

        if klasse == YTest[i]:

            correct[klasse] = correct[klasse] +1

        else:

            incorrect[klasse] = incorrect[klasse] +1   

    return(correct, incorrect)



(correct, incorrect) =  TestNaiveBayesMixed(muApproxTrue,sgApproxTrue, muApproxFalse, sgApproxFalse)

print("Von %d Testfaellen wurden %d richtig und %d falsch klassifiziert" % (XTest.shape[0],np.sum(correct),np.sum(incorrect) ))   



keineDiagnose = np.logical_not(np.logical_or(dataset[iTraining,7],YTrain))

index = np.flatnonzero(keineDiagnose)

muApprox = np.sum(T[index])/index.shape[0]

sgApprox = np.sum( (T[index]-muApprox)**2 ) / (index.shape[0] -1)

fig = plt.figure()

ax = fig.add_subplot(111)

ax.hist(T[index],15,normed=1, facecolor='k', alpha=0.5)

ax.set_xlabel('Temperatur');

ax.set_ylabel('Wahrscheinlichkeit')

ax.plot(Tplot,Gausverteilung(Tplot,muApprox,sgApprox),'k')

ax.plot(Tplot,Gausverteilung(Tplot,muApproxTrue,sgApproxTrue),'k:')

ax.plot(Tplot,Gausverteilung(Tplot,muApproxFalse,sgApproxFalse),'k-.')

Keine Kommentare:

Kommentar veröffentlichen

Hinweis: Nur ein Mitglied dieses Blogs kann Kommentare posten.