机器学习实战 AdaBoost预测患有疝气病的马的存活问题
机器学习实战使用AdaBoost来预测患有疝气病的马的存活问题结果示例完整代码# -*- coding: utf-8 -*-# @Time: 2021/6/21 15:33# @Author: weiwei# @File: Horse.pyimport numpy as npfrom math import logdef loadSimpData():datMat = np.matrix([[1.
·
机器学习实战
使用AdaBoost来预测患有疝气病的马的存活问题



结果示例
完整代码
# -*- coding: utf-8 -*-
# @Time : 2021/6/21 15:33
# @Author : weiwei
# @File : Horse.py
import numpy as np
from math import log
def loadSimpData():
datMat = np.matrix([
[1., 2.1],
[2, 1.1],
[1.3, 1.],
[1., 1.],
[2., 1.]
])
classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
return datMat, classLabels
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t'))
dataMat = [];
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat - 1):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat, labelMat
def stumpClassify(dataMatrix, dimen, threshVal, threshIneq):
retArray = np.ones((np.shape(dataMatrix)[0], 1))
if threshIneq == 'lt':
retArray[dataMatrix[:, dimen] <= threshVal] = -1.0
else:
retArray[dataMatrix[:, dimen] <= threshVal] = 1.0
return retArray
def buildStump(dataArr, classLabels, D):
dataMatrix = np.mat(dataArr)
labelMat = np.mat(classLabels).T
m, n = np.shape(dataMatrix)
numSteps = 10.0;
bestStump = {};
bestClassEst = np.mat(np.zeros((m, 1)))
minError = np.inf
for i in range(n):
rangeMin = dataMatrix[:, i].min();
rangeMax = dataMatrix[:, i].max();
stepSize = (rangeMax - rangeMin) / numSteps
for j in range(-1, int(numSteps) + 1):
for inequal in ['lt', 'gt']:
threshVal = (rangeMin + float(j) * stepSize)
predictedVals = stumpClassify(dataMatrix, i, threshVal, inequal)
errArr = np.mat(np.ones((m, 1)))
errArr[predictedVals == labelMat] = 0
weightError = D.T * errArr
print("the error rate of this test is %.3f" % (weightError))
if weightError < minError:
minError = weightError
bestClassEst = predictedVals.copy()
bestStump['dim'] = i
bestStump['thresh'] = threshVal
bestStump['ineq'] = inequal
return bestStump, minError, bestClassEst
def adaBoostTrainDS(dataArr, classLabels, numIt=40):
weakClassArr = []
m = np.shape(dataArr)[0]
D = np.mat(np.ones((m, 1)) / m)
aggClassEst = np.mat(np.zeros((m, 1)))
for i in range(numIt):
bestStump, error, classEst = buildStump(dataArr, classLabels, D)
print("D:", D.T)
alpha = float(0.5 * log((1.0 - error) / max(error, 1e-16)))
bestStump['alpha'] = alpha
weakClassArr.append(bestStump)
print("classEst: ", classEst.T)
expon = np.multiply(-1 * alpha * np.mat(classLabels).T, classEst)
D = np.multiply(D, np.exp(expon))
D = D / D.sum()
aggClassEst += alpha * classEst
print("aggClassEst: ", aggClassEst.T)
aggErrors = np.multiply(np.sign(aggClassEst) != np.mat(classLabels).T, np.ones((m, 1)))
errorRate = aggErrors.sum() / m
print("total error: ", errorRate)
if errorRate == 0.0: break
return weakClassArr, aggClassEst
def adaClassify(datToClass, classifierArr):
dataMatrix = np.mat(datToClass)
m = np.shape(dataMatrix)[0]
aggClassEst = np.mat(np.zeros((m, 1)))
for i in range(len(classifierArr)):
print("the error rate of this test is %.6f" % (classifierArr[i]['alpha']))
classEst = stumpClassify(dataMatrix, classifierArr[i]['dim'], \
classifierArr[i]['thresh'], \
classifierArr[i]['ineq'])
aggClassEst += classifierArr[i]['alpha'] * classEst
return np.sign(aggClassEst)
if __name__ == '__main__':
datMat, classLabels = loadSimpData()
D = np.mat(np.ones((5, 1)) / 5)
buildStump(datMat, classLabels, D)
classifierArr, aggClassEst = adaBoostTrainDS(datMat, classLabels)
print(adaClassify([[0, 0], [1, 0], [2, 2]], classifierArr))
datArr, labelArr = loadDataSet('horseColicTraining.txt')
classifierArr, aggClassEst = adaBoostTrainDS(datArr, labelArr, 10)
testArr, testLableArr = loadDataSet('horseColicTest.txt')
prediction = adaClassify(testArr, classifierArr)
errArr = np.mat(np.ones((67, 1)))
errRate = errArr[prediction != np.mat(testLableArr).T].sum() / 67
print("after 10 iterations the average error rate is: %f" % (errRate))
数据集相关资料可以从Machine-learning中找到
更多推荐




所有评论(0)