Open Edward-hb opened 1 month ago
这里的i在计算方差的时候是没有变化的计算的方差没有遍历所有的。 i = 0 for p in self.labeled_hudongList: # 预先计算存储各分量相似度 if p.title == item.title: # 如果训练集已经有,直接返回label return p.label title_simi.append(self.get_title_simi(p, item)) openTypeList_simi.append(self.get_openTypeList_simi(p, item)) baseInfoKeyList_simi.append(self.get_baseInfoKeyList_simi(p, item)) baseInfoValueList_simi.append(self.get_baseInfoValueList_simi(p, item)) mean[0] += title_simi[i] mean[1] += openTypeList_simi[i] mean[2] += baseInfoKeyList_simi[i] maxx[2] = max(maxx[2],baseInfoKeyList_simi[i]) minn[2] = min(minn[2],baseInfoKeyList_simi[i]) mean[3] += baseInfoValueList_simi[i] maxx[3] = max(maxx[3],baseInfoValueList_simi[i]) minn[3] = min(minn[3],baseInfoValueList_simi[i]) i += 1 for i in range(4): mean[i] /= len(self.labeled_hudongList) for p in self.labeled_hudongList: # 计算方差 var[0] += (title_simi[i]-mean[0])*(title_simi[i]-mean[0]) var[1] += (openTypeList_simi[i]-mean[1])*(openTypeList_simi[i]-mean[1]) var[2] += (baseInfoKeyList_simi[i]-mean[2])*(baseInfoKeyList_simi[i]-mean[2]) var[3] += (baseInfoValueList_simi[i]-mean[3])*(baseInfoValueList_simi[i]-mean[3])
我这里修改了变量名所以和源码略有不同 for i in range(len(self.labeled_hudongList)): title_simi[i] = (title_simi[i] - mean[0]) / stand[0] openTypeList_simi[i] = (openTypeList_simi[i] - mean[1]) / stand[1] baseInfoKeyList_simi[i] = (baseInfoKeyList_simi[i] - mean[2]) / stand[2] baseInfoValueList_simi[i] = (baseInfoValueList_simi[i] - mean[3]) / stand[3]
修改为:
我这里修改了变量名所以和源码略有不同 for i in range(len(self.labeled_hudongList)): title_simi[i] = (title_simi[i] - mean[0]) / stand[0] openTypeList_simi[i] = (openTypeList_simi[i] - mean[1]) / stand[1] baseInfoKeyList_simi[i] = (baseInfoKeyList_simi[i] - mean[2]) / stand[2] baseInfoValueList_simi[i] = (baseInfoValueList_simi[i] - mean[3]) / stand[3]