Python识别男女声音
特征提取
核心思想: 最大方差理论,在信号处理中认为信号具有较大的方差,噪声有较小的方差,信噪比就是信号与噪声的方差比,越大越好。因此我们认为,最好的k维特征是将n维样本点变换为k维后,每一维上的样本方差都尽可能的大。 具体思想到推理,给出了一个很好的过程。利用R语言实现,可以使用R语言的warbleR包,使用一个开源的R语言函数包能提取出20个特征,通过脚本处理好的3000多条特征文件,可以直接加载训练模型。机器学习
利用xgboost算法,Xgboost是GB算法的高效实现,xgboost算法的步骤首先初始化为一个常数根据一阶导数g和二阶导数h,迭代生成基学习器,相加更新学习器。xgboost考虑了训练数据为稀疏值的情况,可以为缺失值或者指定的值指定分支的默认方向,这能大大提升算法的效率。xgboost 还考虑了当数据量比较大,内存不够时怎么有效的使用磁盘,主要是结合多线程、数据压缩、分片的方法,尽可能的提高算法的效率。测试过程
正如我们所知,将一段需要测试的男生或女生的声音文件(.wav)输入,进行特征的提取,并且与之前的训练模型进行比较,根据男女生声音特性的不同,例如基因频率,输出结果。总结
在完成设计实验的过程中,我们深刻意识到自己能力的不足和知识的短浅,有许多代码的问题都需要查找资料才能进行有效的解决,不过在这个过程中让我们学到了很多东西,这让我们非常开心和满足。提取特征
import os import rpy2.robjects as robjects from rpy2.robjects import pandas2ri, r import pandas as pd os.chdir('C:/Users/TStra/Desktop/signal') data_list = [] #获取特征文件 def get_feature(fname): pandas2ri.activate() robjects.r.source('feature_extract.R') #利用rpy2读取R脚本 data_read = robjects.r.processFolder(fname) #得到数据文件 data_read = pandas2ri.ri2py(data_read) #转化为python可以使用的数据 return data_read if __name__ == '__main__': file_name_list = os.listdir('data') #存放.wav格式声音的文件夹 for file_name in file_name_list: data = get_feature(file_name) data_list.append(data) result = pd.concat(data_list) result['label'] = 'male' result.to_csv("male.csv", index=False) #result['label'] = 'female' #result.to_csv("female.csv", index=False)机器学习
import xgboost as xgb import pandas as pd import numpy as np import sklearn import pickle import pprint def xgb_score(preds, dtrain): labels = dtrain.get_label() return 'log_loss', sklearn.metrics.log_loss(labels, preds) input_data = pd.read_csv('C:/Users/TStra/Desktop/signal/voice.csv') #pandas 读取csv文件 input_data = input_data.sample(frac=1) #利用pandas抽样 frac=1 即比例为1 gender = {'male' : 0, 'female' : 1} #性别判断 input_data['label'] = input_data['label'].map(gender) #map函数 cols = [c for c in input_data.columns if c not in ['label']] print (cols) train = input_data.iloc[0 :3300] test = input_data.iloc[3300 : ] test_label = test['label'] test_label = np.array(test_label).reshape([-1 , 1]) del(test['label']) fold = 1 for i in range(fold): params = { 'eta': 0.01, #use 0.002 'max_depth': 5, 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'lambda':0.1, 'gamma':0.1, 'seed': i, 'silent': True } x1 = train[cols][0:3000] x2 = train[cols][3000:] y1 = train['label'][0:3000] y2 = train['label'][3000 : ] watchlist = [(xgb.DMatrix(x1, y1), 'train'), (xgb.DMatrix(x2, y2), 'valid')] model = xgb.train(params, xgb.DMatrix(x1, y1), 1500, watchlist, feval=xgb_score, maximize=False, verbose_eval=50, early_stopping_rounds=50) #use 1500 if i != 0: pred += model.predict(xgb.DMatrix(test[cols]), ntree_limit=model.best_ntree_limit) else: pred = model.predict(xgb.DMatrix(test[cols]), ntree_limit=model.best_ntree_limit) pred /= fold pre_label = np.zeros([pred.shape[0], 1]) for i in range(pred.shape[0]): if pred[i] >= 0.5: pre_label[i] = 1 else: pre_label[i] = 0 acc = np.mean(np.equal(pre_label, test_label).astype(np.float)) print("the test acc is:", acc) model_save = open('model.pkl', 'wb') #保存模型 pickle.dump(model, model_save) model_save.close()测试过程
import xgboost as xgb import pandas as pd import numpy as np import sklearn import pickle import pprint import rpy2.robjects as robjects from rpy2.robjects import r, pandas2ri import os os.chdir('C:/Users/TStra/Desktop/signal') #get feature file def get_feature(fname): pandas2ri.activate() robjects.r.source('feature_extract.R') data_read = robjects.r.processFolder(fname) data_read = pandas2ri.ri2py(data_read) return data_read if __name__ == '__main__': data_list = [] model_save = open('model.pkl', 'rb') model = pickle.load(model_save) model_save.close() file_name_list = os.listdir('data') #读取声音文件 for file_name in file_name_list: data = get_feature(file_name) data_list.append(data) test = pd.concat(data_list) #获取wav特征 pred = model.predict(xgb.DMatrix(test), ntree_limit=model.best_ntree_limit) print (pred) pre_label = np.zeros([pred.shape[0], 1]) for i in range(pred.shape[0]): if pred[i] >= 0.5: pre_label[i] = pred[i] else: pre_label[i] = pred[i] num = 0 tlen = len(pre_label) for i in pre_label: num += i print ('female is;'+str(num)) print ('male is:'+str(tlen-num)) print ((tlen-num)/tlen) print (num/tlen)