南强小屋 Design By 杰米
我们给大家带来了关于学习python中scikit-learn机器代码的相关具体实例,以下就是全部代码内容:
# -*- coding: utf-8 -*-
import numpy
from sklearn import metrics
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn import linear_model
from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn import cross_validation
from sklearn import preprocessing
#import iris_data
def load_data():
iris = load_iris()
x, y = iris.data, iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)
return x_train,y_train,x_test,y_test
def train_clf3(train_data, train_tags):
clf = LinearSVC(C=1100.0)#default with 'rbf'
clf.fit(train_data,train_tags)
return clf
def train_clf(train_data, train_tags):
clf = MultinomialNB(alpha=0.01)
print numpy.asarray(train_tags)
clf.fit(train_data, numpy.asarray(train_tags))
return clf
def evaluate(actual, pred):
m_precision = metrics.precision_score(actual, pred)
m_recall = metrics.recall_score(actual, pred)
print 'precision:{0:.3f}'.format(m_precision)
print 'recall:{0:0.3f}'.format(m_recall)
print 'f1-score:{0:.8f}'.format(metrics.f1_score(actual,pred));
x_train,y_train,x_test,y_test = load_data()
clf = train_clf(x_train, y_train)
pred = clf.predict(x_test)
evaluate(numpy.asarray(y_test), pred)
print metrics.classification_report(y_test, pred)
使用自定义数据
# coding: utf-8
import numpy
from sklearn import metrics
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
import codecs
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from sklearn import linear_model
train_corpus = [
'我们 我们 好孩子 认证 。 就是',
'我们 好孩子 认证 。 中国',
'我们 好孩子 认证 。 孤独',
'我们 好孩子 认证 。',
]
test_corpus = [
'我 菲律宾 韩国',
'我们 好孩子 认证 。 中国',
]
def input_data(train_file, test_file):
train_words = []
train_tags = []
test_words = []
test_tags = []
f1 = codecs.open(train_file,'r','utf-8','ignore')
for line in f1:
tks = line.split(':', 1)
word_list = tks[1]
word_array = word_list[1:(len(word_list)-3)].split(", ")
train_words.append(" ".join(word_array))
train_tags.append(tks[0])
f2 = codecs.open(test_file,'r','utf-8','ignore')
for line in f2:
tks = line.split(':', 1)
word_list = tks[1]
word_array = word_list[1:(len(word_list)-3)].split(", ")
test_words.append(" ".join(word_array))
test_tags.append(tks[0])
return train_words, train_tags, test_words, test_tags
def vectorize(train_words, test_words):
#v = HashingVectorizer(n_features=25000, non_negative=True)
v = HashingVectorizer(non_negative=True)
#v = CountVectorizer(min_df=1)
train_data = v.fit_transform(train_words)
test_data = v.fit_transform(test_words)
return train_data, test_data
def vectorize1(train_words, test_words):
tv = TfidfVectorizer(sublinear_tf = False,use_idf=True);
train_data = tv.fit_transform(train_words);
tv2 = TfidfVectorizer(vocabulary = tv.vocabulary_);
test_data = tv2.fit_transform(test_words);
return train_data, test_data
def vectorize2(train_words, test_words):
count_v1= CountVectorizer(stop_words = 'english', max_df = 0.5);
counts_train = count_v1.fit_transform(train_words);
count_v2 = CountVectorizer(vocabulary=count_v1.vocabulary_);
counts_test = count_v2.fit_transform(test_words);
tfidftransformer = TfidfTransformer();
train_data = tfidftransformer.fit(counts_train).transform(counts_train);
test_data = tfidftransformer.fit(counts_test).transform(counts_test);
return train_data, test_data
def evaluate(actual, pred):
m_precision = metrics.precision_score(actual, pred)
m_recall = metrics.recall_score(actual, pred)
print 'precision:{0:.3f}'.format(m_precision)
print 'recall:{0:0.3f}'.format(m_recall)
print 'f1-score:{0:.8f}'.format(metrics.f1_score(actual,pred));
def train_clf(train_data, train_tags):
clf = MultinomialNB(alpha=0.01)
clf.fit(train_data, numpy.asarray(train_tags))
return clf
def train_clf1(train_data, train_tags):
#KNN Classifier
clf = KNeighborsClassifier()#default with k=5
clf.fit(train_data, numpy.asarray(train_tags))
return clf
def train_clf2(train_data, train_tags):
clf = linear_model.LogisticRegression(C=1e5)
clf.fit(train_data,train_tags)
return clf
def train_clf3(train_data, train_tags):
clf = LinearSVC(C=1100.0)#default with 'rbf'
clf.fit(train_data,train_tags)
return clf
def train_clf4(train_data, train_tags):
"""
随机森林,不可使用稀疏矩阵
"""
clf = RandomForestClassifier(n_estimators=10)
clf.fit(train_data.todense(),train_tags)
return clf
#使用codecs逐行读取
def codecs_read_label_line(filename):
label_list=[]
f = codecs.open(filename,'r','utf-8','ignore')
line = f.readline()
while line:
#label_list.append(line[0:len(line)-2])
label_list.append(line[0:len(line)-1])
line = f.readline()
f.close()
return label_list
def save_test_features(test_url, test_label):
test_feature_list = codecs_read_label_line('test.dat')
fw = open('test_labeded.dat',"w+")
for (url,label) in zip(test_feature_list,test_label):
fw.write(url+'\t'+label)
fw.write('\n')
fw.close()
def main():
train_file = u'..\\file\\py_train.txt'
test_file = u'..\\file\\py_test.txt'
train_words, train_tags, test_words, test_tags = input_data(train_file, test_file)
#print len(train_words), len(train_tags), len(test_words), len(test_words),
train_data, test_data = vectorize1(train_words, test_words)
print type(train_data)
print train_data.shape
print test_data.shape
print test_data[0].shape
print numpy.asarray(test_data[0])
clf = train_clf3(train_data, train_tags)
scores = cross_validation.cross_val_score(
clf, train_data, train_tags, cv=5, scoring="f1_weighted")
print scores
#predicted = cross_validation.cross_val_predict(clf, train_data,train_tags, cv=5)
'''
'''
pred = clf.predict(test_data)
error_list=[]
for (true_tag,predict_tag) in zip(test_tags,pred):
if true_tag != predict_tag:
print true_tag,predict_tag
error_list.append(true_tag+' '+predict_tag)
print len(error_list)
evaluate(numpy.asarray(test_tags), pred)
'''
#输出打标签结果
test_feature_list = codecs_read_label_line('test.dat')
save_test_features(test_feature_list, pred)
'''
if __name__ == '__main__':
main()
南强小屋 Design By 杰米
广告合作:本站广告合作请联系QQ:858582 申请时备注:广告合作(否则不回)
免责声明:本站文章均来自网站采集或用户投稿,网站不提供任何软件下载或自行开发的软件! 如有用户或公司发现本站内容信息存在侵权行为,请邮件告知! 858582#qq.com
免责声明:本站文章均来自网站采集或用户投稿,网站不提供任何软件下载或自行开发的软件! 如有用户或公司发现本站内容信息存在侵权行为,请邮件告知! 858582#qq.com
南强小屋 Design By 杰米
暂无python中scikit-learn机器代码实例的评论...
《魔兽世界》大逃杀!60人新游玩模式《强袭风暴》3月21日上线
暴雪近日发布了《魔兽世界》10.2.6 更新内容,新游玩模式《强袭风暴》即将于3月21 日在亚服上线,届时玩家将前往阿拉希高地展开一场 60 人大逃杀对战。
艾泽拉斯的冒险者已经征服了艾泽拉斯的大地及遥远的彼岸。他们在对抗世界上最致命的敌人时展现出过人的手腕,并且成功阻止终结宇宙等级的威胁。当他们在为即将于《魔兽世界》资料片《地心之战》中来袭的萨拉塔斯势力做战斗准备时,他们还需要在熟悉的阿拉希高地面对一个全新的敌人──那就是彼此。在《巨龙崛起》10.2.6 更新的《强袭风暴》中,玩家将会进入一个全新的海盗主题大逃杀式限时活动,其中包含极高的风险和史诗级的奖励。
《强袭风暴》不是普通的战场,作为一个独立于主游戏之外的活动,玩家可以用大逃杀的风格来体验《魔兽世界》,不分职业、不分装备(除了你在赛局中捡到的),光是技巧和战略的强弱之分就能决定出谁才是能坚持到最后的赢家。本次活动将会开放单人和双人模式,玩家在加入海盗主题的预赛大厅区域前,可以从强袭风暴角色画面新增好友。游玩游戏将可以累计名望轨迹,《巨龙崛起》和《魔兽世界:巫妖王之怒 经典版》的玩家都可以获得奖励。