百度智能云全功能AI开发平台BML自定义作业建模 - 训练作业代码示例(Sklearn 0.23.2)
文档简介:
sklearn框架下,自定义作业支持发布保存模型为pickle和joblib格式,并且在发布至模型仓库时需要选择相应的模型文件。使用下面代码进行模型训练时,训练程序可以自行加载数据,训练数据选择空文件夹即可。
Sklearn
sklearn框架下,自定义作业支持发布保存模型为pickle和joblib格式,并且在发布至模型仓库时需要选择相应的模型文件。使用下面代码进行模型训练时,训练程序可以自行加载数据,训练数据选择空文件夹即可。
pickle格式模型示例代码:
# -*- coding:utf-8 -*- """ sklearn train demo """ import numpy as np from
sklearn import datasets from sklearn.model_selection import train_test_split from
sklearn.linear_model import LogisticRegression def load_data(): """ load data "
"" # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练 # 每条数据类型为
x{nbarray} [6.4, 3.1, 5.5, 1.8] inputdata = datasets.load_iris() # 切分,
测试训练2,8分 x_train, x_test, y_train, y_test = \ train_test_split(inputdata.data, inputdata.target, test_size = 0.2,
random_state=0) return x_train, x_test, y_train, y_test def save_model(model)
: """ save model with pickle format """ import pickle with open('output/clf.pickle','wb')
as f: pickle.dump(model, f) def save_model_joblib(model): """ save model with joblib format
""" try: import joblib except: from sklearn.externals import joblib joblib.dump(model, 'output/clf.pkl') def main(): """ main """ # 训练集x
,测试集x,训练集label,测试集label x_train, x_test, y_train, y_test = load_data()
# l2为正则项 model = LogisticRegression(penalty='l2') model.fit(x_train, y_train)
save_model(model) print("w: %s" % model.coef_) print("b: %s" % model.intercept_)
# 准确率 print("precision: %s" % model.score(x_test, y_test)) print("MSE: %s"
% np.mean((model.predict(x_test) - y_test) ** 2)) if __name__ == '__main__': main()
joblib格式示例代码:
# -*- coding:utf-8 -*- import numpy as np from sklearn import datasets from sklearn.model_
selection import train_test_split from sklearn.linear_model import LogisticRegression def load_data()
: # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练 # 每条数据类型为 x{nbarray}
[6.4, 3.1, 5.5, 1.8] inputdata = datasets.load_iris() # 切分,测试训练2,8分 x_train, x_test, y_train, y_test = \ train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0) return
x_train, x_test, y_train, y_test def save_model(model): import pickle with open('output/clf.pickle',
'wb') as f: pickle.dump(model, f) def save_model_joblib(model): from sklearn.externals import joblib joblib.dump(model, 'output/clf.pkl') def main(): # 训练集x ,测试集x,训练集label,测试集label
x_train, x_test, y_train, y_test = load_data() # l2为正则项 model = LogisticRegression(penalty='l2')
model.fit(x_train, y_train) save_model_joblib(model) print("w: %s" % model.coef_) print("b: %s" % model.intercept_)
# 准确率 print("precision: %s" % model.score(x_test, y_test)) print("MSE: %s" % np.mean
((model.predict(x_test) - y_test) ** 2)) if __name__ == '__main__': main()