百度智能云全功能AI开发平台BML自定义作业建模 - 自动搜索作业代码示例(PaddlePaddle 2.1.1)
文档简介:
基于PaddlePaddle2.1.1框架的MNIST图像分类,训练数据集paddle_train_data.zip点击这里下载。
如下所示是其超参搜索任务中一个超参数组合的训练代码,代码会通过argparse模块接受在平台中填写的信息,请保持一致。
PaddlePaddle 2.1.1代码规范
基于PaddlePaddle2.1.1框架的MNIST图像分类,训练数据集paddle_train_data.zip点击这里下载。
如下所示是其超参搜索任务中一个超参数组合的训练代码,代码会通过argparse模块接受在平台中填写的信息,请保持一致。
PaddlePaddle2.1.1_autosearch.py示例代码
# -*- coding:utf-8 -*- """ paddle train demo """ import os import numpy as np import paddle
# 导入paddle模块 import paddle.fluid as fluid import gzip import struct import argparse import
time from rudder_autosearch.sdk.amaas_tools import AMaasTools def parse_arg(): """parse arguments""
" parser = argparse.ArgumentParser(description='paddle2.1.1 mnist Example') parser.add_argument('--train_dir',
type=str, default='./train_data', help='input data dir for training (default: ./train_data)') parser.add_
argument('--test_dir', type=str, default='./test_data', help='input data dir for test (default: ./test_data)'
) parser.add_argument('--output_dir', type=str, default='./output', help='output dir for auto_search job
(default: ./output)') parser.add_argument('--job_id', type=str, default="job-1234", help='auto_search
job id (default: "job-1234")') parser.add_argument('--trial_id', type=str, default="0-0", help='
auto_search id of a single trial (default: "0-0")') parser.add_argument('--metric', type=str,
default="acc", help='evaluation metric of the model') parser.add_argument('--data_sampling_scale',
type=float, default=1.0, help='sampling ratio of the data (default: 1.0)') parser.add_argument('
--batch_size', type=int, default=64, help='number of images input in an iteration (default: 64)')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate of the training (default:
0.001)') parser.add_argument('--epoch', type=int, default=5, help='number of epochs to train (default:
5)') args = parser.parse_args() args.output_dir = os.path.join(args.output_dir, args.job_id, args.trial_id)
if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) print("job_id: {}, trial_id:
{}".format(args.job_id, args.trial_id)) return args def load_data(file_dir, is_train=True): """ :param file_dir: :param is_train: :return: """ if is_train: image_path = file_dir + '/train-images-idx3-ubyte.gz' label_path = file_dir
+ '/train-labels-idx1-ubyte.gz' else: image_path = file_dir + '/t10k-images-idx3-ubyte.gz'
label_path = file_dir + '/t10k-labels-idx1-ubyte.gz' with open(image_path.replace('.gz', ''),
'wb') as out_f, gzip.GzipFile(image_path) as zip_f: out_f.write(zip_f.read()) # os.unlink(image_path)
with open(label_path.replace('.gz', ''), 'wb') as out_f, gzip.GzipFile(label_path) as zip_f:
out_f.write(zip_f.read()) # os.unlink(label_path) with open(label_path[:-3], 'rb')
as lbpath: magic, n = struct.unpack('>II', lbpath.read(8)) labels = np.fromfile(lbpath,
dtype=np.uint8) with open(image_path[:-3], 'rb') as imgpath: magic, num, rows, cols
= struct.unpack('>IIII', imgpath.read(16)) images = np.fromfile(imgpath, dtype=np.uint8)
.reshape(len(labels), 784) return images, labels def reader_creator(file_dir, is_train=True,
buffer_size=100, data_sampling_scale=1): """ :param file_dir: :param is_train: :param buffer_size: :return: """ images, labels = load_data(file_dir, is_train) if is_train: np.random.seed(0)
sample_data_num = int(data_sampling_scale * len(images)) idx = np.arange(len(images))
np.random.shuffle(idx) images, labels = images[0:sample_data_num], labels[0:sample_data_num] def reader(): """ :return: """ for num in range(int(len(labels) / buffer_size)): for i in range(buffer_size):
yield images[num * buffer_size + i, :], int(labels[num * buffer_size + i]) return reader
def reader_load(args): """reader_load""" # 每次读取训练集中的500个数据并随机打乱,传入batched reader中,
batched reader 每次 yield args.batch_size个数据 train_reader = paddle.batch( paddle.reader.shuffle
( reader_creator(args.train_dir, is_train=True, buffer_size=100, data_sampling_scale=args.data_sampling_scale)
, buf_size=500), batch_size=args.batch_size) # 读取测试集的数据,每次 yield 64个数据 test_reader
= paddle.batch( reader_creator(args.test_dir, is_train=False, buffer_size=100), batch_size=args.batch_size)
return train_reader, test_reader def softmax_regression(): """ 定义softmax分类器: 一个以softmax为激活函数的全连接层 Return: predict_image -- 分类的结果 """ # 输入的原始图像数据,大小为28*28*1 img = fluid.layers.data(name='img', shape=[1, 28, 28],
dtype='float32') # 以softmax为激活函数的全连接层,输出层的大小必须为数字的个数10 predict = fluid.
layers.fc( input=img, size=10, act='softmax') return predict def multilayer_perceptron(): """ 定义多层感知机分类器: 含有两个隐藏层(全连接层)的多层感知器 其中前两个隐藏层的激活函数采用 ReLU,输出层的激活函数用 Softmax Return: predict_image -- 分类的结果 """ # 输入的原始图像数据,大小为28*28*1 img = fluid.layers.data(name='img', shape=[1, 28, 28],
dtype='float32') # 第一个全连接层,激活函数为ReLU hidden = fluid.layers.fc(input=img, size=200,
act='relu') # 第二个全连接层,激活函数为ReLU hidden = fluid.layers.fc(input=hidden, size=200, act='relu')
# 以softmax为激活函数的全连接输出层,输出层的大小必须为数字的个数10 prediction = fluid.layers.fc(input=hidden,
size=10, act='softmax') return prediction def convolutional_neural_network(): """ 定义卷积神经网络分类器: 输入的二维图像,经过两个卷积-池化层,使用以softmax为激活函数的全连接层作为输出层 Return: predict -- 分类的结果 """ # 输入的原始图像数据,大小为28*28*1 img = fluid.layers.data(name='img', shape=[1, 28, 28],
dtype='float32') # 第一个卷积-池化层 # 使用20个5*5的滤波器,池化大小为2,池化步长为2,激活函数为Relu conv_pool_1
= fluid.nets.simple_img_conv_pool( input=img, filter_size=5, num_filters=20, pool_size=2, pool_stride=2,
act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) # 第二个卷积-池化层 # 使用20个5*5的滤波器,
池化大小为2,池化步长为2,激活函数为Relu conv_pool_2 = fluid.nets.simple_img_conv_pool( input=conv_pool_1,
filter_size=5, num_filters=50, pool_size=2, pool_stride=2, act="relu") # 以softmax为激活函数的全连接输出层,
输出层的大小必须为数字的个数10 prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return prediction def train_program(): """ 配置train_program Return: predict -- 分类的结果 avg_cost -- 平均损失 acc -- 分类的准确率 """ paddle.enable_static() # 标签层,名称为label,对应输入图片的类别标签 label =
fluid.layers.data(name='label', shape=[1], dtype='int64') # predict = softmax_regression()
# 取消注释将使用 Softmax回归 # predict = multilayer_perceptron() # 取消注释将使用 多层感知器
predict = convolutional_neural_network() # 取消注释将使用 LeNet5卷积神经网络 # 使用类交叉熵函数计算
predict和label之间的损失函数 cost = fluid.layers.cross_entropy(input=predict, label=label)
# 计算平均损失 avg_cost = fluid.layers.mean(cost) # 计算分类准确率 acc = fluid.layers.accuracy(input=predict,
label=label) return predict, [avg_cost, acc] def optimizer_program(): """ :return: """ return fluid.optimizer.Adam(learning_rate=0.001) def event_handler(pass_id, batch_id, cost):
"""event_handler""" # 打印训练的中间结果,训练轮次,batch数,损失函数 print("Pass %d, Batch %d, Cost %f"
% (pass_id, batch_id, cost)) def train_test(train_test_program, train_test_feed, train_test_reader, executor,
fetch_list): """train_test""" # 将分类准确率存储在acc_set中 acc_set = [] # 将平均损失存储在avg_loss_set中
avg_loss_set = [] # 将测试 reader yield 出的每一个数据传入网络中进行训练 for test_data in train_test_reader():
avg_loss_np, acc_np = executor.run( program=train_test_program, feed=train_test_feed.feed(test_data),
fetch_list=fetch_list) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np))
# 获得测试数据上的准确率和损失值 acc_val_mean = np.array(acc_set).mean() avg_loss_val_mean =
np.array(avg_loss_set).mean() # 返回平均损失值,平均准确率 return avg_loss_val_mean, acc_val_mean
class Model(): def __init__(self, args, train_reader, test_reader): self.args = args self.create_model() self.train_reader = train_reader self.test_reader = test_reader def create_model(self): """create_model"""
# 该模型运行在单个CPU上 self.place = fluid.CPUPlace() # 调用train_program 获取预测值,
损失值 self.prediction, [self.avg_loss, self.acc] = train_program() # 输入的原始图像数据,
大小为28*28*1 img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
# 标签层,名称为label,对应输入图片的类别标签 label = fluid.layers.data(name='label',
shape=[1], dtype='int64') # 告知网络传入的数据分为两部分,第一部分是img值,第二部分是label值
self.feeder = fluid.DataFeeder(feed_list=[img, label], place=self.place) # 选择Adam优化器
optimizer = fluid.optimizer.Adam(learning_rate=self.args.lr) optimizer.minimize(self.avg_loss)
def run_train(self): PASS_NUM = self.args.epoch epochs = [epoch_id for epoch_id in range(PASS_NUM)] self.exe = fluid.Executor(self.place)
self.exe.run(fluid.default_startup_program()) main_program = fluid.default_main_program() step
= 0 for epoch_id in epochs: print("Epoch %d:" % (epoch_id)) for step_id, data in enumerate
(self.train_reader()): metrics = self.exe.run(main_program, feed=self.feeder.feed(data),
fetch_list=[self.avg_loss, self.acc]) if step % 100 == 0: # 每训练100次 更新一次图片 event_handler(step,
epoch_id, metrics[0]) step += 1 def save_model(self): """save_model"""
# 将模型参数存储在名为 save_dirname 的文件中 save_dirname = self.args.output_dir fluid.io.save_inference_model(save_dirname, ["img"], [self.prediction],
self.exe, model_filename='model', params_filename='params') def evaluate(self):
"""evaluate""" test_program = fluid.default_main_program().clone(for_test=True)
avg_loss_val, acc_val = train_test(train_test_program=test_program, train_test_reader=self.
test_reader, train_test_feed=self.feeder, executor=self.exe, fetch_list=[self.avg_loss,
self.acc]) print("accuracy: %f" % acc_val) return acc_val def report_final(args, metric):
"""report_final_result""" # 结果上报sdk amaas_tools = AMaasTools(args.job_id, args.trial_id)
metric_dict = {args.metric: metric} for i in range(3): flag, ret_msg = amaas_tools.report_final
_result(metric=metric_dict, export_model_path=args.output_dir, checkpoint_path="")
print("End Report, metric:{}, ret_msg:{}".format(metric, ret_msg)) if flag: break time.sleep(1)
assert flag, "Report final result to manager failed! Please check whether manager'address or
manager'status " \ "is ok! " def main(): """main""" # 获取参数 args = parse_arg()
# 加载数据集 train_reader, test_reader = reader_load(args) # 模型定义 model = Model(args,
train_reader, test_reader) # 模型训练 model.run_train() # 模型保存 model.save_model()
# 模型评估 acc = model.evaluate() # 上报结果 report_final(args, metric=acc) if __name__ == '__main__': main()
示例代码对应的yaml配置如下,请保持格式一致
pwo_search_demo_for_paddle.yml示例内容
#搜索算法参数
search_strategy:
algo: PARTICLE_SEARCH #搜索策略:粒子群算法
params:
population_num: 8 #种群个体数量 | [1,10] int类型
round: 10 #迭代轮数 |[5,50] int类型
inertia_weight: 0.5 # 惯性权重 |(0,1] float类型
global_acceleration: 1.5 #全局加速度 |(0,4] float类型
local_acceleration: 1.5 #个体加速度 |(0,4] float类型
#单次训练时数据的采样比例,单位%
data_sampling_scale: 100 #|(0,100] int类型
#评价指标参数
metrics:
name: acc #评价指标 | 任意字符串 str类型
goal: MAXIMIZE #最大值/最小值 | str类型 MAXIMIZE or MINIMIZE 必须为这两个之一(也即支持大写)
expected_value: 100 #早停标准值,评价指标超过该值则结束整个超参搜索,单位% |无限制 int类型
#搜索参数空间
search_space:
batch_size:
htype: choice
value: [64, 128, 256, 512]
lr:
htype: loguniform
value: [0.0001, 0.1]
epoch:
htype: choice
value: [1, 5, 10]