1、摘要
本文主要讲解:PSO粒子群优化-LSTM-pyswarms框架-实现期货价格预测
主要思路:
- 从网上找到影响期货价格预测的相关数据,爬取下来并处理好,形成时间序列训练数据
- 使用train_test_split划分训练集和测试集
- 训练数据和测试数据进行标准化处理:StandardScaler.fit_transform
- 建立LSTM模型,激活函数用relu,使用SGD去优化
- 使用pyswarms更新模型的权重
- 画PSO-LSTM实际值与预测值结果图
2、数据介绍
上海期货交易所每月行情,请看文末链接
3、相关技术
PySwarms 是一个可扩展的 Python 粒子群优化 (PSO) 研究工具包。
PySwarms 实现了网格搜索和随机搜索技术来为优化器找到最佳参数。
PySwarms也可以绘制优化器性能。绘图仪模块建立在 之上matplotlib,使其高度可定制。
不过说实话,算法优化我并不推荐用PSO,虽然说PSO的论文多,但是都被用烂了,AutoML-NNI,hyperopt,optuna,ray都是很好很先进的优化框架,里面集成了很多效果非常好的优化算法,推荐大家学习。
4、完整代码和步骤
效果如下:
主运行程序入口
import os
import time
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.style as style
import numpy as np
import pandas as pd
import pyswarms as ps
import tensorflow as tf
from keras import optimizers
from keras.layers import Dense
from keras.layers import LSTM
from keras.models import Sequential
from pyswarms.utils.plotters import plot_cost_history
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Error: 0.1827714
matplotlib.style.use('ggplot')
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文标签
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['xtick.direction'] = 'in' # 将x周的刻度线方向设置向内
plt.rcParams['ytick.direction'] = 'in' # 将y轴的刻度方向设置向内
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
rb_data = pd.read_csv(r'new_data.csv')
pre_data1 = pd.read_csv(r'pre_data1.csv')
pre_data1['日期'] = pd.to_datetime(pre_data1['日期'])
pre_data1.set_index('日期', inplace=True)
# X = rb_data.iloc[:,:-1]
col = rb_data.columns.tolist()
Y = rb_data.iloc[:, -1]
Ypre = pre_data1.iloc[:, -1]
_x = Ypre.index
_x = [i.strftime("%Y-%m-%d") for i in _x]
print(_x)
print(Ypre)
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.25, random_state=420)
# print(Xtrain.describe().T)
# 训练数据和测试数据进行标准化处理
ss_x = StandardScaler()
Xtrain = ss_x.fit_transform(Xtrain)
Xtest = ss_x.transform(Xtest)
Xpre = ss_x.transform(Xpre)
X = ss_x.transform(X)
ss_y = StandardScaler()
Ytrain = ss_y.fit_transform(Ytrain.values.reshape(-1, 1))
Y = ss_y.fit_transform(Y.values.reshape(-1, 1))
Ytest = ss_y.transform(Ytest.values.reshape(-1, 1))
Ypre = ss_y.transform(Ypre.values.reshape(-1, 1))
Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], 1))
Xtest = np.reshape(Xtest, (Xtest.shape[0], Xtrain.shape[1], 1))
Xpre = np.reshape(Xpre, (Xpre.shape[0], Xtrain.shape[1], 1))
Ytrain = np.reshape(Ytrain, (Ytrain.shape[0],))
Ytest = np.reshape(Ytest, (Ytest.shape[0],))
Ypre = np.reshape(Ypre, (Ypre.shape[0],))
timesteps = 16
features = 1
# print(x_train.shape,y_train.shape)
# Write Keras model LSTM layers and compile the model using SGD
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(timesteps, features)))
model.add(Dense(1))
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_squared_error', optimizer=sgd)
model.summary()
total_param = 68737
# Use model fit command to train the model
t0 = time.time()
result_sgd = model.fit(Xtrain, Ytrain, epochs=100, batch_size=128, verbose=0)
t1 = time.time()
print("***************************")
print()
print("LSTM Model")
print("Time taken to train the model: ", t1 - t0, "secs.")
print("Error:", result_sgd.history['loss'][-1])
# Save or plot error with epochs
plt.plot(result_sgd.history['loss'])
plt.xlabel('Number of iteration', size=15)
plt.ylabel('Value of MSE', size=15)
plt.title('PSO_adam_LSTM parameter optimization')
ax = plt.axes()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.savefig('4_PSO-LSTM适应函数曲线图.svg', dpi=600, format='svg', transparent=True, bbox_inches='tight')
plt.show()
# Part 2:
# I use another PSO package called pyswarms here
model_weights = np.array([model.layers[0].get_weights(), model.layers[1].get_weights()])
shape = np.array(
[model_weights[0][0].shape, model_weights[0][1].shape, model_weights[0][2].shape, model_weights[1][0].shape,
model_weights[1][1].shape])
def func(vector_x):
init_index = 0
end_index = 0
end_index += shape[0][0] * shape[0][1]
model_weights[0][0] = vector_x[init_index:end_index].reshape(shape[0])
init_index = end_index
end_index += shape[1][0] * shape[1][1]
model_weights[0][1] = vector_x[init_index:end_index].reshape(shape[1])
init_index = end_index
end_index += shape[2][0]
model_weights[0][2] = vector_x[init_index:end_index]
init_index = end_index
end_index += shape[3][0] * shape[3][1]
model_weights[1][0] = vector_x[init_index:end_index].reshape(shape[3])
init_index = end_index
end_index += shape[4][0]
model_weights[1][1] = vector_x[init_index:end_index]
model.layers[0].set_weights(model_weights[0])
model.layers[1].set_weights(model_weights[1])
pso_predict = model.predict(Xpre)
model.save(src + 'PSO_LSTM.h5')
error = mean_squared_error(Ypre, pso_predict)
return error
def swarm_func(x):
n_particles = x.shape[0]
j = [func(x[i]) for i in range(n_particles)]
return np.array(j)
# initialization
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=80, dimensions=total_param,
options=options)
t2 = time.time()
result_pso = optimizer.optimize(swarm_func, iters=120)
t3 = time.time()
print("Partical Swarm Optimization")
print("Time taken to train the model: ", t3 - t2, "secs.")
print("Error:", result_pso[0])
print()
print("***************************")
print()
plot_cost_history(optimizer.cost_history)
# plt.xlabel('Number of iteration', size=15)
# plt.ylabel('Value of MSE', size=15)
# plt.title('PSO_adam_DNN parameter optimization')
# ax = plt.axes()
# ax.spines['top'].set_visible(False)
# ax.spines['right'].set_visible(False)
# plt.savefig('D:\\Desktop\\PSO_LSTM_result\\6_PSO-LSTM适应度函数值曲线图.svg', dpi=600, format='svg', transparent=True,
# bbox_inches='tight')
plt.show()
# Save or plot error with generations/iterations
# Compare performance matrices with Part 1
##################################################################
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(timesteps, features)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.load_weights(src + 'PSO_LSTM.h5')
# Y_1 = model.predict(Xtrain)
# Y_2 = model.predict(Xtest)
Y_3 = model.predict(Xpre)
###########################
# Ytrain = ss_y.inverse_transform(Ytrain)
# Y1 = ss_y.inverse_transform(Y_1)
# Ytest = ss_y.inverse_transform(Ytest)
# Y2 = ss_y.inverse_transform(Y_2)
Ypre = ss_y.inverse_transform(Ypre)
Y3 = ss_y.inverse_transform(Y_3)
##########################
plt.plot(range(len(_x)), Ypre, label='real_price', color='red')
plt.plot(range(len(_x)), Y3, label='predict_value', color='blue')
plt.xlim('2020-12-18', '2021-06-11')
plt.xticks(range(0, len(_x), 2), list(_x)[::2], rotation=45)
plt.ylim(3500, 6000)
plt.yticks(range(3500, 6500, 500))
plt.title('PSO-LSTM_futures_price(real and predict)')
plt.xlabel('date')
plt.ylabel('futures_price')
plt.legend(frameon=False, loc='best')
ax = plt.axes()
# ax.spines['top'].set_visible(False)
# ax.spines['right'].set_visible(False)
plt.savefig(src + '3_PSO-LSTM预测集.svg', dpi=600, format='svg', transparent=True,
bbox_inches='tight')
plt.show()
print(mean_absolute_error(ss_y.transform(Ypre), ss_y.transform(Y3)))
print(mean_squared_error(ss_y.transform(Ypre), ss_y.transform(Y3)))
print(r2_score(ss_y.transform(Ypre), ss_y.transform(Y3)))
#######################
a = np.array(Ypre)
b = np.array(Y3)
a1 = pd.Series(a.ravel().tolist(), name='Ypre')
b1 = pd.Series(b.ravel().tolist(), name='Y3')
HB = pd.concat([a1, b1], axis=1)
HB.to_csv(src + '5_PSO-LSTM实际值与预测值结果.csv', index=False)
代码有所删减,需要完整代码请到其他文章找到我的联系方式可以吗?
5、学习链接
上海期货交易所每月行情
https://pypi.org/project/pyswarms/
ljvmiranda921/pyswarms
PySwarms(Python粒子群优化工具包)的使用:GlobalBestPSO例子解析