#背景:使用Pytorch框架下的LSTM模块构建模型,拟合一个序列[58, 367, 324..]
#遇到的问题:构建模型(LSTM层+全链接层)后,进行训练发现loss不收敛,一直拟合不了这个序列。
#问题代码:多次调整lr以及迭代次数,发现输出仍然趋于一个定值(拟合不了期望序列的波动情况)。loss非常大,毫无收敛迹象
#2023/3/27 10:00更新:
根据下面的回答补充以下信息:
①learning rate设置不合理:我尝试过了0.00001到10,没有任何改善。
②epoch设置不合理:我尝试过100到500没有任何改善。
③下面实际代码中只拟合了前3个数据,因为我发现整个26个数据拟合不出来(loss不收敛),我就减少了拟合数据,发现这样是可以收敛的!
感谢下面各位的回答,也希望回答能附带输出结果
import torch
train_data = torch.tensor([[[ 58.]], [[367.]], [[324.]], [[620.]], [[146.]], [[681.]], [[582.]], [[432.]], [[ 87.]],
[[415.]], [[443.]], [[680.]], [[ 0.]], [[230.]], [[484.]], [[497.]], [[324.]], [[620.]],
[[681.]], [[ 84.]], [[484.]], [[448.]], [[144.]], [[536.]], [[680.]], [[ 0.]]], dtype = torch.float32)
train_data_short = torch.tensor(([[[100]]]),dtype=torch.float32)
class POEM_LSTM(torch.nn.Module):
def __init__(self, input_size, hidden_size, num_layers):
super(POEM_LSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.LstmLayer = torch.nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, bias= False)
self.LinearLayer = torch.nn.Linear(in_features=self.hidden_size, out_features=self.input_size)
def forward(self, x):
LstmLayerOutput, _ = self.LstmLayer(x) # h_c = (h_t, c_t)
FinalOutput = self.LinearLayer(LstmLayerOutput) #需要对输出进行圆整,因为onehot为0~681的整数
return FinalOutput
poem_lstm = POEM_LSTM(1,3,2) #网络模型实例化
loss = torch.nn.MSELoss()
opt = torch.optim.Adam(poem_lstm.parameters(), lr = 10)
for i in range(500):
# input = train_data[0]
for j in range(3):
opt.zero_grad() # 每个iteration梯度清0
output= poem_lstm(torch.tensor([[j]],dtype=torch.float32))
l_loss = loss(output, train_data[j])
l_loss.backward()
opt.step()
if i == 499:
# print(poem_lstm.state_dict())
print(output)
print(train_data[j])
print(l_loss)
print('\n')