Tensorflow:基于LSTM生成藏头诗
最近在学习TensorFlow,学习到了RNN这一块,相关的资料不是很多,了解到使用RNN可以生成藏头诗之后,我就决定拿这个下手啦!
本文不介绍RNN以及LSTM的相关基本知识,如要了解,请自行百度。
本文是在学习了 TensorFlow7: 基于RNN生成古诗词
这一篇博客之后继续在其基础上修改的代码,若要了解相关内容可以先去上面的博客去看一下。
【注:本博客所使用的数据、代码、模型文件均已放在百度云上:
链接: https://pan.baidu.com/s/1qY4mt1y 密码: 47y2】
模型生成
首先我们要训练好模型。这里采用的是2层的LSTM框架,每层有128个隐藏层节点,batch_size设为64。训练数据来源于全唐诗(可在上面百度云资源分享当中找到)。特别注意到的一点是这里每训练完一次就对训练数据做shuffle。
源代码如下:
#!/usr/bin/python3#-*- coding: UTF-8 -*-import collections import numpy as np import tensorflow as tf #-------------------------------数据预处理---------------------------# poetry_file ='poetry.txt' # 诗集 poetrys = [] with open(poetry_file, "r") as f:
for line in f:
try:
line = line.decode('UTF-8')
line = line.strip(u'\n')
title, content = line.strip(u' ').split(u':')
content = content.replace(u' ',u'')
if u'_' in content or u'(' in content or u'(' in content or u'《' in content or u'[' in content:
continue
if len(content) < 5 or len(content) > 79:
continue
content = u'[' + content + u']'
poetrys.append(content)
except Exception as e:
pass # 按诗的字数排序 poetrys = sorted(poetrys,key=lambda line: len(line))
print('唐诗总数: ', len(poetrys)) # 统计每个字出现次数 all_words = [] for poetry in poetrys:
all_words += [word for word in poetry]
counter = collections.Counter(all_words)
count_pairs = sorted(counter.items(), key=lambda x: -x[1])
words, _ = zip(*count_pairs) # 取前多少个常用字 words = words[:len(words)] + (' ',) # 每个字映射为一个数字ID word_num_map = dict(zip(words, range(len(words)))) # 把诗转换为向量形式,参考TensorFlow练习1 to_num = lambda word: word_num_map.get(word, len(words))
poetrys_vector = [ list(map(to_num, poetry)) for poetry in poetrys] #[[314, 3199, 367, 1556, 26, 179, 680, 0, 3199, 41, 506, 40, 151, 4, 98, 1], #[339, 3, 133, 31, 302, 653, 512, 0, 37, 148, 294, 25, 54, 833, 3, 1, 965, 1315, 377, 1700, 562, 21, 37, 0, 2, 1253, 21, 36, 264, 877, 809, 1] #....] # 每次取64首诗进行训练 batch_size = 64n_chunk = len(poetrys_vector) // batch_size class DataSet(object):
def __init__(self,data_size):
self._data_size = data_size
self._epochs_completed = 0
self._index_in_epoch = 0
self._data_index = np.arange(data_size) def next_batch(self,batch_size):
start = self._index_in_epoch if start + batch_size > self._data_size:
np.random.shuffle(self._data_index)
self._epochs_completed = self._epochs_completed + 1
self._index_in_epoch = batch_size
full_batch_features ,full_batch_labels = self.data_batch(0,batch_size) return full_batch_features ,full_batch_labels
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
full_batch_features ,full_batch_labels = self.data_batch(start,end) if self._index_in_epoch == self._data_size:
self._index_in_epoch = 0
self._epochs_completed = self._epochs_completed + 1
np.random.shuffle(self._data_index) return full_batch_features,full_batch_labels def data_batch(self,start,end):
batches = [] for i in range(start,end):
batches.append(poetrys_vector[self._data_index[i]])
length = max(map(len,batches))
xdata = np.full((end - start,length), word_num_map[' '], np.int32)
for row in range(end - start):
xdata[row,:len(batches[row])] = batches[row]
ydata = np.copy(xdata)
ydata[:,:-1] = xdata[:,1:]
return xdata,ydata#---------------------------------------RNN--------------------------------------# input_data = tf.placeholder(tf.int32, [batch_size, None])
output_targets = tf.placeholder(tf.int32, [batch_size, None]) # 定义RNN def neural_network(model='lstm', rnn_size=128, num_layers=2):
if model == 'rnn':
cell_fun = tf.nn.rnn_cell.BasicRNNCell
elif model == 'gru':
cell_fun = tf.nn.rnn_cell.GRUCell
elif model == 'lstm':
cell_fun = tf.nn.rnn_cell.BasicLSTMCell
cell = cell_fun(rnn_size, state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [rnn_size, len(words)])
softmax_b = tf.get_variable("softmax_b", [len(words)])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [len(words), rnn_size])
inputs = tf.nn.embedding_lookup(embedding, input_data)
outputs, last_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, scope='rnnlm')
output = tf.reshape(outputs,[-1, rnn_size])
logits = tf.matmul(output, softmax_w) + softmax_b
probs = tf.nn.softmax(logits)
return logits, last_state, probs, cell, initial_state
def load_model(sess, saver,ckpt_path):
latest_ckpt = tf.train.latest_checkpoint(ckpt_path) if latest_ckpt: print ('resume from', latest_ckpt)
saver.restore(sess, latest_ckpt) return int(latest_ckpt[latest_ckpt.rindex('-') + 1:]) else: print ('building model from scratch')
sess.run(tf.global_variables_initializer()) return -1#训练 def train_neural_network():
logits, last_state, _, _, _ = neural_network()
targets = tf.reshape(output_targets, [-1])
loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)], len(words))
cost = tf.reduce_mean(loss)
learning_rate = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvars))
Session_config = tf.ConfigProto(allow_soft_placement=True)
Session_config.gpu_options.allow_growth = True
trainds = DataSet(len(poetrys_vector)) with tf.Session(config=Session_config) as sess: with tf.device('/gpu:2'):
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver(tf.all_variables())
last_epoch = load_model(sess, saver,'model/')
for epoch in range(last_epoch + 1,100):
sess.run(tf.assign(learning_rate, 0.002 * (0.97 ** epoch)))
#sess.run(tf.assign(learning_rate, 0.01))
all_loss = 0.0
for batche in range(n_chunk):
x,y = trainds.next_batch(batch_size)
train_loss, _ , _ = sess.run([cost, last_state, train_op], feed_dict={input_data: x, output_targets: y})
all_loss = all_loss + train_loss
if batche % 50 == 1: #print(epoch, batche, 0.01,train_loss)
print(epoch, batche, 0.002 * (0.97 ** epoch),train_loss)
saver.save(sess, 'model/poetry.module', global_step=epoch)
print (epoch,' Loss: ', all_loss * 1.0 / n_chunk)
train_neural_network()
使用该代码会将训练好的模型参数保存在 “model” 文件夹下。经过100个epoch之后,平均loss会降到2.6左右。训练好的模型也已经放在了上面分享的百度云资源当中。
生成古诗
使用训练好的模型可以轻松生成各种古诗。
下面就是几个例子:
生成藏头诗
上代码:
#!/usr/bin/python3
#-*- coding: UTF-8 -*-
import
collections
import
numpy
as
np
import
tensorflow
as
tf
'''
This one will produce a poetry with heads.
'''
#-------------------------------数据预处理---------------------------#
poetry_file =
'poetry.txt'
# 诗集
poetrys = []
with
open(poetry_file,
"r"
)
as
f:
for
line
in
f:
try
:
line = line.decode(
'UTF-8'
)
line = line.strip(
u'\n'
)
title, content = line.strip(
u' '
).split(
u':'
)
content = content.replace(
u' '
,
u''
)
if
u'_'
in
content
or
u'('
in
content
or
u'('
in
content
or
u'《'
in
content
or
u'['
in
content:
continue
if
len(content) <
5
or
len(content) >
79
:
continue
content =
u'['
+ content +
u']'
poetrys.append(content)
except
Exception
as
e:
pass
# 按诗的字数排序
poetrys = sorted(poetrys,key=
lambda
line: len(line))
print(
'唐诗总数: '
, len(poetrys))
# 统计每个字出现次数
all_words = []
for
poetry
in
poetrys:
all_words += [word
for
word
in
poetry]
counter = collections.Counter(all_words)
count_pairs = sorted(counter.items(), key=
lambda
x: -x[
1
])
words, _ = zip(*count_pairs)
# 取前多少个常用字
words = words[:len(words)] + (
' '
,)
# 每个字映射为一个数字ID
word_num_map = dict(zip(words, range(len(words))))
# 把诗转换为向量形式,参考TensorFlow练习1
to_num =
lambda
word: word_num_map.get(word, len(words))
poetrys_vector = [ list(map(to_num, poetry))
for
poetry
in
poetrys]
#[[314, 3199, 367, 1556, 26, 179, 680, 0, 3199, 41, 506, 40, 151, 4, 98, 1],
#[339, 3, 133, 31, 302, 653, 512, 0, 37, 148, 294, 25, 54, 833, 3, 1, 965, 1315, 377, 1700, 562, 21, 37, 0, 2, 1253, 21, 36, 264, 877, 809, 1]
#....]
# 每次取64首诗进行训练
batch_size =
1
n_chunk = len(poetrys_vector) // batch_size
class
DataSet
(object)
:
def
__init__
(self,data_size)
:
self._data_size = data_size
self._epochs_completed =
0
self._index_in_epoch =
0
self._data_index = np.arange(data_size)
def
next_batch
(self,batch_size)
:
start = self._index_in_epoch
if
start + batch_size > self._data_size:
np.random.shuffle(self._data_index)
self._epochs_completed = self._epochs_completed +
1
self._index_in_epoch = batch_size
full_batch_features ,full_batch_labels = self.data_batch(
0
,batch_size)
return
full_batch_features ,full_batch_labels
else
:
self._index_in_epoch += batch_size
end = self._index_in_epoch
full_batch_features ,full_batch_labels = self.data_batch(start,end)
if
self._index_in_epoch == self._data_size:
self._index_in_epoch =
0
self._epochs_completed = self._epochs_completed +
1
np.random.shuffle(self._data_index)
return
full_batch_features,full_batch_labels
def
data_batch
(self,start,end)
:
batches = []
for
i
in
range(start,end):
batches.append(poetrys_vector[self._data_index[i]])
length = max(map(len,batches))
xdata = np.full((end - start,length), word_num_map[
' '
], np.int32)
for
row
in
range(end - start):
xdata[row,:len(batches[row])] = batches[row]
ydata = np.copy(xdata)
ydata[:,:-
1
] = xdata[:,
1
:]
return
xdata,ydata
#---------------------------------------RNN--------------------------------------#
input_data = tf.placeholder(tf.int32, [batch_size,
None
])
output_targets = tf.placeholder(tf.int32, [batch_size,
None
])
# 定义RNN
def
neural_network
(model=
'lstm'
, rnn_size=
128
, num_layers=
2
)
:
if
model ==
'rnn'
:
cell_fun = tf.nn.rnn_cell.BasicRNNCell
elif
model ==
'gru'
:
cell_fun = tf.nn.rnn_cell.GRUCell
elif
model ==
'lstm'
:
cell_fun = tf.nn.rnn_cell.BasicLSTMCell
cell = cell_fun(rnn_size, state_is_tuple=
True
)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=
True
)
initial_state = cell.zero_state(batch_size, tf.float32)
with
tf.variable_scope(
'rnnlm'
):
softmax_w = tf.get_variable(
"softmax_w"
, [rnn_size, len(words)])
softmax_b = tf.get_variable(
"softmax_b"
, [len(words)])
with
tf.device(
"/cpu:0"
):
embedding = tf.get_variable(
"embedding"
, [len(words), rnn_size])
inputs = tf.nn.embedding_lookup(embedding, input_data)
outputs, last_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, scope=
'rnnlm'
)
output = tf.reshape(outputs,[-
1
, rnn_size])
logits = tf.matmul(output, softmax_w) + softmax_b
probs = tf.nn.softmax(logits)
return
logits, last_state, probs, cell, initial_state
#-------------------------------生成古诗---------------------------------#
# 使用训练完成的模型
def
gen_head_poetry
(heads, type)
:
if
type !=
5
and
type !=
7
:
print
'The second para has to be 5 or 7!'
return
def
to_word
(weights)
:
t = np.cumsum(weights)
s = np.sum(weights)
sample = int(np.searchsorted(t, np.random.rand(
1
)*s))
return
words[sample]
_, last_state, probs, cell, initial_state = neural_network()
Session_config = tf.ConfigProto(allow_soft_placement =
True
)
Session_config.gpu_options.allow_growth=
True
with
tf.Session(config=Session_config)
as
sess:
with
tf.device(
'/gpu:1'
):
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver(tf.all_variables())
saver.restore(sess,
'model/poetry.module-99'
)
poem =
''
for
head
in
heads:
flag =
True
while
flag:
state_ = sess.run(cell.zero_state(
1
, tf.float32))
x = np.array([list(map(word_num_map.get,
u'['
))])
[probs_, state_] = sess.run([probs, last_state], feed_dict={input_data: x, initial_state: state_})
sentence = head
x = np.zeros((
1
,
1
))
x[
0
,
0
] = word_num_map[sentence]
[probs_, state_] = sess.run([probs, last_state], feed_dict={input_data: x, initial_state: state_})
word = to_word(probs_)
sentence += word
while
word !=
u'。'
:
x = np.zeros((
1
,
1
))
x[
0
,
0
] = word_num_map[word]
[probs_, state_] = sess.run([probs, last_state], feed_dict={input_data: x, initial_state: state_})
word = to_word(probs_)
sentence += word
if
len(sentence) ==
2
+
2
* type:
sentence +=
u'\n'
poem += sentence
flag =
False
return
poem
print(gen_head_poetry(
u'天下之大'
,
5
))
最后从函数接口可以看到,除了可以自己定义诗的头外,还可以定义是五言绝句还是七言绝句。
来看几个五言绝句的例子:
再来看几个七言绝句的例子:
那么是不是可以用它来写情诗呢?
当然可以啦!


评论