ELMo for Keras
from bilm import TokenBatcher, BidirectionalLanguageModel, weight_layers, dump_token_embeddings
import h5py
我们得到四个文件,vocab_file,options_file,weight_file,用于保存模型的token_embedding_file。像这样:
vocab_file = 'vocab_small.txt'
with open(vocab_file, 'w') as fout:
for i in range(len(config.i2w)):
fout.write(config.i2w[i]+'\n')
options_file = './ELMo/elmo_2x4096_512_2048cnn_2xhighway_options.json'
weight_file = './ELMo/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5'
vocab_file = 'vocab_small.txt'
token_embedding_file = 'elmo_token_embeddings.hdf5'
模型搭好后将wt作为权值:
dump_token_embeddings(
config.vocab_file, config.options_file, config.weight_file, config.token_embedding_file
)
tf.reset_default_graph()
f = h5py.File(config.token_embedding_file,'r')
wt = f['embedding'][:]
model.get_layer('word_emb').set_weights([word_vector])
word_emb.trainable = False