1 Star 0 Fork 0

yasiping / emnlp2017-bilstm-cnn-crf

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
Train_MultiTask.py 2.27 KB
一键复制 编辑 原始数据 按行查看 历史
nreimers 提交于 2018-04-20 12:20 . Update code to Keras 2.1.5
# This file contain an example how to perform multi-task learning using the
# BiLSTM-CNN-CRF implementation.
# In the datasets variable, we specify two datasets: POS-tagging (unidep_pos) and conll2000_chunking.
# The network will then train jointly on both datasets.
# The network can on more datasets by adding more entries to the datasets dictionary.
from __future__ import print_function
import os
import logging
import sys
from neuralnets.BiLSTM import BiLSTM
from util.preprocessing import perpareDataset, loadDatasetPickle
from keras import backend as K
# :: Change into the working dir of the script ::
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
# :: Logging level ::
loggingLevel = logging.INFO
logger = logging.getLogger()
logger.setLevel(loggingLevel)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(loggingLevel)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
######################################################
#
# Data preprocessing
#
######################################################
datasets = {
'unidep_pos':
{'columns': {1:'tokens', 3:'POS'},
'label': 'POS',
'evaluate': True,
'commentSymbol': None},
'conll2000_chunking':
{'columns': {0:'tokens', 2:'chunk_BIO'},
'label': 'chunk_BIO',
'evaluate': True,
'commentSymbol': None},
}
embeddingsPath = 'komninos_english_embeddings.gz' #Word embeddings by Levy et al: https://levyomer.wordpress.com/2014/04/25/dependency-based-word-embeddings/
# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasets)
######################################################
#
# The training of the network starts here
#
######################################################
#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)
# Some network hyperparameters
params = {'classifier': ['CRF'], 'LSTM-Size': [100], 'dropout': (0.25, 0.25)}
model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"
model.fit(epochs=25)
1
https://gitee.com/yasiping/emnlp2017-bilstm-cnn-crf.git
git@gitee.com:yasiping/emnlp2017-bilstm-cnn-crf.git
yasiping
emnlp2017-bilstm-cnn-crf
emnlp2017-bilstm-cnn-crf
master

搜索帮助