1 Star 0 Fork 0

7分钟/aspect-extraction

Create your Gitee Account
Explore and code with more than 12 million developers,Free private repositories !:)
Sign up
Clone or Download
build_data.py 1.88 KB
Copy Edit Raw Blame History
Soujanya Poria authored 2018-07-28 15:45 . Add files via upload
from model.config import Config
from model.data_utils import CoNLLDataset, get_vocabs, UNK, NUM, \
get_glove_vocab, write_vocab, load_vocab, get_char_vocab, \
export_trimmed_glove_vectors, get_processing_word
def main():
"""Procedure to build data
You MUST RUN this procedure. It iterates over the whole dataset (train,
dev and test) and extract the vocabularies in terms of words, tags, and
characters. Having built the vocabularies it writes them in a file. The
writing of vocabulary in a file assigns an id (the line #) to each word.
It then extract the relevant GloVe vectors and stores them in a np array
such that the i-th entry corresponds to the i-th word in the vocabulary.
Args:
config: (instance of Config) has attributes like hyper-params...
"""
# get config and processing of words
config = Config(load=False)
processing_word = get_processing_word(lowercase=True)
# Generators
dev = CoNLLDataset(config.filename_dev, processing_word)
test = CoNLLDataset(config.filename_test, processing_word)
train = CoNLLDataset(config.filename_train, processing_word)
# Build Word and Tag vocab
vocab_words, vocab_tags = get_vocabs([train, dev, test])
vocab_glove = get_glove_vocab(config.filename_glove)
vocab = vocab_words & vocab_glove
vocab.add(UNK)
vocab.add(NUM)
# Save vocab
write_vocab(vocab, config.filename_words)
write_vocab(vocab_tags, config.filename_tags)
# Trim GloVe Vectors
vocab = load_vocab(config.filename_words)
export_trimmed_glove_vectors(vocab, config.filename_glove,
config.filename_trimmed, config.dim_word)
# Build and save char vocab
train = CoNLLDataset(config.filename_train)
vocab_chars = get_char_vocab(train)
write_vocab(vocab_chars, config.filename_chars)
if __name__ == "__main__":
main()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/plusyou13/aspect-extraction.git
git@gitee.com:plusyou13/aspect-extraction.git
plusyou13
aspect-extraction
aspect-extraction
master

Search