Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dict has no attribute word_vec #17

Open
senjutisen7 opened this issue Apr 14, 2020 · 0 comments
Open

dict has no attribute word_vec #17

senjutisen7 opened this issue Apr 14, 2020 · 0 comments

Comments

@senjutisen7
Copy link

senjutisen7 commented Apr 14, 2020

Hi

When I try to load the glove vectors,while training the LSTM, I get the following error:

AttributeError: 'dict' object has no attribute 'word_vec'

This happens when I try to train the LSTM with the following code :

def get_init_embedding(reverse_dict, embedding_size):
print("Loading GLove vectors..")
with open("C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/glove.6B.300d_pickle", 'rb') as handle:
word_vectors = pickle.load(handle)

word_vec_list = list()

#loop through all the words in reverse dict

used_words = 0
for _,word in sorted(reverse_dict.items()):
    try:
        word_vec = word_vectors.word_vec(word)
used_words += 1
    except KeyError:
        word_vec = np.zeros([embedding_size], dtype=np.float32)
        
    word_vec_list.append(word_vec)
    

word_vec_list[2] = np.random.normal(0, 1, embedding_size)
word_vec_list[3] = np.random.normal(0, 1, embedding_size)

return np.array(word_vec_list)

Building model architecture
class Model(object):
def init(self, reversed_dict, article_max_len, summary_max_len, args, forward_only=False):
self.vocabulary_size = len(reversed_dict)
self.embedding_size = args.embedding_size
self.num_hidden = args.num_hidden
self.num_layers = args.num_layers
self.learning_rate = args.learning_rate
self.beam_width = args.beam_width
if not forward_only:#forward_only=In training phase, keep_prob is used for defining drop out %
self.keep_prob = args.keep_prob
else:
self.keep_prob = 1.0
self.cell = tf.nn.rnn_cell.BasicLSTMCell #initializing an LSTM cell
with tf.variable_scope("decoder/projection"):#projection layer used in decoder in both training and testing.Projection layer is used for converting indices of individual words to continous weight vector
self.projection_layer = tf.layers.Dense(self.vocabulary_size, use_bias=False)
#Defining batch size
self.batch_size = tf.placeholder(tf.int32, (), name="batch_size")
self.X = tf.placeholder(tf.int32, [None, article_max_len]) #Defining X again for subsequent steps
self.X_len = tf.placeholder(tf.int32, [None])#Defining X as length of articles. Its a place holder cause length of articles will become input that will be called in runtime env
self.decoder_input = tf.placeholder(tf.int32, [None, summary_max_len])#Starting to define decoder
self.decoder_len = tf.placeholder(tf.int32, [None])#Defining a decoder lenght
self.decoder_target = tf.placeholder(tf.int32, [None, summary_max_len]) #Defining a decoder target
self.global_step = tf.Variable(0, trainable=False)

    #EMBEDDING LAYER
    
    with tf.name_scope("embedding"):
        if not forward_only and args.glove:#if in training phase and if glove is used
            init_embeddings = tf.constant(get_init_embedding(reversed_dict, self.embedding_size), dtype=tf.float32)#Constant function because word embedding wont change as part of dict.Get_Init_embedding is a function that returns the vector for each word in our dict
        else:
            init_embeddings = tf.random_uniform([self.vocabulary_size, self.embedding_size], -1.0, 1.0)#if embedding is in testing phase, no constant dict is available. initializing a random variable
        self.embeddings = tf.get_variable("embeddings", initializer=init_embeddings)
        self.encoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(self.embeddings, self.X), perm=[1, 0, 2]) #encoder input
        self.decoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(self.embeddings, self.decoder_input), perm=[1, 0, 2]) #decoder input

    with tf.name_scope("encoder"):
        fw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
        bw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
        fw_cells = [rnn.DropoutWrapper(cell) for cell in fw_cells]
        bw_cells = [rnn.DropoutWrapper(cell) for cell in bw_cells]

        encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
            fw_cells, bw_cells, self.encoder_emb_inp,
            sequence_length=self.X_len, time_major=True, dtype=tf.float32)
        self.encoder_output = tf.concat(encoder_outputs, 2)
        encoder_state_c = tf.concat((encoder_state_fw[0].c, encoder_state_bw[0].c), 1)
        encoder_state_h = tf.concat((encoder_state_fw[0].h, encoder_state_bw[0].h), 1)
        self.encoder_state = rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)

    with tf.name_scope("decoder"), tf.variable_scope("decoder") as decoder_scope:
        decoder_cell = self.cell(self.num_hidden * 2)

        if not forward_only:
            attention_states = tf.transpose(self.encoder_output, [1, 0, 2])
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                self.num_hidden * 2, attention_states, memory_sequence_length=self.X_len, normalize=True)
            decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
                                                               attention_layer_size=self.num_hidden * 2)
            initial_state = decoder_cell.zero_state(dtype=tf.float32, batch_size=self.batch_size)
            initial_state = initial_state.clone(cell_state=self.encoder_state)
            helper = tf.contrib.seq2seq.TrainingHelper(self.decoder_emb_inp, self.decoder_len, time_major=True)
            decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, initial_state)
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, scope=decoder_scope)
            self.decoder_output = outputs.rnn_output
            self.logits = tf.transpose(
                self.projection_layer(self.decoder_output), perm=[1, 0, 2])
            self.logits_reshape = tf.concat(
                [self.logits, tf.zeros([self.batch_size, summary_max_len - tf.shape(self.logits)[1], self.vocabulary_size])], axis=1)
        else:
            tiled_encoder_output = tf.contrib.seq2seq.tile_batch(
                tf.transpose(self.encoder_output, perm=[1, 0, 2]), multiplier=self.beam_width)
            tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, multiplier=self.beam_width)
            tiled_seq_len = tf.contrib.seq2seq.tile_batch(self.X_len, multiplier=self.beam_width)
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                self.num_hidden * 2, tiled_encoder_output, memory_sequence_length=tiled_seq_len, normalize=True)
            decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
                                                               attention_layer_size=self.num_hidden * 2)
            initial_state = decoder_cell.zero_state(dtype=tf.float32, batch_size=self.batch_size * self.beam_width)
            initial_state = initial_state.clone(cell_state=tiled_encoder_final_state)
            decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                cell=decoder_cell,
                embedding=self.embeddings,
                start_tokens=tf.fill([self.batch_size], tf.constant(2)),
                end_token=tf.constant(3),
                initial_state=initial_state,
                beam_width=self.beam_width,
                output_layer=self.projection_layer
            )
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder, output_time_major=True, maximum_iterations=summary_max_len, scope=decoder_scope)
            self.prediction = tf.transpose(outputs.predicted_ids, perm=[1, 2, 0])

    with tf.name_scope("loss"):
        if not forward_only:
            crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.logits_reshape, labels=self.decoder_target)
            weights = tf.sequence_mask(self.decoder_len, summary_max_len, dtype=tf.float32)
            self.loss = tf.reduce_sum(crossent * weights / tf.to_float(self.batch_size))

            params = tf.trainable_variables()
            gradients = tf.gradients(self.loss, params)
            clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            self.update = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)

Training
import time
start = time.perf_counter()
import tensorflow as tf
import argparse
import pickle
import os

class args:
pass

args.num_hidden=150
args.num_layers=2
args.beam_width=10
args.glove="store_true"
args.embedding_size=300

args.learning_rate=1e-3
args.batch_size=64
args.num_epochs=10
args.keep_prob = 0.8

args.toy=False #"store_true"

args.with_model="store_true"

if not os.path.exists("saved_model"):

os.mkdir("saved_model")

else:

if args.with_model:

old_model_checkpoint_path = open('saved_model/', 'r')

old_model_checkpoint_path = "".join(["saved_model/",old_model_checkpoint_path.read().splitlines()[0].split('"')[1] ])

print("Building dictionary...")
word_dict, reverse_dict, article_max_len, summary_max_len = build_dict("train", args.toy)
print("Loading training dataset...")
train_x, train_y = build_dataset("train", word_dict, article_max_len, summary_max_len, args.toy)

tf.reset_default_graph()

with tf.Session() as sess:
model = Model(reverse_dict, article_max_len, summary_max_len, args)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())
if 'old_model_checkpoint_path' in globals():
print("Continuing from previous trained model:" , old_model_checkpoint_path , "...")
saver.restore(sess, old_model_checkpoint_path )

batches = batch_iter(train_x, train_y, args.batch_size, args.num_epochs)
num_batches_per_epoch = (len(train_x) - 1) // args.batch_size + 1

print("\nIteration starts.")
print("Number of batches per epoch :", num_batches_per_epoch)
for batch_x, batch_y in batches:
    batch_x_len = list(map(lambda x: len([y for y in x if y != 0]), batch_x))
    batch_decoder_input = list(map(lambda x: [word_dict["<s>"]] + list(x), batch_y))
    batch_decoder_len = list(map(lambda x: len([y for y in x if y != 0]), batch_decoder_input))
    batch_decoder_output = list(map(lambda x: list(x) + [word_dict["</s>"]], batch_y))

    batch_decoder_input = list(
        map(lambda d: d + (summary_max_len - len(d)) * [word_dict["<padding>"]], batch_decoder_input))
    batch_decoder_output = list(
        map(lambda d: d + (summary_max_len - len(d)) * [word_dict["<padding>"]], batch_decoder_output))
    
    train_feed_dict = {
        model.batch_size: len(batch_x),
        model.X: batch_x,
        model.X_len: batch_x_len,
        model.decoder_input: batch_decoder_input,
        model.decoder_len: batch_decoder_len,
        model.decoder_target: batch_decoder_output
    }

    _, step, loss = sess.run([model.update, model.global_step, model.loss], feed_dict=train_feed_dict)

    if step % 1000 == 0:
        print("step {0}: loss = {1}".format(step, loss))

    if step % num_batches_per_epoch == 0:
        hours, rem = divmod(time.perf_counter() - start, 3600)
        minutes, seconds = divmod(rem, 60)
        saver.save(sess, "C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/Open source libraries/Text summarization", global_step=step)
        print(" Epoch {0}: Model is saved.".format(step // num_batches_per_epoch),
        "Elapsed: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds) , "\n")

I have converted the downloaded glove txt into pickle using the following code:

import pickle
import numpy as np

f = open('C:/Users/sensen/OneDrive - HERE Global B.V-/Desktop/NLP/Open source libraries/Text summarization/glove.6B/glove.6B.300d.txt', 'r', encoding='UTF-8')
g = open('glove.6B.300d_pickle', 'wb')
word_dict = {}
wordvec = []
for idx, line in enumerate(f.readlines()):
word_split = line.split(' ')
word = word_split[0]
word_dict[word] = idx
d = word_split[1:]
d[-1] = d[-1][:-1]
d = [float(e) for e in d]
wordvec.append(d)

embedding = np.array(wordvec)
pickling = {}
pickling = {'embedding' : embedding, 'word_dict': word_dict}
pickle.dump(pickling, g)
f.close()
g.close()

Can you help me solve the error?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant