1

I am trying to train an actor-critic model, but when I reach the backprop for the critic I get this error: RuntimeError: invalid gradient at index 0 - expected type torch.cuda.FloatTensor but got torch.FloatTensor I am failing to identify which gradient the error refers to. Can anyone help?

Here is the Stack trace:

Traceback (most recent call last):
  File "train.py", line 338, in <module>
    main()
  File "train.py", line 327, in main
    reinforce_trainer.train(opt.start_reinforce, opt.start_reinforce + opt.critic_pretrain_epochs - 1, True, start_time)
  File "/home/fbommfim/init-tests/treeLSTM/lib/train/reinforce_trainer.py", line 56, in train
    train_reward, critic_loss = self.train_epoch(epoch, pretrain_critic, no_update)
  File "/home/fbommfim/init-tests/treeLSTM/lib/train/reinforce_trainer.py", line 153, in train_epoch
    critic_loss = self.critic.backward(baselines.cuda(), rewards, critic_weights.cuda(), num_words, self.critic_loss_func, regression=True)
  File "/home/fbommfim/init-tests/treeLSTM/lib/model/encoder_decoder/hybrid2seq_model.py", line 67, in backward
    outputs.backward(grad_output)
  File "/home/linuxbrew/.linuxbrew/Cellar/python/3.7.6_1/lib/python3.7/site-packages/torch/tensor.py", line 195, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/linuxbrew/.linuxbrew/Cellar/python/3.7.6_1/lib/python3.7/site-packages/torch/autograd/__init__.py", line 99, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: invalid gradient at index 0 - expected type torch.cuda.FloatTensor but got torch.FloatTensor

and relevant code: train_epoch from reinforce_trainer

def train_epoch(self, epoch, pretrain_critic, no_update):
        self.actor.train() # may also have self.critic.train() ?
        total_reward, report_reward = 0, 0
        total_critic_loss, report_critic_loss = 0, 0
        total_sents, report_sents = 0, 0
        total_words, report_words = 0, 0
        last_time = time.time()
        batch_count = len(self.train_data)
        batch_order = torch.randperm(batch_count)
        with tqdm(total = (batch_count)) as prog:
            for i in range(batch_count):
                batch = self.train_data[i] # batch_order[i]
                if self.opt.data_type == 'code':
                    targets = batch[2]
                    attention_mask = batch[1][2][0].data.eq(lib.Constants.PAD).t()
                elif self.opt.data_type == 'text':
                    targets = batch[2]
                    attention_mask = batch[0][0].data.eq(lib.Constants.PAD).t()
                elif self.opt.data_type == 'hybrid':
                    targets = batch[2]
                    attention_mask_code = batch[1][2][0].data.eq(lib.Constants.PAD).t()
                    attention_mask_txt = batch[0][0].data.eq(lib.Constants.PAD).t()

                batch_size = targets.size(1)

                self.actor.zero_grad()
                self.critic.zero_grad()

                # Sample translations
                if self.opt.has_attn:
                    if self.opt.data_type == 'code' or self.opt.data_type == 'text':
                        self.actor.decoder.attn.applyMask(attention_mask)
                    elif self.opt.data_type == 'hybrid':
                        self.actor.decoder.attn.applyMask(attention_mask_code, attention_mask_txt)
                samples, outputs = self.actor.sample(batch, self.max_length)

                # Calculate rewards
                rewards, samples = self.sent_reward_func(samples.t().tolist(), targets.data.t().tolist())
                reward = sum(rewards)

                # Perturb rewards (if specified).
                if self.pert_func is not None:
                    rewards = self.pert_func(rewards)

                samples = torch.LongTensor(samples).t().contiguous()
                rewards = torch.FloatTensor([rewards] * samples.size(0)).contiguous()
                if self.opt.cuda:
                    samples = samples.cuda()
                    rewards = rewards.cuda()

                # Update critic.
                critic_weights = samples.ne(lib.Constants.PAD).float()
                num_words = critic_weights.data.sum()
                if not no_update:
                    if self.opt.data_type == 'code':
                        baselines = self.critic((batch[0], batch[1], samples, batch[3]), eval=False, regression=True)
                    elif self.opt.data_type == 'text':
                        baselines = self.critic((batch[0], batch[1], samples, batch[3]), eval=False, regression=True)
                    elif self.opt.data_type == 'hybrid':
                        baselines = self.critic((batch[0], batch[1], samples, batch[3]), eval=False, regression=True)

                    critic_loss = self.critic.backward(baselines, rewards, critic_weights, num_words, self.critic_loss_func, regression=True)
                    self.critic_optim.step()
                else:
                    critic_loss = 0

                # Update actor
                if not pretrain_critic and not no_update:
                    # Subtract baseline from reward
                    norm_rewards = (rewards - baselines).data
                    actor_weights = norm_rewards * critic_weights
                    # TODO: can use PyTorch reinforce() here but that function is a black box.
                    # This is an alternative way where you specify an objective that gives the same gradient
                    # as the policy gradient's objective, which looks much like weighted log-likelihood.
                    actor_loss = self.actor.backward(outputs, samples, actor_weights, 1, self.actor_loss_func)
                    self.optim.step()
                else:
                    actor_loss = 0

                # Gather stats
                total_reward += reward
                report_reward += reward
                total_sents += batch_size
                report_sents += batch_size
                total_critic_loss += critic_loss
                report_critic_loss += critic_loss
                total_words += num_words
                report_words += num_words
                self.opt.iteration += 1
                print ("iteration: %s, loss: %s " % (self.opt.iteration, actor_loss))
                print ("iteration: %s, reward: %s " % (self.opt.iteration, (report_reward / report_sents) * 100))

                if i % self.opt.log_interval == 0 and i > 0:
                    print("""Epoch %3d, %6d/%d batches; actor reward: %.4f; critic loss: %f; %5.0f tokens/s; %s elapsed""" %
                          (epoch, i, batch_count, (report_reward / report_sents) * 100,
                          report_critic_loss / report_words,
                          report_words / (time.time() - last_time),
                          str(datetime.timedelta(seconds=int(time.time() - self.start_time)))))

                    report_reward = report_sents = report_critic_loss = report_words = 0
                    last_time = time.time()
                prog.update(1)

        return total_reward / total_sents, total_critic_loss / total_words

and backward for hybrid2seq_model.py:

def backward(self, outputs, targets, weights, normalizer, criterion, regression=False):
        grad_output, loss = self.generator.backward(outputs, targets, weights, normalizer, criterion, regression)
        outputs.cuda()
        grad_output.cuda()
        outputs.backward(grad_output)
        return loss

1 Answer 1

2

The error says it's expecting a cuda tensor and got a non-cuda tensor, so that's what I'd look for.

Calls like grad_output.cuda() returns a cuda tensor. It's not an inplace operation. You probably wanted grad_output = grad_output.cuda(), so I'd start by fixing calls like that.

Sign up to request clarification or add additional context in comments.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.