I would like to know why this function:
@tf.function
def train(self,TargetNet,epsilon):
if len(self.experience['s']) < self.min_experiences:
return 0
ids=np.random.randint(low=0,high=len(self.replay_buffer['s']),size=self.batch_size)
states=np.asarray([self.experience['s'][i] for i in ids])
actions=np.asarray([self.experience['a'][i] for i in ids])
rewards=np.asarray([self.experience['r'][i] for i in ids])
next_states=np.asarray([self.experience['s1'][i] for i in ids])
dones = np.asarray([self.experience['done'][i] for i in ids])
q_next_actions=self.get_action(next_states,epsilon)
q_value_next=TargetNet.predict(next_states)
q_value_next=tf.gather_nd(q_value_next,tf.stack((tf.range(self.batch_size),q_next_actions),axis=1))
targets=tf.where(dones, rewards, rewards+self.gamma*q_value_next)
with tf.GradientTape() as tape:
estimates=tf.math.reduce_sum(self.predict(states)*tf.one_hot(actions,self.num_actions),axis=1)
loss=tf.math.reduce_sum(tf.square(estimates - targets))
variables=self.model.trainable_variables
gradients=tape.gradient(loss,variables)
self.optimizer.apply_gradients(zip(gradients,variables))
gives ValueError: Creating variables on a non-first call to a function decorated with tf.function. Whereas this code which is very similiar:
@tf.function
def train(self, TargetNet):
if len(self.experience['s']) < self.min_experiences:
return 0
ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
states = np.asarray([self.experience['s'][i] for i in ids])
actions = np.asarray([self.experience['a'][i] for i in ids])
rewards = np.asarray([self.experience['r'][i] for i in ids])
states_next = np.asarray([self.experience['s2'][i] for i in ids])
dones = np.asarray([self.experience['done'][i] for i in ids])
value_next = np.max(TargetNet.predict(states_next), axis=1)
actual_values = np.where(dones, rewards, rewards+self.gamma*value_next)
with tf.GradientTape() as tape:
selected_action_values = tf.math.reduce_sum(
self.predict(states) * tf.one_hot(actions, self.num_actions), axis=1)
loss = tf.math.reduce_sum(tf.square(actual_values - selected_action_values))
variables = self.model.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
Does not throw an error.Please help me understand why.
EDIT:I removed the parameter epsilon from the function and it works.Is it because the @tf.function decorator is valid only for single argument functions?