Hi, there, thank you for your excellent work. In the output log I found three types of

Hello, thanks for your interest in our work! <code class="notranslat

what's the meaning of AverageTrainReturn_all_train_tasks, AverageReturn_all_train_tasks, AverageReturn_all_test_tasks? about oyster HOT 1 CLOSED

tianyma commented on July 26, 2024

what's the meaning of AverageTrainReturn_all_train_tasks, AverageReturn_all_train_tasks, AverageReturn_all_test_tasks?

from oyster.

Comments (1)

katerakelly commented on July 26, 2024 1

Hello, thanks for your interest in our work!

AverageTrainReturn_all_train_tasks - average return achieved by an agent in a sampling of training tasks using context sampled from the replay buffer (implemented here:

oyster/rlkit/core/rl_algorithm.py

Lines 415 to 436 in 44e20fd

    
           ### eval train tasks with posterior sampled from the training replay buffer 
        
           train_returns = [] 
        
           for idx in indices: 
        
               self.task_idx = idx 
        
               self.env.reset_task(idx) 
        
               paths = [] 
        
               for _ in range(self.num_steps_per_eval // self.max_path_length): 
        
                   context = self.sample_context(idx) 
        
                   self.agent.infer_posterior(context) 
        
                   p, _ = self.sampler.obtain_samples(deterministic=self.eval_deterministic, max_samples=self.max_path_length, 
        
                                                           accum_context=False, 
        
                                                           max_trajs=1, 
        
                                                           resample=np.inf) 
        
                   paths += p 
        
               if self.sparse_rewards: 
        
                   for p in paths: 
        
                       sparse_rewards = np.stack(e['sparse_reward'] for e in p['env_infos']).reshape(-1, 1) 
        
                       p['rewards'] = sparse_rewards 
        
               train_returns.append(eval_util.get_average_returns(paths)) 
        
           train_returns = np.mean(train_returns)

)
AverageReturn_all_train_tasks - average return achieved by an agent in a sampling of training tasks using context sampled by the current policy
AverageReturn_all_test_tasks - average return achieved by an agent in a sampling of testing tasks using context sampled by the current policy
(these last two implemented via this function:

oyster/rlkit/core/rl_algorithm.py

Lines 379 to 395 in 44e20fd

    
           def _do_eval(self, indices, epoch): 
        
               final_returns = [] 
        
               online_returns = [] 
        
               for idx in indices: 
        
                   all_rets = [] 
        
                   for r in range(self.num_evals): 
        
                       paths = self.collect_paths(idx, epoch, r) 
        
                       all_rets.append([eval_util.get_average_returns([p]) for p in paths]) 
        
                   final_returns.append(np.mean([a[-1] for a in all_rets])) 
        
                   # record online returns for the first n trajectories 
        
                   n = min([len(a) for a in all_rets]) 
        
                   all_rets = [a[:n] for a in all_rets] 
        
                   all_rets = np.mean(np.stack(all_rets), axis=0) # avg return per nth rollout 
        
                   online_returns.append(all_rets) 
        
               n = min([len(t) for t in online_returns]) 
        
               online_returns = [t[:n] for t in online_returns] 
        
               return final_returns, online_returns

)

The final metric is the one reported in our paper.

from oyster.

what's the meaning of AverageTrainReturn_all_train_tasks, AverageReturn_all_train_tasks, AverageReturn_all_test_tasks? about oyster HOT 1 CLOSED

Comments (1)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent

	### eval train tasks with posterior sampled from the training replay buffer
	train_returns = []
	for idx in indices:
	self.task_idx = idx
	self.env.reset_task(idx)
	paths = []
	for _ in range(self.num_steps_per_eval // self.max_path_length):
	context = self.sample_context(idx)
	self.agent.infer_posterior(context)
	p, _ = self.sampler.obtain_samples(deterministic=self.eval_deterministic, max_samples=self.max_path_length,
	accum_context=False,
	max_trajs=1,
	resample=np.inf)
	paths += p

	if self.sparse_rewards:
	for p in paths:
	sparse_rewards = np.stack(e['sparse_reward'] for e in p['env_infos']).reshape(-1, 1)
	p['rewards'] = sparse_rewards

	train_returns.append(eval_util.get_average_returns(paths))
	train_returns = np.mean(train_returns)

	def _do_eval(self, indices, epoch):
	final_returns = []
	online_returns = []
	for idx in indices:
	all_rets = []
	for r in range(self.num_evals):
	paths = self.collect_paths(idx, epoch, r)
	all_rets.append([eval_util.get_average_returns([p]) for p in paths])
	final_returns.append(np.mean([a[-1] for a in all_rets]))
	# record online returns for the first n trajectories
	n = min([len(a) for a in all_rets])
	all_rets = [a[:n] for a in all_rets]
	all_rets = np.mean(np.stack(all_rets), axis=0) # avg return per nth rollout
	online_returns.append(all_rets)
	n = min([len(t) for t in online_returns])
	online_returns = [t[:n] for t in online_returns]
	return final_returns, online_returns