我们从Python开源项目中,提取了以下6个代码示例,用于说明如何使用gym.benchmark_spec()。
def benchmark_score_from_local(benchmark_id, training_dir): spec = gym.benchmark_spec(benchmark_id) directories = [] for name, _, files in os.walk(training_dir): manifests = gym.monitoring.detect_training_manifests(name, files=files) if manifests: directories.append(name) benchmark_results = defaultdict(list) for training_dir in directories: results = gym.monitoring.load_results(training_dir) env_id = results['env_info']['env_id'] benchmark_result = spec.score_evaluation(env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) # from pprint import pprint # pprint(benchmark_result) benchmark_results[env_id].append(benchmark_result) return gym.benchmarks.scoring.benchmark_aggregate_score(spec, benchmark_results)
def main(): # Games that we'll be testing. game_to_ID = {'BeamRider':0, 'Breakout':1, 'Enduro':2, 'Pong':3, 'Qbert':4} # Get some arguments here. Note: num_timesteps default uses tasks default. parser = argparse.ArgumentParser() parser.add_argument('--game', type=str, default='Pong') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--num_timesteps', type=int, default=40000000) args = parser.parse_args() # Choose the game to play and set log file. benchmark = gym.benchmark_spec('Atari40M') task = benchmark.tasks[game_to_ID[args.game]] log_name = args.game+"_s"+str(args.seed).zfill(3)+".pkl" # Run training. Should change the seed if possible! # Also, the actual # of iterations run is _roughly_ num_timesteps/4. seed = args.seed env = get_env(task, seed) session = get_session() print("task = {}".format(task)) atari_learn(env, session, num_timesteps=args.num_timesteps, log_file=log_name)
def atari_main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. # ['BeamRiderNoFrameskip-v4', 'BreakoutNoFrameskip-v4', 'EnduroNoFrameskip-v4', # 'PongNoFrameskip-v4', 'QbertNoFrameskip-v4', 'SeaquestNoFrameskip-v4', # 'SpaceInvadersNoFrameskip-v4'] task = benchmark.tasks[1] print('availabe tasks: ', [t.env_id for t in benchmark.tasks]) print('task: ', task.env_id, 'max steps: ', task.max_timesteps) # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) last_obs = env.reset() exploration_schedule = PiecewiseSchedule( [ (0, 1.0), (1e6, 0.1), (task.max_timesteps / 2, 0.01), ], outside_value=0.01 ) dqn = DoubleDQN(image_shape=(84, 84, 1), num_actions=env.action_space.n, training_starts=50000, target_update_freq=10000, training_batch_size=32, # training_starts=2000, # target_update_freq=500, # training_batch_size=3, exploration=exploration_schedule ) reward_sum_episode = 0 num_episodes = 0 episode_rewards = deque(maxlen=100) for step in range(task.max_timesteps): if step > 0 and step % 1000 == 0: print('step: ', step, 'episodes:', num_episodes, 'epsilon:', exploration_schedule.value(step), 'learning rate:', dqn.get_learning_rate(), 'last 100 training loss mean', dqn.get_avg_loss(), 'last 100 episode mean rewards: ', np.mean(np.array(episode_rewards, dtype=np.float32))) env.render() action = dqn.choose_action(step, last_obs) obs, reward, done, info = env.step(action) reward_sum_episode += reward dqn.learn(step, action, reward, done, info) if done: last_obs = env.reset() episode_rewards.append(reward_sum_episode) reward_sum_episode = 0 num_episodes += 1 else: last_obs = obs
def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos): # We're uploading a benchmark run. directories = [] env_ids = [] for name, _, files in os.walk(training_dir): manifests = monitoring.detect_training_manifests(name, files=files) if manifests: env_info = monitoring.load_env_info_from_manifests(manifests, training_dir) env_ids.append(env_info['env_id']) directories.append(name) # Validate against benchmark spec try: spec = benchmark_spec(benchmark_id) except error.UnregisteredBenchmark: raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id)) spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)] if not env_ids: raise error.Error("Could not find any evaluations in {}".format(training_dir)) # This could be more stringent about mixing evaluations if sorted(env_ids) != sorted(spec_env_ids): logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids)) tags = json.dumps(benchmark_run_tags) _create_with_retries = util.retry_exponential_backoff( resource.BenchmarkRun.create, (error.APIConnectionError,), max_retries=5, interval=3, ) benchmark_run = _create_with_retries(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=tags) benchmark_run_id = benchmark_run.id # Actually do the uploads. for training_dir in directories: # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark _upload_with_retries = util.retry_exponential_backoff( _upload, (error.APIConnectionError,), max_retries=5, interval=3, ) _upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos) logger.info(""" **************************************************** You successfully uploaded your benchmark on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), benchmark_id, benchmark_run.web_url()) return benchmark_run_id