Python gym 模块,make() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用gym.make()

项目:rl-attack-detection    作者:yenchenlin    | 项目源码 | 文件源码
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=2000000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True
    )
    act.save("pong_model.pkl")
    env.close()
项目:chainer_pong    作者:icoxfog417    | 项目源码 | 文件源码
def main(game_count=1):
    record = os.path.join(os.path.dirname(__file__), "funfun")
    env = gym.make("Pong-v0")
    hanamichi = Hanamichi()

    env.monitor.start(record)
    for i in range(game_count):
        playing = True
        observation = env.reset()
        reward = -1
        action = -1

        while playing:
            env.render()
            if action < 0:
                action = hanamichi.start(observation)
            else:
                action = hanamichi.act(observation, reward)
            observation, reward, done, info = env.step(action)
            playing = not done
            if done:
                hanamichi.end(reward)

    env.monitor.close()
项目:pytorch.rl.learning    作者:moskomule    | 项目源码 | 文件源码
def __init__(self, env_name, num_episodes, alpha, gamma, policy, report_freq=100, **kwargs):
        """
        base class for RL using lookup table
        :param env_name: see https://github.com/openai/gym/wiki/Table-of-environments
        :param num_episodes: int, number of episode for training
        :param alpha: float, learning rate
        :param gamma: float, discount rate
        :param policy: str
        :param report_freq: int, by default 100
        :param kwargs: other arguments
        """
        self.env = gym.make(env_name)
        self.num_episodes = num_episodes
        self.alpha = alpha
        self.gamma = gamma
        self.state = None
        self._rewards = None
        self._policy = policy
        self.report_freq = report_freq
        for k, v in kwargs.items():
            setattr(self, str(k), v)
项目:pytorch.rl.learning    作者:moskomule    | 项目源码 | 文件源码
def make_atari(env_id, noop=True, max_and_skip=True, episode_life=True, clip_rewards=True, frame_stack=True,
               scale=True):
    """Configure environment for DeepMind-style Atari.
    """
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
    if noop:
        env = NoopResetEnv(env, noop_max=30)
    if max_and_skip:
        env = MaxAndSkipEnv(env, skip=4)
    if episode_life:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, 4)
    return env
项目:combine-DT-with-NN-in-RL    作者:Burning-Bear    | 项目源码 | 文件源码
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=2000000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True
    )
    act.save("pong_model.pkl")
    env.close()
项目:combine-DT-with-NN-in-RL    作者:Burning-Bear    | 项目源码 | 文件源码
def main():
    env = gym.make("CartPole-v0")
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
项目:combine-DT-with-NN-in-RL    作者:Burning-Bear    | 项目源码 | 文件源码
def main():
    env = gym.make("CartPole-v0")
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
项目:gym-extensions    作者:Breakend    | 项目源码 | 文件源码
def test_cartpole_contextual():
    env_id = 'CartPoleContextual-v0'
    env = gym.make(env_id)
    if isinstance(env.unwrapped, CartPoleEnv):
        env.reset()
    else:
        raise NotImplementedError

    nr_of_items_context_space_info = 10
    nr_unwrapped = len(list(env.unwrapped.context_space_info().keys()))
    if nr_of_items_context_space_info != nr_unwrapped:
        print('context_space_info() function needs to be implemented!')
        raise NotImplementedError

    context_vect = [0.01, 0.01, 0.01, 0.01]
    # these should change because change_context_function
    if context_vect == env.unwrapped.context:
        raise AttributeError

    env.unwrapped.change_context(context_vect)
    if context_vect != env.unwrapped.context:
        raise AttributeError
项目:gym-extensions    作者:Breakend    | 项目源码 | 文件源码
def test_pendulum_contextual():
    env_id = 'PendulumContextual-v0'
    env = gym.make(env_id)
    if isinstance(env.unwrapped, PendulumEnv):
        env.reset()
    else:
        raise NotImplementedError

    nr_of_items_context_space_info = 10
    nr_unwrapped = len(list(env.unwrapped.context_space_info().keys()))
    if nr_of_items_context_space_info != nr_unwrapped:
        print('context_space_info() function needs to be implemented!')
        raise NotImplementedError

    context_vect = [0.01, 0.01]
    if context_vect == env.unwrapped.context:
        raise AttributeError

    env.unwrapped.change_context(context_vect)
    if context_vect != env.unwrapped.context:
        raise AttributeError
项目:rl-attack-detection    作者:yenchenlin    | 项目源码 | 文件源码
def main():
    env = gym.make("CartPole-v0")
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
项目:baselines    作者:openai    | 项目源码 | 文件源码
def main():
    env = gym.make("MountainCar-v0")
    # Enabling layer_norm here is import for parameter space noise!
    model = deepq.models.mlp([64], layer_norm=True)
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.1,
        print_freq=10,
        param_noise=True
    )
    print("Saving model to mountaincar_model.pkl")
    act.save("mountaincar_model.pkl")
项目:baselines    作者:openai    | 项目源码 | 文件源码
def main():
    env = gym.make("CartPole-v0")
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
项目:baselines    作者:openai    | 项目源码 | 文件源码
def train(env_id, num_timesteps, seed):
    import baselines.common.tf_util as U
    sess = U.single_threaded_session()
    sess.__enter__()

    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = gym.make(env_id)
    def policy_fn(name, ob_space, ac_space):
        return MlpPolicy(name=name, ob_space=env.observation_space, ac_space=env.action_space,
            hid_size=32, num_hid_layers=2)
    env = bench.Monitor(env, logger.get_dir() and
        osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)
    gym.logger.setLevel(logging.WARN)

    trpo_mpi.learn(env, policy_fn, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1,
        max_timesteps=num_timesteps, gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3)
    env.close()
项目:baselines    作者:openai    | 项目源码 | 文件源码
def train(env_id, num_timesteps, seed):
    env=gym.make(env_id)
    env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
    set_global_seeds(seed)
    env.seed(seed)
    gym.logger.setLevel(logging.WARN)

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, vf=vf,
            gamma=0.99, lam=0.97, timesteps_per_batch=2500,
            desired_kl=0.002,
            num_timesteps=num_timesteps, animate=False)

        env.close()
项目:ai-bs-summer17    作者:uchibe    | 项目源码 | 文件源码
def main():
    env = gym.make("SpaceInvadersNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=2000000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True
    )
    act.save("space_invaders_model.pkl")
    env.close()
项目:ai-bs-summer17    作者:uchibe    | 项目源码 | 文件源码
def train(env_id, num_timesteps, seed):
    from baselines.pposgd import mlp_policy, pposgd_simple
    U.make_session(num_cpu=1).__enter__()
    logger.session().__enter__()
    set_global_seeds(seed)
    env = gym.make(env_id)
    def policy_fn(name, ob_space, ac_space):
        return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
            hid_size=64, num_hid_layers=2)
    env = bench.Monitor(env, osp.join(logger.get_dir(), "monitor.json"))
    env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    pposgd_simple.learn(env, policy_fn, 
            max_timesteps=num_timesteps,
            timesteps_per_batch=2048,
            clip_param=0.2, entcoeff=0.0,
            optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
            gamma=0.99, lam=0.95,
        )
    env.close()
项目:pytorch-a2c-ppo-acktr    作者:ikostrikov    | 项目源码 | 文件源码
def make_env(env_id, seed, rank, log_dir):
    def _thunk():
        env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        env.seed(seed + rank)
        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))
        if is_atari:
            env = wrap_deepmind(env)
        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = WrapPyTorch(env)
        return env

    return _thunk
项目:ngraph    作者:NervanaSystems    | 项目源码 | 文件源码
def main():
    # initialize gym environment
    environment = gym.make('CartPole-v0')

    state_axes = ng.make_axes([
        ng.make_axis(environment.observation_space.shape[0], name='width')
    ])

    agent = dqn.Agent(
        state_axes,
        environment.action_space,
        model=baselines_model,
        epsilon=dqn.linear_generator(start=1.0, end=0.02, steps=10000),
        learning_rate=1e-3,
        gamma=1.0,
        memory=dqn.Memory(maxlen=50000),
        learning_starts=1000,
    )

    rl_loop.rl_loop_train(environment, agent, episodes=1000)

    total_reward = rl_loop.evaluate_single_episode(environment, agent)
    print(total_reward)
项目:ngraph    作者:NervanaSystems    | 项目源码 | 文件源码
def test_dependent_environment():
    environment = gym.make('DependentEnv-v0')

    total_rewards = []
    for i in range(10):
        agent = dqn.Agent(
            dqn.space_shape(environment.observation_space),
            environment.action_space,
            model=model,
            epsilon=dqn.decay_generator(start=1.0, decay=0.995, minimum=0.1),
            gamma=0.99,
            learning_rate=0.1,
        )

        rl_loop.rl_loop_train(environment, agent, episodes=10)

        total_rewards.append(
            rl_loop.evaluate_single_episode(environment, agent)
        )

    # most of these 10 agents will be able to converge to the perfect policy
    assert np.mean(np.array(total_rewards) == 100) >= 0.5
项目:DHP    作者:YuhangSong    | 项目源码 | 文件源码
def create_flash_env(env_id, client_id, remotes, **_):
    env = gym.make(env_id)
    env = Vision(env)
    env = Logger(env)
    env = BlockingReset(env)

    reg = universe.runtime_spec('flashgames').server_registry
    height = reg[env_id]["height"]
    width = reg[env_id]["width"]
    env = CropScreen(env, height, width, 84, 18)
    env = FlashRescale(env)

    keys = ['left', 'right', 'up', 'down', 'x']
    env = DiscreteToFixedKeysVNCActions(env, keys)
    env = EpisodeID(env)
    env = DiagnosticsInfo(env)
    env = Unvectorize(env)
    env.configure(fps=5.0, remotes=remotes, start_timeout=15 * 60, client_id=client_id,
                  vnc_driver='go', vnc_kwargs={
                    'encoding': 'tight', 'compress_level': 0,
                    'fine_quality_level': 50, 'subsample_level': 3})
    return env
项目:PAAC.pytorch    作者:qbx2    | 项目源码 | 文件源码
def make(env_id, hack=None):
    if 'Deterministic-v4' not in env_id:
        print('[Warning] Use Deterministic-v4 version '
              'to reproduce the results of paper.')

    _env = env = gym.make(env_id)

    if hack:
        # Hack gym env to output grayscale image
        if env.spec.timestep_limit is not None:
            from gym.wrappers.time_limit import TimeLimit

            if isinstance(env, TimeLimit):
                _env = env.env

        if hack == 'train':
            _env._get_image = _env.ale.getScreenGrayscale
            _env._get_obs = _env.ale.getScreenGrayscale
        elif hack == 'eval':
            _env._get_obs = _env.ale.getScreenGrayscale

    return env
项目:chi    作者:rmst    | 项目源码 | 文件源码
def dqn_test(env='OneRoundDeterministicReward-v0'):
    env = gym.make(env)
    env = ObservationShapeWrapper(env)

    @tt.model(tracker=tf.train.ExponentialMovingAverage(1-.01),
                         optimizer=tf.train.AdamOptimizer(.01))
    def q_network(x):
        x = layers.fully_connected(x, 32)
        x = layers.fully_connected(x, env.action_space.n, activation_fn=None,
                                                             weights_initializer=tf.random_normal_initializer(0, 1e-4))
        return x

    agent = DqnAgent(env, q_network, double_dqn=False, replay_start=100, annealing_time=100)

    rs = []
    for ep in range(10000):
        r, _ = agent.play_episode()

        rs.append(r)

        if ep % 100 == 0:
            print(f'Return after episode {ep} is {sum(rs)/len(rs)}')
            rs = []
项目:universe    作者:openai    | 项目源码 | 文件源码
def test_steps_limit_restart():
    env = gym.make('test.StepsLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == None
    assert env._max_episode_steps == 2

    # Episode has started
    _, _, done, info = env.step([[]])
    assert done == [False]

    # Limit reached, now we get a done signal and the env resets itself
    _, _, done, info = env.step([[]])
    assert done == [True]
    assert env._elapsed_steps == 0
项目:universe    作者:openai    | 项目源码 | 文件源码
def test_seconds_limit_restart():
    env = gym.make('test.SecondsLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == 0.1
    assert env._max_episode_steps == None

    # Episode has started
    _, _, done, info = env.step([[]])
    assert done == [False]

    # Not enough time has passed
    _, _, done, info = env.step([[]])
    assert done == [False]

    time.sleep(0.2)

    # Limit reached, now we get a done signal and the env resets itself
    _, _, done, info = env.step([[]])
    assert done == [True]
项目:universe    作者:openai    | 项目源码 | 文件源码
def test_default_time_limit():
    # We need an env without a default limit
    register(
        id='test.NoLimitDummyVNCEnv-v0',
        entry_point='universe.envs:DummyVNCEnv',
        tags={
            'vnc': True,
            },
    )

    env = gym.make('test.NoLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS
    assert env._max_episode_steps == None
项目:universe    作者:openai    | 项目源码 | 文件源码
def test_joint():
    env1 = gym.make('test.DummyVNCEnv-v0')
    env2 = gym.make('test.DummyVNCEnv-v0')
    env1.configure(_n=3)
    env2.configure(_n=3)
    for reward_buffer in [env1._reward_buffers[0], env2._reward_buffers[0]]:
        reward_buffer.set_env_info('running', 'test.DummyVNCEnv-v0', '1', 60)
        reward_buffer.reset('1')
        reward_buffer.push('1', 10, False, {})

    env = wrappers.Joint([env1, env2])
    assert env.n == 6
    observation_n = env.reset()
    assert observation_n == [None] * 6

    observation_n, reward_n, done_n, info = env.step([[] for _ in range(env.n)])
    assert reward_n == [10.0, 0.0, 0.0, 10.0, 0.0, 0.0]
    assert done_n == [False] * 6
项目:universe    作者:openai    | 项目源码 | 文件源码
def __init__(self, env, gym_core_id=None):
        super(GymCoreAction, self).__init__(env)

        if gym_core_id is None:
            # self.spec is None while inside of the make, so we need
            # to pass gym_core_id in explicitly there. This case will
            # be hit when instantiating by hand.
            gym_core_id = self.spec._kwargs['gym_core_id']

        spec = gym.spec(gym_core_id)
        raw_action_space = gym_core_action_space(gym_core_id)

        self._actions = raw_action_space.actions
        self.action_space = gym_spaces.Discrete(len(self._actions))

        if spec._entry_point.startswith('gym.envs.atari:'):
            self.key_state = translator.AtariKeyState(gym.make(gym_core_id))
        else:
            self.key_state = None
项目:universe    作者:openai    | 项目源码 | 文件源码
def test_describe_handling():
    env = gym.make('flashgames.DuskDrive-v0')
    env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900')
    env.reset()

    reward_buffer = get_reward_buffer(env)
    rewarder_client = get_rewarder_client(env)

    rewarder_client._manual_recv('v0.env.describe', {'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60}, {'episode_id': '1'})

    assert reward_buffer._remote_episode_id == '1'
    assert reward_buffer._remote_env_state == 'resetting'
    assert reward_buffer._current_episode_id == None
    assert reward_buffer.reward_state(reward_buffer._current_episode_id)._env_state == None

    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    assert reward_buffer._remote_episode_id == '1'
    assert reward_buffer._remote_env_state == 'resetting'
    assert reward_buffer._current_episode_id == '1'
    assert reward_buffer.reward_state(reward_buffer._current_episode_id)._env_state == 'resetting'
项目:universe    作者:openai    | 项目源码 | 文件源码
def test_smoke(env_id):
    """Check that environments start up without errors and that we can extract rewards and observations"""
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    env = gym.make(env_id)
    if env.metadata.get('configure.required', False):
        if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'):  # Used to test universe-envs in CI
            configure_with_latest_docker_runtime_tag(env)
        else:
            env.configure(remotes=1)

    env = wrappers.Unvectorize(env)

    env.reset()
    _rollout(env, timestep_limit=60*30) # Check a rollout
项目:gym-malware    作者:endgameinc    | 项目源码 | 文件源码
def train_agent(rounds=10000, use_score=False, name='result_dir', create_agent=create_ddqn_agent):
    ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
    env = gym.make( ENV_NAME ) 
    np.random.seed(123)
    env.seed(123)

    agent = create_agent(env)

    chainerrl.experiments.train_agent_with_evaluation(
        agent, env,
        steps=rounds,                   # Train the agent for this many rounds steps
        max_episode_len=env.maxturns,   # Maximum length of each episodes        
        eval_interval=1000,             # Evaluate the agent after every 1000 steps
        eval_n_runs=100,                # 100 episodes are sampled for each evaluation        
        outdir=name)                    # Save everything to 'result' directory

    return agent
项目:cs234    作者:CalciferZh    | 项目源码 | 文件源码
def main():
  env = gym.make('Stochastic-4x4-FrozenLake-v0')
  policy = learn_with_mdp_model(env)
  render_single(env, policy)

  # for i in range(10):
  #   print('\n%d' % i)
  #   env.render()
  #   print(env.step(env.action_space.sample()))
  # env.render()
  # for init_state in env.P.keys():
  #   for action in env.P[init_state]:
  #     print("\nState: %d, action: %d" % (init_state, action))
  #     for next_state in env.P[init_state][action]:
  #       print(next_state)
  # for _ in range(10):
  #   env.render()
  #   env.step(env.action_space.sample())
项目:bolero    作者:rock-learning    | 项目源码 | 文件源码
def init(self):
        gym.configuration.undo_logger_setup()

        self.env = gym.make(self.env_name)
        self.n_inputs, self.input_handler = self._init_space(
            self.env.action_space)
        self.inputs = np.empty(self.n_inputs)
        self.n_outputs, _ = self._init_space(self.env.observation_space)
        self.outputs = np.empty(self.n_outputs)

        if self.seed is not None:
            self.env.seed(self.seed)

        self.logger = get_logger(self, self.log_to_file, self.log_to_stdout)

        if self.log_to_stdout or self.log_to_file:
            self.logger.info("Number of inputs: %d" % self.n_inputs)
            self.logger.info("Number of outputs: %d" % self.n_outputs)
项目:malmo-challenge    作者:Kaixhin    | 项目源码 | 文件源码
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
项目:malmo-challenge    作者:Microsoft    | 项目源码 | 文件源码
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
项目:vic-tensorflow    作者:sygi    | 项目源码 | 文件源码
def deterministic_grid_test():
    env = gym.make("deterministic-grid-world-v0")
    prev_state = env.state
    for _ in xrange(100): env.step(0)  # noop
    assert env.state == prev_state

    while env.state[0] > 0:
        env.step(1)

    assert env.state[0] == 0
    env.step(1)
    assert env.state[0] == 0

    while env.state[1] < env.board_size[1] - 1:
        env.step(3)

    assert env.state[1] == env.board_size[1] - 1
    env.step(3)
    assert env.state[1] == env.board_size[1] - 1
项目:vic-tensorflow    作者:sygi    | 项目源码 | 文件源码
def __init__(self, n_options=10, logger=None, plotting=False,
                 log_tf_graph=False):
        if logger is None:
            logger = logging.getLogger("logger")
            logger.setLevel(logging.INFO)
        self.logger = logger

        self.n_options = n_options
        self.env = gym.make("deterministic-grid-world-v0")
        self.n_actions = self.env.action_space.n
        self.n_states = 1 + reduce(lambda x, y: x*y,
             map(lambda x: x.n, self.env.observation_space.spaces))

        if plotting:
            self.plot_robots = [PlotRobot('dqn loss', 0, log_scale=True),
                                PlotRobot('q loss', 1), PlotRobot('rewards', 2)]
        else:
            self.plot_robots = [None] * 3
        self.plotting = self.plot_robots[2]

        self.colors = list('bgrcmyk') + ['magenta', 'lime', 'gray']
        self.build_graph(log_tf_graph)
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def execute(symbol, begin, end, days, plot, model_path,random):
    print model_path
    model = load_model(model_path)
    env = gym.make('trading-v0').env
    env.initialise(symbol=symbol, start=begin, end=end, days=days, random = random)
    state_size = env.observation_space.shape[0]
    state = env.reset()
    done = False
    while not done:
        state = state.reshape(1, state_size)
        # state = state.reshape(1, 1, state_size)
        qval = model.predict(state, batch_size=1)
        action = (np.argmax(qval))
        state, _, done, info = env.step(action)

        # log.info("%s,%s,%s,%s",state, _, done, info)
        # log.info("\n%s", env.sim.to_df())
        if plot:
           env.render()
项目:-CuriousActorCritic    作者:skelneko    | 项目源码 | 文件源码
def __init__(self, game="MsPacman-v0"):

        self.screen_h = Config.SCREEN_H
        self.screen_w = Config.SCREEN_W
        self.screen_shape = Config.SCREEN_SHAPE
        self.frame_per_row = Config.FRAME_PER_ROW
        self.frame_buffer = None

        self.action_space = 9

        # meta
        self.total_episode_run = 0
        self.steps_in_episode = 0
        self.max_steps_in_episode = 0

        self.env = gym.make(game)
        self.reset()
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def demo_run():
    env = gym.make("RoboschoolInvertedPendulum-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def demo_run():
    env = gym.make("RoboschoolHumanoidFlagrun-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def demo_run():
    env = gym.make("RoboschoolAnt-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def demo_run():
    env = gym.make("RoboschoolReacher-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            print("score=%0.2f in %i frames" % (score, frame))
            break
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def demo_run():
    env = gym.make("RoboschoolHopper-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def demo_run():
    env = gym.make("RoboschoolWalker2d-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def multiplayer(self, env, game_server_guid, player_n):
        """
        That's the function you call between gym.make() and first env.reset(), to connect to multiplayer server.

        game_server_guid -- is an id that server and client use to identify themselves to belong to the same session.
        player_n -- integer, up to scene.players_count.

        You see here env._reset() gets overwritten, that means if you call env.reset(), it will not create
        single player scene on your side (as it usually do), but rather it will communicate to server, reset environment
        there. Same with step() and render().
        """
        self.shmem_client_init(game_server_guid, player_n)
        env._step   = self.shmem_client_step  # replace real function with fake, that communicates with environment on server
        env._reset  = self.shmem_client_reset
        env._render = self.shmem_client_rgb_array
        self.shmem_client_send_env_id()
项目:roboschool    作者:openai    | 项目源码 | 文件源码
def read_env_id_and_create_env(self):
        self.sh_pipe_actready = open(self.sh_pipe_actready_filename, "rt")
        self.sh_pipe_obsready = os.open(self.sh_pipe_obsready_filename, os.O_WRONLY)
        env_id = self.sh_pipe_actready.readline()[:-1]
        if env_id.find("-v")==-1:
            raise ValueError("multiplayer client %s sent here invalid environment id '%s'" % (self.prefix, env_id))
        #
        # And at this point we know env_id.
        #
        print("Player %i connected, wants to operate %s in this scene" % (self.player_n, env_id))
        self.env = gym.make(env_id)  # gym.make() creates at least timeout wrapper, we need it.

        self.env.unwrapped.scene = self.scene
        self.env.unwrapped.player_n = self.player_n
        assert isinstance(self.env.observation_space, gym.spaces.Box)
        assert isinstance(self.env.action_space, gym.spaces.Box)
        self.sh_obs = np.memmap(self.prefix + "_obs",  mode="w+", shape=self.env.observation_space.shape, dtype=np.float32)
        self.sh_act = np.memmap(self.prefix + "_act",  mode="w+", shape=self.env.action_space.shape, dtype=np.float32)
        self.sh_rew = np.memmap(self.prefix + "_rew",  mode="w+", shape=(1,), dtype=np.float32)
        self.sh_rgb = np.memmap(self.prefix + "_rgb",  mode="w+", shape=(self.env.unwrapped.VIDEO_H,self.env.unwrapped.VIDEO_W,3), dtype=np.uint8)
        os.write(self.sh_pipe_obsready, b'accepted\n')
项目:gym    作者:openai    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("envid")
    parser.add_argument("outfile")
    parser.add_argument("--gymdir")

    args = parser.parse_args()
    if args.gymdir:
        sys.path.insert(0, args.gymdir)
    import gym
    from gym import utils
    print utils.colorize("gym directory: %s"%path.dirname(gym.__file__), "yellow")
    env = gym.make(args.envid)
    agent = RandomAgent(env.action_space)
    alldata = {}
    for i in xrange(2):
        np.random.seed(i)
        data = rollout(env, agent, env.spec.max_episode_steps)
        for (k, v) in data.items():
            alldata["%i-%s"%(i, k)] = v
    np.savez(args.outfile, **alldata)
项目:A3C    作者:go2sea    | 项目源码 | 文件源码
def __init__(self, name, globalAC, config, mutex):
        self.mutex = mutex
        self.config = config
        self.env = gym.make(self.config.GAME).unwrapped  # ??-v0??????????
        self.name = name
        self.AC = ACNet(name, config, globalAC)
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def __init__(self, death_penalty=True, deterministic=True, v=3, **kwargs):
        env_id = "MsPacman"
        if deterministic:
            env_id += "Deterministic"
        env_id += "-v%d" % v

        env = gym.make(env_id)
        super(Pacman, self).__init__(env)
        self.observation_space = gym.spaces.Box(0.0, 1.0, [42, 42, 1])
        self.death_penalty = death_penalty
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def __init__(self, death_penalty=True, deterministic=True, v=3, **kwargs):
        env_id = "MsPacman"
        if deterministic:
            env_id += "Deterministic"
        env_id += "-v%d" % v

        env = gym.make(env_id)
        super(Pacman, self).__init__(env)
        self.observation_space = gym.spaces.Box(0.0, 1.0, [42, 42, 1])
        self.death_penalty = death_penalty