Python agent 模块,Agent() 实例源码

我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用agent.Agent()

项目:snake_game    作者:wing3s    | 项目源码 | 文件源码
def main():
    game_width = 12
    game_height = 9
    nb_frames = 4
    actions = ((-1, 0), (1, 0), (0, -1), (0, 1), (0, 0))

    # Recipe of deep reinforcement learning model
    model = Sequential()
    model.add(Convolution2D(
        16,
        nb_row=3,
        nb_col=3,
        activation='relu',
        input_shape=(nb_frames, game_height, game_width)))
    model.add(Convolution2D(32, nb_row=3, nb_col=3, activation='relu'))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(len(actions)))
    model.compile(RMSprop(), 'MSE')

    agent = Agent(
        model, nb_frames, snake_game, actions, size=(game_width, game_height))
    agent.train(nb_epochs=10000, batch_size=64, gamma=0.8, save_model=True)
    agent.play(nb_rounds=10)
项目:cobalt    作者:PressLabs    | 项目源码 | 文件源码
def agent(machine_manager, volume_manager):
    return Agent(machine_manager, volume_manager, {
        'agent_ttl': 60,
        'max_error_count': 3,
        'max_error_timeout': 10,
        'node': {
            'volume_path': '/mnt',
            'conf_path': '/etc/cobalt.conf',
            'max_fill': 0.8,
            'conf': {
                'name': 'test-node',
                'labels': ['ssd']
            }
        },
        'watch_timeout': 10
    })
项目:cobalt    作者:PressLabs    | 项目源码 | 文件源码
def m_agent_service(volume_manager, machine_manager, m_driver, m_node):
    agent = Agent(volume_manager, machine_manager, {
        'agent_ttl': 60,
        'max_error_count': 3,
        'max_error_timeout': 10,
        'node': {
            'volume_path': '/mnt',
            'conf_path': '/etc/cobalt.conf',
            'max_fill': 0.8,
            'conf': {
                'name': 'test-node',
                'labels': ['ssd']
            }
        },
        'watch_timeout': 10
    })

    agent._driver = m_driver
    agent._node = m_node

    return agent
项目:OverviewOne    作者:SpaceVR-O1    | 项目源码 | 文件源码
def thread_agent(self):
        """
        This is the entry point for the thread that handles
        all communication with the Supernova bus.
        It communicates back to the main ConnOps loop via
        multithreaded Event objects.
        """
        while True:
            try:
                # Set up the command handlers
                self.agent = Agent()
                self.agent.service_handler["Telemetry Packet"] = self.on_telemetry

                # Run
                self.agent.bind_udp_sockets()
                self.agent.run() # should never exit

            except Exception as ex:
                # NOTE: It is an error to ever reach this line.
                # Catch and swallow all exceptions.
                1 + 1

            # NOTE: It is an error to ever reach this line.
            self.agent_errors = self.agent_errors + 1
项目:voltha    作者:opencord    | 项目源码 | 文件源码
def parse_options():
    parser = ArgumentParser("pyofagent - Python-based Open Flow Agent")
    parser.add_argument("-c", "--controller", #dest="controller",
                        help="Controller host:port to connect to", metavar="HOST:PORT",
                        default="localhost:6633")
    parser.add_argument("-d", "--devid", dest="datapath_id",
                        help="Device identified", metavar="DEVID",
                        default=42)
    parser.add_argument("-v", "--verbose", action='store_true', #dest=verbose,
                        default="enable verbose logging (log-level is DEBUG)")
    parser.add_argument("-I", "--in-out-iface", metavar="IN-OUT-IFACE",
                        help="Local interface to receve/send in-out frames",)
    parser.add_argument("-S", "--in-out-stag", metavar="IN-OUT-STAG",
                        help="Expect/Apply given s-tag when receiving/sending frames"+
                             "at the in-out interface")
    return parser.parse_args()
项目:voltha    作者:opencord    | 项目源码 | 文件源码
def main():

    args = parse_options()

    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

    store = ObjectStore()
    backend = MockBackend(store, in_out_iface=args.in_out_iface,
                                 in_out_stag=None if args.in_out_stag is None else int(args.in_out_stag))
    agent = Agent(args.controller, int(args.datapath_id), store, backend)
    store.set_agent(agent)
    backend.set_agent(agent)

    try:
        agent.run()
    except KeyboardInterrupt:
        logging.info("Ctrl-c received! Shutting down connection and exiting...")
        agent.stop()
        backend.stop()
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(self):
        self.stats.start()

        for i in np.arange(FLAGS.nb_concurrent):
            self.agents.append(Agent(i, self.prediction_q, self.training_q, self.stats.episode_log_q))
            self.agents[-1].start()

        for i in np.arange(FLAGS.nb_trainers):
            self.trainers.append(Trainer(self, i))
            self.trainers[-1].start()
        for i in np.arange(FLAGS.nb_predictors):
            self.predictors.append(Predictor(self, i))
            self.predictors[-1].start()


        while True:
            if self.stats.episode_count.value % FLAGS.checkpoint_interval:
                self.save_model()
            time.sleep(0.05)
项目:alba    作者:paulfchristiano    | 项目源码 | 文件源码
def __init__(self, expert, **kwargs):
        """
        expert: an Agent implementing the policy-to-be-imitated

        The Imitator will sometimes call expert.act() in order to get training data

        We promise that the number of calls to expert.act() will be sublinear
        in the number of calls to Imitator.act().

        Note that each Agent has immutable state,
        but calling methods on an Imitator may cause updates to external parameters,
        and these parameters may affect the behavior of existing Agent objects
        """
        super(Imitator, self).__init__(**kwargs)
        self.expert = expert

    #the act method is responsible for sometimes calling expert.act() to gather training data
    #it is also responsible for updating the agent's parameters
项目:alba    作者:paulfchristiano    | 项目源码 | 文件源码
def __init__(self, reward, **kwargs):
        """
        reward: a function that acts on a pair of observation and action sequences,
        and returns a reward in [0, 1]

        The agent will sometimes call reward() in order to evaluate a partial trajectory.
        Its goal is to maximize the total reward
        of all of the complete trajectories produced by calling act().
        We say that a trajectory is complete if act() was never called
        on the agent's state at the end of the trajectory.

        Note that each Agent has immutable state,
        but calling methods on an RL agent may cause updates to external parameters,
        and these parameters may affect the behavior of existing Agent objects
        """
        super(IntrinsicRL, self).__init__(**kwargs)
        self.reward = reward

    #the act method is responsible for sometimes calling reward() to gather training data,
    #and for updating the agent's parameters
项目:cobalt    作者:PressLabs    | 项目源码 | 文件源码
def setup_services(self):
        """A method to prepare the possible services that Cobalt can have."""
        self._service_endpoints = {
            'engine': Engine(self.etcd, self.volume_manager,
                             self.machine_manager, self.config['engine']),
            'api': Api(self.volume_manager, self.config['api']),
            'agent': Agent(self.machine_manager, self.volume_manager, self.config['agent'])
        }
项目:RL_NFSP    作者:Richard-An    | 项目源码 | 文件源码
def __init__(self, agent, player):
        self.Agent = agent
        self.player = player
        self.ACTION_NUM = agent.dim_actions
        self.STATE_NUM = agent.dim_states
        self.RLMemory_num = 20
        self.SLMemory_num = 20
        self.RLMemory = deque(maxlen=self.RLMemory_num)
        self.SLMemory = deque(maxlen=self.SLMemory_num)
        # self.Q = DQN.DQN_DouDiZhu(self.ACTION_NUM, self.STATE_NUM, self.RLMemory, self.RLMemory_num, self.player)
        # self.Pi = SLN.Pi(self.ACTION_NUM, self.STATE_NUM, self.SLMemory, self.SLMemory_num, self.player)
        self.EPSILON = 0.06
        self.ETA = 0.1
        self.EPISODE_NUM = 5000000
        self.Q_enable = False
项目:apiai_assistant    作者:toasterco    | 项目源码 | 文件源码
def process(self, request, headers=None):
        """ Processes the API.ai request and return an agent with the action
        performed in it

        Args:
            request (:obj:`dict`): request received from API.ai
            headers (:obj:`dict`, optional): headers of the HTTP request to verify it

        Returns:
            :obj:`apiai_assistant.agent.Agent`: Agent instance with the action performed in it
        """

        agent_instance = agent.Agent(
            corpus=self.corpus,
            request=request,
            ssml=self._ssml
        )

        if headers and type(headers) is dict:
            h_magic_key = headers.get('magic-key')
            if h_magic_key and self.magic_key and h_magic_key != self.magic_key:
                agent_instance.error('Could not verify request',
                                     code=agent.Status.AccessDenied)
                return agent_instance

        if self.validate(agent_instance):
            action = self.action_map[agent_instance.parser.action]
            action(agent_instance)

        logging.debug('- Response: {}'.format(
            agent_instance.response.to_dict()))

        return agent_instance
项目:ViZDoomAgents    作者:GoingMyWay    | 项目源码 | 文件源码
def main_train(tf_configs=None):
    s_t = time.time()

    tf.reset_default_graph()

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
    with tf.device('/gpu:2'):
        optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-6)
        master_network = network.ACNetwork('global', optimizer, shape=cfg.new_img_dim)  # Generate global network
        num_workers = 16
        agents = []
        # Create worker classes
        for i in range(num_workers):
            agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, model_path, global_episodes))
    saver = tf.train.Saver(max_to_keep=100)

    with tf.Session(config=tf_configs) as sess:
        coord = tf.train.Coordinator()
        if load_model:
            print('Loading Model...')
            ckpt = tf.train.get_checkpoint_state(model_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        # This is where the asynchronous magic happens.
        # Start the "work" process for each worker in a separate threat.
        worker_threads = []
        for ag in agents:
            agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
            t = threading.Thread(target=(agent_train))
            t.start()
            time.sleep(0.5)
            worker_threads.append(t)
        coord.join(worker_threads)
    print("training ends, costs{}".format(time.time() - s_t))
项目:ViZDoomAgents    作者:GoingMyWay    | 项目源码 | 文件源码
def main_play(tf_configs=None):
    tf.reset_default_graph()

    with tf.Session(config=tf_configs) as sess:

        ag = agent.Agent(DoomGame(), 0, s_size, a_size, play=True)

        print('Loading Model...')
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, os.path.join(model_path, 'model-200.ckpt'))
        print('Successfully loaded!')

        ag.play_game(sess, 10)
项目:ViZDoomAgents    作者:GoingMyWay    | 项目源码 | 文件源码
def main_train(tf_configs=None):
    s_t = time.time()

    tf.reset_default_graph()

    if not os.path.exists(cfg.model_path):
        os.makedirs(cfg.model_path)

    global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
    with tf.device("/gpu:0"):
        optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5)
        global_network = network.ACNetwork('global', optimizer, img_shape=cfg.IMG_SHAPE)
        num_workers = cfg.AGENTS_NUM
        agents = []
        # Create worker classes
        for i in range(num_workers):
            agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, cfg.model_path, global_episodes))
    saver = tf.train.Saver(max_to_keep=100)

    with tf.Session(config=tf_configs) as sess:
        coord = tf.train.Coordinator()
        if load_model:
            print('Loading Model...')
            ckpt = tf.train.get_checkpoint_state(cfg.model_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        # This is where the asynchronous magic happens.
        # Start the "work" process for each worker in a separate threat.
        worker_threads = []
        for ag in agents:
            agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
            t = threading.Thread(target=(agent_train))
            t.start()
            time.sleep(0.5)
            worker_threads.append(t)
        coord.join(worker_threads)
    print("training ends, costs{}".format(time.time() - s_t))
项目:ViZDoomAgents    作者:GoingMyWay    | 项目源码 | 文件源码
def main_play(tf_configs=None):
    tf.reset_default_graph()

    with tf.Session(config=tf_configs) as sess:

        ag = agent.Agent(DoomGame(), 0, play=True)

        print('Loading Model...')
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(cfg.model_path)
        saver.restore(sess, os.path.join(cfg.model_path, cfg.model_file))
        print('Successfully loaded!')

        ag.play_game(sess, 10)
项目:ViZDoomAgents    作者:GoingMyWay    | 项目源码 | 文件源码
def main_train(tf_configs=None):
    s_t = time.time()

    tf.reset_default_graph()

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    with tf.device("/cpu:0"):
        global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5)
        master_network = network.ACNetwork('global', optimizer)  # Generate global network
        num_workers = 16
        agents = []
        # Create worker classes
        for i in range(num_workers):
            agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, model_path, global_episodes))
        saver = tf.train.Saver(max_to_keep=100)

    with tf.Session(config=tf_configs) as sess:
        coord = tf.train.Coordinator()
        if load_model:
            print('Loading Model...')
            ckpt = tf.train.get_checkpoint_state(model_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        # This is where the asynchronous magic happens.
        # Start the "work" process for each worker in a separate threat.
        worker_threads = []
        for ag in agents:
            agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
            t = threading.Thread(target=(agent_train))
            t.start()
            time.sleep(0.5)
            worker_threads.append(t)
        coord.join(worker_threads)
    print("training ends, costs{}".format(time.time() - s_t))
项目:ViZDoomAgents    作者:GoingMyWay    | 项目源码 | 文件源码
def main_train(tf_configs=None):
    s_t = time.time()

    tf.reset_default_graph()

    if not os.path.exists(cfg.model_path):
        os.makedirs(cfg.model_path)

    global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
    with tf.device("/gpu:0"):
        optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5)
        global_network = network.ACNetwork('global', optimizer, img_shape=cfg.IMG_SHAPE)
        num_workers = cfg.AGENTS_NUM
        agents = []
        # Create worker classes
        for i in range(num_workers):
            agents.append(agent.Agent(DoomGame(), i, optimizer, cfg.model_path, global_episodes, task_name='D3_battle'))
    saver = tf.train.Saver(max_to_keep=100)

    with tf.Session(config=tf_configs) as sess:
        coord = tf.train.Coordinator()
        if load_model:
            print('Loading Model...')
            ckpt = tf.train.get_checkpoint_state(cfg.model_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        # This is where the asynchronous magic happens.
        # Start the "work" process for each worker in a separate threat.
        worker_threads = []
        for ag in agents:
            agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
            t = threading.Thread(target=(agent_train))
            t.start()
            time.sleep(0.5)
            worker_threads.append(t)
        coord.join(worker_threads)
    print("training ends, costs{}".format(time.time() - s_t))
项目:ViZDoomAgents    作者:GoingMyWay    | 项目源码 | 文件源码
def main_play(tf_configs=None):
    tf.reset_default_graph()

    with tf.Session(config=tf_configs) as sess:

        ag = agent.Agent(DoomGame(), 0, optimizer=None, play=True)

        print('Loading Model...')
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(cfg.model_path)
        saver.restore(sess, os.path.join(cfg.model_path, cfg.model_file))
        print('Successfully loaded the model, now time to play the game...')
        time.sleep(3)

        ag.play_game(sess, 10)
项目:OverviewOne    作者:SpaceVR-O1    | 项目源码 | 文件源码
def __init__(self):
        """ Constructor """

        self.cmds = self.create_payload_cmd_handler()
        self.agent = Agent()
        self.agent.service_handler["Payload Command"] = self.cmds.dispatch

        self.capture_proc = None
项目:voltha    作者:opencord    | 项目源码 | 文件源码
def __init__(self, consul_endpoint, vcore_endpoint, controller_endpoints,
                 enable_tls=False, key_file=None, cert_file=None,
                 vcore_retry_interval=0.5, devices_refresh_interval=5,
                 subscription_refresh_interval=5):

        log.info('init-connection-manager')
        log.info('list-of-controllers', controller_endpoints=controller_endpoints)
        self.controller_endpoints = controller_endpoints
        self.consul_endpoint = consul_endpoint
        self.vcore_endpoint = vcore_endpoint
        self.enable_tls = enable_tls
        self.key_file = key_file
        self.cert_file = cert_file

        self.channel = None
        self.grpc_client = None  # single, shared gRPC client to vcore

        self.agent_map = {}  # (datapath_id, controller_endpoint) -> Agent()
        self.device_id_to_datapath_id_map = {}

        self.vcore_retry_interval = vcore_retry_interval
        self.devices_refresh_interval = devices_refresh_interval
        self.subscription_refresh_interval = subscription_refresh_interval
        self.subscription = None

        self.running = False
项目:voltha    作者:opencord    | 项目源码 | 文件源码
def create_agent(self, device):
        datapath_id = device.datapath_id
        device_id = device.id
        for controller_endpoint in self.controller_endpoints:
            agent = Agent(controller_endpoint, datapath_id,
                          device_id, self.grpc_client, self.enable_tls,
                          self.key_file, self.cert_file)
            agent.start()
            self.agent_map[(datapath_id,controller_endpoint)] = agent
            self.device_id_to_datapath_id_map[device_id] = datapath_id
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        ckpt = tf.train.get_checkpoint_state(settings["load_from"])
        print("Loading Model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        ckpt = tf.train.get_checkpoint_state(settings["load_from"])
        print("Loading Model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["load_from"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["load_from"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        ckpt = tf.train.get_checkpoint_state(settings["load_from"])
        print("Loading Model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["load_from"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:alba    作者:paulfchristiano    | 项目源码 | 文件源码
def well_formed(self):
        return (
            isinstance(self.H, Agent) and
            isinstance(self.child_base, BudgetedHCH) and
            areinstances(self.args, Referent)
        )
项目:alba    作者:paulfchristiano    | 项目源码 | 文件源码
def well_formed(self):
        return (
            isinstance(self.transcript, tuple) and
            all(hashable(x) for x in self.transcript) and
            isinstance(self.transcript_hash, six.string_types) and
            isinstance(self.agent, Agent) and
            self.agent.state_free
        )
项目:reinforcement-learning    作者:cgnicholls    | 项目源码 | 文件源码
def run_agent(save_path, T, game_name):
    with tf.Session() as sess:
        agent = Agent(session=sess, observation_shape=(210,160,3),
        action_size=3,
        optimizer=tf.train.AdamOptimizer(1e-4))

        # Create a saver, and only keep 2 checkpoints.
        saver = tf.train.Saver()

        saver.restore(sess, save_path + '-' + str(T))

        play(agent, game_name)

        return sess, agent
项目:IntelAct-Vizdoom    作者:chendagui16    | 项目源码 | 文件源码
def main():
    """Main function
    Test the checkpoint
    """
    simulator_config = 'config/simulator.json'
    print 'Starting simulator...'
    simulator = DoomSimulator(simulator_config)
    simulator.add_bots(10)
    print 'Simulator started!'

    agent_config = 'config/agent.json'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    print 'Creating agent...'
    ag = Agent(sess, agent_config, simulator)
    print 'load model...'
    loadstatus = ag.load('./checkpoints')
    if not loadstatus:
        raise IOError

    img_buffer = np.zeros((ag.history_length, simulator.num_channels, simulator.resolution[1], simulator.resolution[0]))
    measure_buffer = np.zeros((ag.history_length, simulator.num_measure))
    curr_step = 0
    term = False
    acts_to_replace = [a + b + c + d for a in [[0, 0], [1, 1]]
                       for b in [[0, 0], [1, 1]] for c in [[0]] for d in [[0], [1]]]
    replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    # Action0-5: MOVE_FORWARD MOVE_BACKWARD TURN_LEFT TURN_RIGHT ATTACK SPPED
    # Action6-11: SELECT_WEAPON2 ~ SELECT_WEAPON7

    while not term:
        if curr_step < ag.history_length:
            img, meas, reward, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist())
        else:
            state_imgs = img_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length]
            state_imgs = np.reshape(state_imgs, (1,) + ag.get_img_shape())
            state_imgs = np.transpose(state_imgs, [0, 2, 3, 1])
            state_meas = measure_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length]
            state_meas = np.reshape(state_meas, (1, ag.history_length * simulator.num_measure))
            curr_act = np.squeeze(ag.act(state_imgs, state_meas, ag.test_objective_params)[0]).tolist()
            if curr_act[:6] in acts_to_replace:
                curr_act = replacement_act
            img, meas, reward, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.
        simulator.show_info()
        sleep(0.02)
        if not term:
            img_buffer[curr_step % ag.history_length] = img
            measure_buffer[curr_step % ag.history_length] = meas
            curr_step += 1
    simulator.close_game()
项目:apiai_assistant    作者:toasterco    | 项目源码 | 文件源码
def validate(self, agent_instance):
        """ Validate an agent instance and update its code and error_message
        if the agent instance is not valid

        Args:
            agent_instance (:obj:`apiai_assistant.agent.Agent`): agent instance

        Returns:
            bool: True if valid, False otherwise.

        """

        if not agent_instance.parser:
            agent_instance.error('Could not instantiate parser',
                                 code=agent.Status.InvalidData)
            return False

        if not agent_instance.parser.is_valid:
            agent_instance.error('Could not validate data',
                                 code=agent.Status.InvalidData)
            return False

        logging.debug("""
        - Actions: {actions}
        - Action: {action}""".format(
            actions=self.action_map.keys(),
            action=agent_instance.parser.action))

        if (not agent_instance.parser.action
           or agent_instance.parser.action not in self.action_map):
            agent_instance.error('Could not understand action',
                                 code=agent.Status.InvalidData)
            return False

        logging.debug("""
        - HTTP Request: {data}
        - API.AI Request: {request}
        - Agent: {code} {message}
        - Valid: {valid}""".format(
            data=agent_instance.parser.data,
            request=agent_instance.parser.request,
            code=agent_instance.code,
            message=agent_instance.error_message,
            valid=agent_instance.code == agent.Status.OK))

        return True
项目:cohda    作者:ambimanus    | 项目源码 | 文件源码
def run(sc):
    ts = datetime.datetime.now().replace(microsecond=0).isoformat('_')
    sc.sim_time_start = ts
    logger.setup(sc)
    store_scenario(sc)
    INFO('Init (%s)' % ts)

    INFO('Fitness (minimal): %.6f' % sc.sol_fitness_min)
    INFO('Fitness (maximal): %.6f' % sc.sol_fitness_max)
    INFO('Fitness (average): %.6f' % sc.sol_fitness_avg)

    INFO('Creating %d agents' % sc.opt_m)
    agents = dict()
    for aid, search_space, initial_value in zip(
            sc.agent_ids, sc.agent_search_spaces, sc.agent_initial_values):
        agents[aid] = Agent(aid, search_space, initial_value)

    INFO('Connecting agents')
    for a, neighbors in sc.network.items():
        for n in neighbors:
            # Consistency check
            assert a != n, 'cannot add myself as neighbor!'
            # Add neighbor
            DEBUG('', 'Connecting', a, '->', n)
            if n not in agents[a].neighbors:
                agents[a].neighbors[n] = agents[n]
            else:
                WARNING(n, 'is already neighbor of', a)

    INFO('Starting simulation')
    mas = Mas(sc, agents)
    logger.set_mas(mas)
    stats = Stats(sc, agents)
    stats.eval(mas.current_time)
    AGENT(mas.aid, 'Notifying initial agent (%s)' % sc.sim_initial_agent)
    kappa = Working_Memory(sc.objective, dict(),
                           Solution_Candidate(None, dict(), float('-inf')))
    msg = Message(mas.aid, sc.sim_initial_agent, kappa)
    mas.msg(msg)
    while mas.is_active():
        mas.step()
        stats.eval(mas.current_time)
    if not stats.is_converged():
        ERROR('convergence not reached!')

    ts = datetime.datetime.now().replace(microsecond=0).isoformat('_')
    INFO('End (%s)' % ts)

    # Store scenario again, this time with simulation result
    store_scenario(sc, overwrite=True)

    return stats
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            if FLAGS.hypertune:
                ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            else:
                ckpt = tf.train.get_checkpoint_state(settings["load_from"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run():
    recreate_directory_structure()
    tf.reset_default_graph()

    sess = tf.Session()
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
    with sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)


            # num_agents = multiprocessing.cpu_count()
            num_agents = FLAGS.nb_concurrent
            agents = []
            envs = []

            for i in range(num_agents):
                gym_env = gym.make(FLAGS.game)
                # if FLAGS.monitor:
                #     gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True)
                if FLAGS.game not in flags.SUPPORTED_ENVS:
                    gym_env = atari_environment.AtariEnvironment(gym_env=gym_env, resized_width=FLAGS.resized_width,
                                                                 resized_height=FLAGS.resized_height,
                                                                 agent_history_length=FLAGS.agent_history_length)
                    FLAGS.nb_actions = len(gym_env.gym_actions)

                envs.append(gym_env)

            global_network = FUNNetwork('global', None)

            for i in range(num_agents):
                agents.append(Agent(envs[i], i, optimizer, global_step))
            saver = tf.train.Saver(max_to_keep=5)

        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver)))
            thread.start()
            agent_threads.append(thread)

        while True:
            if FLAGS.show_training:
                for env in envs:
                    # time.sleep(1)
                    # with main_lock:
                    env.render()

        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run():
    recreate_directory_structure()
    tf.reset_default_graph()

    sess = tf.Session()
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
    with sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
            if FLAGS.use_conv:
                global_network = ConvNetwork('global', None)
            else:
                global_network = ACNetwork('global', None)

            # num_agents = multiprocessing.cpu_count()
            num_agents = FLAGS.nb_concurrent
            agents = []
            envs = []

            for i in range(num_agents):
                gym_env = gym.make(FLAGS.game)
                # if FLAGS.monitor:
                #     gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True)
                envs.append(gym_env)

            for i in range(num_agents):
                agents.append(Agent(envs[i], i, optimizer, global_step))
            saver = tf.train.Saver(max_to_keep=5)

        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver)))
            thread.start()
            agent_threads.append(thread)

        while True:
            if FLAGS.show_training:
                for env in envs:
                    # time.sleep(1)
                    # with main_lock:
                    env.render()

        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            if FLAGS.hypertune:
                ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            else:
                ckpt = tf.train.get_checkpoint_state(settings["load_from"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:meta-learning    作者:ioanachelu    | 项目源码 | 文件源码
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            # print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            try:
                saver.restore(sess, ckpt.model_checkpoint_path)
            except Exception as e:
                print(sys.exc_info()[0])
                print(e)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
项目:reinforcement-learning    作者:cgnicholls    | 项目源码 | 文件源码
def a3c(game_name, num_threads=8, restore=None, save_path='model'):
    processes = []
    envs = []
    for _ in range(num_threads+1):
        gym_env = gym.make(game_name)
        if game_name == 'CartPole-v0':
            env = CustomGymClassicControl(game_name)
        else:
            print "Assuming ATARI game and playing with pixels"
            env = CustomGym(game_name)
        envs.append(env)

    # Separate out the evaluation environment
    evaluation_env = envs[0]
    envs = envs[1:]

    with tf.Session() as sess:
        agent = Agent(session=sess,
        action_size=envs[0].action_size, model='mnih',
        optimizer=tf.train.AdamOptimizer(INITIAL_LEARNING_RATE))

        # Create a saver, and only keep 2 checkpoints.
        saver = tf.train.Saver(max_to_keep=2)

        T_queue = Queue.Queue()

        # Either restore the parameters or don't.
        if restore is not None:
            saver.restore(sess, save_path + '-' + str(restore))
            last_T = restore
            print "T was:", last_T
            T_queue.put(last_T)
        else:
            sess.run(tf.global_variables_initializer())
            T_queue.put(0)

        summary = Summary(save_path, agent)

        # Create a process for each worker
        for i in range(num_threads):
            processes.append(threading.Thread(target=async_trainer, args=(agent,
            envs[i], sess, i, T_queue, summary, saver, save_path,)))

        # Create a process to evaluate the agent
        processes.append(threading.Thread(target=evaluator, args=(agent,
        evaluation_env, sess, T_queue, summary, saver, save_path,)))

        # Start all the processes
        for p in processes:
            p.daemon = True
            p.start()

        # Until training is finished
        while not training_finished:
            sleep(0.01)

        # Join the processes, so we get this thread back.
        for p in processes:
            p.join()

# Returns sum(rewards[i] * gamma**i)