我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用agent.Agent()。
def main(): game_width = 12 game_height = 9 nb_frames = 4 actions = ((-1, 0), (1, 0), (0, -1), (0, 1), (0, 0)) # Recipe of deep reinforcement learning model model = Sequential() model.add(Convolution2D( 16, nb_row=3, nb_col=3, activation='relu', input_shape=(nb_frames, game_height, game_width))) model.add(Convolution2D(32, nb_row=3, nb_col=3, activation='relu')) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dense(len(actions))) model.compile(RMSprop(), 'MSE') agent = Agent( model, nb_frames, snake_game, actions, size=(game_width, game_height)) agent.train(nb_epochs=10000, batch_size=64, gamma=0.8, save_model=True) agent.play(nb_rounds=10)
def agent(machine_manager, volume_manager): return Agent(machine_manager, volume_manager, { 'agent_ttl': 60, 'max_error_count': 3, 'max_error_timeout': 10, 'node': { 'volume_path': '/mnt', 'conf_path': '/etc/cobalt.conf', 'max_fill': 0.8, 'conf': { 'name': 'test-node', 'labels': ['ssd'] } }, 'watch_timeout': 10 })
def m_agent_service(volume_manager, machine_manager, m_driver, m_node): agent = Agent(volume_manager, machine_manager, { 'agent_ttl': 60, 'max_error_count': 3, 'max_error_timeout': 10, 'node': { 'volume_path': '/mnt', 'conf_path': '/etc/cobalt.conf', 'max_fill': 0.8, 'conf': { 'name': 'test-node', 'labels': ['ssd'] } }, 'watch_timeout': 10 }) agent._driver = m_driver agent._node = m_node return agent
def thread_agent(self): """ This is the entry point for the thread that handles all communication with the Supernova bus. It communicates back to the main ConnOps loop via multithreaded Event objects. """ while True: try: # Set up the command handlers self.agent = Agent() self.agent.service_handler["Telemetry Packet"] = self.on_telemetry # Run self.agent.bind_udp_sockets() self.agent.run() # should never exit except Exception as ex: # NOTE: It is an error to ever reach this line. # Catch and swallow all exceptions. 1 + 1 # NOTE: It is an error to ever reach this line. self.agent_errors = self.agent_errors + 1
def parse_options(): parser = ArgumentParser("pyofagent - Python-based Open Flow Agent") parser.add_argument("-c", "--controller", #dest="controller", help="Controller host:port to connect to", metavar="HOST:PORT", default="localhost:6633") parser.add_argument("-d", "--devid", dest="datapath_id", help="Device identified", metavar="DEVID", default=42) parser.add_argument("-v", "--verbose", action='store_true', #dest=verbose, default="enable verbose logging (log-level is DEBUG)") parser.add_argument("-I", "--in-out-iface", metavar="IN-OUT-IFACE", help="Local interface to receve/send in-out frames",) parser.add_argument("-S", "--in-out-stag", metavar="IN-OUT-STAG", help="Expect/Apply given s-tag when receiving/sending frames"+ "at the in-out interface") return parser.parse_args()
def main(): args = parse_options() logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) store = ObjectStore() backend = MockBackend(store, in_out_iface=args.in_out_iface, in_out_stag=None if args.in_out_stag is None else int(args.in_out_stag)) agent = Agent(args.controller, int(args.datapath_id), store, backend) store.set_agent(agent) backend.set_agent(agent) try: agent.run() except KeyboardInterrupt: logging.info("Ctrl-c received! Shutting down connection and exiting...") agent.stop() backend.stop()
def run(self): self.stats.start() for i in np.arange(FLAGS.nb_concurrent): self.agents.append(Agent(i, self.prediction_q, self.training_q, self.stats.episode_log_q)) self.agents[-1].start() for i in np.arange(FLAGS.nb_trainers): self.trainers.append(Trainer(self, i)) self.trainers[-1].start() for i in np.arange(FLAGS.nb_predictors): self.predictors.append(Predictor(self, i)) self.predictors[-1].start() while True: if self.stats.episode_count.value % FLAGS.checkpoint_interval: self.save_model() time.sleep(0.05)
def __init__(self, expert, **kwargs): """ expert: an Agent implementing the policy-to-be-imitated The Imitator will sometimes call expert.act() in order to get training data We promise that the number of calls to expert.act() will be sublinear in the number of calls to Imitator.act(). Note that each Agent has immutable state, but calling methods on an Imitator may cause updates to external parameters, and these parameters may affect the behavior of existing Agent objects """ super(Imitator, self).__init__(**kwargs) self.expert = expert #the act method is responsible for sometimes calling expert.act() to gather training data #it is also responsible for updating the agent's parameters
def __init__(self, reward, **kwargs): """ reward: a function that acts on a pair of observation and action sequences, and returns a reward in [0, 1] The agent will sometimes call reward() in order to evaluate a partial trajectory. Its goal is to maximize the total reward of all of the complete trajectories produced by calling act(). We say that a trajectory is complete if act() was never called on the agent's state at the end of the trajectory. Note that each Agent has immutable state, but calling methods on an RL agent may cause updates to external parameters, and these parameters may affect the behavior of existing Agent objects """ super(IntrinsicRL, self).__init__(**kwargs) self.reward = reward #the act method is responsible for sometimes calling reward() to gather training data, #and for updating the agent's parameters
def setup_services(self): """A method to prepare the possible services that Cobalt can have.""" self._service_endpoints = { 'engine': Engine(self.etcd, self.volume_manager, self.machine_manager, self.config['engine']), 'api': Api(self.volume_manager, self.config['api']), 'agent': Agent(self.machine_manager, self.volume_manager, self.config['agent']) }
def __init__(self, agent, player): self.Agent = agent self.player = player self.ACTION_NUM = agent.dim_actions self.STATE_NUM = agent.dim_states self.RLMemory_num = 20 self.SLMemory_num = 20 self.RLMemory = deque(maxlen=self.RLMemory_num) self.SLMemory = deque(maxlen=self.SLMemory_num) # self.Q = DQN.DQN_DouDiZhu(self.ACTION_NUM, self.STATE_NUM, self.RLMemory, self.RLMemory_num, self.player) # self.Pi = SLN.Pi(self.ACTION_NUM, self.STATE_NUM, self.SLMemory, self.SLMemory_num, self.player) self.EPSILON = 0.06 self.ETA = 0.1 self.EPISODE_NUM = 5000000 self.Q_enable = False
def process(self, request, headers=None): """ Processes the API.ai request and return an agent with the action performed in it Args: request (:obj:`dict`): request received from API.ai headers (:obj:`dict`, optional): headers of the HTTP request to verify it Returns: :obj:`apiai_assistant.agent.Agent`: Agent instance with the action performed in it """ agent_instance = agent.Agent( corpus=self.corpus, request=request, ssml=self._ssml ) if headers and type(headers) is dict: h_magic_key = headers.get('magic-key') if h_magic_key and self.magic_key and h_magic_key != self.magic_key: agent_instance.error('Could not verify request', code=agent.Status.AccessDenied) return agent_instance if self.validate(agent_instance): action = self.action_map[agent_instance.parser.action] action(agent_instance) logging.debug('- Response: {}'.format( agent_instance.response.to_dict())) return agent_instance
def main_train(tf_configs=None): s_t = time.time() tf.reset_default_graph() if not os.path.exists(model_path): os.makedirs(model_path) global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) with tf.device('/gpu:2'): optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-6) master_network = network.ACNetwork('global', optimizer, shape=cfg.new_img_dim) # Generate global network num_workers = 16 agents = [] # Create worker classes for i in range(num_workers): agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, model_path, global_episodes)) saver = tf.train.Saver(max_to_keep=100) with tf.Session(config=tf_configs) as sess: coord = tf.train.Coordinator() if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(model_path) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) # This is where the asynchronous magic happens. # Start the "work" process for each worker in a separate threat. worker_threads = [] for ag in agents: agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver) t = threading.Thread(target=(agent_train)) t.start() time.sleep(0.5) worker_threads.append(t) coord.join(worker_threads) print("training ends, costs{}".format(time.time() - s_t))
def main_play(tf_configs=None): tf.reset_default_graph() with tf.Session(config=tf_configs) as sess: ag = agent.Agent(DoomGame(), 0, s_size, a_size, play=True) print('Loading Model...') saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) saver.restore(sess, os.path.join(model_path, 'model-200.ckpt')) print('Successfully loaded!') ag.play_game(sess, 10)
def main_train(tf_configs=None): s_t = time.time() tf.reset_default_graph() if not os.path.exists(cfg.model_path): os.makedirs(cfg.model_path) global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) with tf.device("/gpu:0"): optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5) global_network = network.ACNetwork('global', optimizer, img_shape=cfg.IMG_SHAPE) num_workers = cfg.AGENTS_NUM agents = [] # Create worker classes for i in range(num_workers): agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, cfg.model_path, global_episodes)) saver = tf.train.Saver(max_to_keep=100) with tf.Session(config=tf_configs) as sess: coord = tf.train.Coordinator() if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(cfg.model_path) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) # This is where the asynchronous magic happens. # Start the "work" process for each worker in a separate threat. worker_threads = [] for ag in agents: agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver) t = threading.Thread(target=(agent_train)) t.start() time.sleep(0.5) worker_threads.append(t) coord.join(worker_threads) print("training ends, costs{}".format(time.time() - s_t))
def main_play(tf_configs=None): tf.reset_default_graph() with tf.Session(config=tf_configs) as sess: ag = agent.Agent(DoomGame(), 0, play=True) print('Loading Model...') saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(cfg.model_path) saver.restore(sess, os.path.join(cfg.model_path, cfg.model_file)) print('Successfully loaded!') ag.play_game(sess, 10)
def main_train(tf_configs=None): s_t = time.time() tf.reset_default_graph() if not os.path.exists(model_path): os.makedirs(model_path) with tf.device("/cpu:0"): global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5) master_network = network.ACNetwork('global', optimizer) # Generate global network num_workers = 16 agents = [] # Create worker classes for i in range(num_workers): agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, model_path, global_episodes)) saver = tf.train.Saver(max_to_keep=100) with tf.Session(config=tf_configs) as sess: coord = tf.train.Coordinator() if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(model_path) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) # This is where the asynchronous magic happens. # Start the "work" process for each worker in a separate threat. worker_threads = [] for ag in agents: agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver) t = threading.Thread(target=(agent_train)) t.start() time.sleep(0.5) worker_threads.append(t) coord.join(worker_threads) print("training ends, costs{}".format(time.time() - s_t))
def main_train(tf_configs=None): s_t = time.time() tf.reset_default_graph() if not os.path.exists(cfg.model_path): os.makedirs(cfg.model_path) global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) with tf.device("/gpu:0"): optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5) global_network = network.ACNetwork('global', optimizer, img_shape=cfg.IMG_SHAPE) num_workers = cfg.AGENTS_NUM agents = [] # Create worker classes for i in range(num_workers): agents.append(agent.Agent(DoomGame(), i, optimizer, cfg.model_path, global_episodes, task_name='D3_battle')) saver = tf.train.Saver(max_to_keep=100) with tf.Session(config=tf_configs) as sess: coord = tf.train.Coordinator() if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(cfg.model_path) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) # This is where the asynchronous magic happens. # Start the "work" process for each worker in a separate threat. worker_threads = [] for ag in agents: agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver) t = threading.Thread(target=(agent_train)) t.start() time.sleep(0.5) worker_threads.append(t) coord.join(worker_threads) print("training ends, costs{}".format(time.time() - s_t))
def main_play(tf_configs=None): tf.reset_default_graph() with tf.Session(config=tf_configs) as sess: ag = agent.Agent(DoomGame(), 0, optimizer=None, play=True) print('Loading Model...') saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(cfg.model_path) saver.restore(sess, os.path.join(cfg.model_path, cfg.model_file)) print('Successfully loaded the model, now time to play the game...') time.sleep(3) ag.play_game(sess, 10)
def __init__(self): """ Constructor """ self.cmds = self.create_payload_cmd_handler() self.agent = Agent() self.agent.service_handler["Payload Command"] = self.cmds.dispatch self.capture_proc = None
def __init__(self, consul_endpoint, vcore_endpoint, controller_endpoints, enable_tls=False, key_file=None, cert_file=None, vcore_retry_interval=0.5, devices_refresh_interval=5, subscription_refresh_interval=5): log.info('init-connection-manager') log.info('list-of-controllers', controller_endpoints=controller_endpoints) self.controller_endpoints = controller_endpoints self.consul_endpoint = consul_endpoint self.vcore_endpoint = vcore_endpoint self.enable_tls = enable_tls self.key_file = key_file self.cert_file = cert_file self.channel = None self.grpc_client = None # single, shared gRPC client to vcore self.agent_map = {} # (datapath_id, controller_endpoint) -> Agent() self.device_id_to_datapath_id_map = {} self.vcore_retry_interval = vcore_retry_interval self.devices_refresh_interval = devices_refresh_interval self.subscription_refresh_interval = subscription_refresh_interval self.subscription = None self.running = False
def create_agent(self, device): datapath_id = device.datapath_id device_id = device.id for controller_endpoint in self.controller_endpoints: agent = Agent(controller_endpoint, datapath_id, device_id, self.grpc_client, self.enable_tls, self.key_file, self.cert_file) agent.start() self.agent_map[(datapath_id,controller_endpoint)] = agent self.device_id_to_datapath_id_map[device_id] = datapath_id
def run(settings): recreate_subdirectory_structure(settings) tf.reset_default_graph() with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"]) global_network = ACNetwork('global', None) num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step, settings)) saver = tf.train.Saver(max_to_keep=5) with tf.Session() as sess: coord = tf.train.Coordinator() ckpt = tf.train.get_checkpoint_state(settings["load_from"]) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) agent_threads = [] for agent in agents: agent_play = lambda: agent.play(sess, coord, saver) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def run(settings): tf.reset_default_graph() with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"]) global_network = ACNetwork('global', None) num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step, settings)) saver = tf.train.Saver(max_to_keep=5) with tf.Session() as sess: coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(settings["load_from"]) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: agent_play = lambda: agent.play(sess, coord, saver) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def run(settings): recreate_subdirectory_structure(settings) tf.reset_default_graph() with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"]) global_network = ACNetwork('global', None) num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step, settings)) saver = tf.train.Saver(max_to_keep=5) with tf.Session() as sess: coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"]) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: agent_play = lambda: agent.play(sess, coord, saver) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def well_formed(self): return ( isinstance(self.H, Agent) and isinstance(self.child_base, BudgetedHCH) and areinstances(self.args, Referent) )
def well_formed(self): return ( isinstance(self.transcript, tuple) and all(hashable(x) for x in self.transcript) and isinstance(self.transcript_hash, six.string_types) and isinstance(self.agent, Agent) and self.agent.state_free )
def run_agent(save_path, T, game_name): with tf.Session() as sess: agent = Agent(session=sess, observation_shape=(210,160,3), action_size=3, optimizer=tf.train.AdamOptimizer(1e-4)) # Create a saver, and only keep 2 checkpoints. saver = tf.train.Saver() saver.restore(sess, save_path + '-' + str(T)) play(agent, game_name) return sess, agent
def main(): """Main function Test the checkpoint """ simulator_config = 'config/simulator.json' print 'Starting simulator...' simulator = DoomSimulator(simulator_config) simulator.add_bots(10) print 'Simulator started!' agent_config = 'config/agent.json' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) print 'Creating agent...' ag = Agent(sess, agent_config, simulator) print 'load model...' loadstatus = ag.load('./checkpoints') if not loadstatus: raise IOError img_buffer = np.zeros((ag.history_length, simulator.num_channels, simulator.resolution[1], simulator.resolution[0])) measure_buffer = np.zeros((ag.history_length, simulator.num_measure)) curr_step = 0 term = False acts_to_replace = [a + b + c + d for a in [[0, 0], [1, 1]] for b in [[0, 0], [1, 1]] for c in [[0]] for d in [[0], [1]]] replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] # Action0-5: MOVE_FORWARD MOVE_BACKWARD TURN_LEFT TURN_RIGHT ATTACK SPPED # Action6-11: SELECT_WEAPON2 ~ SELECT_WEAPON7 while not term: if curr_step < ag.history_length: img, meas, reward, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist()) else: state_imgs = img_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length] state_imgs = np.reshape(state_imgs, (1,) + ag.get_img_shape()) state_imgs = np.transpose(state_imgs, [0, 2, 3, 1]) state_meas = measure_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length] state_meas = np.reshape(state_meas, (1, ag.history_length * simulator.num_measure)) curr_act = np.squeeze(ag.act(state_imgs, state_meas, ag.test_objective_params)[0]).tolist() if curr_act[:6] in acts_to_replace: curr_act = replacement_act img, meas, reward, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. simulator.show_info() sleep(0.02) if not term: img_buffer[curr_step % ag.history_length] = img measure_buffer[curr_step % ag.history_length] = meas curr_step += 1 simulator.close_game()
def validate(self, agent_instance): """ Validate an agent instance and update its code and error_message if the agent instance is not valid Args: agent_instance (:obj:`apiai_assistant.agent.Agent`): agent instance Returns: bool: True if valid, False otherwise. """ if not agent_instance.parser: agent_instance.error('Could not instantiate parser', code=agent.Status.InvalidData) return False if not agent_instance.parser.is_valid: agent_instance.error('Could not validate data', code=agent.Status.InvalidData) return False logging.debug(""" - Actions: {actions} - Action: {action}""".format( actions=self.action_map.keys(), action=agent_instance.parser.action)) if (not agent_instance.parser.action or agent_instance.parser.action not in self.action_map): agent_instance.error('Could not understand action', code=agent.Status.InvalidData) return False logging.debug(""" - HTTP Request: {data} - API.AI Request: {request} - Agent: {code} {message} - Valid: {valid}""".format( data=agent_instance.parser.data, request=agent_instance.parser.request, code=agent_instance.code, message=agent_instance.error_message, valid=agent_instance.code == agent.Status.OK)) return True
def run(sc): ts = datetime.datetime.now().replace(microsecond=0).isoformat('_') sc.sim_time_start = ts logger.setup(sc) store_scenario(sc) INFO('Init (%s)' % ts) INFO('Fitness (minimal): %.6f' % sc.sol_fitness_min) INFO('Fitness (maximal): %.6f' % sc.sol_fitness_max) INFO('Fitness (average): %.6f' % sc.sol_fitness_avg) INFO('Creating %d agents' % sc.opt_m) agents = dict() for aid, search_space, initial_value in zip( sc.agent_ids, sc.agent_search_spaces, sc.agent_initial_values): agents[aid] = Agent(aid, search_space, initial_value) INFO('Connecting agents') for a, neighbors in sc.network.items(): for n in neighbors: # Consistency check assert a != n, 'cannot add myself as neighbor!' # Add neighbor DEBUG('', 'Connecting', a, '->', n) if n not in agents[a].neighbors: agents[a].neighbors[n] = agents[n] else: WARNING(n, 'is already neighbor of', a) INFO('Starting simulation') mas = Mas(sc, agents) logger.set_mas(mas) stats = Stats(sc, agents) stats.eval(mas.current_time) AGENT(mas.aid, 'Notifying initial agent (%s)' % sc.sim_initial_agent) kappa = Working_Memory(sc.objective, dict(), Solution_Candidate(None, dict(), float('-inf'))) msg = Message(mas.aid, sc.sim_initial_agent, kappa) mas.msg(msg) while mas.is_active(): mas.step() stats.eval(mas.current_time) if not stats.is_converged(): ERROR('convergence not reached!') ts = datetime.datetime.now().replace(microsecond=0).isoformat('_') INFO('End (%s)' % ts) # Store scenario again, this time with simulation result store_scenario(sc, overwrite=True) return stats
def run(settings): recreate_subdirectory_structure(settings) tf.reset_default_graph() with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"]) global_network = ACNetwork('global', None) num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step, settings)) saver = tf.train.Saver(max_to_keep=5) with tf.Session() as sess: coord = tf.train.Coordinator() if FLAGS.resume: if FLAGS.hypertune: ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"]) else: ckpt = tf.train.get_checkpoint_state(settings["load_from"]) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: agent_play = lambda: agent.play(sess, coord, saver) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def run(): recreate_directory_structure() tf.reset_default_graph() sess = tf.Session() # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) with sess: with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) # num_agents = multiprocessing.cpu_count() num_agents = FLAGS.nb_concurrent agents = [] envs = [] for i in range(num_agents): gym_env = gym.make(FLAGS.game) # if FLAGS.monitor: # gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True) if FLAGS.game not in flags.SUPPORTED_ENVS: gym_env = atari_environment.AtariEnvironment(gym_env=gym_env, resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height, agent_history_length=FLAGS.agent_history_length) FLAGS.nb_actions = len(gym_env.gym_actions) envs.append(gym_env) global_network = FUNNetwork('global', None) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step)) saver = tf.train.Saver(max_to_keep=5) coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name)) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver))) thread.start() agent_threads.append(thread) while True: if FLAGS.show_training: for env in envs: # time.sleep(1) # with main_lock: env.render() coord.join(agent_threads)
def run(): recreate_directory_structure() tf.reset_default_graph() sess = tf.Session() # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) with sess: with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) if FLAGS.use_conv: global_network = ConvNetwork('global', None) else: global_network = ACNetwork('global', None) # num_agents = multiprocessing.cpu_count() num_agents = FLAGS.nb_concurrent agents = [] envs = [] for i in range(num_agents): gym_env = gym.make(FLAGS.game) # if FLAGS.monitor: # gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True) envs.append(gym_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step)) saver = tf.train.Saver(max_to_keep=5) coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name)) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver))) thread.start() agent_threads.append(thread) while True: if FLAGS.show_training: for env in envs: # time.sleep(1) # with main_lock: env.render() coord.join(agent_threads)
def run(settings): recreate_subdirectory_structure(settings) tf.reset_default_graph() with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"]) global_network = ACNetwork('global', None) num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step, settings)) saver = tf.train.Saver(max_to_keep=5) with tf.Session() as sess: coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"]) # print("Loading Model from {}".format(ckpt.model_checkpoint_path)) try: saver.restore(sess, ckpt.model_checkpoint_path) except Exception as e: print(sys.exc_info()[0]) print(e) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: agent_play = lambda: agent.play(sess, coord, saver) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def a3c(game_name, num_threads=8, restore=None, save_path='model'): processes = [] envs = [] for _ in range(num_threads+1): gym_env = gym.make(game_name) if game_name == 'CartPole-v0': env = CustomGymClassicControl(game_name) else: print "Assuming ATARI game and playing with pixels" env = CustomGym(game_name) envs.append(env) # Separate out the evaluation environment evaluation_env = envs[0] envs = envs[1:] with tf.Session() as sess: agent = Agent(session=sess, action_size=envs[0].action_size, model='mnih', optimizer=tf.train.AdamOptimizer(INITIAL_LEARNING_RATE)) # Create a saver, and only keep 2 checkpoints. saver = tf.train.Saver(max_to_keep=2) T_queue = Queue.Queue() # Either restore the parameters or don't. if restore is not None: saver.restore(sess, save_path + '-' + str(restore)) last_T = restore print "T was:", last_T T_queue.put(last_T) else: sess.run(tf.global_variables_initializer()) T_queue.put(0) summary = Summary(save_path, agent) # Create a process for each worker for i in range(num_threads): processes.append(threading.Thread(target=async_trainer, args=(agent, envs[i], sess, i, T_queue, summary, saver, save_path,))) # Create a process to evaluate the agent processes.append(threading.Thread(target=evaluator, args=(agent, evaluation_env, sess, T_queue, summary, saver, save_path,))) # Start all the processes for p in processes: p.daemon = True p.start() # Until training is finished while not training_finished: sleep(0.01) # Join the processes, so we get this thread back. for p in processes: p.join() # Returns sum(rewards[i] * gamma**i)