我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用multiprocessing.Queue()。
def __init__(self, pool_names, max_restarts=0, options=None): self.names = pool_names self.queue = multiprocessing.Queue() self.pool = dict() self.max_restarts = max_restarts self.options = options or dict() self.dog_path = os.curdir self.dog_handler = LiveReload(self) # self.dog_observer = Observer() # self.dog_observer.schedule(self.dog_handler, self.dog_path, recursive=True) if multiprocessing.get_start_method() != 'fork': # pragma: no cover root_logger = logging.getLogger() self.log_listener = QueueListener(self.queue, *root_logger.handlers) # TODO: Find out how to get the watchdog + livereload working on a later moment. # self.dog_observer.start() self._restarts = dict()
def __init__(self, data_queue, data_paths, repeat=True): ''' data_queue : Multiprocessing queue data_paths : list of data and label pair used to load data repeat : if set True, return data until exit is set ''' super(DataProcess, self).__init__() # Queue to transfer the loaded mini batches self.data_queue = data_queue self.data_paths = data_paths self.num_data = len(data_paths) self.repeat = repeat # Tuple of data shape self.batch_size = cfg.CONST.BATCH_SIZE self.exit = Event() self.shuffle_db_inds()
def data_loading(minibatch_size, data_iterator, shapeInput, exit_size): queue_train = Queue(maxsize=exit_size*10) queue_test = Queue(maxsize=exit_size*10) def start_loading(): for e in range(exit_size): iterator_train = data_iterator(shapeInput, minibatch_size, shuffle=True, train=True) iterator_test = data_iterator(shapeInput, minibatch_size, shuffle=True, train=False) for new_input in iterator_train: while queue_train.full(): print('Queue full') time.sleep(30) queue_train.put(new_input) new_input_test = iterator_test.next() queue_test.put(new_input_test) print('Exiting queue') t = threading.Thread(target=start_loading) t.daemon = True t.start() return queue_train, queue_test
def run(self): self.total = self.get_total() or 5000000 if not self.total: print('error loading document total; using estimate') index_queue = multiprocessing.Queue() bulk_index_process = multiprocessing.Process( target=self.bulk_index, args=(index_queue,), ) bulk_index_process.start() for comment in self.iter_comments(): self.stats['fetched'] += 1 if not self.stats['fetched'] % 500: print('fetched %s/%s\t%s%%\t%s' % (self.stats['fetched'], self.total, int(self.stats['fetched'] / self.total * 100), comment['date_disseminated'])) index_queue.put(comment) index_queue.put(None) bulk_index_process.join() return self.stats['fetched']
def __init__(self, configuration): self.client_queue = multiprocessing.Queue(0) self.apply_patch() self.logger = self.init_logger() if ["debug", "html", "content_type", "notify", "ports"] not in configuration: raise PJFMissingArgument() if configuration.debug: print("[\033[92mINFO\033[0m] Starting HTTP ({0}) and HTTPS ({1}) built-in server...".format( configuration.ports["servers"]["HTTP_PORT"], configuration.ports["servers"]["HTTPS_PORT"] )) if not configuration.content_type: configuration.content_type = False if not configuration.content_type: configuration.content_type = "application/json" self.config = configuration self.json = PJFFactory(configuration) self.https = SSLWSGIRefServer(host="0.0.0.0", port=self.config.ports["servers"]["HTTPS_PORT"]) self.http = WSGIRefServer(host="0.0.0.0", port=self.config.ports["servers"]["HTTP_PORT"]) self.httpsd = multiprocessing.Process(target=run, kwargs={"server": self.https, "quiet": True}) self.httpd = multiprocessing.Process(target=run, kwargs={"server": self.http, "quiet": True}) if self.config.fuzz_web: self.request_checker = Thread(target=self.request_pool, args=()) self.logger.debug("[{0}] - PJFServer successfully initialized".format(time.strftime("%H:%M:%S")))
def _launch_pipeline(self): """This method creates two queues. filename_queue: stores the list of filesnames in data_file and label_file data_queue: stores the mini-batch """ self.data_processes = [] # Holds process handles queue_size = 2 * self.num_preprocess_threads + 2 * self.num_gpu_towers self.data_queue = Queue(queue_size) # This queue stores the data image_files = open(self.data_file, 'r').readlines() labels = open(self.label_file, 'r').readlines() print 'Size of queue: ', queue_size self.filename_queue = Queue(len(image_files)) # This queue stores the filenames p = Process(target=self._create_filename_queue, args=(self.filename_queue, image_files, labels, self.num_epochs)) p.start() self.data_processes.append(p) print 'Data feeder started' for each_worker in range(self.num_preprocess_threads): p = Process(target=self._each_worker_process, args=(self.data_queue,)) p.start() self.data_processes.append(p)
def set_roidb(self, roidb): """Set the roidb to be used by this layer during training.""" self._roidb = roidb self._shuffle_roidb_inds() if cfg.TRAIN.USE_PREFETCH: self._blob_queue = Queue(10) self._prefetch_process = BlobFetcher(self._blob_queue, self._roidb, self._num_classes) self._prefetch_process.start() # Terminate the child process when the parent exists def cleanup(): print 'Terminating BlobFetcher' self._prefetch_process.terminate() self._prefetch_process.join() import atexit atexit.register(cleanup)
def buffered_gen_mp(source_gen, buffer_size=2): """ Generator that runs a slow source generator in a separate process. buffer_size: the maximal number of items to pre-generate (length of the buffer) """ if buffer_size < 2: raise RuntimeError("Minimal buffer size is 2!") buffer = mp.Queue(maxsize=buffer_size - 1) # the effective buffer size is one less, because the generation process # will generate one extra element and block until there is room in the buffer. def _buffered_generation_process(source_gen, buffer): for data in source_gen: buffer.put(data, block=True) buffer.put(None) # sentinel: signal the end of the iterator buffer.close() # unfortunately this does not suffice as a signal: if buffer.get() # was called and subsequently the buffer is closed, it will block forever. process = mp.Process(target=_buffered_generation_process, args=(source_gen, buffer)) process.start() for data in iter(buffer.get, None): yield data
def buffered_gen_threaded(source_gen, buffer_size=5): """ Generator that runs a slow source generator in a separate thread. Beware of the GIL! buffer_size: the maximal number of items to pre-generate (length of the buffer) """ if buffer_size < 2: raise RuntimeError("Minimal buffer size is 2!") buffer = Queue.Queue(maxsize=buffer_size - 1) # the effective buffer size is one less, because the generation process # will generate one extra element and block until there is room in the buffer. def _buffered_generation_thread(source_gen, buffer): for data in source_gen: buffer.put(data, block=True) buffer.put(None) # sentinel: signal the end of the iterator thread = threading.Thread(target=_buffered_generation_thread, args=(source_gen, buffer)) thread.daemon = True thread.start() for data in iter(buffer.get, None): yield data
def start(self): self.setup_sockets() import StaticUPnP_Settings permissions = Namespace(**StaticUPnP_Settings.permissions) print(permissions) if permissions.drop_permissions: self.drop_privileges(permissions.user, permissions.group) self.running = Value(ctypes.c_int, 1) self.queue = Queue() self.reciever_thread = Process(target=self.socket_handler, args=(self.queue, self.running)) self.reciever_thread.start() self.schedule_thread = Process(target=self.schedule_handler, args=(self.running,)) self.schedule_thread.start() self.response_thread = Process(target=self.response_handler, args=(self.queue, self.running)) self.response_thread.start()
def run_parallel(num_processes, out_dir, source): page = requests.get("http://storage.googleapis.com/books/ngrams/books/datasetsv2.html") pattern = re.compile('href=\'(.*%s-%s-%s-.*\.gz)' % (source, TYPE, VERSION)) urls = pattern.findall(page.text) del page queue = Queue() for url in urls: queue.put(url) ioutils.mkdir(out_dir + '/' + source + '/raw') download_dir = out_dir + '/' + source + '/raw/' ioutils.mkdir(download_dir) procs = [Process(target=split_main, args=[i, queue, download_dir]) for i in range(num_processes)] for p in procs: p.start() for p in procs: p.join()
def main(): ip_queue = multiprocessing.Queue() msg_queue = multiprocessing.Queue() p1 = multiprocessing.Process(target=get_proxy,args=(ip_queue,msg_queue)) p2 = multiprocessing.Process(target=test_and_verify.verify_db_data,args=(ip_queue,msg_queue)) p3 = [multiprocessing.Process(target=test_and_verify.gevent_queue,args=(ip_queue,msg_queue)) for i in range(settings.TEST_PROCESS_NUM)] p4 = multiprocessing.Process(target=web_cache_run,args=(ip_queue,)) p1.start() p2.start() for p in p3: p.start() pid_list = [os.getpid(),p1.pid,p2.pid,] pid_list.extend(p.pid for p in p3) if WEB_USE_REDIS_CACHE: p4.start() pid_list.append(p4.pid) with open(PID,"w") as f: f.write(json.dumps(pid_list)) p1.join() p2.join() for p in p3: p.join() if WEB_USE_REDIS_CACHE: p4.join()
def test_handle_receive_on_a_channel(self): """ Given that I have a channel When I receive on that channel Then I should get a message via the consumer """ body = BrightsideMessageBody("test message") header = BrightsideMessageHeader(uuid4(), "test topic", BrightsideMessageType.MT_COMMAND) message = BrightsideMessage(header, body) fake_queue = [message] consumer = FakeConsumer(fake_queue) channel = Channel("test", consumer, Pipeline()) msg = channel.receive(1) self.assertEqual(message.body.value, msg.body.value) self.assertEqual(message.header.topic, msg.header.topic) self.assertEqual(message.header.message_type, msg.header.message_type) self.assertEqual(0, len(fake_queue)) # We have read the queue self.assertTrue(channel.state == ChannelState.started) # We don't stop because we consume a message
def test_handle_acknowledge(self): """ Given that I have a channel When I acknowlege a message on that channel Then I should acknowledge the message on the consumer """ body = BrightsideMessageBody("test message") header = BrightsideMessageHeader(uuid4(), "test topic", BrightsideMessageType.MT_COMMAND) message = BrightsideMessage(header, body) fake_queue = [message] consumer = FakeConsumer(fake_queue) channel = Channel("test", consumer, Pipeline()) channel.acknowledge(message) self.assertTrue(consumer.has_acknowledged(message))
def test_handle_requeue(self): """ Given that I have a channel When I receive a requeue on that channel I should ask the the consumer to requeue the message """ body = BrightsideMessageBody("test message") header = BrightsideMessageHeader(uuid4(), "test topic", BrightsideMessageType.MT_COMMAND) message = BrightsideMessage(header, body) fake_queue = [] consumer = FakeConsumer(fake_queue) channel = Channel("test", consumer, Pipeline()) channel.requeue(message) self.assertEqual(len(consumer), 1)
def __init__(self, *args, **kwargs): test_notes = global_vars['test_notes'] pause_reporting = global_vars['pause_reporting'] def wrapper(func, test_notes, pause_reporting, **kwargs): """ :param func: function to pass to multiprocessing.Process. :param test_notes: multiprocessing Queue() instance. Allows us to add notes to :param disable_reporting: multiprocessing Event() instance. Turns off reporting to terminal when input needed. :param kwargs: dictionary that contains all args and kwargs being sent to wrapped function. :return: """ global_vars['test_notes'] = test_notes global_vars['pause_reporting'] = pause_reporting args_ = kwargs['args'] if 'args' in kwargs else () kwargs_ = kwargs['kwargs'] if 'kwargs' in kwargs else {} return func(*args_, **kwargs_) wrapper_args = [kwargs['target'], test_notes, pause_reporting] wrapper_kwargs = kwargs multiprocessing.Process.__init__(self, target=wrapper, args=wrapper_args, kwargs=wrapper_kwargs)
def __init__(self, name, nsaddr=None, addr=None, serializer=None, transport=None, base=Agent, attributes=None): super().__init__() self.name = name self._daemon = None self.host, self.port = address_to_host_port(addr) if self.port is None: self.port = 0 self.nsaddr = nsaddr self.serializer = serializer self.transport = transport self.base = base self.shutdown_event = multiprocessing.Event() self.queue = multiprocessing.Queue() self.sigint = False self.attributes = attributes
def _receive_message(c, block=False): """Receive a message.""" if isinstance(c, multiprocessing.queues.Queue): try: message = c.get(block=block) except queue.Empty: return None else: if not block and not c.poll(): return None try: message = c.recv() except EOFError: return None return message
def test(self): """Test IPTables firewall rules Returns: (bool, Optional[str]): A tuple with the first object being True if the test succeeded, else False. The second object is a string storing an optional error message. """ rules = self.build(chains=self.chains, interfaces=self.interfaces, addressbook=self.addressbook, rules=self.rules, services=self.services) tmpfile = tempfile.NamedTemporaryFile( dir=self._sessions_dir, prefix='test_', delete=False) tmpfile.write("\n".join(rules)) tmpfile.close() os.chmod(tmpfile.name, 0755) q = Queue() p = Process(target=self._test, args=(tmpfile.name, q)) p.start() p.join() os.remove(tmpfile.name) return q.get()
def parmap(f, X, nprocs=multiprocessing.cpu_count()): """ paralell map for multiprocessing """ q_in = multiprocessing.Queue(1) q_out = multiprocessing.Queue() proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out)) for _ in range(nprocs)] for p in proc: p.daemon = True p.start() sent = [q_in.put((i, x)) for i, x in enumerate(X)] [q_in.put((None, None)) for _ in range(nprocs)] res = [q_out.get() for _ in range(len(sent))] [p.join() for p in proc] return [x for i, x in sorted(res)]
def __init__(self, operation, client, result_type, metadata_type, call_options=None): """ Args: operation (google.longrunning.Operation): the initial long-running operation object. client (google.gapic.longrunning.operations_client.OperationsClient): a client for the long-running operation service. result_type (type): the class type of the result. metadata_type (Optional[type]): the class type of the metadata. call_options (Optional[google.gax.CallOptions]): the call options that are used when reloading the operation. """ self._operation = operation self._client = client self._result_type = result_type self._metadata_type = metadata_type self._call_options = call_options self._queue = mp.Queue() self._process = None
def get_result(self): """ Get result from result queue, do task index confirm meanwhile Return '' if all tasks have been confirmed Raises: Queue.Empty: can not get response within timeout """ # check whether all task has been confirmed # if so, return '' if self._task_confirm_num==self._cur_task_num: return '' # may throw Queue.Empty here task_result=self._result_queue.get(block=True,timeout=self._timeout) resultl=task_result.split('|') index=int(resultl[1],10) result='|'.join(resultl[2:]) # do confirm # if it is duplicate, try to get result again if self._task_confirm_list[index]!=0: return self.get_result() self._task_confirm_list[index]=1 self._task_confirm_num+=1 LOG.debug('get result: %s'%task_result.replace('\n',' ')) return result
def __init__(self, parent=None, fps=30, inbox=None, outbox=None): super().__init__(parent) self._fps = fps self._interval = 1 / fps self.startTimer(1000 / fps) # Connect signals to slots self.clearScreenSignal.connect(self.clearScreen) self.restartScreenSignal.connect(self.restartScreen) # Creates mail boxes self._inbox = inbox = Queue() if inbox is None else inbox self._outbox = outbox = Queue() if outbox is None else outbox # Init self._turtles = QGraphicsSceneGroup(self, inbox=inbox, outbox=outbox) self._tasks = deque() assert self._turtles.inbox is self._inbox assert self._turtles.outbox is self._outbox
def start_qt_scene_app_subprocess(): """ Starts a remote sub-process that initializes a TurtleScene widget and Qt's mainloop. """ inbox = MailboxState.inbox = Queue() outbox = MailboxState.outbox = Queue() process = Process(target=start_qt_scene_app, kwargs=dict(outbox=outbox, inbox=inbox, ping=True), name='turtle-server') process.daemon = True process.start() # Send a ping message to the out process outbox.put(['ping']) msg = inbox.get(timeout=2.0) if msg != ['ping']: raise RuntimeError('wrong response from server: %s' % (msg,)) return process
def __call__(self, *args, **kwargs): """Execute the embedded function object asynchronously. The function given to the constructor is transparently called and requires that "ready" be intermittently polled. If and when it is True, the "value" property may then be checked for returned data. """ self.__limit = kwargs.pop('timeout', self.__limit) self.__queue = multiprocessing.Queue(1) args = (self.__queue, self.__function) + args self.__process = multiprocessing.Process(target=_target, args=args, kwargs=kwargs) self.__process.daemon = True self.__process.start() self.__timeout = self.__limit + time.time() while not self.ready: time.sleep(0.01) return self.value
def _get_com(args): """Gets a list of configured COM ports for serial communication. """ from liveserial.monitor import ComMonitorThread as CMT from multiprocessing import Queue dataq, errorq = Queue(), Queue() result = [] msg.info("Starting setup of ports {}.".format(args["port"]), 2) if args["config"]: for port in args["port"]: if port.lower() != "aggregate": #The aggregate port name is just a shortcut so that we can plot #transforms between multiple sensor streams. It doesn't actually #represent a physical port that will be monitored. com = CMT.from_config(args["config"], port, dataq, errorq, args["listen"], args["sensors"]) result.append(com) else: for port in args["port"]: com = CMT(dataq, errorq, port, args["baudrate"], args["stopbits"], args["parity"], args["timeout"], args["listen"], args["virtual"]) result.append(com) return result
def test_dummy_observer(): flowqueue = mp.Queue(QUEUE_SIZE) observer_shutdown_queue = mp.Queue(QUEUE_SIZE) observer = DummyObserver() observer_process = mp.Process( args=(flowqueue, observer_shutdown_queue), target=observer.run_flow_enqueuer, name='observer', daemon=True) observer_process.start() observer_shutdown_queue.put(True) assert flowqueue.get(True, timeout=3) == SHUTDOWN_SENTINEL observer_process.join(3) assert not observer_process.is_alive()
def start(self, no_runner=False): from multiprocessing import Process, Queue queue = Queue() logging.getLogger(self.logger).log(logging.DEBUG, "Starting {} MPQueue workers...".format(self.count)) if not no_runner: for process_index in range(self.count): process_name = self.options.pop('process_name_template', "MPQueueProcess_{index}").format(index=process_index) worker_instance = MPQueueWorker( result_backend=self.result_backend, queue=queue, logger=self.logger, **self.options ) p = Process( name=process_name, target=worker, args=(worker_instance,) ) p.daemon = True p.start() return MPQueueRunner(queue=queue, logger=self.logger)
def __init__(self, args): self.args = args self.tasks = multiprocessing.JoinableQueue() self.results = multiprocessing.Queue() self.actors = [] self.actors.append(Actor(self.args, self.tasks, self.results, 9999, args.monitor)) for i in xrange(self.args.num_threads-1): self.actors.append(Actor(self.args, self.tasks, self.results, 37*(i+3), False)) for a in self.actors: a.start() # we will start by running 20,000 / 1000 = 20 episodes for the first ieration self.average_timesteps_in_episode = 1000
def work_every_day(self, queue, region): """ Handles data for one day and for one region :param queue: the list of days to consider :type queue: `Queue` :param region: the region to consider :type region: `str` This is ran as an independant process, so it works asynchronously from the rest. """ try: for cursor in iter(queue.get, 'STOP'): self.pull(cursor, region) time.sleep(0.5) except KeyboardInterrupt: pass except: raise
def work_every_minute(self, queue, region): """ Handles data for one minute and for one region :param queue: the minute ticks for a given day :type queue: `Queue` :param region: the region to consider :type region: `str` This is ran as an independant process, so it works asynchronously from the rest. """ try: for cursor in iter(queue.get, 'STOP'): self.tick(cursor, region) time.sleep(0.5) except KeyboardInterrupt: pass except: raise
def get_parallel_runner_1(path): param_dict = np.load(path, encoding='latin1').item() cfg = PredictConfig( model=Model(), session_init=ParamRestore(param_dict), session_config=get_default_sess_config(0.99), input_names=['input'], output_names=['resized_map'] ) inque = mp.Queue() outque = mp.Queue() with change_gpu(0): proc = MultiProcessQueuePredictWorker(1, inque, outque, cfg) proc.start() with change_gpu(1): pred1 = OfflinePredictor(cfg) def func1(img): inque.put((0,[[img]])) func1.outque = outque def func2(img): return pred1([[img]])[0][0] return func1, func2
def iter_split_evaluate_wrapper(self, fn, local_vars, in_size, q_in, q_out): l = Lock() idx_q = Queue() def split_iter(): try: while True: l.acquire() i, data_in = q_in.get() idx_q.put(i) if data_in is EOFMessage: return yield data_in except BaseException: traceback.print_exc(file=sys.stdout) gs = itertools.tee(split_iter(), in_size) for data_out in self.evaluate((fn,) + tuple((lambda i: (x[i] for x in gs[i]))(i) for i in range(in_size)), local_vars=local_vars): q_out.put((idx_q.get(), data_out)) l.release() q_out.put((0, EOFMessage))
def test_process(): from multiprocessing import Queue from lib.config import cfg from lib.data_io import category_model_id_pair cfg.TRAIN.PAD_X = 10 cfg.TRAIN.PAD_Y = 10 data_queue = Queue(2) category_model_pair = category_model_id_pair(dataset_portion=[0, 0.1]) data_process = ReconstructionDataProcess(data_queue, category_model_pair) data_process.start() batch_img, batch_voxel = data_queue.get() kill_processes(data_queue, [data_process])
def __init__(self, n_walkers, n_workers=None, gpu_indices=None): if gpu_indices is not None: self.gpu_indices = gpu_indices self.n_workers = len(gpu_indices) else: assert n_workers, "If gpu_indices are not given the n_workers must be given" self.n_workers = n_workers self.gpu_indices = range(n_workers) # make a Queue for free workers, when one is being used it is # popped off and locked self.free_workers = mulproc.Queue() # the semaphore provides the locks on the workers self.lock = mulproc.Semaphore(self.n_workers) # initialize a list to put results in self.results_list = mulproc.Manager().list() for i in range(n_walkers): self.results_list.append(None) # add the free worker indices (not device/gpu indices) to the # free workers queue for i in range(self.n_workers): self.free_workers.put(i)
def list_buckets_fast(self): buckets = [] jobs = [] for profile in self.clients.keys(): queue = mp.Queue() kwargs = {'profile_names': profile, 'queue': queue} process = mp.Process(target=self.list_buckets, kwargs=kwargs) process.start() jobs.append((process, queue)) count = 0 for job in jobs: process = job[0] queue = job[1] process.join() profile_buckets = queue.get() buckets.extend(profile_buckets) count += 1 for job in jobs: process = job[0] if process.is_alive(): process.terminate() return buckets
def __init__(self, field, observed_component=None, steps_per_frame=10, scale=1, frame_delay=1e-2): """Class constructor. Args: field: Field to be observed. observed_component: Component to be observed (as string). steps_per_frame: Simulation steps between updates of the animation. scale: Scale of the animation. frame_delay: Delay between animation updates. """ self.field = field self.field_components = {name: getattr(self.field, name) for name in dir(self.field) if type(getattr(self.field, name)) == fld.FieldComponent} if observed_component: if observed_component in self.field_components.keys(): self.observed_component = observed_component else: raise KeyError('Field component {} not found in given field.' .format(observed_component)) else: self.observed_component = list(self.field_components.keys())[0] self.steps_per_frame = int(steps_per_frame) self.scale = scale self.frame_delay = frame_delay self.show_boundaries = True self.show_materials = True self.show_output = True self._plot_queue = mp.Queue() self._x_axis_prefix, self._x_axis_factor = get_prefix(max(self.field.x.vector)) self._t_prefix, self._t_factor = get_prefix(max(self.field.t.vector)) self.axes = None self.plot_title = '' self.x_label = '$x$' self.time_precision = 2
def _sim_function(self, queue): """Simulation function to be started as a separate process. Args: queue: Instance of multiprocessing.Queue that is used to transfer data between simulation and visualization process. """ for ii in range(int(self.field.t.samples / self.steps_per_frame)): self.field.simulate(self.steps_per_frame) queue.put((self.field.t.vector[self.field.step], getattr(self.field, self.observed_component).values)) # return field when simulation finishes to get output signals queue.put(self.field)
def __init__(self, queue, environment_name='default', pool=None, options=None): """ Create an environment process of the controller itself. :param queue: Queue to hook on. :param environment_name: Name of environment. :param pool: Pool. :param options: Custom options. :type queue: multiprocessing.Queue :type environment_name: str :type pool: multiprocessing.Pool :type options: dict """ self.queue = queue self.name = environment_name self.options = options or dict() self.max_restarts = 1 self.restarts = 0 self.process = multiprocessing.Process(target=_run, kwargs=dict( name=self.name, queue=self.queue, options=self.options, )) self.__last_state = True
def get_total_conf_mapped_reads_in_cells(filename, filtered_barcodes, mem_gb): """ Number of confidently mapped reads w/ valid, filtered barcodes. Because this is called from a 'split' function, we must stay within the given mem limit. NOTE: We re-open the file for each chunk IN ISOLATED PROCESSES due to a possible memory leak in h5py. Tests show the mem usage is nondeterministic, too. https://github.com/h5py/h5py/issues/763 (among many others) Args: filtered_barcodes (set) - set of barcode strings (e.g., ACGT-1) filename (str) - path to molecule info HDF5 file mem_gb (int) - limit memory usage to this value """ filtered_bcs_set = set(MoleculeCounter.get_compressed_bc_iter(filtered_barcodes)) entries_per_chunk = int(np.floor(float(mem_gb*1e9)) / MoleculeCounter.get_record_bytes()) print 'Entries per chunk: %d' % entries_per_chunk with MoleculeCounter.open(filename, 'r') as mc: num_entries = mc.nrows() total_mapped_reads = 0 for start in xrange(0, num_entries, entries_per_chunk): queue = multiprocessing.Queue() p = multiprocessing.Process(target=MoleculeCounter.get_total_conf_mapped_reads_in_cells_chunk, args=(filename, filtered_bcs_set, start, entries_per_chunk, queue)) p.start() p.join() total_mapped_reads += queue.get() return total_mapped_reads
def run(self): ''' get documents without a sentiment tag that match significant terms: - significant terms from postive regex tagged vs others - extra multi match clause for stronger terms (in multiple term sets: positive vs negative, untagged, and all - phrase match net neutrality since both terms score high ''' index_queue = multiprocessing.Queue() bulk_index_process = multiprocessing.Process( target=self.bulk_index, args=(index_queue,), ) bulk_index_process.start() fetched = 0 try: while fetched < self.limit: ''' use search instead of scan because keeping an ordered scan cursor open negates the performance benefits ''' resp = self.es.search(index='fcc-comments', body=self.query, size=self.limit) for doc in resp['hits']['hits']: index_queue.put(doc['_id']) fetched += 1 if not fetched % 100: print('%s\t%s\t%s' % (fetched, doc['_score'], doc['_source']['text_data'])) except ConnectionTimeout: print('error fetching: connection timeout') index_queue.put(None) bulk_index_process.join()
def __init__(self, all_result_path, device): """ Queue????????????? :param all_result_path: ?????????? :param device: ??id """ self.all_result_path = all_result_path self.device = device self.adb = lib.adbUtils.ADB(self.device) self.queue = Queue(10)