public void testEnqueueAndDispatch_multipleListeners() { Object listener1 = new Object(); ListenerCallQueue<Object> queue = new ListenerCallQueue<>(); queue.addListener(listener1, directExecutor()); Multiset<Object> counters = ConcurrentHashMultiset.create(); queue.enqueue(incrementingEvent(counters, listener1, 1)); queue.enqueue(incrementingEvent(counters, listener1, 2)); Object listener2 = new Object(); queue.addListener(listener2, directExecutor()); queue.enqueue(incrementingEvent(counters, multiset(listener1, 3, listener2, 1))); queue.enqueue(incrementingEvent(counters, multiset(listener1, 4, listener2, 2))); assertEquals(0, counters.size()); queue.dispatch(); assertEquals(multiset(listener1, 4, listener2, 2), counters); }
public void testEnqueueAndDispatch_withExceptions() { Object listener = new Object(); ListenerCallQueue<Object> queue = new ListenerCallQueue<>(); queue.addListener(listener, directExecutor()); Multiset<Object> counters = ConcurrentHashMultiset.create(); queue.enqueue(incrementingEvent(counters, listener, 1)); queue.enqueue(THROWING_EVENT); queue.enqueue(incrementingEvent(counters, listener, 2)); queue.enqueue(THROWING_EVENT); queue.enqueue(incrementingEvent(counters, listener, 3)); queue.enqueue(THROWING_EVENT); queue.enqueue(incrementingEvent(counters, listener, 4)); queue.enqueue(THROWING_EVENT); assertEquals(0, counters.size()); queue.dispatch(); assertEquals(multiset(listener, 4), counters); }
public void testEnqueueAndDispatch_multithreaded() throws InterruptedException { Object listener = new Object(); ExecutorService service = Executors.newFixedThreadPool(4); ListenerCallQueue<Object> queue = new ListenerCallQueue<>(); try { queue.addListener(listener, service); final CountDownLatch latch = new CountDownLatch(1); Multiset<Object> counters = ConcurrentHashMultiset.create(); queue.enqueue(incrementingEvent(counters, listener, 1)); queue.enqueue(incrementingEvent(counters, listener, 2)); queue.enqueue(incrementingEvent(counters, listener, 3)); queue.enqueue(incrementingEvent(counters, listener, 4)); queue.enqueue(countDownEvent(latch)); assertEquals(0, counters.size()); queue.dispatch(); latch.await(); assertEquals(multiset(listener, 4), counters); } finally { service.shutdown(); } }
public DependencyEvaluator(final File modelDir, final Stream<DependencyParse> trainSentences) { try { dependencyGenerator = new DependencyGenerator(modelDir); } catch (final IOException e) { throw new RuntimeException(e); } log.info("Counting valid dependencies..."); final ConcurrentHashMultiset<String> dependencyCounts = ConcurrentHashMultiset.create(); trainSentences .parallel() .map(DependencyParse::getDependencies) .map(CCGBankEvaluation::asResolvedDependencies) .flatMap(Set::stream) .forEach(dep -> dependencyCounts.add( dep.getCategory().toString() + dep.getArgNumber())); validDependencies = dependencyCounts.entrySet().stream() .filter(entry -> entry.getCount() >= 10) .map(Entry::getElement) .collect(Collectors.toSet()); log.info("{} valid dependencies found", validDependencies.size()); }
/** * Increment count for a given alignment for a given phrase-pair. */ public void incrementAlignmentCount(AlignmentTemplate alTemp) { if (storeAlignmentCounts) { int idx = alTemp.getKey(); int alIdx = alTemp.getAKey(); final ConcurrentHashMultiset<Integer> aCounts; if (idx >= 0) { assert (idx <= index.size()); synchronized (aCounter) { // assert(idx <= aCounter.size()); while (idx >= aCounter.size()) aCounter.add(ConcurrentHashMultiset.create()); aCounts = aCounter.get(idx); } aCounts.add(alIdx); } } }
public static void main(String[] args) { // Parse text to separate words String INPUT_TEXT = "Hello World! Hello All! Hi World!"; // Create Multiset Multiset<String> multiset = ConcurrentHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" "))); // Print count words System.out.println(multiset); // print [Hi, Hello x 2, World! x 2, All!] - in random orders // Print all unique words System.out.println(multiset.elementSet()); // print [Hi, Hello, World!, All!] - in random orders // Print count occurrences of words System.out.println("Hello = " + multiset.count("Hello")); // print 2 System.out.println("World = " + multiset.count("World!")); // print 2 System.out.println("All = " + multiset.count("All!")); // print 1 System.out.println("Hi = " + multiset.count("Hi")); // print 1 System.out.println("Empty = " + multiset.count("Empty")); // print 0 // Print count all words System.out.println(multiset.size()); //print 6 // Print count unique words System.out.println(multiset.elementSet().size()); //print 4 }
public static void main(String[] args) { // Разберем текст на слова String INPUT_TEXT = "Hello World! Hello All! Hi World!"; // Создаем Multiset Multiset<String> multiset = ConcurrentHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" "))); // Выводим кол-вом вхождений слов System.out.println(multiset); // напечатает [Hi, Hello x 2, World! x 2, All!] - в произвольном порядке // Выводим все уникальные слова System.out.println(multiset.elementSet()); // напечатает [Hi, Hello, World!, All!] - в произвольном порядке // Выводим количество по каждому слову System.out.println("Hello = " + multiset.count("Hello")); // напечатает 2 System.out.println("World = " + multiset.count("World!")); // напечатает 2 System.out.println("All = " + multiset.count("All!")); // напечатает 1 System.out.println("Hi = " + multiset.count("Hi")); // напечатает 1 System.out.println("Empty = " + multiset.count("Empty")); // напечатает 0 // Выводим общее количества всех слов в тексте System.out.println(multiset.size()); //напечатает 6 // Выводим общее количество всех уникальных слов System.out.println(multiset.elementSet().size()); //напечатает 4 }
@Override public void addCFGRule(final int rootId, final NodeConsequent ruleConsequent) { Multiset<NodeConsequent> ruleProduction; final Multiset<NodeConsequent> tempMultiset = ConcurrentHashMultiset .create(); if (grammar instanceof ConcurrentMap) { final ConcurrentMap<Integer, Multiset<NodeConsequent>> conGrammar = (ConcurrentMap<Integer, Multiset<NodeConsequent>>) grammar; ruleProduction = conGrammar.putIfAbsent(rootId, tempMultiset); } else { if (grammar.containsKey(rootId)) { ruleProduction = grammar.get(rootId); } else { ruleProduction = null; } } if (ruleProduction == null) { ruleProduction = tempMultiset; } ruleProduction.add(ruleConsequent); }
/** * Add a tree with the given number of times. * * @param subTree * @param count */ public void addTree(final TreeNode<T> subTree, final int count) { checkArgument(count > 0); final T rootNodeData = subTree.getData(); final ConcurrentHashMultiset<TreeNode<T>> tempNew = ConcurrentHashMultiset .create(); final ConcurrentHashMultiset<TreeNode<T>> nSet = grammar.putIfAbsent( rootNodeData, tempNew); if (nSet != null) { nSet.add(subTree, count); } else { tempNew.add(subTree, count); } }
/** * Build a set of words in the vocabulary from a collection of files. * * @param files * @return */ public static Set<String> buildVocabulary(final Collection<File> files, final ITokenizer tokenizer, final int threshold) { final ConcurrentHashMultiset<String> vocabulary = ConcurrentHashMultiset .create(); // add everything final ParallelThreadPool threadPool = new ParallelThreadPool(); for (final File fi : files) { threadPool.pushTask(new VocabularyExtractorRunnable(fi, vocabulary, tokenizer)); } threadPool.waitForTermination(); // Remove rare pruneElementsFromMultiset(threshold, vocabulary); LOGGER.info("Vocabulary built, with " + vocabulary.elementSet().size() + " words"); return vocabulary.elementSet(); }
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF){ Multiset<Ngram> multiset = ConcurrentHashMultiset.create(); String field = template.getField(); Arrays.stream(ids).parallel().forEach(id -> { Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id); add(termVector,multiset,template); }); Multiset<Ngram> filtered = ConcurrentHashMultiset.create(); for (Multiset.Entry entry: multiset.entrySet()){ Ngram ngram = (Ngram)entry.getElement(); int count = entry.getCount(); if (count>=minDF){ filtered.add(ngram,count); } } return filtered; }
private static void add(List<String> source, Multiset<Ngram> multiset, String field, int slop, List<Integer> template){ Multiset<Ngram> multiSetForDoc = ConcurrentHashMultiset.create(); for (int i=0;i<source.size();i++){ if(i+template.get(template.size()-1)<source.size()){ List<String> list = new ArrayList<>(); for (int j: template){ list.add(source.get(i+j)); } Ngram ngram = new Ngram(); ngram.setNgram(Ngram.toNgramString(list)); ngram.setSlop(slop); ngram.setField(field); ngram.setInOrder(true); multiSetForDoc.setCount(ngram,1); } } multiset.addAll(multiSetForDoc); }
/** * * @param numClasses * @param samples sampled multi-labels; can have duplicates; their empirical probabilities will be estimated * @return */ public MultiLabel predict(int numClasses, List<MultiLabel> samples){ Multiset<MultiLabel> multiset = ConcurrentHashMultiset.create(); for (MultiLabel multiLabel: samples){ multiset.add(multiLabel); } int sampleSize = samples.size(); List<MultiLabel> uniqueOnes = new ArrayList<>(); List<Double> probs = new ArrayList<>(); for (Multiset.Entry<MultiLabel> entry: multiset.entrySet()){ uniqueOnes.add(entry.getElement()); probs.add((double)entry.getCount()/sampleSize); } return predict(numClasses,uniqueOnes,probs); }
public void testEnqueueAndDispatch() { Object listener = new Object(); ListenerCallQueue<Object> queue = new ListenerCallQueue<>(); queue.addListener(listener, directExecutor()); Multiset<Object> counters = ConcurrentHashMultiset.create(); queue.enqueue(incrementingEvent(counters, listener, 1)); queue.enqueue(incrementingEvent(counters, listener, 2)); queue.enqueue(incrementingEvent(counters, listener, 3)); queue.enqueue(incrementingEvent(counters, listener, 4)); assertEquals(0, counters.size()); queue.dispatch(); assertEquals(multiset(listener, 4), counters); }
public void testEnqueueAndDispatch_multithreaded_withThrowingRunnable() throws InterruptedException { Object listener = new Object(); ExecutorService service = Executors.newFixedThreadPool(4); ListenerCallQueue<Object> queue = new ListenerCallQueue<>(); try { queue.addListener(listener, service); final CountDownLatch latch = new CountDownLatch(1); Multiset<Object> counters = ConcurrentHashMultiset.create(); queue.enqueue(incrementingEvent(counters, listener, 1)); queue.enqueue(THROWING_EVENT); queue.enqueue(incrementingEvent(counters, listener, 2)); queue.enqueue(THROWING_EVENT); queue.enqueue(incrementingEvent(counters, listener, 3)); queue.enqueue(THROWING_EVENT); queue.enqueue(incrementingEvent(counters, listener, 4)); queue.enqueue(THROWING_EVENT); queue.enqueue(countDownEvent(latch)); assertEquals(0, counters.size()); queue.dispatch(); latch.await(); assertEquals(multiset(listener, 4), counters); } finally { service.shutdown(); } }
public Tuple2<Double, Multiset<String>> transform(Row row) throws IOException { Double label = row.getDouble(1); StringReader document = new StringReader(row.getString(0).replaceAll("br2n", "")); List<String> wordsList = new ArrayList<>(); try (BulgarianAnalyzer analyzer = new BulgarianAnalyzer(BULGARIAN_STOP_WORDS_SET)) { TokenStream stream = analyzer.tokenStream("words", document); TokenFilter lowerFilter = new LowerCaseFilter(stream); TokenFilter numbers = new NumberFilter(lowerFilter); TokenFilter length = new LengthFilter(numbers, 3, 1000); TokenFilter stemmer = new BulgarianStemFilter(length); TokenFilter ngrams = new ShingleFilter(stemmer, 2, 3); try (TokenFilter filter = ngrams) { Attribute termAtt = filter.addAttribute(CharTermAttribute.class); filter.reset(); while (filter.incrementToken()) { String word = termAtt.toString().replace(",", "(comma)").replaceAll("\n|\r", ""); if (word.contains("_")) { continue; } wordsList.add(word); } } } Multiset<String> words = ConcurrentHashMultiset.create(wordsList); return new Tuple2<>(label, words); }
public void resetAfterLimit() { if (System.currentTimeMillis() - limit > lastCleared.getTime()) { frequency = ConcurrentHashMultiset.create(); lastCleared = new Date( System.currentTimeMillis()); } }
@Test public void testResetAfterLimit() throws InterruptedException { rateLimiter = new LoggingRateLimiter(0); ConcurrentHashMultiset freq = rateLimiter.getFrequency(); rateLimiter.resetAfterLimit(); assertTrue (freq != rateLimiter.getFrequency() ); freq = rateLimiter.getFrequency(); Thread.sleep(1); rateLimiter.resetAfterLimit(); assertThat (freq,is( rateLimiter.getFrequency())); }
@Test public void testOverflow() throws Throwable { when(pjp.getTarget()).thenReturn(this); Field field = findField(ConcurrentHashMultiset.class, "countMap"); makeAccessible(field); ConcurrentMap map = (ConcurrentMap) getField(field, jobs.getStatCounter()); map.put("java.lang.String", Integer.MAX_VALUE); testExecute(); }
private int getArgmaxAlignment(int idx) { if (idx >= aCounter.size()) return -1; // Linear search: ConcurrentHashMultiset<Integer> aCounts = aCounter.get(idx); int maxK = -1; int maxV = Integer.MIN_VALUE; String maxKLex = null; for (int k : aCounts.elementSet()) { int v = aCounts.count(k); if (v == maxV) { // If there is a tie, take lexicographic order as defined in Moses: String kLex = AlignmentTemplate.alignmentToString(aIndex.get(k)); if (maxKLex == null) maxKLex = AlignmentTemplate.alignmentToString(aIndex.get(maxK)); if (kLex.compareTo(maxKLex) < 0) { maxK = k; maxV = v; maxKLex = kLex; } } else if (v > maxV) { maxK = k; maxV = v; maxKLex = null; } } assert (maxK >= 0); return maxK; }
private static void addCountToArray(ConcurrentHashMultiset<Integer> counter, int idx) { if (idx < 0) return; counter.add(idx); if (DEBUG_LEVEL >= 3) System.err.println("Increasing count idx=" + idx + " in vector (" + counter + ")."); }
private static void addCountToArray(final ConcurrentHashMultiset<Integer> counter, int idx) { if (idx < 0) return; counter.add(idx); if (DEBUG_LEVEL >= 3) System.err.println("Increasing count idx=" + idx + " in vector (" + counter + ")."); }
/** * Adds all tree production of other grammar to this grammar. * * @param other */ public void addAll(final TSGrammar<T> other) { for (final ConcurrentHashMultiset<TreeNode<T>> treeSet : other.grammar .values()) { for (final Multiset.Entry<TreeNode<T>> entry : treeSet.entrySet()) { addTree(entry.getElement(), entry.getCount()); } } }
@Override public SortedMultiset<Integer> computeGrammarTreeSizeStats() { // Get tree size distribution. final SortedMultiset<Integer> treeSizes = TreeMultiset.create(); for (final Entry<T, ConcurrentHashMultiset<TreeNode<T>>> entry : grammar .entrySet()) { for (final Multiset.Entry<TreeNode<T>> rule : entry.getValue() .entrySet()) { treeSizes.add(rule.getElement().getTreeSize(), rule.getCount()); } } return treeSizes; }
@Override public int countTreeOccurences(final TreeNode<T> root) { final ConcurrentHashMultiset<TreeNode<T>> productions = grammar .get(root.getData()); if (productions == null) { return 0; } return productions.count(root); }
@Override public int countTreesWithRoot(final T root) { final ConcurrentHashMultiset<TreeNode<T>> set = grammar.get(root); if (set == null) { return 0; } return set.size(); }
@Override public boolean removeTree(final TreeNode<T> subTree) { final T rootNodeData = subTree.getData(); final ConcurrentHashMultiset<TreeNode<T>> productions = grammar .get(rootNodeData); if (productions == null) { return false; } else { return productions.remove(subTree); } }
@Override public int removeTree(final TreeNode<T> subTree, final int occurences) { final T rootNodeData = subTree.getData(); final ConcurrentHashMultiset<TreeNode<T>> productions = grammar .get(rootNodeData); if (productions == null) { return -occurences; } else { return productions.remove(subTree, occurences) - occurences; } }
public VocabularyExtractorRunnable(final File file, final ConcurrentHashMultiset<String> vocabulary, final ITokenizer tokenizerModule) { codeFile = file; vocabularySet = vocabulary; tokenizer = tokenizerModule; }
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template){ Multiset<Ngram> multiset = ConcurrentHashMultiset.create(); String field = template.getField(); Arrays.stream(ids).parallel().forEach(id -> { Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id); add(termVector,multiset,template); }); return multiset; }