public WritableSortable(int j) throws IOException { seed = r.nextLong(); r.setSeed(seed); Text t = new Text(); StringBuilder sb = new StringBuilder(); indices = new int[j]; offsets = new int[j]; check = new String[j]; DataOutputBuffer dob = new DataOutputBuffer(); for (int i = 0; i < j; ++i) { indices[i] = i; offsets[i] = dob.getLength(); genRandom(t, r.nextInt(15) + 1, sb); t.write(dob); check[i] = t.toString(); } eob = dob.getLength(); bytes = dob.getData(); comparator = WritableComparator.get(Text.class); }
private void fillKey(BytesWritable o) { int len = keyLenRNG.nextInt(); if (len < MIN_KEY_LEN) len = MIN_KEY_LEN; o.setSize(len); int n = MIN_KEY_LEN; while (n < len) { byte[] word = dict[random.nextInt(dict.length)]; int l = Math.min(word.length, len - n); System.arraycopy(word, 0, o.getBytes(), n, l); n += l; } if (sorted && WritableComparator.compareBytes( lastKey.getBytes(), MIN_KEY_LEN, lastKey.getLength() - MIN_KEY_LEN, o.getBytes(), MIN_KEY_LEN, o.getLength() - MIN_KEY_LEN) > 0) { incrementPrefix(); } System.arraycopy(prefix, 0, o.getBytes(), 0, MIN_KEY_LEN); lastKey.set(o); }
private static void testEncoding(long l) { byte[] b = GenericObjectMapper.writeReverseOrderedLong(l); assertEquals("error decoding", l, GenericObjectMapper.readReverseOrderedLong(b, 0)); byte[] buf = new byte[16]; System.arraycopy(b, 0, buf, 5, 8); assertEquals("error decoding at offset", l, GenericObjectMapper.readReverseOrderedLong(buf, 5)); if (l > Long.MIN_VALUE) { byte[] a = GenericObjectMapper.writeReverseOrderedLong(l-1); assertEquals("error preserving ordering", 1, WritableComparator.compareBytes(a, 0, a.length, b, 0, b.length)); } if (l < Long.MAX_VALUE) { byte[] c = GenericObjectMapper.writeReverseOrderedLong(l+1); assertEquals("error preserving ordering", 1, WritableComparator.compareBytes(b, 0, b.length, c, 0, c.length)); } }
/** * Given an expression and an optional comparator, build a tree of * InputFormats using the comparator to sort keys. */ static Node parse(String expr, JobConf job) throws IOException { if (null == expr) { throw new IOException("Expression is null"); } Class<? extends WritableComparator> cmpcl = job.getClass("mapred.join.keycomparator", null, WritableComparator.class); Lexer lex = new Lexer(expr); Stack<Token> st = new Stack<Token>(); Token tok; while ((tok = lex.next()) != null) { if (TType.RPAREN.equals(tok.getType())) { st.push(reduce(st, job)); } else { st.push(tok); } } if (st.size() == 1 && TType.CIF.equals(st.peek().getType())) { Node ret = st.pop().getNode(); if (cmpcl != null) { ret.setKeyComparator(cmpcl); } return ret; } throw new IOException("Missing ')'"); }
/** * Create a RecordReader with <tt>capacity</tt> children to position * <tt>id</tt> in the parent reader. * The id of a root CompositeRecordReader is -1 by convention, but relying * on this is not recommended. */ @SuppressWarnings("unchecked") // Generic array assignment public CompositeRecordReader(int id, int capacity, Class<? extends WritableComparator> cmpcl) throws IOException { assert capacity > 0 : "Invalid capacity"; this.id = id; if (null != cmpcl) { cmp = ReflectionUtils.newInstance(cmpcl, null); q = new PriorityQueue<ComposableRecordReader<K,?>>(3, new Comparator<ComposableRecordReader<K,?>>() { public int compare(ComposableRecordReader<K,?> o1, ComposableRecordReader<K,?> o2) { return cmp.compare(o1.key(), o2.key()); } }); } jc = new JoinCollector(capacity); kids = new ComposableRecordReader[capacity]; }
/** * Add a RecordReader to this collection. * The id() of a RecordReader determines where in the Tuple its * entry will appear. Adding RecordReaders with the same id has * undefined behavior. */ public void add(ComposableRecordReader<K,? extends V> rr) throws IOException { kids[rr.id()] = rr; if (null == q) { cmp = WritableComparator.get(rr.createKey().getClass(), conf); q = new PriorityQueue<ComposableRecordReader<K,?>>(3, new Comparator<ComposableRecordReader<K,?>>() { public int compare(ComposableRecordReader<K,?> o1, ComposableRecordReader<K,?> o2) { return cmp.compare(o1.key(), o2.key()); } }); } if (rr.hasNext()) { q.add(rr); } }
/** * Given an expression and an optional comparator, build a tree of * InputFormats using the comparator to sort keys. */ static Node parse(String expr, Configuration conf) throws IOException { if (null == expr) { throw new IOException("Expression is null"); } Class<? extends WritableComparator> cmpcl = conf.getClass( CompositeInputFormat.JOIN_COMPARATOR, null, WritableComparator.class); Lexer lex = new Lexer(expr); Stack<Token> st = new Stack<Token>(); Token tok; while ((tok = lex.next()) != null) { if (TType.RPAREN.equals(tok.getType())) { st.push(reduce(st, conf)); } else { st.push(tok); } } if (st.size() == 1 && TType.CIF.equals(st.peek().getType())) { Node ret = st.pop().getNode(); if (cmpcl != null) { ret.setKeyComparator(cmpcl); } return ret; } throw new IOException("Missing ')'"); }
private void fillKey(BytesWritable o) { int len = keyLenRNG.nextInt(); if (len < MIN_KEY_LEN) len = MIN_KEY_LEN; o.setSize(len); int n = MIN_KEY_LEN; while (n < len) { byte[] word = dict[random.nextInt(dict.length)]; int l = Math.min(word.length, len - n); System.arraycopy(word, 0, o.get(), n, l); n += l; } if (sorted && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey .getSize() - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) { incrementPrefix(); } System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN); lastKey.set(o); }
/** * Compare bytes from {#getBytes()}. * @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int) */ public int compareTo(BinaryComparable other) { if (this == other) return 0; return WritableComparator.compareBytes(getBytes(), 0, getLength(), other.getBytes(), 0, other.getLength()); }
/** * Returns true if the byte array begins with the specified prefix. */ public static boolean prefixMatches(byte[] prefix, int prefixlen, byte[] b) { if (b.length < prefixlen) { return false; } return WritableComparator.compareBytes(prefix, 0, prefixlen, b, 0, prefixlen) == 0; }
/** * Test a user comparator that relies on deserializing both arguments * for each compare. */ @Test public void testBakedUserComparator() throws Exception { MyWritable a = new MyWritable(8, 8); MyWritable b = new MyWritable(7, 9); assertTrue(a.compareTo(b) > 0); assertTrue(WritableComparator.get(MyWritable.class).compare(a, b) < 0); }
/** * Get the {@link RawComparator} comparator used to compare keys. * * @return the {@link RawComparator} comparator used to compare keys. */ public RawComparator getOutputKeyComparator() { Class<? extends RawComparator> theClass = getClass( JobContext.KEY_COMPARATOR, null, RawComparator.class); if (theClass != null) return ReflectionUtils.newInstance(theClass, this); return WritableComparator.get(getMapOutputKeyClass().asSubclass(WritableComparable.class), this); }
/** * Use (the specified slice of the array returned by) * {@link BinaryComparable#getBytes()} to partition. */ @Override public int getPartition(BinaryComparable key, V value, int numPartitions) { int length = key.getLength(); int leftIndex = (leftOffset + length) % length; int rightIndex = (rightOffset + length) % length; int hash = WritableComparator.hashBytes(key.getBytes(), leftIndex, rightIndex - leftIndex + 1); return (hash & Integer.MAX_VALUE) % numPartitions; }
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { rr.initialize(split, context); conf = context.getConfiguration(); nextKeyValue(); if (!empty) { keyclass = key.getClass().asSubclass(WritableComparable.class); valueclass = value.getClass(); if (cmp == null) { cmp = WritableComparator.get(keyclass, conf); } } }
@Override public void setKeyComparator(Class<? extends WritableComparator> cmpcl) { super.setKeyComparator(cmpcl); for (Node n : kids) { n.setKeyComparator(cmpcl); } }
@SuppressWarnings("unchecked") public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (kids != null) { for (int i = 0; i < kids.length; ++i) { kids[i].initialize(((CompositeInputSplit)split).get(i), context); if (kids[i].key() == null) { continue; } // get keyclass if (keyclass == null) { keyclass = kids[i].createKey().getClass(). asSubclass(WritableComparable.class); } // create priority queue if (null == q) { cmp = WritableComparator.get(keyclass, conf); q = new PriorityQueue<ComposableRecordReader<K,?>>(3, new Comparator<ComposableRecordReader<K,?>>() { public int compare(ComposableRecordReader<K,?> o1, ComposableRecordReader<K,?> o2) { return cmp.compare(o1.key(), o2.key()); } }); } // Explicit check for key class agreement if (!keyclass.equals(kids[i].key().getClass())) { throw new ClassCastException("Child key classes fail to agree"); } // add the kid to priority queue if it has any elements if (kids[i].hasNext()) { q.add(kids[i]); } } } }
static void binSortTest(GridmixRecord x, GridmixRecord y, int min, int max, WritableComparator cmp) throws Exception { final Random r = new Random(); final long s = r.nextLong(); r.setSeed(s); LOG.info("sort: " + s); final DataOutputBuffer out1 = new DataOutputBuffer(); final DataOutputBuffer out2 = new DataOutputBuffer(); for (int i = min; i < max; ++i) { final long seed1 = r.nextLong(); setSerialize(x, seed1, i, out1); assertEquals(0, x.compareSeed(seed1, Math.max(0, i - x.fixedBytes()))); final long seed2 = r.nextLong(); setSerialize(y, seed2, i, out2); assertEquals(0, y.compareSeed(seed2, Math.max(0, i - x.fixedBytes()))); // for eq sized records, ensure byte cmp where req final int chk = WritableComparator.compareBytes( out1.getData(), 0, out1.getLength(), out2.getData(), 0, out2.getLength()); assertEquals(Integer.signum(chk), Integer.signum(x.compareTo(y))); assertEquals(Integer.signum(chk), Integer.signum(cmp.compare( out1.getData(), 0, out1.getLength(), out2.getData(), 0, out2.getLength()))); // write second copy, compare eq final int s1 = out1.getLength(); x.write(out1); assertEquals(0, cmp.compare(out1.getData(), 0, s1, out1.getData(), s1, out1.getLength() - s1)); final int s2 = out2.getLength(); y.write(out2); assertEquals(0, cmp.compare(out2.getData(), 0, s2, out2.getData(), s2, out2.getLength() - s2)); assertEquals(Integer.signum(chk), Integer.signum(cmp.compare(out1.getData(), 0, s1, out2.getData(), s2, out2.getLength() - s2))); } }
private boolean notInTable(final TableName tn, final byte [] rn) { if (WritableComparator.compareBytes(tn.getName(), 0, tn.getName().length, rn, 0, tn.getName().length) != 0) { LOG.error("Region " + Bytes.toStringBinary(rn) + " does not belong to table " + tn); return true; } return false; }