Java 类org.apache.hadoop.io.SequenceFile.Reader.Option 实例源码
项目:GeoCrawler
文件:TestInjector.java
private List<String> readCrawldb() throws IOException {
Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME
+ "/part-00000/data");
System.out.println("reading:" + dbfile);
Option rFile = SequenceFile.Reader.file(dbfile);
@SuppressWarnings("resource")
SequenceFile.Reader reader = new SequenceFile.Reader(conf, rFile);
ArrayList<String> read = new ArrayList<String>();
READ: do {
Text key = new Text();
CrawlDatum value = new CrawlDatum();
if (!reader.next(key, value))
break READ;
read.add(key.toString());
} while (true);
return read;
}
项目:GeoCrawler
文件:TestInjector.java
private HashMap<String, CrawlDatum> readCrawldbRecords() throws IOException {
Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME
+ "/part-00000/data");
System.out.println("reading:" + dbfile);
Option rFile = SequenceFile.Reader.file(dbfile);
@SuppressWarnings("resource")
SequenceFile.Reader reader = new SequenceFile.Reader(conf, rFile);
HashMap<String, CrawlDatum> read = new HashMap<String, CrawlDatum>();
READ: do {
Text key = new Text();
CrawlDatum value = new CrawlDatum();
if (!reader.next(key, value))
break READ;
read.put(key.toString(), value);
} while (true);
return read;
}
项目:GeoCrawler
文件:TestCrawlDbFilter.java
/**
* Read contents of fetchlist.
*
* @param fetchlist
* path to Generated fetchlist
* @return Generated {@link URLCrawlDatum} objects
* @throws IOException
*/
private ArrayList<URLCrawlDatum> readContents(Path fetchlist)
throws IOException {
// verify results
Option fFile = SequenceFile.Reader.file(fetchlist);
SequenceFile.Reader reader = new SequenceFile.Reader(conf, fFile);
ArrayList<URLCrawlDatum> l = new ArrayList<URLCrawlDatum>();
READ: do {
Text key = new Text();
CrawlDatum value = new CrawlDatum();
if (!reader.next(key, value)) {
break READ;
}
l.add(new URLCrawlDatum(key, value));
} while (true);
reader.close();
return l;
}
项目:GeoCrawler
文件:TestGenerator.java
/**
* Read contents of fetchlist.
*
* @param fetchlist
* path to Generated fetchlist
* @return Generated {@link URLCrawlDatum} objects
* @throws IOException
*/
private ArrayList<URLCrawlDatum> readContents(Path fetchlist)
throws IOException {
// verify results
Option rFile = SequenceFile.Reader.file(fetchlist);
SequenceFile.Reader reader = new SequenceFile.Reader(conf, rFile);
ArrayList<URLCrawlDatum> l = new ArrayList<URLCrawlDatum>();
READ: do {
Text key = new Text();
CrawlDatum value = new CrawlDatum();
if (!reader.next(key, value)) {
break READ;
}
l.add(new URLCrawlDatum(key, value));
} while (true);
reader.close();
return l;
}
项目:kylin
文件:CubeStatsReader.java
public CubeStatsResult(Path path, int precision) throws IOException {
Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
Option seqInput = SequenceFile.Reader.file(path);
try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
while (reader.next(key, value)) {
if (key.get() == 0L) {
percentage = Bytes.toInt(value.getBytes());
} else if (key.get() == -1) {
mapperOverlapRatio = Bytes.toDouble(value.getBytes());
} else if (key.get() == -2) {
mapperNumber = Bytes.toInt(value.getBytes());
} else if (key.get() > 0) {
HLLCounter hll = new HLLCounter(precision);
ByteArray byteArray = new ByteArray(value.getBytes());
hll.readRegisters(byteArray.asBuffer());
counterMap.put(key.get(), hll);
}
}
}
}
项目:warcutils
文件:WarcSequenceFileRecordReader.java
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit split = (FileSplit) inputSplit;
Configuration conf = context.getConfiguration();
final Path path = split.getPath();
Option optPath = SequenceFile.Reader.file(path);
in = new SequenceFile.Reader(conf, optPath);
this.end = split.getStart() + inputSplit.getLength();
if (split.getStart() > in.getPosition()) {
in.sync(split.getStart());
}
start = in.getPosition();
done = start >= end;
}
项目:cdk
文件:ReadSequenceFileBuilder.java
@Override
protected boolean doProcess(Record inputRecord, InputStream in) throws IOException {
FSDataInputStream fsInputStream = new FSDataInputStream(new ForwardOnlySeekable(in));
Option opt = SequenceFile.Reader.stream(fsInputStream);
SequenceFile.Metadata sequenceFileMetaData = null;
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(conf, opt);
if (includeMetaData) {
sequenceFileMetaData = reader.getMetadata();
}
Class keyClass = reader.getKeyClass();
Class valueClass = reader.getValueClass();
Record template = inputRecord.copy();
removeAttachments(template);
while (true) {
Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
Writable val = (Writable)ReflectionUtils.newInstance(valueClass, conf);
try {
if (!reader.next(key, val)) {
break;
}
} catch (EOFException ex) {
// SequenceFile.Reader will throw an EOFException after reading
// all the data, if it doesn't know the length. Since we are
// passing in an InputStream, we hit this case;
LOG.trace("Received expected EOFException", ex);
break;
}
incrementNumRecords();
Record outputRecord = template.copy();
outputRecord.put(keyField, key);
outputRecord.put(valueField, val);
outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE);
if (includeMetaData && sequenceFileMetaData != null) {
outputRecord.put(SEQUENCE_FILE_META_DATA, sequenceFileMetaData);
}
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
} finally {
Closeables.closeQuietly(reader);
}
return true;
}