Java 类org.apache.hadoop.util.LineReader 实例源码
项目:hadoop
文件:TestKeyValueTextInputFormat.java
public void testUTF8() throws Exception {
LineReader in = null;
try {
in = makeStream("abcd\u20acbdcd\u20ac");
Text line = new Text();
in.readLine(line);
assertEquals("readLine changed utf8 characters",
"abcd\u20acbdcd\u20ac", line.toString());
in = makeStream("abc\u200axyz");
in.readLine(line);
assertEquals("split on fake newline", "abc\u200axyz", line.toString());
} finally {
if (in != null) {
in.close();
}
}
}
项目:hadoop
文件:TestKeyValueTextInputFormat.java
public void testNewLines() throws Exception {
LineReader in = null;
try {
in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
} finally {
if (in != null) {
in.close();
}
}
}
项目:hadoop
文件:TestTextInputFormat.java
/**
* Test readLine for correct interpretation of maxLineLength
* (returned string should be clipped at maxLineLength, and the
* remaining bytes on the same line should be thrown out).
* Also check that returned value matches the string length.
* Varies buffer size to stress test.
*
* @throws Exception
*/
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
final int STRLENBYTES = STR.getBytes().length;
Text out = new Text();
for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
LineReader in = makeStream(STR, bufsz);
int c = 0;
c += in.readLine(out, 1);
assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
c += in.readLine(out, 3);
assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
c += in.readLine(out, 10);
assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
c += in.readLine(out, 8);
assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
}
}
项目:hadoop
文件:TestMRKeyValueTextInputFormat.java
@Test
public void testNewLines() throws Exception {
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop
文件:MultiFileWordCount.java
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
项目:SparkSeq
文件:SingleFastqInputFormat.java
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
file = split.getPath();
start = split.getStart();
end = start + split.getLength();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream fileIn = fs.open(file);
CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = codecFactory.getCodec(file);
if (codec == null) { // no codec. Uncompressed file.
positionAtFirstRecord(fileIn);
inputStream = fileIn;
} else {
// compressed file
if (start != 0) {
throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
}
inputStream = codec.createInputStream(fileIn);
end = Long.MAX_VALUE; // read until the end of the file
}
lineReader = new LineReader(inputStream);
}
项目:ditb
文件:CompactionTool.java
/**
* Returns a split for each store files directory using the block location
* of each file as locality reference.
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
List<FileStatus> files = listStatus(job);
Text key = new Text();
for (FileStatus file: files) {
Path path = file.getPath();
FileSystem fs = path.getFileSystem(job.getConfiguration());
LineReader reader = new LineReader(fs.open(path));
long pos = 0;
int n;
try {
while ((n = reader.readLine(key)) > 0) {
String[] hosts = getStoreDirHosts(fs, path);
splits.add(new FileSplit(path, pos, n, hosts));
pos += n;
}
} finally {
reader.close();
}
}
return splits;
}
项目:aliyun-oss-hadoop-fs
文件:TestKeyValueTextInputFormat.java
public void testUTF8() throws Exception {
LineReader in = null;
try {
in = makeStream("abcd\u20acbdcd\u20ac");
Text line = new Text();
in.readLine(line);
assertEquals("readLine changed utf8 characters",
"abcd\u20acbdcd\u20ac", line.toString());
in = makeStream("abc\u200axyz");
in.readLine(line);
assertEquals("split on fake newline", "abc\u200axyz", line.toString());
} finally {
if (in != null) {
in.close();
}
}
}
项目:aliyun-oss-hadoop-fs
文件:TestKeyValueTextInputFormat.java
public void testNewLines() throws Exception {
LineReader in = null;
try {
in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
} finally {
if (in != null) {
in.close();
}
}
}
项目:aliyun-oss-hadoop-fs
文件:TestTextInputFormat.java
/**
* Test readLine for correct interpretation of maxLineLength
* (returned string should be clipped at maxLineLength, and the
* remaining bytes on the same line should be thrown out).
* Also check that returned value matches the string length.
* Varies buffer size to stress test.
*
* @throws Exception
*/
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
final int STRLENBYTES = STR.getBytes().length;
Text out = new Text();
for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
LineReader in = makeStream(STR, bufsz);
int c = 0;
c += in.readLine(out, 1);
assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
c += in.readLine(out, 3);
assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
c += in.readLine(out, 10);
assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
c += in.readLine(out, 8);
assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
}
}
项目:aliyun-oss-hadoop-fs
文件:TestMRKeyValueTextInputFormat.java
@Test
public void testNewLines() throws Exception {
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
}
项目:aliyun-oss-hadoop-fs
文件:MultiFileWordCount.java
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
项目:SOAPgaea
文件:WholeGenomeShare.java
public static boolean distributeCache(String chrList, Job job, String cacheName)
throws IOException, URISyntaxException {
job.addCacheFile(new URI(chrList + "#" + cacheName));
Configuration conf = job.getConfiguration();
Path refPath = new Path(chrList);
FileSystem fs = refPath.getFileSystem(conf);
FSDataInputStream refin = fs.open(refPath);
LineReader in = new LineReader(refin);
Text line = new Text();
String chrFile = "";
String[] chrs = new String[3];
while ((in.readLine(line)) != 0) {
chrFile = line.toString();
chrs = chrFile.split("\t");
File fileTest = new File(chrs[1]);
if (fileTest.isFile()) {
chrs[1] = "file://" + chrs[1];
}
job.addCacheFile(new URI(chrs[1] + "#" + chrs[0]));
}
in.close();
refin.close();
return true;
}
项目:SOAPgaea
文件:WholeGenomeShare.java
protected void loadChromosomeList(Path refPath) throws NumberFormatException, IOException{
Configuration conf = new Configuration();
FileSystem fs = refPath.getFileSystem(conf);
FSDataInputStream refin = fs.open(refPath);
LineReader in = new LineReader(refin);
Text line = new Text();
String chrFile = "";
String[] chrs = new String[3];
while((in.readLine(line)) != 0){
chrFile = line.toString();
chrs = chrFile.split("\t");
// insert chr
if(!addChromosome(chrs[0])) {
in.close();
throw new RuntimeException("map Chromosome "+chrs[1]+" Failed.");
}
setChromosome(chrs[1],chrs[0],Integer.parseInt(chrs[2]));
}
in.close();
}
项目:SOAPgaea
文件:FastqQualityControlReporterIO.java
public void readFromHdfs(Path path, Configuration conf,
FastqQualityControlReport report) throws IOException {
FileSystem fs = path.getFileSystem(conf);
FSDataInputStream FSinput = fs.open(path);
LineReader lineReader = new LineReader(FSinput, conf);
Text line = new Text();
int sampleID = 0, i,cnt;
while ((lineReader.readLine(line)) != 0) {
sampleID = report.addCount(line.toString());
if(report.isPartitionNull())
continue;
for (i = 0; i < FastqQualityControlReport.BASE_STATIC_COUNT; i++) {
cnt = lineReader.readLine(line);
if(cnt == 0)
continue;
report.addBaseByPosition(sampleID, i, line.toString());
}
}
lineReader.close();
}
项目:SOAPgaea
文件:WholeGenomeResultReport.java
@Override
public void parseReport(LineReader lineReader, Text line, ReferenceShare genome) throws IOException {
super.parseReport(lineReader, line, genome);
String lineString = line.toString();
if(lineString.contains("Cover Information")) {
if(lineReader.readLine(line) > 0 && line.getLength() != 0) {
String[] splitArray = line.toString().split("\t");
WholeGenomeCoverReport coverReport = null;
for(String keyValue : splitArray) {
if(keyValue.split(" ").length == 1) {
String chrName = keyValue;
if(!coverReports.containsKey(chrName)) {
ChromosomeInformationShare chrInfo = genome.getChromosomeInfo(chrName);
coverReport = new WholeGenomeCoverReport(chrInfo);
coverReports.put(chrName, coverReport);
} else {
coverReport = coverReports.get(chrName);
}
} else {
assert coverReport != null;
coverReport.parse(keyValue, genome);
}
}
}
}
}
项目:FEL
文件:WikipediaDocnoMapping.java
/**
* Creates a mappings file from the contents of a flat text file containing docid to docno
* mappings. This method is used by {@link WikipediaDocnoMappingBuilder} internally.
*
* @param inputFile flat text file containing docid to docno mappings
* @param outputFile output mappings file
* @throws IOException
*/
static public void writeDocnoMappingData(FileSystem fs, String inputFile, int n, String outputFile) throws IOException {
LOG.info("Writing " + n + " docids to " + outputFile);
LineReader reader = new LineReader(fs.open(new Path(inputFile)));
int cnt = 0;
Text line = new Text();
FSDataOutputStream out = fs.create(new Path(outputFile), true);
out.writeInt(n);
for (int i = 0; i < n; i++) {
reader.readLine(line);
String[] arr = line.toString().split("\\t");
out.writeInt(Integer.parseInt(arr[0]));
cnt++;
if (cnt % 100000 == 0) {
LOG.info(cnt + " articles");
}
}
out.close();
reader.close();
LOG.info("Done!");
}
项目:big-c
文件:TestKeyValueTextInputFormat.java
public void testUTF8() throws Exception {
LineReader in = null;
try {
in = makeStream("abcd\u20acbdcd\u20ac");
Text line = new Text();
in.readLine(line);
assertEquals("readLine changed utf8 characters",
"abcd\u20acbdcd\u20ac", line.toString());
in = makeStream("abc\u200axyz");
in.readLine(line);
assertEquals("split on fake newline", "abc\u200axyz", line.toString());
} finally {
if (in != null) {
in.close();
}
}
}
项目:big-c
文件:TestKeyValueTextInputFormat.java
public void testNewLines() throws Exception {
LineReader in = null;
try {
in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
} finally {
if (in != null) {
in.close();
}
}
}
项目:big-c
文件:TestTextInputFormat.java
/**
* Test readLine for correct interpretation of maxLineLength
* (returned string should be clipped at maxLineLength, and the
* remaining bytes on the same line should be thrown out).
* Also check that returned value matches the string length.
* Varies buffer size to stress test.
*
* @throws Exception
*/
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
final int STRLENBYTES = STR.getBytes().length;
Text out = new Text();
for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
LineReader in = makeStream(STR, bufsz);
int c = 0;
c += in.readLine(out, 1);
assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
c += in.readLine(out, 3);
assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
c += in.readLine(out, 10);
assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
c += in.readLine(out, 8);
assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
}
}
项目:big-c
文件:TestMRKeyValueTextInputFormat.java
@Test
public void testNewLines() throws Exception {
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
}
项目:big-c
文件:MultiFileWordCount.java
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
项目:LCIndex-HBase-0.94.16
文件:CompactionTool.java
/**
* Returns a split for each store files directory using the block location
* of each file as locality reference.
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
List<FileStatus> files = listStatus(job);
Text key = new Text();
for (FileStatus file: files) {
Path path = file.getPath();
FileSystem fs = path.getFileSystem(job.getConfiguration());
LineReader reader = new LineReader(fs.open(path));
long pos = 0;
int n;
try {
while ((n = reader.readLine(key)) > 0) {
String[] hosts = getStoreDirHosts(fs, path);
splits.add(new FileSplit(path, pos, n, hosts));
pos += n;
}
} finally {
reader.close();
}
}
return splits;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestKeyValueTextInputFormat.java
public void testUTF8() throws Exception {
LineReader in = null;
try {
in = makeStream("abcd\u20acbdcd\u20ac");
Text line = new Text();
in.readLine(line);
assertEquals("readLine changed utf8 characters",
"abcd\u20acbdcd\u20ac", line.toString());
in = makeStream("abc\u200axyz");
in.readLine(line);
assertEquals("split on fake newline", "abc\u200axyz", line.toString());
} finally {
if (in != null) {
in.close();
}
}
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestKeyValueTextInputFormat.java
public void testNewLines() throws Exception {
LineReader in = null;
try {
in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
} finally {
if (in != null) {
in.close();
}
}
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestTextInputFormat.java
/**
* Test readLine for correct interpretation of maxLineLength
* (returned string should be clipped at maxLineLength, and the
* remaining bytes on the same line should be thrown out).
* Also check that returned value matches the string length.
* Varies buffer size to stress test.
*
* @throws Exception
*/
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
final int STRLENBYTES = STR.getBytes().length;
Text out = new Text();
for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
LineReader in = makeStream(STR, bufsz);
int c = 0;
c += in.readLine(out, 1);
assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
c += in.readLine(out, 3);
assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
c += in.readLine(out, 10);
assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
c += in.readLine(out, 8);
assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
}
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestMRKeyValueTextInputFormat.java
@Test
public void testNewLines() throws Exception {
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop-2.6.0-cdh5.4.3
文件:MultiFileWordCount.java
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestTextInputFormat.java
/**
* Test readLine for correct interpretation of maxLineLength
* (returned string should be clipped at maxLineLength, and the
* remaining bytes on the same line should be thrown out).
* Also check that returned value matches the string length.
* Varies buffer size to stress test.
*
* @throws Exception
*/
@Test
public void testMaxLineLength() throws Exception {
final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
final int STRLENBYTES = STR.getBytes().length;
Text out = new Text();
for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
LineReader in = makeStream(STR, bufsz);
int c = 0;
c += in.readLine(out, 1);
assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
c += in.readLine(out, 3);
assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
c += in.readLine(out, 10);
assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
c += in.readLine(out, 8);
assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
}
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestKeyValueTextInputFormat.java
public void testNewLines() throws Exception {
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestMRKeyValueTextInputFormat.java
public void testNewLines() throws Exception {
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
}
项目:HIndex
文件:CompactionTool.java
/**
* Returns a split for each store files directory using the block location
* of each file as locality reference.
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
List<FileStatus> files = listStatus(job);
Text key = new Text();
for (FileStatus file: files) {
Path path = file.getPath();
FileSystem fs = path.getFileSystem(job.getConfiguration());
LineReader reader = new LineReader(fs.open(path));
long pos = 0;
int n;
try {
while ((n = reader.readLine(key)) > 0) {
String[] hosts = getStoreDirHosts(fs, path);
splits.add(new FileSplit(path, pos, n, hosts));
pos += n;
}
} finally {
reader.close();
}
}
return splits;
}
项目:hadoop-EAR
文件:HarIndex.java
/**
* Constructor that reads the contents of the index file.
* @param in An input stream to the index file.
* @param max The size of the index file.
* @throws IOException
*/
public HarIndex(InputStream in, long max) throws IOException {
LineReader lineReader = new LineReader(in);
Text text = new Text();
long nread = 0;
while (nread < max) {
int n = lineReader.readLine(text);
nread += n;
String line = text.toString();
try {
parseLine(line);
} catch (UnsupportedEncodingException e) {
throw new IOException("UnsupportedEncodingException after reading " +
nread + "bytes");
}
}
}
项目:hadoop-EAR
文件:HarFileSystem.java
public int getHarVersion() throws IOException {
FSDataInputStream masterIn = fs.open(masterIndex);
LineReader lmaster = new LineReader(masterIn, getConf());
Text line = new Text();
lmaster.readLine(line);
try {
masterIn.close();
} catch(IOException e){
//disregard it.
// its a read.
}
String versionLine = line.toString();
String[] arr = versionLine.split(" ");
int version = Integer.parseInt(arr[0]);
return version;
}
项目:hadoop-plus
文件:TestKeyValueTextInputFormat.java
public void testNewLines() throws Exception {
LineReader in = null;
try {
in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
} finally {
if (in != null) {
in.close();
}
}
}
项目:hops
文件:TestKeyValueTextInputFormat.java
@Test
public void testUTF8() throws Exception {
LineReader in = null;
try {
in = makeStream("abcd\u20acbdcd\u20ac");
Text line = new Text();
in.readLine(line);
assertEquals("readLine changed utf8 characters",
"abcd\u20acbdcd\u20ac", line.toString());
in = makeStream("abc\u200axyz");
in.readLine(line);
assertEquals("split on fake newline", "abc\u200axyz", line.toString());
} finally {
if (in != null) {
in.close();
}
}
}
项目:hadoop-plus
文件:TestTextInputFormat.java
/**
* Test readLine for correct interpretation of maxLineLength
* (returned string should be clipped at maxLineLength, and the
* remaining bytes on the same line should be thrown out).
* Also check that returned value matches the string length.
* Varies buffer size to stress test.
*
* @throws Exception
*/
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
final int STRLENBYTES = STR.getBytes().length;
Text out = new Text();
for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
LineReader in = makeStream(STR, bufsz);
int c = 0;
c += in.readLine(out, 1);
assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
c += in.readLine(out, 1);
assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
c += in.readLine(out, 3);
assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
c += in.readLine(out, 10);
assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
c += in.readLine(out, 8);
assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
}
}
项目:hadoop-plus
文件:TestMRKeyValueTextInputFormat.java
@Test
public void testNewLines() throws Exception {
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
}
项目:hadoop-plus
文件:MultiFileWordCount.java
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
项目:pbase
文件:CompactionTool.java
/**
* Returns a split for each store files directory using the block location
* of each file as locality reference.
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
List<FileStatus> files = listStatus(job);
Text key = new Text();
for (FileStatus file: files) {
Path path = file.getPath();
FileSystem fs = path.getFileSystem(job.getConfiguration());
LineReader reader = new LineReader(fs.open(path));
long pos = 0;
int n;
try {
while ((n = reader.readLine(key)) > 0) {
String[] hosts = getStoreDirHosts(fs, path);
splits.add(new FileSplit(path, pos, n, hosts));
pos += n;
}
} finally {
reader.close();
}
}
return splits;
}