Java 类org.apache.hadoop.mapred.lib.InputSampler 实例源码
项目:melody-join
文件:MRSimJoinHD.java
Object[] getPivots(Path input, int numPivs) throws IOException
{
JobConf job = new JobConf();
job.setInputFormat(KeyValueTextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
org.apache.hadoop.mapred.FileInputFormat.addInputPath(job, input);
final KeyValueTextInputFormat inf = (KeyValueTextInputFormat) job.getInputFormat();
InputSampler.Sampler<Text, Text> sampler = new InputSampler.RandomSampler<Text, Text>(1.0, numPivs, 100);
Object[] samples = sampler.getSample(inf, job);
return samples;
}