https://www.quora.com/Is-there-a-fast-algorithm-to-run-on-the-MapReduce-framework-to-find-the-median-from-a-huge-integer-set
You could use MapReduce to sort the data and then some postprocessing outside of the MapReduce framework to find the median in the sorted data.
This isn't asymptotically optimal - it's O(nlogn).
http://www.1point3acres.com/bbs/thread-14930-1-1.html
http://stackoverflow.com/questions/10109514/computing-median-in-map-reduce
Related:
https://github.com/rajdeepd/hadoop-samples
public class MedianTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text yearText = new Text();
private final static IntWritable tempWritable = new IntWritable(0);
protected void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
String[] line = value.toString().split(";");
String year = line[0];
yearText.set(year);
int temp = Integer.parseInt(line[1]);
tempWritable.set(temp);
context.write(yearText,tempWritable);
}
}
public class MedianTemperatureReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
ArrayList<Integer> temperatureList = new ArrayList<Integer>();
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws java.io.IOException, InterruptedException {
int median = 0;
for (IntWritable value : values) {
temperatureList.add(value.get());
}
Collections.sort(temperatureList);
int size = temperatureList.size();
if(size%2 == 0){
int half = size/2;
median = temperatureList.get(half);
}else {
int half = (size + 1)/2;
median = temperatureList.get(half -1);
}
context.write(key, new IntWritable(median));
}
}
You could use MapReduce to sort the data and then some postprocessing outside of the MapReduce framework to find the median in the sorted data.
This isn't asymptotically optimal - it's O(nlogn).
http://www.1point3acres.com/bbs/thread-14930-1-1.html
http://stackoverflow.com/questions/10109514/computing-median-in-map-reduce
Related:
https://github.com/rajdeepd/hadoop-samples
public class MedianTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text yearText = new Text();
private final static IntWritable tempWritable = new IntWritable(0);
protected void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
String[] line = value.toString().split(";");
String year = line[0];
yearText.set(year);
int temp = Integer.parseInt(line[1]);
tempWritable.set(temp);
context.write(yearText,tempWritable);
}
}
public class MedianTemperatureReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
ArrayList<Integer> temperatureList = new ArrayList<Integer>();
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws java.io.IOException, InterruptedException {
int median = 0;
for (IntWritable value : values) {
temperatureList.add(value.get());
}
Collections.sort(temperatureList);
int size = temperatureList.size();
if(size%2 == 0){
int half = size/2;
median = temperatureList.get(half);
}else {
int half = (size + 1)/2;
median = temperatureList.get(half -1);
}
context.write(key, new IntWritable(median));
}
}