- Driver Class
import java.io.IOException;
// file system
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
// box classes import
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
// mapreduce imports
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Job j = new Job();
j.setJobName("My First Job");
FileInputFormat.addInputPath(j, new Path(args[0]));
FileOutputFormat.setOutputPath(j, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
- Mapper Class
// exception handling
import java.io.IOException;
// box classes import
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
// import mapper class
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String inputstring = value.toString();
for (String x : inputstring.split(" ")) {
context.write(new Text(x), new IntWritable(1));
- Reducer Class
// exceptions import
import java.io.IOException;
// import box classes
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
// import reducer class
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int y = 0;
for (IntWritable x : values) {
context.write(key, new IntWritable(y));
How to execute
H_CLASSPATH=$(hadoop classpath)
- creating a varible to store the path for the jar files needed for compiling
javac *.java -cp $H_CLASSPATH
- compiling files to create
jar -cvf wordcount.jar *.class
- creating jar file
hadoop fs -put poem.txt
- uploading the input file to HDFS
hadoop jar wordcount.jar WordCountDriver poem.txt wordcountout
- executing the map reduce program
hadoop fs -ls wordcountout
- listing the output files
hadoop fs -cat wordcountout\part-r-00000
- lising the contents of output file (only if step 5 was successful)