WORD COUNT PROGRAM EXECUTION STEPS IN HADOOP
cd
/home/user/Documents/
sudo
mkdir wordcount
cd
wordcount
Step 2: Create a
WordCount.java file in the wordcount directory
vi
WordCount.java
Sample content of the WordCount.java file
//package org.myorg;
import
java.io.IOException;
import java.util.*;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.conf.*;
import
org.apache.hadoop.io.*;
import
org.apache.hadoop.mapred.*;
import
org.apache.hadoop.util.*;
public class
WordCount {
public static class Map extends
MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
private final static
IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable
key, Text value, OutputCollector<Text, IntWritable> output, Reporter
reporter) throws IOException {
String line =
value.toString();
StringTokenizer
tokenizer = new StringTokenizer(line);
while
(tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
}
public static class Reduce extends
MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key,
Iterator<IntWritable> values, OutputCollector<Text, IntWritable>
output, Reporter reporter) throws IOException {
int sum = 0;
while
(values.hasNext()) {
sum +=
values.next().get();
}
output.collect(key, new
IntWritable(sum));
}
}
public static void main(String[] args)
throws Exception {
JobConf conf = new JobConf(WordCount.class);
conf.setJobName("wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
//conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf,
new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
Step 3: Create a
directory called wordcountc in
/home/user/Documents/wordcount/
sudo mkdir wordcountc
Step 4: Create a
directory on the Hadoop file system
hdfs
dfs -mkdir /example1
Step 5: Copy the input
file from the local system to Hadoop file system
hdfs
dfs -copyFromLocal /home/user/Documents/emp.txt /example/
Step 6: sudo javac
-classpath /usr/local/hadoop/share/hadoop/common/hadoop-common- 2.6.0.jar:/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core- 2.6.0.jar:/usr/local/hadoop/share/hadoop/common/lib/hadoop-annotations-2.6.0.jar
-d wordcountc/ WordCount.java
Step 7: Now after
compilation 3 class files will be generated in the directory 'wordcount'
Step 8: Create jar file
using the command
sudo
jar -cvf wordcountj.jar -C /home/user/Documents/wordcount/wordcountc .
Step 9: Change to
/usr/local/hadoop/ folder
cd
/usr/local/hadoop/ folder
Step 10: Execute using
the below command
bin/hadoop
jar /home/user/Documents/wordcount/wordcountj.jar WordCount /example1/emp.txt output
Step 11: The output can
be checked by typing http://localhost:50070 on the browser
window. It will display data node information. In the menu, under 'utilities',
you can see an option for 'browse the file system'. Click that and find out the
result of the execution under '/user/hdpuser/output' directory.
Execution of wordcount program without creating a jar file
Execute the following command
bin/hadoop
jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /example1/emp.txt output1
No comments:
Post a Comment