qileilove

          blog已經轉移至github,大家請訪問 http://qaseven.github.io/

          對Mapreduce代碼進行單元測試

           hadoop自帶一個wordcount的示例代碼,用于計算單詞個數。我將其單獨移出來,測試成功。源碼如下:
          package org.apache.hadoop.examples;
          import java.io.IOException;
          import java.util.StringTokenizer;
          import org.apache.hadoop.conf.Configuration;
          import org.apache.hadoop.fs.Path;
          import org.apache.hadoop.io.IntWritable;
          import org.apache.hadoop.io.Text;
          import org.apache.hadoop.mapreduce.Job;
          import org.apache.hadoop.mapreduce.Mapper;
          import org.apache.hadoop.mapreduce.Reducer;
          import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
          import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
          import org.apache.hadoop.util.GenericOptionsParser;
          public class WordCount {
          public static class TokenizerMapper
          extends Mapper{
          private final static IntWritable one = new IntWritable(1);
          private Text word = new Text();
          public void map(Object key, Text value, Context context
          ) throws IOException, InterruptedException {
          StringTokenizer itr = new StringTokenizer(value.toString());
          while (itr.hasMoreTokens()) {
          word  = new Text(itr.nextToken()); //to unitest,should be new Text word.set(itr.nextToken())
          context.write(word, new IntWritable(1));
          }
          }
          }
          public static class IntSumReducer
          extends Reducer {
          private IntWritable result = new IntWritable();
          public void reduce(Text key, Iterable values,
          Context context
          ) throws IOException, InterruptedException {
          int sum = 0;
          for (IntWritable val : values) {
          sum += val.get();
          }
          result.set(sum);
          context.write(key, result);
          }
          }
          public static void main(String[] args) throws Exception {
          Configuration conf = new Configuration();
          String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
          if (otherArgs.length != 2) {
          System.err.println("Usage: wordcount  ");
          System.exit(2);
          }
          Job job = new Job(conf, "word count");
          job.setJarByClass(WordCount.class);
          job.setMapperClass(TokenizerMapper.class);
          job.setCombinerClass(IntSumReducer.class);
          job.setReducerClass(IntSumReducer.class);
          job.setOutputKeyClass(Text.class);
          job.setOutputValueClass(IntWritable.class);
          FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
          FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
          System.exit(job.waitForCompletion(true) ? 0 : 1);
          }
          }
           現在我想對其進行單元測試。一種方式,是job執行完了后,讀取輸出目錄中的文件,確認計數是否正確。但這樣的情況如果失敗,也不知道是哪里失敗。我們需要對map和reduce單獨進行測試。
            tomwhite的書《hadoop權威指南》有提到如何用Mockito進行單元測試,我們依照原書對溫度的單元測試來對wordcount進行單元測試。(原書第二版的示例已經過時,可以參考英文版第三版或我的程序)。
          package org.apache.hadoop.examples;
          /* author zhouhh
          * date:2012.8.7
          */
          import static org.mockito.Mockito.*;
          import java.io.IOException;
          import java.util.ArrayList;
          import java.util.List;
          import org.apache.hadoop.io.*;
          import org.junit.*;
          public class WordCountTest {
          @Test
          public  void testWordCountMap() throws IOException, InterruptedException
          {
          WordCount w = new WordCount();
          WordCount.TokenizerMapper mapper = new WordCount.TokenizerMapper();
          Text value = new Text("a b c b a a");
          @SuppressWarnings("unchecked")
          WordCount.TokenizerMapper.Context context = mock(WordCount.TokenizerMapper.Context.class);
          mapper.map(null, value, context);
          verify(context,times(3)).write(new Text("a"), new IntWritable(1));
          verify(context).write(new Text("c"), new IntWritable(1));
          //verify(context).write(new Text("cc"), new IntWritable(1));
          }
          @Test
          public void testWordCountReduce() throws IOException, InterruptedException
          {
          WordCount.IntSumReducer reducer = new WordCount.IntSumReducer();
          WordCount.IntSumReducer.Context context = mock(WordCount.IntSumReducer.Context.class);
          Text key = new Text("a");
          List values = new ArrayList();
          values.add(new IntWritable(1));
          values.add(new IntWritable(1));
          reducer.reduce(key, values, context);
          verify(context).write(new Text("a"), new IntWritable(2));
          }
          public static void main(String[] args) {
          // try {
          // WordCountTest t = new WordCountTest();
          //
          // //t.testWordCountMap();
          // t.testWordCountReduce();
          // } catch (IOException e) {
          // // TODO Auto-generated catch block
          // e.printStackTrace();
          // } catch (InterruptedException e) {
          // // TODO Auto-generated catch block
          // e.printStackTrace();
          // }
          }
          }
            verify(context)只檢查一次的寫,如果多次寫,需用verify(contex,times(n))檢查,否則會失敗。
            執行時在測試文件上點run as JUnit Test,會得到測試結果是否通過。
            本示例程序在hadoop1.0.3環境中測試通過。Mockito也在hadoop的lib中自帶,打包在mockito-all-1.8.5.jar

          posted on 2014-11-26 14:19 順其自然EVO 閱讀(309) 評論(0)  編輯  收藏 所屬分類: 測試學習專欄

          <2014年11月>
          2627282930311
          2345678
          9101112131415
          16171819202122
          23242526272829
          30123456

          導航

          統計

          常用鏈接

          留言簿(55)

          隨筆分類

          隨筆檔案

          文章分類

          文章檔案

          搜索

          最新評論

          閱讀排行榜

          評論排行榜

          主站蜘蛛池模板: 关岭| 阳信县| 东海县| 万载县| 通道| 慈利县| 曲沃县| 上饶县| 定日县| 赤水市| 米林县| 定边县| 札达县| 宁远县| 内江市| 罗田县| 惠安县| 阜新市| 饶河县| 长海县| 类乌齐县| 惠州市| 天津市| 犍为县| 神池县| 曲松县| 鹤山市| 凉山| 沭阳县| 阳朔县| 大港区| 东辽县| 公安县| 宜宾县| 海安县| 启东市| 金华市| 汤原县| 库伦旗| 柳林县| 顺义区|