tangtb

          Hadoop,SpringMVC,ExtJs,Struts2,Spring,SpringSecurity,Hibernate,Struts
          posts - 25, comments - 88, trackbacks - 0, articles - 0
            BlogJava :: 首頁 :: 新隨筆 :: 聯(lián)系 :: 聚合  :: 管理

          Windows遠程調(diào)試Hadoop

          Posted on 2014-06-03 21:09 tangtb 閱讀(1922) 評論(0)  編輯  收藏 所屬分類: Hadoop

          前置條件

          成功安裝配置Hadoop集群

          成功編譯安裝Hadoop Eclipse插件

          本地有和服務器相同版本的Hadoop安裝包,并已解壓(本例使用hadoop-1.1.2

          啟動Hadoop集群

          配置hadoop installation directory

          Eclipse選擇WindowàPreferens,彈出Preferens窗口,選擇左側Hadoop Map/Reduce選項,右側配置Hadoop installation directory

          配置Map/Reduce Locations

          Eclipse選擇WindowàOpen PerspectiveàMap/Reduce,打開Map/Reduce視圖。

          點擊上圖右上角藍色小象,彈出Hadoop Location新增配置窗口,如下圖:

          Location name:自定義Location名稱

          Map/Reduce Mastermapred-site.xmlmapred.job.tracker屬性值

          DFS Mastercore-site.xmlfs.default.name屬性值

          User name:服務器端連接hdfs的用戶名

          連接成功,如上圖左側所示,列出了當前HDFS中的目錄。

          新建MapReduce Project

          NCDC求年最高氣溫為例,將氣溫數(shù)據(jù)放在/user/hadoop/ncdc/input目錄下,如圖所示:

          創(chuàng)建測試類MaxTempTest,代碼如下:

          package com.hadoop.test;

           

          import java.io.IOException;

           

          import org.apache.hadoop.conf.Configuration;

          import org.apache.hadoop.conf.Configured;

          import org.apache.hadoop.fs.FileSystem;

          import org.apache.hadoop.fs.Path;

          import org.apache.hadoop.io.IntWritable;

          import org.apache.hadoop.io.LongWritable;

          import org.apache.hadoop.io.Text;

          import org.apache.hadoop.mapreduce.Job;

          import org.apache.hadoop.mapreduce.Mapper;

          import org.apache.hadoop.mapreduce.Reducer;

          import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

          import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

          import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

          import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

          import org.apache.hadoop.util.Tool;

          import org.apache.hadoop.util.ToolRunner;

           

          public class MaxTempTest extends Configured implements Tool {

             

              public static class MaxTempMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

                

                 private static final int MISSING = 9999;

           

                 protected void map(LongWritable key, Text value, Context context)

                        throws IOException, InterruptedException {

                     //從輸入文本中解析出年和氣溫

                     String line = value.toString();

                     String year = line.substring(15, 19);

                     int airTemp = 0;

                     if (line.length() > 87) {

                        if (line.charAt(87) == '+') {

                            airTemp = Integer.parseInt(line.substring(88, 92));

                        } else {

                            airTemp = Integer.parseInt(line.substring(87, 92));

                        }

                        String quality = line.substring(92, 93);

                        if (airTemp != MISSING && quality.matches("[01459]")) {

                            context.write(new Text(year), new IntWritable(airTemp));

                        }

                     } else {

                        System.out.println("year: " + year + ", " + context.getInputSplit().toString());

                        context.write(new Text(year), new IntWritable(airTemp));

                     }

                 }

              }

             

              public static class MaxTempReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

                

                 protected void reduce(Text key, java.lang.Iterable<IntWritable> values, Context context) throws IOException ,InterruptedException {

                     //計算最大值

                     int maxValue = Integer.MIN_VALUE;

                     for (IntWritable value : values) {

                        maxValue = Math.max(maxValue, value.get());

                     }

                     context.write(key, new IntWritable(maxValue));

                 }

              }

             

              /**

               * @param args

               */

              public static void main(String[] args) {

                 // TODO Auto-generated method stub

                 try {

                     int exitCode = ToolRunner.run(new MaxTempTest(), args);

                     System.exit(exitCode);

                 } catch (Exception e) {

                     // TODO Auto-generated catch block

                     e.printStackTrace();

                 }

              }

             

              @Override

              public int run(String[] args) throws Exception {

                 // TODO Auto-generated method stub

                

                 if (args.length != 2) {

                     System.out.println("Usage: MaxTempTest <input path> <output path>");

                     System.exit(-1);

                 }

                

                 Configuration config = new Configuration();

                 try {

                     FileSystem hdfs = FileSystem.get(config);

                     Job job = new Job(config, "Max TempTest");

                     //設置輸出KeyValue的類型

                     job.setOutputKeyClass(Text.class);

                     job.setOutputValueClass(IntWritable.class);     

                    

                     job.setJarByClass(MaxTempTest.class);

                     job.setMapperClass(MaxTempMapper.class); //設置Mapper

                     job.setReducerClass(MaxTempReducer.class);    //設置Reducer

                     //設置輸入輸出格式

                     job.setInputFormatClass(TextInputFormat.class);

                     job.setOutputFormatClass(TextOutputFormat.class);

                    

                     Path inputDir = new Path(args[0]);

                     Path outputDir = new Path(args[1]);

                     //如果輸出目錄已經(jīng)存在,刪除該目錄

                     if (hdfs.exists(outputDir)) {

                        hdfs.delete(outputDir, true);

                     }

                     //設置輸入輸出路徑

                     FileInputFormat.setInputPaths(job, inputDir);

                     FileOutputFormat.setOutputPath(job, outputDir);

                     //提交作業(yè)

                     job.waitForCompletion(true);

                 } catch (IOException e) {

                     e.printStackTrace();

                 } catch (ClassNotFoundException e) {

                     // TODO Auto-generated catch block

                     e.printStackTrace();

                 } catch (InterruptedException e) {

                     // TODO Auto-generated catch block

                     e.printStackTrace();

                 }

                 return 0;

              }

          }

          運行測試項目

          在測試類MaxTempTest右鍵àRun AsàRun Configurations彈出Run Configurations窗口,右鍵Java ApplicationàNew,新建名為MaxTempTest,如下圖所示:

          點右側Arguments配置運行參數(shù),在Program arguments中輸入:

          hdfs://10.120.10.11:9090/user/hadoop/ncdc/input hdfs://10.120.10.11:9090/user/hadoop/ncdc/output

          在測試類MaxTempTest右鍵àRun AsàRun on Hadoop彈出Select Hadoop location窗口,如下圖所示:

          選擇“Choose an existing server from the list below”,點擊“Finish”,提交作業(yè)。

          運行錯誤一

          解決方法:

          hadoop-1.1.2\src\core\org\apache\hadoop\fs目錄找到FileUtil.java,將該文件復制到自己的測試項目中,包名與FileUtil中的package定義相同,注釋掉類中checkReturnValue方法中的代碼,如下圖所示:

          運行錯誤二

          解決方法:

          在服務器端修改目錄權限,執(zhí)行hadoop fs -chmod -R 777 ./ncdc命令

          運行結果

          如上圖所示,作業(yè)運行成功。Eclipse左側“DFS Locations”右鍵刷新”ncdc”目錄,雙擊打開part-r-00000查看運行結果,如下圖所示:

           

           

           

          主站蜘蛛池模板: 张家界市| 崇州市| 祁门县| 翁源县| 乌兰察布市| 老河口市| 台东县| 柘荣县| 郸城县| 广州市| 鹿邑县| 汉寿县| 兴安县| 舒城县| 准格尔旗| 涿鹿县| 特克斯县| 申扎县| 兴文县| 盐亭县| 京山县| 临朐县| 平罗县| 衡山县| 九台市| 兴隆县| 佛山市| 搜索| 正阳县| 门源| 连州市| 大连市| 桃江县| 赫章县| 科技| 张家界市| 美姑县| 资中县| 曲靖市| 景洪市| 富宁县|