<rt id="bn8ez"></rt>
<label id="bn8ez"></label>

  • <span id="bn8ez"></span>

    <label id="bn8ez"><meter id="bn8ez"></meter></label>

    paulwong

    WordCount的一個(gè)變種版本…Hadoop

    統(tǒng)計(jì)域名(實(shí)際是host)的計(jì)數(shù)器。

    輸入:一個(gè)文件夾中有一堆的文本文件,內(nèi)容是一行一個(gè)的url,可以想像為數(shù)據(jù)庫(kù)中的一條記錄
    流程:提取url的domain,對(duì)domain計(jì)數(shù)+1
    輸出:域名,域名計(jì)數(shù)

    代碼如下:
    Mapper
    package com.keseek.hadoop;

    import java.io.IOException;
    import java.net.URI;

    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.Mapper;

    public class DomainCountMapper implements
            Mapper
    <LongWritable, Text, Text, LongWritable> {

        @Override
       
    public void configure(JobConf arg0) {
           
    // Init Text and LongWritable
            domain = new Text();
            one
    = new LongWritable(1);
        }


        @Override
       
    public void close() throws IOException {
           
    // TODO Auto-generated method stub
        }


        @Override
       
    public void map(LongWritable key, Text value,
                OutputCollector
    <Text, LongWritable> output, Reporter reporter)
               
    throws IOException {
           
    // Get URL
            String url = value.toString().trim();

           
    // URL->Domain && Collect
            domain.set(ParseDomain(url));
           
    if (domain.getLength() != 0) {
                output.collect(domain, one);
            }


        }


       
    public String ParseDomain(String url) {
           
    try {
                URI uri
    = URI.create(url);
               
    return uri.getHost();
            }
    catch (Exception e) {
               
    return "";
            }

        }


       
    // Shared used Text domain
        private Text domain;

       
    // One static
        private LongWritable one;

    }

    Reducer

    package com.keseek.hadoop;

    import java.io.IOException;
    import java.util.Iterator;

    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.Reducer;

    public class DomainCountReducer implements
            Reducer
    <Text, LongWritable, Text, LongWritable> {

        @Override
       
    public void configure(JobConf arg0) {
           
    // TODO Auto-generated method stub

        }


        @Override
       
    public void close() throws IOException {
           
    // TODO Auto-generated method stub

        }


        @Override
       
    public void reduce(Text key, Iterator<LongWritable> values,
                OutputCollector
    <Text, LongWritable> output, Reporter reporter)
               
    throws IOException {
           
    // Count the domain
            long cnt = 0;
           
    while (values.hasNext()) {
                cnt
    += values.next().get();
            }

           
    // Output
            output.collect(key, new LongWritable(cnt));
        }


    }

    Main

    package com.keseek.hadoop;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobClient;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.RunningJob;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;

    public class DomainCountMain {
       
    public static void main(String[] args) throws Exception {
           
    // Param for path
            if (args.length != 2) {
                System.out.println(
    "Usage:");
                System.out
                        .println(
    "DomainCountMain.jar  <Input_Path>  <Outpu_Path>");
                System.exit(
    -1);
            }


           
    // Configure JobConf
            JobConf jobconf = new JobConf(DomainCountMain.class);

            jobconf.setJobName(
    "Domain Counter by Coder4");

            FileInputFormat.setInputPaths(jobconf,
    new Path(args[0]));
           FileOutputFormat.setOutputPath(jobconf,
    new Path(args[1]));

            jobconf.setInputFormat(TextInputFormat.
    class);
           jobconf.setOutputFormat(TextOutputFormat.
    class);

            jobconf.setMapperClass(DomainCountMapper.
    class);
            jobconf.setReducerClass(DomainCountReducer.
    class);
           jobconf.setCombinerClass(DomainCountReducer.
    class);

            jobconf.setMapOutputKeyClass(Text.
    class);
            jobconf.setMapOutputValueClass(LongWritable.
    class);
            jobconf.setOutputKeyClass(Text.
    class);
            jobconf.setOutputValueClass(LongWritable.
    class);

           
    // Run job
            RunningJob run = JobClient.runJob(jobconf);
            run.waitForCompletion();
           
    if (run.isSuccessful()) {
                System.out.println(
    "<<<DomainCount Main>>> success.");
            }
    else {
                System.out.println(
    "<<<DomainCount Main>>> error.");
            }

        }

    }

    posted on 2012-09-08 15:30 paulwong 閱讀(266) 評(píng)論(0)  編輯  收藏 所屬分類: HADOOP云計(jì)算

    主站蜘蛛池模板: 亚洲日本中文字幕区| 免费在线观看黄色毛片| 亚洲av无码一区二区乱子伦as| 一级毛片正片免费视频手机看| 免费真实播放国产乱子伦| 美女视频黄a视频全免费网站一区 美女视频黄a视频全免费网站色 | 亚洲一区二区三区在线| 69影院毛片免费观看视频在线 | 亚洲国产情侣一区二区三区| 亚洲人成网站色在线入口| 亚洲第一成年网站视频| 永久免费bbbbbb视频| 在线观看亚洲免费视频| 国产精品久久香蕉免费播放| 美女视频黄频a免费| 不卡一卡二卡三亚洲| 中文字幕久无码免费久久| 亚洲国产精品福利片在线观看| 久久99毛片免费观看不卡| 亚洲视频在线观看| 无码av免费毛片一区二区| 亚洲人成色77777在线观看| 国产免费观看a大片的网站| 日本特黄特色AAA大片免费| 国产午夜亚洲不卡| 午夜视频在线免费观看| 亚洲人成在线免费观看| 国产v片免费播放| 三上悠亚电影全集免费| 精品亚洲麻豆1区2区3区| 日韩成全视频观看免费观看高清| 免费一级全黄少妇性色生活片 | 久久精品亚洲日本佐佐木明希| 最好看最新的中文字幕免费| 亚洲午夜精品在线| 国产精品无码免费视频二三区| yellow免费网站| 亚洲欧洲日本精品| 国产一级理论免费版| 免费h视频在线观看| 亚洲综合欧美色五月俺也去|