锘??xml version="1.0" encoding="utf-8" standalone="yes"?>亚洲AV无码专区在线观看成人,亚洲国产成人久久三区,亚洲欧洲国产经精品香蕉网http://m.tkk7.com/paulwong/category/53479.htmlzh-cnTue, 23 Apr 2013 21:36:25 GMTTue, 23 Apr 2013 21:36:25 GMT60涓涓狿IG鑴氭湰渚嬪瓙鍒嗘瀽http://m.tkk7.com/paulwong/archive/2013/04/13/397791.htmlpaulwongpaulwongSat, 13 Apr 2013 07:21:00 GMThttp://m.tkk7.com/paulwong/archive/2013/04/13/397791.htmlhttp://m.tkk7.com/paulwong/comments/397791.htmlhttp://m.tkk7.com/paulwong/archive/2013/04/13/397791.html#Feedback0http://m.tkk7.com/paulwong/comments/commentRss/397791.htmlhttp://m.tkk7.com/paulwong/services/trackbacks/397791.html
PIGGYBANK_PATH=$PIG_HOME/contrib/piggybank/java/piggybank.jar
INPUT=pig/input/test-pig-full.txt
OUTPUT=pig/output/test-pig-output-$(date  +%Y%m%d%H%M%S)
PIGSCRIPT=analyst_status_logs.pig

#analyst_500_404_month.pig
#
analyst_500_404_day.pig
#
analyst_404_percentage.pig
#
analyst_500_percentage.pig
#
analyst_unique_path.pig
#
analyst_user_logs.pig
#
analyst_status_logs.pig


pig -p PIGGYBANK_PATH=$PIGGYBANK_PATH -p INPUT=$INPUT -p OUTPUT=$OUTPUT $PIGSCRIPT


瑕佸垎鏋愮殑鏁版嵁婧愶紝LOG 鏂囦歡
46.20.45.18 - - [25/Dec/2012:23:00:25 +0100] "GET / HTTP/1.0" 302 - "-" "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)" "-" "-" 46.20.45.18 "" 11011AEC9542DB0983093A100E8733F8 0
46.20.45.18 - - [25/Dec/2012:23:00:25 +0100] "GET /sign-in.jspx HTTP/1.0" 200 3926 "-" "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)" "-" "-" 46.20.45.18 "" 11011AEC9542DB0983093A100E8733F8 0
69.59.28.19 - - [25/Dec/2012:23:01:25 +0100] "GET / HTTP/1.0" 302 - "-" "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)" "-" "-" 69.59.28.19 "" 36D80DE7FE52A2D89A8F53A012307B0A 15


PIG鑴氭湰錛?br />
--娉ㄥ唽JAR鍖咃紝鍥犱負瑕佺敤鍒癉ateExtractor
register '$PIGGYBANK_PATH';

--澹版槑涓涓煭鍑芥暟鍚?br />DEFINE DATE_EXTRACT_MM 
org.apache.pig.piggybank.evaluation.util.apachelogparser.DateExtractor('yyyy-MM');

DEFINE DATE_EXTRACT_DD 
org.apache.pig.piggybank.evaluation.util.apachelogparser.DateExtractor('yyyy-MM-dd');

-- pig/input/test-pig-full.txt
--鎶婃暟鎹粠鍙橀噺鎵鎸囩殑鏂囦歡鍔犺澆鍒癙IG涓紝騫跺畾涔夋暟鎹垪鍚嶏紝姝ゆ椂鐨勬暟鎹泦涓烘暟緇?a,b,c)
raw_logs = load '$INPUT' USING org.apache.pig.piggybank.storage.MyRegExLoader('^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] "(\\S+) (\\S+) (HTTP[^"]+)" (\\S+) (\\S+) "([^"]*)" "([^"]*)" "(\\S+)" "(\\S+)" (\\S+) "(.*)" (\\S+) (\\S+)')
as (remoteAddr: chararray, 
n2: chararray, 
n3: chararray, 
time: chararray, 
method: chararray,
path:chararray,
protocol:chararray,
status: int, 
bytes_string: chararray, 
referrer: chararray, 
browser: chararray, 
n10:chararray,
remoteLogname: chararray, 
remoteAddr12: chararray, 
path2: chararray, 
sessionid: chararray, 
n15: chararray
);

--榪囨護鏁版嵁
filter_logs = FILTER raw_logs BY not (browser matches '.*pingdom.*');
--item_logs = FOREACH raw_logs GENERATE browser;

--percent 500 logs
--閲嶅畾涔夋暟鎹」錛屾暟鎹泦鍙彇2欏箂tatus,month
reitem_percent_500_logs = FOREACH filter_logs GENERATE status,DATE_EXTRACT_MM(time) as month;
--鍒嗙粍鏁版嵁闆嗭紝姝ゆ椂鐨勬暟鎹粨鏋勪負MAP(a{(aa,bb,cc),(dd,ee,ff)},b{(bb,cc,dd),(ff,gg,hh)})
group_month_percent_500_logs = GROUP reitem_percent_500_logs BY (month);
--閲嶅畾涔夊垎緇勬暟鎹泦鏁版嵁欏癸紝榪涜鍒嗙粍緇熻錛屾鏃惰鑱斿悎鍒嗙粍鏁版嵁闆嗗拰鍘熸暟鎹泦緇熻
final_month_500_logs = FOREACH group_month_percent_500_logs 
{
    --瀵瑰師鏁版嵁闆嗗仛count錛屽洜涓烘槸鍦╢oreachj閲屽仛count鐨勶紝鍗充嬌鏄鍘熸暟鎹泦錛屼篃浼氳嚜鍔ㄤ細鍔爉onth==group鐨勬潯浠?br />    --浠庤繖閲屽彲浠ョ湅鍑哄浜巊roup閲岀殑鏁版嵁闆嗭紝瀹屽叏娌$敤鍒?br />    --榪欐椂鏄互姣忎竴琛屼負鍗曚綅鐨勶紝緇熻MAP涓殑KEY-a瀵瑰簲鐨勬暟緇勫湪鍘熸暟鎹泦涓殑涓暟
    total = COUNT(reitem_percent_500_logs);
    --瀵瑰師鏁版嵁闆嗗仛filter錛屽洜涓烘槸鍦╢oreachj閲屽仛count鐨勶紝鍗充嬌鏄鍘熸暟鎹泦錛屼篃浼氳嚜鍔ㄤ細鍔爉onth==group鐨勬潯浠?br />    --閲嶆柊榪囨護涓涓嬪師鏁版嵁闆嗭紝寰楀埌status==500,month==group鐨勬暟鎹泦
    t = filter reitem_percent_500_logs by status== 500; --create a bag which contains only T values
    --閲嶅畾涔夋暟鎹」錛屽彇group錛岀粺璁$粨鏋?br />    generate flatten(group) as col1, 100*(double)COUNT(t)/(double)total;
}
STORE final_month_500_logs into '$OUTPUT' using PigStorage(',');



paulwong 2013-04-13 15:21 鍙戣〃璇勮
]]>
鎶婂懡浠よ涓殑鍊間紶榪汸IG涓?/title><link>http://m.tkk7.com/paulwong/archive/2013/04/10/397645.html</link><dc:creator>paulwong</dc:creator><author>paulwong</author><pubDate>Wed, 10 Apr 2013 07:32:00 GMT</pubDate><guid>http://m.tkk7.com/paulwong/archive/2013/04/10/397645.html</guid><wfw:comment>http://m.tkk7.com/paulwong/comments/397645.html</wfw:comment><comments>http://m.tkk7.com/paulwong/archive/2013/04/10/397645.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://m.tkk7.com/paulwong/comments/commentRss/397645.html</wfw:commentRss><trackback:ping>http://m.tkk7.com/paulwong/services/trackbacks/397645.html</trackback:ping><description><![CDATA[<a target="_blank">http://wiki.apache.org/pig/ParameterSubstitution<br /> <br /> <br /> </a> <div> <div style="background-color:#eeeeee;font-size:13px;border:1px solid #CCCCCC;padding-right: 5px;padding-bottom: 4px;padding-left: 4px;padding-top: 4px;width: 98%;word-break:break-all"><!--<br /> <br /> Code highlighting produced by Actipro CodeHighlighter (freeware)<br /> http://www.CodeHighlighter.com/<br /> <br /> -->%pig -param input=/user/paul/sample.txt -param output=/user/paul/output/</div> </div><br /><br />PIG涓幏鍙?br /><div style="background-color:#eeeeee;font-size:13px;border:1px solid #CCCCCC;padding-right: 5px;padding-bottom: 4px;padding-left: 4px;padding-top: 4px;width: 98%;word-break:break-all"><!--<br /><br />Code highlighting produced by Actipro CodeHighlighter (freeware)<br />http://www.CodeHighlighter.com/<br /><br />-->records = LOAD <span style="color: #800080; ">$input</span>;</div><img src ="http://m.tkk7.com/paulwong/aggbug/397645.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://m.tkk7.com/paulwong/" target="_blank">paulwong</a> 2013-04-10 15:32 <a href="http://m.tkk7.com/paulwong/archive/2013/04/10/397645.html#Feedback" target="_blank" style="text-decoration:none;">鍙戣〃璇勮</a></div>]]></description></item><item><title>PIG涓殑鍒嗙粍緇熻鐧懼垎姣?/title><link>http://m.tkk7.com/paulwong/archive/2013/04/10/397642.html</link><dc:creator>paulwong</dc:creator><author>paulwong</author><pubDate>Wed, 10 Apr 2013 06:13:00 GMT</pubDate><guid>http://m.tkk7.com/paulwong/archive/2013/04/10/397642.html</guid><wfw:comment>http://m.tkk7.com/paulwong/comments/397642.html</wfw:comment><comments>http://m.tkk7.com/paulwong/archive/2013/04/10/397642.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://m.tkk7.com/paulwong/comments/commentRss/397642.html</wfw:commentRss><trackback:ping>http://m.tkk7.com/paulwong/services/trackbacks/397642.html</trackback:ping><description><![CDATA[<a target="_blank">http://stackoverflow.com/questions/15318785/pig-calculating-percentage-of-total-for-a-field<br /><br /></a><a target="_blank">http://stackoverflow.com/questions/13476642/calculating-percentage-in-a-pig-query</a><img src ="http://m.tkk7.com/paulwong/aggbug/397642.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://m.tkk7.com/paulwong/" target="_blank">paulwong</a> 2013-04-10 14:13 <a href="http://m.tkk7.com/paulwong/archive/2013/04/10/397642.html#Feedback" target="_blank" style="text-decoration:none;">鍙戣〃璇勮</a></div>]]></description></item><item><title>CombinedLogLoaderhttp://m.tkk7.com/paulwong/archive/2013/04/08/397510.htmlpaulwongpaulwongMon, 08 Apr 2013 03:28:00 GMThttp://m.tkk7.com/paulwong/archive/2013/04/08/397510.htmlhttp://m.tkk7.com/paulwong/comments/397510.htmlhttp://m.tkk7.com/paulwong/archive/2013/04/08/397510.html#Feedback0http://m.tkk7.com/paulwong/comments/commentRss/397510.htmlhttp://m.tkk7.com/paulwong/services/trackbacks/397510.html
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
 * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * 
 * 
http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License is
 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 
*/

package org.apache.pig.piggybank.storage.apachelog;

import java.util.regex.Pattern;

import org.apache.pig.piggybank.storage.RegExLoader;

/**
 * CombinedLogLoader is used to load logs based on Apache's combined log format, based on a format like
 * 
 * LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
 * 
 * The log filename ends up being access_log from a line like
 * 
 * CustomLog logs/combined_log combined
 * 
 * Example:
 * 
 * raw = LOAD 'combined_log' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader AS
 * (remoteAddr, remoteLogname, user, time, method, uri, proto, status, bytes, referer, userAgent);
 * 
 
*/

public class CombinedLogLoader extends RegExLoader {
    // 1.2.3.4 - - [30/Sep/2008:15:07:53 -0400] "GET / HTTP/1.1" 200 3190 "-"
    
// "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1"
    private final static Pattern combinedLogPattern = Pattern
        .compile("^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+.(\\S+\\s+\\S+).\\s+\"(\\S+)\\s+(.+?)\\s+(HTTP[^\"]+)\"\\s+(\\S+)\\s+(\\S+)\\s+\"([^\"]*)\"\\s+\"(.*)\"$");

    public Pattern getPattern() {
        return combinedLogPattern;
    }
}


paulwong 2013-04-08 11:28 鍙戣〃璇勮
]]>
Analyzing Apache logs with Pig http://m.tkk7.com/paulwong/archive/2013/04/08/397489.htmlpaulwongpaulwongSun, 07 Apr 2013 18:06:00 GMThttp://m.tkk7.com/paulwong/archive/2013/04/08/397489.htmlhttp://m.tkk7.com/paulwong/comments/397489.htmlhttp://m.tkk7.com/paulwong/archive/2013/04/08/397489.html#Feedback0http://m.tkk7.com/paulwong/comments/commentRss/397489.htmlhttp://m.tkk7.com/paulwong/services/trackbacks/397489.html

Analyzing log files, churning them and extracting meaningful information is a potential use case in Hadoop. We don’t have to go in for MapReduce programming for these analyses; instead we can go for tools like Pig and Hive for this log analysis. I’d just give you a start off on the analysis part. Let us consider Pig for apache log analysis. Pig has some built in libraries that would help us load the apache log files into pig and also some cleanup operation on string values from crude log files. All the functionalities are available in the piggybank.jar mostly available under pig/contrib/piggybank/java/ directory. As the first step we need to register this jar file with our pig session then only we can use the functionalities in our Pig Latin
1.       Register PiggyBank jar
REGISTER /usr/lib/pig/contrib/piggybank/java/piggybank.jar;
Once we have registered the jar file we need to define a few functionalities to be used in our Pig Latin. For any basic apache log analysis we need a loader to load the log files in a column oriented format in pig, we can create a apache log loader as
2.       Define a log loader
DEFINE ApacheCommonLogLoader org.apache.pig.piggybank.storage.apachelog.CommonLogLoader();
(Piggy Bank has other log loaders as well)
In apache log files the default format of date is ‘dd/MMM/yyyy:HH:mm:ss Z’ . But such a date won’t help us much in case of log analysis we may have to extract date without time stamp. For that we use DateExtractor()
3.       Define Date Extractor
DEFINE DayExtractor org.apache.pig.piggybank.evaluation.util.apachelogparser.DateExtractor('yyyy-MM-dd');
Once we have the required functionalities with us we need to first load the log file into pig
4.       Load apachelog file into pig
--load the log files from hdfs into pig using CommonLogLoader
logs = LOAD '/userdata/bejoys/pig/p01/access.log.2011-01-01' USING ApacheCommonLogLoader AS (ip_address, rfc, userId, dt, request, serverstatus, returnobject, referersite, clientbrowser);
Now we are ready to dive in for the actual log analysis. There would be multiple information you need to extract out of a log; we’d see a few of those common requirements out here
Note: you need to first register the jar, define the classes to be used and load the log files into pig before trying out any of the pig latin below
Requirement 1: Find unique hits per day
PIG Latin
--Extracting the day alone and grouping records based on days
grpd = GROUP logs BY DayExtractor(dt) as day;
--looping through each group to get the unique no of userIds
cntd = FOREACH grpd
{
                tempId =  logs.userId;
                uniqueUserId = DISTINCT tempId;
                GENERATE group AS day,COUNT(uniqueUserId) AS cnt;
}
--sorting the processed records based on no of unique user ids in descending order
srtd = ORDER cntd BY cnt desc;
--storing the final result into a hdfs directory
STORE srtd INTO '/userdata/bejoys/pig/ApacheLogResult1';
Requirement 1: Find unique hits to websites (IPs) per day
PIG Latin
--Extracting the day alone and grouping records based on days and ip address
grpd = GROUP logs BY (DayExtractor(dt) as day,ip_address);
--looping through each group to get the unique no of userIds
cntd = FOREACH grpd
{
                tempId =  logs.userId;
                uniqueUserId = DISTINCT tempId;
                GENERATE group AS day,COUNT(uniqueUserId) AS cnt;
}
--sorting the processed records based on no of unique user ids in descending order
srtd = ORDER cntd BY cnt desc;
--storing the final result into a hdfs directory
STORE srtd INTO '/userdata/bejoys/pig/ ApacheLogResult2 ';
Note: When you use pig latin in grunt shell we need to know a few factors
1.       When we issue a pig statement in grunt and press enter only the semantic check is being done, no execution is triggered.
2.       All the pig statements are executed only after the STORE command is submitted, ie map reduce programs would be triggered only after STORE is submitted
3.       Also in this case you don’t have to load the log files again and again to pig once it is loaded we can use the same for all related operations in that session. Once you are out of the grunt shell the loaded files are lost, you’d have to perform the register and log file loading steps all over again.


paulwong 2013-04-08 02:06 鍙戣〃璇勮
]]>
PIG灝忚http://m.tkk7.com/paulwong/archive/2013/04/05/397411.htmlpaulwongpaulwongFri, 05 Apr 2013 13:33:00 GMThttp://m.tkk7.com/paulwong/archive/2013/04/05/397411.htmlhttp://m.tkk7.com/paulwong/comments/397411.htmlhttp://m.tkk7.com/paulwong/archive/2013/04/05/397411.html#Feedback0http://m.tkk7.com/paulwong/comments/commentRss/397411.htmlhttp://m.tkk7.com/paulwong/services/trackbacks/397411.html浠涔堟槸PIG
鏄竴縐嶈璁¤璦錛岄氳繃璁捐鏁版嵁鎬庝箞嫻佸姩錛岀劧鍚庣敱鐩稿簲鐨勫紩鎿庡皢姝ゅ彉鎴怣APREDUCE JOB鍘籋ADOOP涓繍琛屻?/div>
PIG涓嶴QL
涓よ呮湁鐩稿悓涔嬪錛屾墽琛屼竴涓垨澶氫釜璇彞錛岀劧鍚庡嚭鏉ヤ竴浜涚粨鏋溿?/div>
浣嗕笉鍚岀殑鏄紝SQL瑕佸厛鎶婃暟鎹鍒拌〃涓墠鑳芥墽琛岋紝SQL涓嶅叧蹇冧腑闂村浣曞仛錛屽嵆鍙戜竴涓猄QL璇彞榪囧幓錛屽氨鏈夌粨鏋滃嚭鏉ャ?/div>
PIG錛屾棤欏誨鏁版嵁鍒拌〃涓紝浣嗚璁捐鐩村埌鍑虹粨鏋滅殑涓棿榪囩▼錛屾楠ゅ浣曠瓑絳夈?/div>

paulwong 2013-04-05 21:33 鍙戣〃璇勮
]]>PIG璧勬簮http://m.tkk7.com/paulwong/archive/2013/04/05/397406.htmlpaulwongpaulwongFri, 05 Apr 2013 10:19:00 GMThttp://m.tkk7.com/paulwong/archive/2013/04/05/397406.htmlhttp://m.tkk7.com/paulwong/comments/397406.htmlhttp://m.tkk7.com/paulwong/archive/2013/04/05/397406.html#Feedback0http://m.tkk7.com/paulwong/comments/commentRss/397406.htmlhttp://m.tkk7.com/paulwong/services/trackbacks/397406.html http://guoyunsky.iteye.com/blog/1317084

http://guoyunsky.iteye.com/category/196632

Hadoop瀛︿範絎旇(9) Pig綆浠?br /> http://www.distream.org/?p=385


[hadoop緋誨垪]Pig鐨勫畨瑁呭拰綆鍗曠ず渚?br /> http://blog.csdn.net/inkfish/article/details/5205999


Hadoop and Pig for Large-Scale Web Log Analysis
http://www.devx.com/Java/Article/48063


Pig瀹炴垬
http://www.cnblogs.com/xuqiang/archive/2011/06/06/2073601.html


[鍘熷垱]Apache Pig涓枃鏁欑▼錛堣繘闃訛級
http://www.codelast.com/?p=4249


鍩轟簬hadoop騫沖彴鐨刾ig璇█瀵筧pache鏃ュ織緋葷粺鐨勫垎鏋?br /> http://goodluck-wgw.iteye.com/blog/1107503


!!Pig璇█
http://hi.baidu.com/cpuramdisk/item/a2980b78caacfa3d71442318


Embedding Pig In Java Programs
http://wiki.apache.org/pig/EmbeddedPig


涓涓猵ig浜嬩緥(REGEX_EXTRACT_ALL, DBStorage錛岀粨鏋滃瓨榪涙暟鎹簱)
http://www.myexception.cn/database/1256233.html


Programming Pig
http://ofps.oreilly.com/titles/9781449302641/index.html


[鍘熷垱]Apache Pig鐨勪竴浜涘熀紜姒傚康鍙婄敤娉曟葷粨錛?錛?br /> http://www.codelast.com/?p=3621


!PIG鎵嬪唽
http://pig.apache.org/docs/r0.11.1/func.html#built-in-functions

paulwong 2013-04-05 18:19 鍙戣〃璇勮
]]>
主站蜘蛛池模板: 日木av无码专区亚洲av毛片| 精品亚洲一区二区三区在线播放| 亚洲av日韩av无码| 男女一进一出抽搐免费视频| 亚洲国产成人五月综合网 | 亚洲综合久久综合激情久久| 精品视频一区二区三区免费| 国产亚洲一区二区精品| 中文无码成人免费视频在线观看 | 特级毛片爽www免费版| 亚洲高清最新av网站| 九九免费精品视频在这里| 国产精品亚洲mnbav网站 | 无人视频免费观看免费视频 | 亚洲男女一区二区三区| 在线观看H网址免费入口| 亚洲成人激情小说| 免费欧洲美女牲交视频| 精品久久久久久无码免费| 亚洲av综合avav中文| free哆啪啪免费永久| 亚洲午夜无码久久| 亚洲高清国产拍精品青青草原| 精品国产免费一区二区三区| 亚洲午夜久久久精品影院| 免费av欧美国产在钱| 手机永久免费的AV在线电影网| 久久91亚洲人成电影网站| 麻豆高清免费国产一区| 日韩欧美亚洲国产精品字幕久久久 | 成人福利免费视频| 国产亚洲视频在线观看| 亚洲av无码成h人动漫无遮挡| 国产国产人免费视频成69堂| 羞羞漫画小舞被黄漫免费| 亚洲Av无码精品色午夜| 日本不卡免费新一二三区| 日韩精品久久久久久免费| 亚洲av日韩av永久无码电影| 亚洲色WWW成人永久网址| 野花高清在线观看免费3中文|