企業(yè)中對(duì)于服務(wù)器常規(guī)監(jiān)控都有部署監(jiān)控軟件系統(tǒng),如常用的zabbix、ganglia、nagios、observer等,但是對(duì)于特殊的業(yè)務(wù)監(jiān)控,比如日志中出現(xiàn)某些關(guān)鍵字多少次后即報(bào)警通知負(fù)責(zé)人,對(duì)某些Http接口心跳監(jiān)控或結(jié)果正確性檢測(cè)等,這些特定需求運(yùn)維也需要開發(fā)相應(yīng)的腳本進(jìn)行支持。一般監(jiān)控有變更都需要通知運(yùn)維人員來(lái)操作,我們也可以自己開發(fā)腳本實(shí)現(xiàn)簡(jiǎn)單的監(jiān)控。
import smtplib
import socket
import fcntl
import struct
import os
import commands
import time
from email.mime.text
import MIMEText
# 發(fā)送的郵件列表,多個(gè)郵件地址逗號(hào)分隔
mailto_list=[
'david1228@foxmail.com',
]
# 需要監(jiān)控列表,JSON格式配置
# 配置說(shuō)明:logfile需要檢測(cè)的日志文件,limitnum為閥值,readnum為讀取日志最后的行數(shù),kword為出現(xiàn)的關(guān)鍵字,sg為大于或小于閥值滿足后報(bào)警
check_list={
'mq:geturl_updatevideo:flush cache OK has a problem, please check!' : {
'logfile':
'/home/ldw/logs/geturl/online/geturl_updatevideo.log',
'limitnum':
'10',
'readnum':
'200',
'kword':
'flush cache OK',
'sg':
'<'},
'mq:geturl_updatevideo has message error, please check!' : {
'logfile':
'/home/ldw/logs/geturl/online/geturl_updatevideo.log',
'limitnum':
'1',
'readnum':
'2000',
'kword':
'message error',
'sg':
'<'}
}
# 讀取網(wǎng)卡IP,輸入?yún)?shù)為網(wǎng)卡名,如eth0、eth1、bond0(網(wǎng)卡做Bond)
def get_ip_address(ifname):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915,
# SIOCGIFADDR
struct.pack(
'256s', ifname[:15])
)[20:24])
# 郵件發(fā)送
def send_mail(to_list,sub,content):
print content
me=mail_user
# 發(fā)送中文,需要設(shè)置編碼
msg = MIMEText(content,_subtype=
'plain',_charset=
'gb2312')
msg[
'Subject'] = sub
msg[
'From'] = me
msg[
'To'] =
";".join(to_list)
try:
server = smtplib.SMTP()
server.connect(mail_host)
server.login(mail_user,mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception, e:
print str(e)
return False
# 發(fā)送郵箱認(rèn)證配置
mail_host=
"smtp.126.com"mail_user=
"xyz@126.com"mail_pass=
"xyz"mail_postfix=
"126.com"content =
""timeddiff=300
def monitor_list(ethip):
for k
in check_list.keys():
logfile=check_list[k][
'logfile']
readnum=check_list[k][
'readnum']
limitnum=check_list[k][
'limitnum']
kword = check_list[k][
'kword']
sg = check_list[k][
'sg']
# 需要監(jiān)控的文件小于5分鐘進(jìn)行檢測(cè)
if ( os.path.exists(logfile)
and (time.time() - os.stat(logfile).st_mtime) < timeddiff ):
cmdstring=
'tail -n ' + readnum +
' ' + logfile +
' | grep "'+ kword+
'" |wc -l 2>&1' cmdstatus,cmdoutput = commands.getstatusoutput(cmdstring)
if ((sg ==
'<' and int(cmdoutput) < int(limitnum))
or (sg ==
'>' and int(cmdoutput) > int(limitnum))):
# 郵件正文,帶上有問(wèn)題的服務(wù)器IP,方便定位.
content = ethip +k+
": "+cmdoutput+
"/"+readnum+
"\n" send_mail(mailto_list,
"Monitor Warning!!!",content)
content =
"" else:
print " Normal monitoring service:"+logfile
else:
print " File has not been updated : "+logfile
if __name__ ==
'__main__':
ethip =
"["+get_ip_address(
'bond0')+
"]\n"+content
monitor_list(ethip)