企業中對于服務器常規監控都有部署監控軟件系統,如常用的zabbix、ganglia、nagios、observer等,但是對于特殊的業務監控,比如日志中出現某些關鍵字多少次后即報警通知負責人,對某些Http接口心跳監控或結果正確性檢測等,這些特定需求運維也需要開發相應的腳本進行支持。一般監控有變更都需要通知運維人員來操作,我們也可以自己開發腳本實現簡單的監控。
import smtplib
import socket
import fcntl
import struct
import os
import commands
import time
from email.mime.text import MIMEText
# 發送的郵件列表,多個郵件地址逗號分隔
mailto_list=[
'david1228@foxmail.com',
]
# 需要監控列表,JSON格式配置
# 配置說明:logfile需要檢測的日志文件,limitnum為閥值,readnum為讀取日志最后的行數,kword為出現的關鍵字,sg為大于或小于閥值滿足后報警
check_list={
'mq:geturl_updatevideo:flush cache OK has a problem, please check!' : {
'logfile':'/home/ldw/logs/geturl/online/geturl_updatevideo.log',
'limitnum':'10',
'readnum':'200',
'kword':'flush cache OK',
'sg':'<'
},
'mq:geturl_updatevideo has message error, please check!' : {
'logfile':'/home/ldw/logs/geturl/online/geturl_updatevideo.log',
'limitnum':'1',
'readnum':'2000',
'kword':'message error',
'sg':'<'
}
}
# 讀取網卡IP,輸入參數為網卡名,如eth0、eth1、bond0(網卡做Bond)
def get_ip_address(ifname):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915, # SIOCGIFADDR
struct.pack('256s', ifname[:15])
)[20:24])
# 郵件發送
def send_mail(to_list,sub,content):
print content
me=mail_user
# 發送中文,需要設置編碼
msg = MIMEText(content,_subtype='plain',_charset='gb2312')
msg['Subject'] = sub
msg['From'] = me
msg['To'] = ";".join(to_list)
try:
server = smtplib.SMTP()
server.connect(mail_host)
server.login(mail_user,mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception, e:
print str(e)
return False
# 發送郵箱認證配置
mail_host="smtp.126.com"
mail_user="xyz@126.com"
mail_pass="xyz"
mail_postfix="126.com"
content = ""
timeddiff=300
def monitor_list(ethip):
for k in check_list.keys():
logfile=check_list[k]['logfile']
readnum=check_list[k]['readnum']
limitnum=check_list[k]['limitnum']
kword = check_list[k]['kword']
sg = check_list[k]['sg']
# 需要監控的文件小于5分鐘進行檢測
if ( os.path.exists(logfile) and (time.time() - os.stat(logfile).st_mtime) < timeddiff ):
cmdstring='tail -n ' + readnum + ' ' + logfile + ' | grep "'+ kword+'" |wc -l 2>&1'
cmdstatus,cmdoutput = commands.getstatusoutput(cmdstring)
if ((sg == '<' and int(cmdoutput) < int(limitnum)) or (sg == '>' and int(cmdoutput) > int(limitnum))):
# 郵件正文,帶上有問題的服務器IP,方便定位.
content = ethip +k+": "+cmdoutput+"/"+readnum+"\n"
send_mail(mailto_list,"Monitor Warning!!!",content)
content = ""
else:
print " Normal monitoring service:"+logfile
else:
print " File has not been updated : "+logfile
if __name__ == '__main__':
ethip = "["+get_ip_address('bond0')+"]\n"+content
monitor_list(ethip)