当前位置: 首页 > 网络技术 > 网络应用 > 正文

zabbix监控docker

时间:2016-12-29

当然思路和脚本参考了网上的,但网上的那些有好多错误,以下为本人经过更改调试后的。

如有疑问可以联系我 QQ: 279379936,一起改进优化。

Centos6下安装easy_install

# yum install python-setuptools

安装python 的docker模块

# easy_install docker-py

sudo:sorry, you must have a tty to run sudo

使用不同账户,执行执行脚本时候sudo经常会碰到 sudo: sorry, you must have a tty to run sudo这个情况,其实修改一下sudo的配置就好了

# vim /etc/sudoers (最好用visudo命令)

注释掉 Default requiretty 一行

#Default requiretty

意思就是sudo默认需要tty终端。注释掉就可以在后台执行了。

Zabbix客户端的部署:

#vim /opt/zabbix/etc/zabbix_agentd.conf

#docker

UserParameter=docker_discovery[*],cat/opt/zabbix/script/docker_cons.txt    //用来发现宿主机上存活的容器

UserParameter=docker_stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 //用来监控容器的各种指标,后面会脚本具体体现,看不懂脚本的请路过。

UserParameter=docker.tomcat.discovery,cat/opt/zabbix/script/docker_tomcat.txt  //用来发现容器启动的tomcat服务

UserParameter=docker.tomcat.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3  //用来监控容器中tomcat的端口

UserParameter=docker.nginx.discovery,cat/opt/zabbix/script/docker_nginx.txt  //用来发现容器启动的nginx服务

UserParameter=docker.nginx.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3 //用来监控容器中nginx的端口

监控脚本1,用来监控容器的CPU 内存 网卡,服务端口

#cat /opt/zabbix/script/zabbix_monitor_docker.py

#!/usr/bin/envpython

#-*- coding:utf-8 -*-

#email:279379936@qq.com

from dockerimport Client

import sys

import subprocess

import os

import time

import commands

defcheck_container_stats(container_name,collect_item):

   container_collect=docker_client.stats(container_name)

    container_collect.next()

    old_result=eval(container_collect.next())

    new_result=eval(container_collect.next())

    container_collect.close()

    if collect_item == 'cpu_total_usage':

       result=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']

    elif collect_item == 'cpu_system_usage':

       result=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']

    elif collect_item == 'cpu_percent':

       cpu_total_usage=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']

       cpu_system_uasge=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']

       cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])

        result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)

    elif collect_item == 'mem_usage':

       result=new_result['memory_stats']['usage']

    elif collect_item == 'mem_limit':

       result=new_result['memory_stats']['limit']

    elif collect_item == 'mem_percent':

       mem_usage=new_result['memory_stats']['usage']

       mem_limit=new_result['memory_stats']['limit']

       result=round(float(mem_usage)/float(mem_limit)*100.0,2)

    elif collect_item == 'network_rx_bytes':

        network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        #print time.time()

        #print network_old_result

        time.sleep(1)

       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        #print time.time()

        #print network_new_result

        #unit b

        result=int(network_new_result['rx']) -int(network_old_result['rx'])

    elif collect_item == 'network_tx_bytes':

        network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        time.sleep(1)

        network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        result=int(network_new_result['tx']) -int(network_old_result['tx'])

    return result

if __name__ =="__main__":

    docker_client = Client(base_url='unix://var/run/docker.sock',version='1.19')

    if len(sys.argv) == 3:

        container_name=sys.argv[1]

        collect_item=sys.argv[2]

        printcheck_container_stats(container_name,collect_item)

    elif len(sys.argv) == 4 and sys.argv[2] =='port':

        container_name=sys.argv[1]

        collect_item=int(sys.argv[3])

       check_stat=commands.getoutput("/usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))

        print check_stat

    else:

        print '1'

说明:上面脚本为通过python的docker模块去抓取数据,由于各种原因,有些机器无法安装python模块,可通过下面脚本实现:

# cat /opt/zabbix/script/zabbix_monitor_docker.py

#!/usr/bin/envpython

#-*- coding:utf-8 -*-

#email:279379936@qq.com

import sys

importsubprocess

import time

import commands

import re

defget_memory_container_dir(memory_dir,container_name):

    con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)

    con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (memory_dir,con_id))

    memory_container_dir=memory_dir + '/' +con_full_id

    return memory_container_dir

defget_cpu_container_dir(cpu_dir,container_name):

    con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)

    con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (cpu_dir,con_id))

    cpu_container_dir=cpu_dir + '/' +con_full_id

    return cpu_container_dir

defget_cpu_info(container_name):

    info = commands.getoutput('echo -ne"GET /containers/%s/stats?stream=false HTTP/1.1\r\n\r\n"|sudo/usr/bin/nc -U /var/run/docker.sock|grep read' % container_name)

    info = eval(info)

    return info

defcheck_container_stats(container_name,collect_item):

    if collect_item == 'cpu_total_usage':

        old_result =get_cpu_info(container_name)

        new_result =get_cpu_info(container_name)

        old_time = old_result['read']

        new_time = new_result['read']

        list_old_time = re.split('\:|\.',old_time)

        list_new_time =re.split('\:|\.',new_time)

        old_s = int(list_old_time[2])

        new_s = int(list_new_time[2])

        if old_s >= new_s:

            time_interval = 2

        else:

            time_interval = new_s - old_s

        result=(new_result['cpu_stats']['cpu_usage']['total_usage']- old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval

    elif collect_item == 'cpu_system_usage':

        old_result =get_cpu_info(container_name)

        new_result = get_cpu_info(container_name)

        old_time = old_result['read']

        new_time = new_result['read']

        list_old_time =re.split('\:|\.',old_time)

        list_new_time =re.split('\:|\.',new_time)

        old_s = int(list_old_time[2])

        new_s = int(list_new_time[2])

        if old_s >= new_s:

            time_interval = 2

        else:

            time_interval = new_s - old_s

       result=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval

    elif collect_item == 'cpu_percent':

        old_result =get_cpu_info(container_name)

        new_result = get_cpu_info(container_name)

        old_time = old_result['read']

        new_time = new_result['read']

        list_old_time =re.split('\:|\.',old_time)

        list_new_time =re.split('\:|\.',new_time)

        old_s = int(list_old_time[2])

        new_s = int(list_new_time[2])

        if old_s >= new_s:

            time_interval = 2

        else:

            time_interval = new_s - old_s

       cpu_total_usage=(new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval

       cpu_system_uasge=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval

       cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])

        result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)

    elif collect_item == 'mem_usage':

       memory_container_dir=get_memory_container_dir(memory_dir,container_name)

        result=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)

    elif collect_item == 'mem_limit':

       memory_container_dir=get_memory_container_dir(memory_dir,container_name)

        result=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)

    elif collect_item == 'mem_percent':

       memory_container_dir=get_memory_container_dir(memory_dir,container_name)

        mem_usage=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)

        mem_limit=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)

       result=round(float(mem_usage)/float(mem_limit)*100.0,2)

    elif collect_item == 'network_rx_bytes':

       network_check_command="""sudo /usr/bin/docker exec %s ifconfigeth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        time.sleep(1)

       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        result=int(network_new_result['rx']) -int(network_old_result['rx'])

    elif collect_item == 'network_tx_bytes':

       network_check_command="""sudo /usr/bin/docker exec %sifconfig eth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk'{print "{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

        network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        time.sleep(1)

       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        result=int(network_new_result['tx']) -int(network_old_result['tx'])

    return result

if __name__ =="__main__":

    cpu_dir="/cgroup/cpuacct/docker"

   memory_dir="/cgroup/memory/docker"

    iflen(sys.argv) == 3:

        container_name=sys.argv[1]

        collect_item=sys.argv[2]

        printcheck_container_stats(container_name,collect_item)

    elif len(sys.argv) == 4 and sys.argv[2] =='port':

        container_name=sys.argv[1]

        collect_item=int(sys.argv[3])

       check_stat=commands.getoutput("sudo /usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))

        print check_stat

    else:

        print '1'

脚本2,用来发现容器名

catdiscovery_cons.py

#!/usr/bin/env python

# Felix Shang

#QQ: 279379936

import commands

import sys

def docker_s():

   cons = commands.getoutput("""sudo /usr/bin/docker ps|grep-v "CONTAINER ID"|awk '{print $NF}'|tr '\n' ' '""")

   count_cons = len(cons.split())

   if count_cons != 0:

       return cons.split()

   else:

       return 0

if __name__ == "__main__":

   if len(sys.argv) == 2 and sys.argv[1] == 'docker':

       infos = docker_s()

       if infos != 0:

           print '{'

           print '\t"data":['

           i = 0

           cou_infos=len(infos)

           for con in infos:

                if i == cou_infos - 1:

                    print'\t\t{"{#CONTAINERNAME}":"%s"}' % con

                else:

                    print'\t\t{"{#CONTAINERNAME}":"%s"},' % con

                i = i + 1

           print '\t]'

           print '}'

脚本3,用来发现容器的服务(tomcat nginx),之前脚本2和脚本3是一个脚本,发现容器时出现好多问题。

# cat/opt/zabbix/script/discovery_docker_service.py

#!/usr/bin/env python

# Felix Shang

#QQ: 279379936

import commands

import sys

def docker_s():

   cons = commands.getoutput("""cat /opt/zabbix/script/docker_cons.txt|grep'CONTAINERNAME'|grep -v grep|awk -F'"' '{print $4}'|tr '\n''\t'""")

   #print cons

   count_cons = len(cons.split())

   if count_cons != 0:

       return cons.split()

   else:

       return 0

def tomcat_s():

    cons = docker_s()

   if cons == 0:

       sys.exit(2)

   else:

       cons_d = {}

       for con in cons:

           #print con

           stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepjava|grep tomcat|grep -v grep>/dev/null;echo $?" % con)

           port_list = []

           if int(stat) == 0:

                tomcat_config_dirs =commands.getoutput("sudo /usr/bin/docker exec %s ps -ef | grep tomcat |grep -v grep | awk -F\= '{print $2}' | awk -F'logging' '{print $1}'" %con).split()

                for tomcat_config_dir intomcat_config_dirs:

                    tomcat_config_file =tomcat_config_dir + 'server.xml'

                    port =commands.getoutput("""sudo /usr/bin/docker exec %s grep"port=" %s|grep -v "shutdown"|grep -v "AJP"|grep"Connector"|awk -F\= '{print $2}'|awk '{print $1}'"""%(con,tomcat_config_file)).strip('"')

                    port_list.append(port)

                cons_d[con] = port_list

           else:

                cons_d[con] = port_list

       return cons_d

def nginx_s():

   cons = docker_s()

   if cons == 0:

       sys.exit(2)

   else:

       cons_d = {}

       for con in cons:

           stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepnginx|grep -v grep>/dev/null;echo $?" % con)

           port_list = []

           if int(stat) == 0:

                port_list =commands.getoutput("sudo /usr/bin/docker exec %s netstat -ntpul|grepnginx|grep -v 40080|awk '{print $4}'|awk -F\: '{print $NF}'|tr '\n' ' '" %con).split()

                cons_d[con] = port_list

           else:

                cons_d[con] = port_list

       return cons_d

if __name__ == "__main__":

   if len(sys.argv) == 2 and sys.argv[1] == 'tomcat':

       infos = tomcat_s()

       print '{'

       print '\t"data":['

       port_infos = []

       for con_info in infos:

           if len(infos[con_info]) == 0:

                continue

           else:

                for port in infos[con_info]:

                    port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_TOMCAT_PORT}":"%s"},'%(con_info,port)

                   port_infos.append(port_info)

       i = 0

        cou_port_infos = len(port_infos)

       for port_i in port_infos:

           if i == cou_port_infos - 1:

                port_i = port_i[0:-1]

           print port_i

           i = i + 1

       print '\t]'

       print '}'

           

   elif len(sys.argv) == 2 and sys.argv[1] == 'nginx':

       infos = nginx_s()

       print '{'

       print '\t"data":['

       port_infos = []

       for con_info in infos:

           if len(infos[con_info]) == 0:

                continue

           else:

                for port in infos[con_info]:

                    port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_NGINX_PORT}":"%s"},'%(con_info,port)

                   port_infos.append(port_info)

       i = 0

       cou_port_infos = len(port_infos)

       for port_i in port_infos:

           if i == cou_port_infos - 1:

                port_i = port_i[0:-1]

           print port_i

           i = i + 1

       print '\t]'

       print '}'

   #else:

#    help_s()

 

#vim /etc/sudoers   //zabbix_agent是通过zabbix用户执行,通过sudo提权让zabbix用户对脚本有执行权限。

zabbix    ALL=(root) NOPASSWD:/usr/bin/docker,/sbin/fdisk,/usr/sbin/dmidecode,/usr/bin/nc

Zabbix服务端的配置:

导入模板:Template docker, 宿主机关联此模板即可。

报错:

Server获取值报错:ZBX_NOTSUPPORTED][Timeout while executing a shell script.]

# vim zabbix_agentd.conf

# 设置超时时间

Timeout=30

本文出自 “鹏哥玩linux” 博客,请务必保留此出处http://pengge.blog.51cto.com/25661/1887209