重写慢日志解析程序,实现打印慢SQL信息及其所属数据库

分组自研的审计平台最近推出慢SQL优化的功能,topN慢SQL可以通过MySQLdumpslow拿到,但由于mysqldumpslow输出的信息不包含数据库,这让程序后续的自动优化变得有些棘手。在观察了MySQL慢日志结构后,决定自己写一个python解析程序,返回的结果比mysqldumpslow解析结果上多出数据库名称这一列:

成都创新互联公司坚持“要么做到,要么别承诺”的工作理念,服务领域包括:成都网站设计、成都网站建设、企业官网、英文网站、手机端网站、网站推广等服务,满足客户于互联网时代的麟游网站设计、移动媒体设计的需求,帮助企业找到有效的互联网解决方案。努力成为您成熟可靠的网络建设合作伙伴!

Count: 15  Time=0.002961s (0.034505s)  Lock=8.8e-05s (0.000767s)  Rows=446 (6690), dbmgr[dbmgr]@10.33.46.179  mysql  --SQL所属数据库
  SHOW GLOBAL VARIABLES;

python版本:2.7
文件名称:slowParse.py --目前仅支持按query time取topN
代码内容:

import os
import sys

def get_sql(slowlog, topN):
    #Slow log 所在目录,请自行替换
        f1 = open("/var/mysql/data3306/" + slowlog, "r")
    flag1 = 1
    flag2 = 0
    sqltext = ""
    slow_sql_all = {}
    sql_info = []
    queryTime_list = []
    locksTime_list = []
    rows_list = []
    logonInfo_list = []
    db = "None"
    rownum = 0
    while 1:
        line = f1.readline()
        rownum += 1
        if not line:
            break
        elif "use " in line and len(line) < 30:
            db = getDB(line.strip())
        elif "# User@Host" in line and flag1 == 1:
            flag2 = 1
            userAndHost = getUserAndHost(line.strip())
            logonInfo = userAndHost[0] + "[" + userAndHost[0] + "]@" + userAndHost[1]
            logonInfo_list.append(logonInfo)
        elif "# Query_time" in line and flag1 == 1 and flag2 == 1:
            execInfo = getExecInfo(line.strip())
            queryTime_list.append(execInfo[0])
            locksTime_list.append(execInfo[1])
            rows_list.append(execInfo[2])
        elif ";" in line and "SET timestamp" not in line and flag1 == 1 and flag2 == 1:
            flag2 = 0
            sqltext = sqltext + line.strip()
            if slow_sql_all.has_key(sqltext):
                tmp = slow_sql_all[sqltext]
                tmp[0].append(queryTime_list[0])
                tmp[1].append(locksTime_list[0])
                tmp[2].append(rows_list[0])
                tmp[3].append(logonInfo_list[0])
            else:
                # count = 1
                # sql_info.append(count)
                sql_info.append(queryTime_list)
                sql_info.append(locksTime_list)
                sql_info.append(rows_list)
                sql_info.append(logonInfo_list)
                sql_info.append(db)
                slow_sql_all[sqltext] = sql_info
            queryTime_list = []
            locksTime_list = []
            rows_list = []
            logonInfo_list = []
            sqltext = ""
            sql_info = []
        elif flag1 == 1 and flag2 == 1 and "# User@Host" not in line and "# Query_time" not in line and "# Time" not in line and "SET timestamp" not in line:
            sqltext = sqltext + line.strip() + " "
    f1.close()

    sqlCombined = {}
    sqlTmp = {}

    for i in slow_sql_all:
        # print i,slow_sql_all[i]
        count = len(slow_sql_all[i][0])
        totalQueryTime = 0
        totalLocksTime = 0
        totalRows = 0
        for j in slow_sql_all[i][0]:
            totalQueryTime += float(j)
        maxQueryTime = float(max(slow_sql_all[i][0]))
        for k in slow_sql_all[i][1]:
            totalLocksTime += float(k)
        maxLocksTime = float(max(slow_sql_all[i][1]))
        for l in slow_sql_all[i][2]:
            totalRows += int(l)
        maxRows = int(max(slow_sql_all[i][2]))
        logonInfo = slow_sql_all[i][3][0]
        db = slow_sql_all[i][4]
        sqlCombined[i] = (
            count, maxQueryTime, totalQueryTime, maxLocksTime, totalLocksTime, maxRows, totalRows, logonInfo, db)
        sqlTmp[i] = maxQueryTime

    sqlTopN = sorted(sqlTmp.items(), key=lambda x: x[1], reverse=True)[:topN]

    #请自行替换生成文件的所在目录
        f2 = open("/var/mysql/data3306/" + slowlog[:-4] + "-top" + str(topN) + ".txt", "w")

    for i in sqlTopN:
        sqltext = i[0]
        count_str = "Count: " + str(sqlCombined[sqltext][0])
        queryTime_str = "Time=" + str(sqlCombined[sqltext][1]) + "s (" + str(sqlCombined[sqltext][2]) + "s)"
        locksTime_str = "Lock=" + str(sqlCombined[sqltext][3]) + "s (" + str(sqlCombined[sqltext][4]) + "s)"
        rows_str = "Rows=" + str(sqlCombined[sqltext][5]) + " (" + str(sqlCombined[sqltext][6]) + "),"
        logonInfo_str = sqlCombined[sqltext][7]
        db_str = sqlCombined[sqltext][8]
        f2.write(
            count_str + "  " + queryTime_str + "  " + locksTime_str + "  " + rows_str + " " + logonInfo_str + "  " + db_str + "\n  " + sqltext + "\n")

    f2.close()

def getDB(line):
    info = line.split(" ")
    db = info[1][:-1]
    return db

def getUserAndHost(line):
    info_list = line.split(" ")
    User = info_list[2].split("[")[0]
    idx = info_list.index("@")
    hostInfo = info_list[idx + 2]
    if hostInfo == "[]":
        Host = "localhost"
    else:
        Host = hostInfo[1:-1]
    return User, Host

def getExecInfo(line):
    info_list = line.split("  ")
    Query_time = info_list[0].split(" ")[2]
    Lock_time = info_list[1].split(" ")[1]
    Rows_sent = info_list[1].split(" ")[3]
    return Query_time, Lock_time, Rows_sent

if __name__ == '__main__':
    filename = str(sys.argv[1])
    topN = int(sys.argv[2])
    get_sql(filename, topN)

使用:

python slowParse.py slow.log 5        --取top 5

文章题目:重写慢日志解析程序,实现打印慢SQL信息及其所属数据库
文章起源:http://pcwzsj.com/article/gsdged.html