python实现根据列表文件自动监测目录完整性

python实现根据列表文件自动监测目录完整性




上一次,我们实现了一个自动目录md5并输出列表文件的脚本,这次,我们来实现基于该列表文件进行校验的脚本

1 基本思路

执行./md5print.py -mf Rule/webcheck.rule ../qhjack/ qhjack.md5后,我们已经得到了一个所需的列表文件qhjack.md5我们只需要对文件进行处理,在与最新的采集信息进行校验即可,如下图:

2 计算md5

计算md5大致上和上次的程序一样,这里直接列出来:

def get_file_md5(file_path):
    if not os.path.isfile(file_path):
        return
    md5=hashlib.md5()
    f=file(file_path,'rb')
    while True:
        b = f.read(8096)
        if not b:
            break
        md5.update(b)
    f.close()
    return md5.hexdigest()

3 列出所有文件,遍历目录

和上次一样,这里还是直接列出源码:

def list_all_files(rootdir):
    _files = []
    list = os.listdir(rootdir) #列出文件夹下所有的目录与文件
    for i in range(0,len(list)):
        path = os.path.join(rootdir,list[i])
        if os.path.isdir(path):
            _files.extend(list_all_files(path))
        if os.path.isfile(path):
            _files.append(os.path.relpath(path))
    return _files

4 转换列表的raw数据到结构化列表

为了方便比较,我们采取结构化列表的方式,无论是遍历获取的新的校验值,还是读取列表文件,均需进行转换,相应的转换程序如下:

file = open(args.list_file,"r")
    if file:
        md5list = file.readlines()
        for line in md5list:
            linesplit = line.split('|')
            linemd5 = linesplit[0]
            linefile = linesplit[1]
            for match_str in match:
                if re.match(match_str.strip('\n'),os.path.abspath(linefile)) != None:
                    checklist.append((linemd5,os.path.relpath(linefile.strip('\n'))))

        for filename in filelist:
            for match_str in match:
                if re.match(match_str.strip('\n'),os.path.abspath(filename)) != None:
                    md5 = get_file_md5(filename)
                    newchecklist.append((md5,os.path.relpath(filename.strip('\n'))))

5 最后

写到这里,基本思路和大部分的实现都已经告诉大家,接下来我将所有脚本的程序发出来,大家可以一起讨论:

import sys,os,hashlib,argparse,re

parse = argparse.ArgumentParser()
parse.add_argument('path')
parse.add_argument('list_file')
parse.add_argument('-l','--list',action='store_true',dest='list_info')
parse.add_argument('-ns','--no-statistics',action='store_false',dest="statistics")
parse.add_argument('-z','--zabbix',action='store_true',dest='zabbix')
parse.add_argument('-mf','--match-file',dest='match_file',nargs='?')
args = parse.parse_args()

def list_all_files(rootdir):
    _files = []
    list = os.listdir(rootdir) #列出文件夹下所有的目录与文件
    for i in range(0,len(list)):
        path = os.path.join(rootdir,list[i])
        if os.path.isdir(path):
            _files.extend(list_all_files(path))
        if os.path.isfile(path):
            _files.append(os.path.relpath(path))
    return _files

def get_file_md5(file_path):
    if not os.path.isfile(file_path):
        return
    md5=hashlib.md5()
    f=file(file_path,'rb')
    while True:
        b = f.read(8096)
        if not b:
            break
        md5.update(b)
    f.close()
    return md5.hexdigest()

def main():
    Diff = 0
    match = []
    checklist = []
    newchecklist = []
    changeList = []
    filelist = list_all_files(args.path)
    if args.match_file:
        match_list = open(args.match_file,"r")
        if match_list:
            match = match_list.readlines()
    if len(match) == 0:
        match.append('^.*$')
    file = open(args.list_file,"r")
    if file:
        md5list = file.readlines()
        for line in md5list:
            linesplit = line.split('|')
            linemd5 = linesplit[0]
            linefile = linesplit[1]
            for match_str in match:
                if re.match(match_str.strip('\n'),os.path.abspath(linefile)) != None:
                    checklist.append((linemd5,os.path.relpath(linefile.strip('\n'))))

        for filename in filelist:
            for match_str in match:
                if re.match(match_str.strip('\n'),os.path.abspath(filename)) != None:
                    md5 = get_file_md5(filename)
                    newchecklist.append((md5,os.path.relpath(filename.strip('\n'))))
        if newchecklist != checklist:
            if len(newchecklist) != len(checklist):
                DiffSet = list(set(newchecklist)^set(checklist))
                for diff_file in DiffSet:
                    Diff = Diff + 1
                    if args.list_info:
                        if diff_file in newchecklist:
                            if diff_file[1] not in changeList:
                                print("检查到文件创建: %s" % (os.path.abspath(diff_file[1])))
                        elif diff_file not in newchecklist:
                            if diff_file[1] not in changeList:
                                print("检查到文件删除: %s" % (os.path.abspath(diff_file[1])))
            else:
                for check in newchecklist:
                    for oldcheck in checklist:
                        if oldcheck[1].strip('\n') == check[1].strip('\n'):
                            if check[0] != oldcheck[0]:
                                Diff = Diff + 1
                                if args.list_info:
                                    print("检查到文件改变:%s" % (os.path.abspath(check[1])))
                                    changeList.append(os.path.relpath(oldcheck[1].strip('\n')));
        if Diff > len(newchecklist):
            Diff = len(newchecklist)
        if args.statistics:
            print("完整性: %.2f%%" % (abs(100 - (float(Diff) / len(newchecklist) * 100))))
            print("差异率: %.2f%%" % (abs(float(Diff) / len(newchecklist) * 100)))
        if Diff != 0:
            if args.zabbix:
                print("1")
            else:
                print("警告:检测完成,发现文件改变")
        else:
            if args.zabbix:
                print("0")
            else:
                print("检测完成,经检测,完全匹配")
if __name__ == "__main__":
    main()

打赏

说点什么

avatar
  订阅  
提醒

扫码二维码快速访问本页

python实现根据列表文件自动监测目录完整性 – 起航天空