今天跟大家分享Python学习教程(Python学习路线):python—收集系统信息 1.1 hashlib模块使用 获取文件的MD5值,和shell下的MD5sum一样 方法一:先实例化一个对象,再使用update做校验,最后十六进制查看hexdigest In [3]: import hashlib In [5]: md5 = hashlib.md5 In [6]: md5.update("a") In [7]: md5.hexdigest Out[7]: '0cc175b9c0f1b6a831c399e269772661' In [8]: md5.update("b\n") #叠加,实际为ab\n的值 In [9]: md5.hexdigest Out[9]: 'daa8075d6ac5ff8d0c6d4650adb4ef29' 说明: 对比shell,哈希值一样 [root@huangzp3 python]# echo "ab"|md5sum daa8075d6ac5ff8d0c6d4650adb4ef29 - 与shell下一致,-n #表示不加换行符 [root@huangzp3 python]# echo "a"|md5sum 60b725f10c9c85c70d97880dfe8191b3 - [root@huangzp3 python]# echo -n "a"|md5sum 0cc175b9c0f1b6a831c399e269772661 - 方法二:字符串短时,直接调用 In [10]: hashlib.md5("hello").hexdigest Out[10]: '5d41402abc4b2a76b9719d911017c592' 脚本: #!/usr/bin/env python import sys import hashlib import codecs def getMd5(f): md5 = hashlib.md5() with codecs. open (f) as fd: while True : data = fd.read( 4096 ) if data: md5.update(data) #每读一次,获取一次md5值,MD5值累加 else : break return md5.hexdigest() if __name__ = = "__main__" : try : print getMd5(sys.argv[ 1 ]) except IndexError: print ( "%s follow a argument" % __file__) 运行结果: [root@huangzp3 python]# python 01.py 1.py e796a8f418fa90d7e4f0a162119f114a [root@huangzp3 python]# cat 1.py |md5sum e796a8f418fa90d7e4f0a162119f114a - 1.2 os.walk模块使用 os.walk:迭代目录里文件,返回一个列表,分别是:路径、路径下的目录、路径下的文件 In [17]: walk = os.walk("/root/python/a") In [18]: for i,j,k in walk:print i,j,k /root/python/a ['b'] ['a.txt'] /root/python/a/b ['c'] ['b.txt'] /root/python/a/b/c ['c.txt'] 脚本: #!/usr/bin/env python import sys import hashlib import codecs import os def getMd5(f): md5 = hashlib.md5() with codecs. open (f) as fd: while True : data = fd.read( 4096 ) if data: md5.update(data) else : break return md5.hexdigest() walk = os.walk(sys.argv[ 1 ]) s = "" for i,j,k in walk: for fil in k: fa = os.path.join(i,fil) result = getMd5(fa) s + = result + " " + fa + "\n" print s, 运行结果: [root@huangzp3 python]# python 02.py /root/python/ b9f6f7a02766b3f9bd28369c6c331218 /root/python/20.py e796a8f418fa90d7e4f0a162119f114a /root/python/1.py 01b1b4f4b2eb7155187c27057e9a4de2 /root/python/1.pyc 1.3 生成器yield 想捕获函数的值,但是又不想退出程序。yield,记住当前函数的值,用于下次调用。调用时用next方法或者for循环遍历 修改如上脚本: #!/usr/bin/env python import sys import hashlib import os import codecs def getMd5(f): md5 = hashlib.md5() with codecs. open (f) as fd: while True : data = fd.read( 4096 ) if data: md5.update(data) else : break return md5.hexdigest() def fileMd5(topdir): walk = os.walk(topdir) for i,j,k in walk: for fil in k: fa = os.path.join(i,fil) result = getMd5(fa) yield "%s %s" % (result,fa) if __name__ = = "__main__" : topdir = sys.argv[ 1 ] md5 = fileMd5(topdir) for lis in md5: print lis 1.4 文件md5值的校验 找出目录中内容相同的文件 脚本: #!/usr/bin/env python import sys import hashlib import os import codecs def getMd5(f): md5 = hashlib.md5() with codecs. open (f) as fd: while True : data = fd.read( 4096 ) if data: md5.update(data) else : break return md5.hexdigest() def getDir(topdir): dic = {} walk = os.walk(topdir) for i,j,k in walk: for lis in k: fil = os.path.join(i,lis) md5 = getMd5(fil) if dic.has_key(md5): dic[md5].append(fil) else : dic[md5] = [fil] #dic[md5] = [fn] ,字典的key为MD5,value值为fn return dic if __name__ = = "__main__" : result = getDir(sys.argv[ 1 ]) for i,j in result.items(): if len (j)> 1 : print i,j 运行结果: [root@huangzp3 python]# python 04.py /root/python b026324c6904b2a9cb4b88d6d61c81d1 ['./test/a/1.txt', './test/b/2.txt', './test.bak/test/a/1.txt', './test.bak/test/b/2.txt'] 1.5 字典排序 字典是无序的 sorted(可迭代对象,cmp,key=根据什么排序,reverse=是否倒序),返回一个排序的列表 说明:key表示关键字;operator.itemgetter(0):根据字典的key值排序,(1)表示根据字典的value排序;reverse=True倒序 x.iteritems返回一个序列,items与iteritems的区别和range与xrange的区别一样 找出占用空间大的文件 脚本: #/usr/bin/env python import sys import os import operator def getDic(topdir): dic = {} walk = os.walk(topdir) for i,j,k in walk: for lis in k: fs = os.path.join(i,lis) sz = os.path.getsize(fs) dic[fs] = sz return dic if __name__ = = "__main__" : dic = getDic(sys.argv[ 1 ]) sorted_dic = sorted (dic.iteritems(),key = operator.itemgetter( 1 ),reverse = True ) for k,v in sorted_dic[: 10 ]: if v> 1024 : K = int (v) / 1024 if K> 1024 : M = K / 1024 if M> 1024 : v = str (M / 1024 ) + "G" else : v = str (M) + "M" else : v = str (K) + "K" print k, "------>" ,v 运行结果: [root@huangzp3 python]# python 05.py /data/program/ /data/program/mongo/data/data27018/local.2 ------> 511M /data/program/mongo/data/data27018/local.1 ------> 511M /data/program/mongo/data/data27017/local.1 ------> 511M /data/program/mongo/data/data27017/local.2 ------> 511M /data/program/mongo/logs/mongodb-27018.log ------> 488M /data/program/mysql/lib/libmysqld.a ------> 235M /data/program/mongo/data/data27018/journal/j._7 ------> 128M /data/program/mysql/bin/mysqld ------> 95M /data/program/mysql/bin/mysql_client_test_embedded ------> 87M /data/program/mysql/bin/mysql_embedded ------> 87M 1.6 python调用外部命令 1)os.system:输出在终端上,捕获不到 In [4]: os.system("ls") 001.sh 10.sh 1.py 2)os.popen:只能捕捉到标准输出,捕捉不到标准错误输出 In [12]: a = os.popen("ls") In [13]: a.read a.read a.readline a.readinto a.readlines 3)os.popen2:返回2个对象,一个标准输入、一个标准输出 In [14]: sdtin,stdout = os.popen2("sort") /usr/bin/ipython:1: DeprecationWarning: os.popen2 is deprecated. Use the subprocess module. #!/usr/bin/python2 4)os.popen3:返回3个对象,标准输入、标准输出、标准错误输出 In [16]: stdin,stdout,stderr = os.popen3("ls a") /usr/bin/ipython:1: DeprecationWarning: os.popen3 is deprecated. Use the subprocess module. #!/usr/bin/python2 5)os.popen4:返回2个对象,pipe_in和pipe_out_err 6)subprosess subpeocess.call('ls -l --color /root',shell=True) 说明:输出不能捕捉到,类似于os.system;不加-l,就不用shell subprocess.check_call(['mkdir','/tmp/aaa']) 说明:会抛出python异常 In [19]: subprocess.call("ls -l",shell=True) total 580868 -rw-r--r-- 1 root root 103 Nov 22 16:23 001.sh -rw-r--r-- 1 root root 375 Nov 1 03:00 100.sh -rw-r--r-- 1 root root 47624178 Nov 1 19:35 100.txt 捕捉异常 脚本: #/usr/bin/env python import subprocess try : subprocess.check_call( "exit 1" ,shell = True ) #check_call会抛出python异常 except subprocess.CalledProcessError: pass print "hello world" 运行结果: [root@huangzp3 python]# python 06.py hello world |
|
来自: 千锋Python学堂 > 《Python基础教程分享》