- 論壇徽章:
- 0
|
[Python]代碼- #!/usr/bin/env python
- #-*- coding:utf-8-*-
-
- import urllib2
- import re
- import hashlib
- import json
-
- #--------------------------------------------------- 工具 start
- def md5(str):
- '''
- 計(jì)算MD5值
- '''
- m = hashlib.md5()
- m.update(str)
- return m.hexdigest()
-
- def search(regex, content, group = 1):
- '''
- 搜索指定正則匹配的內(nèi)容
- '''
- pattern = re.search(regex, content, re.DOTALL)
- if(pattern != None):
- return pattern.group(group)
- return ''
-
- def findall(regex, content):
- '''
- 查找指定正則匹配的所有內(nèi)容
- '''
- return re.findall(regex, content, re.DOTALL)
-
- def cleanHtmlTag(content):
- '''
- 清理HTML標(biāo)簽
- '''
- return content or re.sub(r'<[^>]*?>', '', content).strip()
-
- def cleanedSearch(regex, content, group = 1):
- '''
- 查找匹配的指定字符串并清除HTML標(biāo)簽
- '''
- return cleanHtmlTag(search(regex, content, group))
-
- def httpGet(url, encoding='gbk'):
- '''
- 發(fā)送Http GET請(qǐng)求,返回內(nèi)容
- '''
- return urllib2.urlopen(url).read().decode(encoding, 'ignore').encode('utf-8')
-
- def toJson(dict):
- return json.dumps(dict, ensure_ascii=False, indent=4)
-
- #--------------------------------------------------- 工具 end
復(fù)制代碼 |
|