python 汉字转拼音,五笔

展开全文

拼音方法一(较全)

Python
01 #coding:utf-8
02 class pinyin():
03     def __init__(self, data_path='./Mandarin.dat'):
04         self.dict = {}
05         for line in open(data_path):
06             k, v = line.split('\t')
07             self.dict[k] = v
08         self.splitter = ''
09     def pinyin(self, chars):
10         result = []
11         for char in chars:
12             key = "%X" % ord(char)
13             try:
14                 result.append(self.dict[key].split(" ")[0].strip()[:-1].lower())
15             except:
16                 result.append(char)
17         return self.splitter.join(result)
18 if __name__ == "__main__":
19     p = pinyin()
20     print p.pinyin(u"中国人的一天")

五笔与拼音

下载库:ChineseCode.dat

Python
01 #coding:utf-8
02 class pinyin():
03     def __init__(self, data_path='./ChineseCode.dat'):
04         self.dict = {}
05         for line in open(data_path):
06             v = line.split('\t')
07             self.dict[v[0]] = v[6]
08         self.splitter = ''
09     def wb(self, chars):
10         result = []
11         for char in chars:
12             key = "%X" % ord(char)
13             try:
14                 result.append(self.dict[key]+'\n')
15             except:
16                 result.append(char)
17         return self.splitter.join(result)
18     def pinyin(self, chars):
19         result = []
20         for char in chars:
21             key = "%X" % ord(char)
22             try:
23                 result.append(self.dict[key].split(",")[0].strip()[:-1].lower())
24             except:
25                 result.append(char)
26         return self.splitter.join(result)
27 if __name__ == "__main__":
28     p = pinyin()
29     print p.get_wb(u"小强")

合并两者

Python
01 #coding:utf-8
02 import cPickle
03 dict = {}
04 i=0;
05 for line in open('./Mandarin.dat'):
06     k, v = line.split('\t')
07     dict[k] = v.rstrip().lower()
08     i += 1
09     #if i > 1000: break
10 i=0;
11 for line in open('./ChineseCode.dat'):
12     items = line.split('\t')
13     #if dict.get(items[0]) <> None:
14         #print dict.get(items[0]), items[6]
15     key = items[0]
16     if  key in dict:
17         dict[key] = dict[key] + '\t' + items[6] + '\t' + items[2]
18         if i % 100 ==0:
19             print i
20     i += 1
21 f = file('ChineseCode2.dat', 'w')
22 cPickle.dump(dict, f)
23 f.close()
24  
25 #print dict

较完整的拼音与五笔

Python
01 #coding:utf-8
02 import cPickle
03 class CnCode():
04     def __init__(self):
05         f = file('./ChineseCode2.dat')
06         self.dict = cPickle.load(f)
07         f.close()
08  
09     def pinyin(self, chars,splitter=' '):
10         result = []
11         for char in chars:
12             key = "%X" % ord(char)
13             try:
14                 result.append(self.dict[key].split("\t")[0].split(" ")[0].strip()[:-1])
15             except:
16                 result.append(char)
17         return splitter.join(result)
18  
19     def wb(self, char,splitter=','):
20         key = "%X" % ord(char)
21         try:
22             result = self.dict[key].split("\t")[1].strip()
23             result = splitter.join(result.split(" "))
24         except:
25             result = char
26         return result
27 if __name__ == "__main__":
28     p = CnCode()
29     print p.pinyin(u"中国人的一天")
30     print p.wb(u"国")

为拼音加上声调

ChineseCode2.dat下载

Python
view plain
print?
01 #coding:utf-8
02 import cPickle
03 import re
04 class CnCode():
05     '''汉字拼音与五笔及声调对照类
06  
07     @author HzqGhost admin@ QQ:313143468'''
08     def __init__(self):
09         f = file('./ChineseCode2.dat')
10         self.dict = cPickle.load(f)
11         f.close()
12         self.yunmu = ( 'ang','eng','ing','ong','an','en','in','un','ai','ei','ui','ao','ou','iu','ie','ue','er','en','a','o','e','i','u')
13         self.sheng = {'a':'ā á ǎ à','o':'ō ó ǒ ò','e':'ē é ě è','i':'ī í ǐ ì','u':'ū ú ǔ ù'}
14  
15     def pinyin(self, chars, splitter=' ', issheng=False, isFirstUpper=True):
16         '''汉字转拼音 powered by blog. (hzqghost)
17  
18         --chars [ustring] 查找对象
19         --splitter [string] 连接字符串
20         --issheng [bool] 是否返回带有声调的拼音
21         --isFirstUpper [bool] 是否首字母大写'''
22         result = []
23         for char in chars:
24             key = "%X" % ord(char)
25             py = ''
26             try:
27                 if self.dict.has_key(key):
28                     py = self.dict[key].split("\t")[0].split(" ")[0].strip()
29                     if issheng:
30                         for ym in self.yunmu:
31                             if re.search(ym, py):
32                                 t = py[-1:].encode('ascii','ignore')
33                                 t2 = "%d" % ord(t)
34                                 t3 = (int(t2) - 48)%4 -1
35                                 py2 = py[:-len(ym)-1]
36                                 letter = self.sheng[ym[0]].split(' ')[t3]
37                                 ym = letter + ym[1:]
38                                 py = py2 + ym
39                                 break
40                     else:
41                         py = py[:-1]
42                     if isFirstUpper:
43                         py = py[0:1].upper()+py[1:]
44                     result.append(py)
45             except:
46                 pass
47         return splitter.join(result)
48  
49     def wb(self, char,splitter=','):
50         key = "%X" % ord(char)
51         try:
52             result = self.dict[key].split("\t")[1].strip()
53             result = splitter.join(result.split(" "))
54         except:
55             result = char
56         return result
57  
58 if __name__ == "__main__":
59     p = CnCode()
60     print p.pinyin(u"中国人的一天我看", ' ',True)
61     print p.wb(u"国")