拼音方法一(较全)
下载库:Mandarin.dat
Python
03 | def __init__( self , data_path = './Mandarin.dat' ): |
05 | for line in open (data_path): |
06 | k, v = line.split( '\t' ) |
09 | def pinyin( self , chars): |
12 | key = "%X" % ord (char) |
14 | result.append( self . dict [key].split( " " )[ 0 ].strip()[: - 1 ].lower()) |
17 | return self .splitter.join(result) |
18 | if __name__ = = "__main__" : |
20 | print p.pinyin(u "中国人的一天" ) |
五笔与拼音
下载库:ChineseCode.dat
Python
03 | def __init__( self , data_path = './ChineseCode.dat' ): |
05 | for line in open (data_path): |
07 | self . dict [v[ 0 ]] = v[ 6 ] |
12 | key = "%X" % ord (char) |
14 | result.append( self . dict [key] + '\n' ) |
17 | return self .splitter.join(result) |
18 | def pinyin( self , chars): |
21 | key = "%X" % ord (char) |
23 | result.append( self . dict [key].split( "," )[ 0 ].strip()[: - 1 ].lower()) |
26 | return self .splitter.join(result) |
27 | if __name__ = = "__main__" : |
合并两者
Python
05 | for line in open ( './Mandarin.dat' ): |
06 | k, v = line.split( '\t' ) |
07 | dict [k] = v.rstrip().lower() |
11 | for line in open ( './ChineseCode.dat' ): |
12 | items = line.split( '\t' ) |
17 | dict [key] = dict [key] + '\t' + items[ 6 ] + '\t' + items[ 2 ] |
21 | f = file ( 'ChineseCode2.dat' , 'w' ) |
较完整的拼音与五笔
Python
05 | f = file ( './ChineseCode2.dat' ) |
06 | self . dict = cPickle.load(f) |
09 | def pinyin( self , chars,splitter = ' ' ): |
12 | key = "%X" % ord (char) |
14 | result.append( self . dict [key].split( "\t" )[ 0 ].split( " " )[ 0 ].strip()[: - 1 ]) |
17 | return splitter.join(result) |
19 | def wb( self , char,splitter = ',' ): |
20 | key = "%X" % ord (char) |
22 | result = self . dict [key].split( "\t" )[ 1 ].strip() |
23 | result = splitter.join(result.split( " " )) |
27 | if __name__ = = "__main__" : |
29 | print p.pinyin(u "中国人的一天" ) |
为拼音加上声调
ChineseCode2.dat下载
Python
09 | f = file ( './ChineseCode2.dat' ) |
10 | self . dict = cPickle.load(f) |
12 | self .yunmu = ( 'ang' , 'eng' , 'ing' , 'ong' , 'an' , 'en' , 'in' , 'un' , 'ai' , 'ei' , 'ui' , 'ao' , 'ou' , 'iu' , 'ie' , 'ue' , 'er' , 'en' , 'a' , 'o' , 'e' , 'i' , 'u' ) |
13 | self .sheng = { 'a' : 'ā á ǎ à' , 'o' : 'ō ó ǒ ò' , 'e' : 'ē é ě è' , 'i' : 'ī í ǐ ì' , 'u' : 'ū ú ǔ ù' } |
15 | def pinyin( self , chars, splitter = ' ' , issheng = False , isFirstUpper = True ): |
24 | key = "%X" % ord (char) |
27 | if self . dict .has_key(key): |
28 | py = self . dict [key].split( "\t" )[ 0 ].split( " " )[ 0 ].strip() |
32 | t = py[ - 1 :].encode( 'ascii' , 'ignore' ) |
34 | t3 = ( int (t2) - 48 ) % 4 - 1 |
36 | letter = self .sheng[ym[ 0 ]].split( ' ' )[t3] |
43 | py = py[ 0 : 1 ].upper() + py[ 1 :] |
47 | return splitter.join(result) |
49 | def wb( self , char,splitter = ',' ): |
50 | key = "%X" % ord (char) |
52 | result = self . dict [key].split( "\t" )[ 1 ].strip() |
53 | result = splitter.join(result.split( " " )) |
58 | if __name__ = = "__main__" : |
60 | print p.pinyin(u "中国人的一天我看" , ' ' , True ) |
|