Android 汉子转换成拼音
1 package com.example.test; 2 3 import android.text.TextUtils; 4 import android.util.Log; 5 6 import java.text.Collator; 7 import java.util.ArrayList; 8 import java.util.Locale; 9 10 /** 11 * An object to convert Chinese character to its corresponding pinyin string. 12 * For characters with multiple possible pinyin string, only one is selected 13 * according to collator. Polyphone is not supported in this implementation. 14 * This class is implemented to achieve the best runtime performance and minimum 15 * runtime resources with tolerable sacrifice of accuracy. This implementation 16 * highly depends on zh_CN ICU collation data and must be always synchronized 17 * with ICU. 18 * 19 * Currently this file is aligned to zh.txt in ICU 4.6 来自android4.2源码 20 */ 21 public class HanziToPinyin { 22 private static final String TAG = "HanziToPinyin"; 23 24 // Turn on this flag when we want to check internal data structure. 25 private static final boolean DEBUG = false; 26 27 /** 28 * Unihans array. 29 * 30 * Each unihans is the first one within same pinyin when collator is zh_CN. 31 */ 32 public static final char[] UNIHANS = { '\u963f', '\u54ce', '\u5b89', 33 '\u80ae', '\u51f9', '\u516b', '\u6300', '\u6273', '\u90a6', 34 '\u52f9', '\u9642', '\u5954', '\u4f3b', '\u5c44', '\u8fb9', 35 '\u706c', '\u618b', '\u6c43', '\u51ab', '\u7676', '\u5cec', 36 '\u5693', '\u5072', '\u53c2', '\u4ed3', '\u64a1', '\u518a', 37 '\u5d7e', '\u66fd', '\u66fe', '\u5c64', '\u53c9', '\u8286', 38 '\u8fbf', '\u4f25', '\u6284', '\u8f66', '\u62bb', '\u6c88', 39 '\u6c89', '\u9637', '\u5403', '\u5145', '\u62bd', '\u51fa', 40 '\u6b3b', '\u63e3', '\u5ddb', '\u5205', '\u5439', '\u65fe', 41 '\u9034', '\u5472', '\u5306', '\u51d1', '\u7c97', '\u6c46', 42 '\u5d14', '\u90a8', '\u6413', '\u5491', '\u5446', '\u4e39', 43 '\u5f53', '\u5200', '\u561a', '\u6265', '\u706f', '\u6c10', 44 '\u55f2', '\u7538', '\u5201', '\u7239', '\u4e01', '\u4e1f', 45 '\u4e1c', '\u543a', '\u53be', '\u8011', '\u8968', '\u5428', 46 '\u591a', '\u59b8', '\u8bf6', '\u5940', '\u97a5', '\u513f', 47 '\u53d1', '\u5e06', '\u531a', '\u98de', '\u5206', '\u4e30', 48 '\u8985', '\u4ecf', '\u7d11', '\u4f15', '\u65ee', '\u4f85', 49 '\u7518', '\u5188', '\u768b', '\u6208', '\u7ed9', '\u6839', 50 '\u522f', '\u5de5', '\u52fe', '\u4f30', '\u74dc', '\u4e56', 51 '\u5173', '\u5149', '\u5f52', '\u4e28', '\u5459', '\u54c8', 52 '\u548d', '\u4f44', '\u592f', '\u8320', '\u8bc3', '\u9ed2', 53 '\u62eb', '\u4ea8', '\u5677', '\u53ff', '\u9f41', '\u4e6f', 54 '\u82b1', '\u6000', '\u72bf', '\u5ddf', '\u7070', '\u660f', 55 '\u5419', '\u4e0c', '\u52a0', '\u620b', '\u6c5f', '\u827d', 56 '\u9636', '\u5dfe', '\u5755', '\u5182', '\u4e29', '\u51e5', 57 '\u59e2', '\u5658', '\u519b', '\u5494', '\u5f00', '\u520a', 58 '\u5ffc', '\u5c3b', '\u533c', '\u808e', '\u52a5', '\u7a7a', 59 '\u62a0', '\u625d', '\u5938', '\u84af', '\u5bbd', '\u5321', 60 '\u4e8f', '\u5764', '\u6269', '\u5783', '\u6765', '\u5170', 61 '\u5577', '\u635e', '\u808b', '\u52d2', '\u5d1a', '\u5215', 62 '\u4fe9', '\u5941', '\u826f', '\u64a9', '\u5217', '\u62ce', 63 '\u5222', '\u6e9c', '\u56d6', '\u9f99', '\u779c', '\u565c', 64 '\u5a08', '\u7567', '\u62a1', '\u7f57', '\u5463', '\u5988', 65 '\u57cb', '\u5ada', '\u7264', '\u732b', '\u4e48', '\u5445', 66 '\u95e8', '\u753f', '\u54aa', '\u5b80', '\u55b5', '\u4e5c', 67 '\u6c11', '\u540d', '\u8c2c', '\u6478', '\u54de', '\u6bea', 68 '\u55ef', '\u62cf', '\u8149', '\u56e1', '\u56d4', '\u5b6c', 69 '\u7592', '\u5a1e', '\u6041', '\u80fd', '\u59ae', '\u62c8', 70 '\u5b22', '\u9e1f', '\u634f', '\u56dc', '\u5b81', '\u599e', 71 '\u519c', '\u7fba', '\u5974', '\u597b', '\u759f', '\u9ec1', 72 '\u90cd', '\u5594', '\u8bb4', '\u5991', '\u62cd', '\u7705', 73 '\u4e53', '\u629b', '\u5478', '\u55b7', '\u5309', '\u4e15', 74 '\u56e8', '\u527d', '\u6c15', '\u59d8', '\u4e52', '\u948b', 75 '\u5256', '\u4ec6', '\u4e03', '\u6390', '\u5343', '\u545b', 76 '\u6084', '\u767f', '\u4eb2', '\u72c5', '\u828e', '\u4e18', 77 '\u533a', '\u5cd1', '\u7f3a', '\u590b', '\u5465', '\u7a63', 78 '\u5a06', '\u60f9', '\u4eba', '\u6254', '\u65e5', '\u8338', 79 '\u53b9', '\u909a', '\u633c', '\u5827', '\u5a51', '\u77a4', 80 '\u637c', '\u4ee8', '\u6be2', '\u4e09', '\u6852', '\u63bb', 81 '\u95aa', '\u68ee', '\u50e7', '\u6740', '\u7b5b', '\u5c71', 82 '\u4f24', '\u5f30', '\u5962', '\u7533', '\u8398', '\u6552', 83 '\u5347', '\u5c38', '\u53ce', '\u4e66', '\u5237', '\u8870', 84 '\u95e9', '\u53cc', '\u8c01', '\u542e', '\u8bf4', '\u53b6', 85 '\u5fea', '\u635c', '\u82cf', '\u72fb', '\u590a', '\u5b59', 86 '\u5506', '\u4ed6', '\u56fc', '\u574d', '\u6c64', '\u5932', 87 '\u5fd1', '\u71a5', '\u5254', '\u5929', '\u65eb', '\u5e16', 88 '\u5385', '\u56f2', '\u5077', '\u51f8', '\u6e4d', '\u63a8', 89 '\u541e', '\u4e47', '\u7a75', '\u6b6a', '\u5f2f', '\u5c23', 90 '\u5371', '\u6637', '\u7fc1', '\u631d', '\u4e4c', '\u5915', 91 '\u8672', '\u4eda', '\u4e61', '\u7071', '\u4e9b', '\u5fc3', 92 '\u661f', '\u51f6', '\u4f11', '\u5401', '\u5405', '\u524a', 93 '\u5743', '\u4e2b', '\u6079', '\u592e', '\u5e7a', '\u503b', 94 '\u4e00', '\u56d9', '\u5e94', '\u54df', '\u4f63', '\u4f18', 95 '\u625c', '\u56e6', '\u66f0', '\u6655', '\u7b60', '\u7b7c', 96 '\u5e00', '\u707d', '\u5142', '\u5328', '\u50ae', '\u5219', 97 '\u8d3c', '\u600e', '\u5897', '\u624e', '\u635a', '\u6cbe', 98 '\u5f20', '\u957f', '\u9577', '\u4f4b', '\u8707', '\u8d1e', 99 '\u4e89', '\u4e4b', '\u5cd9', '\u5ea2', '\u4e2d', '\u5dde', 100 '\u6731', '\u6293', '\u62fd', '\u4e13', '\u5986', '\u96b9', 101 '\u5b92', '\u5353', '\u4e72', '\u5b97', '\u90b9', '\u79df', 102 '\u94bb', '\u539c', '\u5c0a', '\u6628', '\u5159', '\u9fc3', 103 '\u9fc4', }; 104 105 /** 106 * Pinyin array. 107 * 108 * Each pinyin is corresponding to unihans of same offset in the unihans 109 * array. 110 */ 111 public static final byte[][] PINYINS = { { 65, 0, 0, 0, 0, 0 }, 112 { 65, 73, 0, 0, 0, 0 }, { 65, 78, 0, 0, 0, 0 }, 113 { 65, 78, 71, 0, 0, 0 }, { 65, 79, 0, 0, 0, 0 }, 114 { 66, 65, 0, 0, 0, 0 }, { 66, 65, 73, 0, 0, 0 }, 115 { 66, 65, 78, 0, 0, 0 }, { 66, 65, 78, 71, 0, 0 }, 116 { 66, 65, 79, 0, 0, 0 }, { 66, 69, 73, 0, 0, 0 }, 117 { 66, 69, 78, 0, 0, 0 }, { 66, 69, 78, 71, 0, 0 }, 118 { 66, 73, 0, 0, 0, 0 }, { 66, 73, 65, 78, 0, 0 }, 119 { 66, 73, 65, 79, 0, 0 }, { 66, 73, 69, 0, 0, 0 }, 120 { 66, 73, 78, 0, 0, 0 }, { 66, 73, 78, 71, 0, 0 }, 121 { 66, 79, 0, 0, 0, 0 }, { 66, 85, 0, 0, 0, 0 }, 122 { 67, 65, 0, 0, 0, 0 }, { 67, 65, 73, 0, 0, 0 }, 123 { 67, 65, 78, 0, 0, 0 }, { 67, 65, 78, 71, 0, 0 }, 124 { 67, 65, 79, 0, 0, 0 }, { 67, 69, 0, 0, 0, 0 }, 125 { 67, 69, 78, 0, 0, 0 }, { 67, 69, 78, 71, 0, 0 }, 126 { 90, 69, 78, 71, 0, 0 }, { 67, 69, 78, 71, 0, 0 }, 127 { 67, 72, 65, 0, 0, 0 }, { 67, 72, 65, 73, 0, 0 }, 128 { 67, 72, 65, 78, 0, 0 }, { 67, 72, 65, 78, 71, 0 }, 129 { 67, 72, 65, 79, 0, 0 }, { 67, 72, 69, 0, 0, 0 }, 130 { 67, 72, 69, 78, 0, 0 }, { 83, 72, 69, 78, 0, 0 }, 131 { 67, 72, 69, 78, 0, 0 }, { 67, 72, 69, 78, 71, 0 }, 132 { 67, 72, 73, 0, 0, 0 }, { 67, 72, 79, 78, 71, 0 }, 133 { 67, 72, 79, 85, 0, 0 }, { 67, 72, 85, 0, 0, 0 }, 134 { 67, 72, 85, 65, 0, 0 }, { 67, 72, 85, 65, 73, 0 }, 135 { 67, 72, 85, 65, 78, 0 }, { 67, 72, 85, 65, 78, 71 }, 136 { 67, 72, 85, 73, 0, 0 }, { 67, 72, 85, 78, 0, 0 }, 137 { 67, 72, 85, 79, 0, 0 }, { 67, 73, 0, 0, 0, 0 }, 138 { 67, 79, 78, 71, 0, 0 }, { 67, 79, 85, 0, 0, 0 }, 139 { 67, 85, 0, 0, 0, 0 }, { 67, 85, 65, 78, 0, 0 }, 140 { 67, 85, 73, 0, 0, 0 }, { 67, 85, 78, 0, 0, 0 }, 141 { 67, 85, 79, 0, 0, 0 }, { 68, 65, 0, 0, 0, 0 }, 142 { 68, 65, 73, 0, 0, 0 }, { 68, 65, 78, 0, 0, 0 }, 143 { 68, 65, 78, 71, 0, 0 }, { 68, 65, 79, 0, 0, 0 }, 144 { 68, 69, 0, 0, 0, 0 }, { 68, 69, 78, 0, 0, 0 }, 145 { 68, 69, 78, 71, 0, 0 }, { 68, 73, 0, 0, 0, 0 }, 146 { 68, 73, 65, 0, 0, 0 }, { 68, 73, 65, 78, 0, 0 }, 147 { 68, 73, 65, 79, 0, 0 }, { 68, 73, 69, 0, 0, 0 }, 148 { 68, 73, 78, 71, 0, 0 }, { 68, 73, 85, 0, 0, 0 }, 149 { 68, 79, 78, 71, 0, 0 }, { 68, 79, 85, 0, 0, 0 }, 150 { 68, 85, 0, 0, 0, 0 }, { 68, 85, 65, 78, 0, 0 }, 151 { 68, 85, 73, 0, 0, 0 }, { 68, 85, 78, 0, 0, 0 }, 152 { 68, 85, 79, 0, 0, 0 }, { 69, 0, 0, 0, 0, 0 }, 153 { 69, 73, 0, 0, 0, 0 }, { 69, 78, 0, 0, 0, 0 }, 154 { 69, 78, 71, 0, 0, 0 }, { 69, 82, 0, 0, 0, 0 }, 155 { 70, 65, 0, 0, 0, 0 }, { 70, 65, 78, 0, 0, 0 }, 156 { 70, 65, 78, 71, 0, 0 }, { 70, 69, 73, 0, 0, 0 }, 157 { 70, 69, 78, 0, 0, 0 }, { 70, 69, 78, 71, 0, 0 }, 158 { 70, 73, 65, 79, 0, 0 }, { 70, 79, 0, 0, 0, 0 }, 159 { 70, 79, 85, 0, 0, 0 }, { 70, 85, 0, 0, 0, 0 }, 160 { 71, 65, 0, 0, 0, 0 }, { 71, 65, 73, 0, 0, 0 }, 161 { 71, 65, 78, 0, 0, 0 }, { 71, 65, 78, 71, 0, 0 }, 162 { 71, 65, 79, 0, 0, 0 }, { 71, 69, 0, 0, 0, 0 }, 163 { 71, 69, 73, 0, 0, 0 }, { 71, 69, 78, 0, 0, 0 }, 164 { 71, 69, 78, 71, 0, 0 }, { 71, 79, 78, 71, 0, 0 }, 165 { 71, 79, 85, 0, 0, 0 }, { 71, 85, 0, 0, 0, 0 }, 166 { 71, 85, 65, 0, 0, 0 }, { 71, 85, 65, 73, 0, 0 }, 167 { 71, 85, 65, 78, 0, 0 }, { 71, 85, 65, 78, 71, 0 }, 168 { 71, 85, 73, 0, 0, 0 }, { 71, 85, 78, 0, 0, 0 }, 169 { 71, 85, 79, 0, 0, 0 }, { 72, 65, 0, 0, 0, 0 }, 170 { 72, 65, 73, 0, 0, 0 }, { 72, 65, 78, 0, 0, 0 }, 171 { 72, 65, 78, 71, 0, 0 }, { 72, 65, 79, 0, 0, 0 }, 172 { 72, 69, 0, 0, 0, 0 }, { 72, 69, 73, 0, 0, 0 }, 173 { 72, 69, 78, 0, 0, 0 }, { 72, 69, 78, 71, 0, 0 }, 174 { 72, 77, 0, 0, 0, 0 }, { 72, 79, 78, 71, 0, 0 }, 175 { 72, 79, 85, 0, 0, 0 }, { 72, 85, 0, 0, 0, 0 }, 176 { 72, 85, 65, 0, 0, 0 }, { 72, 85, 65, 73, 0, 0 }, 177 { 72, 85, 65, 78, 0, 0 }, { 72, 85, 65, 78, 71, 0 }, 178 { 72, 85, 73, 0, 0, 0 }, { 72, 85, 78, 0, 0, 0 }, 179 { 72, 85, 79, 0, 0, 0 }, { 74, 73, 0, 0, 0, 0 }, 180 { 74, 73, 65, 0, 0, 0 }, { 74, 73, 65, 78, 0, 0 }, 181 { 74, 73, 65, 78, 71, 0 }, { 74, 73, 65, 79, 0, 0 }, 182 { 74, 73, 69, 0, 0, 0 }, { 74, 73, 78, 0, 0, 0 }, 183 { 74, 73, 78, 71, 0, 0 }, { 74, 73, 79, 78, 71, 0 }, 184 { 74, 73, 85, 0, 0, 0 }, { 74, 85, 0, 0, 0, 0 }, 185 { 74, 85, 65, 78, 0, 0 }, { 74, 85, 69, 0, 0, 0 }, 186 { 74, 85, 78, 0, 0, 0 }, { 75, 65, 0, 0, 0, 0 }, 187 { 75, 65, 73, 0, 0, 0 }, { 75, 65, 78, 0, 0, 0 }, 188 { 75, 65, 78, 71, 0, 0 }, { 75, 65, 79, 0, 0, 0 }, 189 { 75, 69, 0, 0, 0, 0 }, { 75, 69, 78, 0, 0, 0 }, 190 { 75, 69, 78, 71, 0, 0 }, { 75, 79, 78, 71, 0, 0 }, 191 { 75, 79, 85, 0, 0, 0 }, { 75, 85, 0, 0, 0, 0 }, 192 { 75, 85, 65, 0, 0, 0 }, { 75, 85, 65, 73, 0, 0 }, 193 { 75, 85, 65, 78, 0, 0 }, { 75, 85, 65, 78, 71, 0 }, 194 { 75, 85, 73, 0, 0, 0 }, { 75, 85, 78, 0, 0, 0 }, 195 { 75, 85, 79, 0, 0, 0 }, { 76, 65, 0, 0, 0, 0 }, 196 { 76, 65, 73, 0, 0, 0 }, { 76, 65, 78, 0, 0, 0 }, 197 { 76, 65, 78, 71, 0, 0 }, { 76, 65, 79, 0, 0, 0 }, 198 { 76, 69, 0, 0, 0, 0 }, { 76, 69, 73, 0, 0, 0 }, 199 { 76, 69, 78, 71, 0, 0 }, { 76, 73, 0, 0, 0, 0 }, 200 { 76, 73, 65, 0, 0, 0 }, { 76, 73, 65, 78, 0, 0 }, 201 { 76, 73, 65, 78, 71, 0 }, { 76, 73, 65, 79, 0, 0 }, 202 { 76, 73, 69, 0, 0, 0 }, { 76, 73, 78, 0, 0, 0 }, 203 { 76, 73, 78, 71, 0, 0 }, { 76, 73, 85, 0, 0, 0 }, 204 { 76, 79, 0, 0, 0, 0 }, { 76, 79, 78, 71, 0, 0 }, 205 { 76, 79, 85, 0, 0, 0 }, { 76, 85, 0, 0, 0, 0 }, 206 { 76, 85, 65, 78, 0, 0 }, { 76, 85, 69, 0, 0, 0 }, 207 { 76, 85, 78, 0, 0, 0 }, { 76, 85, 79, 0, 0, 0 }, 208 { 77, 0, 0, 0, 0, 0 }, { 77, 65, 0, 0, 0, 0 }, 209 { 77, 65, 73, 0, 0, 0 }, { 77, 65, 78, 0, 0, 0 }, 210 { 77, 65, 78, 71, 0, 0 }, { 77, 65, 79, 0, 0, 0 }, 211 { 77, 69, 0, 0, 0, 0 }, { 77, 69, 73, 0, 0, 0 }, 212 { 77, 69, 78, 0, 0, 0 }, { 77, 69, 78, 71, 0, 0 }, 213 { 77, 73, 0, 0, 0, 0 }, { 77, 73, 65, 78, 0, 0 }, 214 { 77, 73, 65, 79, 0, 0 }, { 77, 73, 69, 0, 0, 0 }, 215 { 77, 73, 78, 0, 0, 0 }, { 77, 73, 78, 71, 0, 0 }, 216 { 77, 73, 85, 0, 0, 0 }, { 77, 79, 0, 0, 0, 0 }, 217 { 77, 79, 85, 0, 0, 0 }, { 77, 85, 0, 0, 0, 0 }, 218 { 78, 0, 0, 0, 0, 0 }, { 78, 65, 0, 0, 0, 0 }, 219 { 78, 65, 73, 0, 0, 0 }, { 78, 65, 78, 0, 0, 0 }, 220 { 78, 65, 78, 71, 0, 0 }, { 78, 65, 79, 0, 0, 0 }, 221 { 78, 69, 0, 0, 0, 0 }, { 78, 69, 73, 0, 0, 0 }, 222 { 78, 69, 78, 0, 0, 0 }, { 78, 69, 78, 71, 0, 0 }, 223 { 78, 73, 0, 0, 0, 0 }, { 78, 73, 65, 78, 0, 0 }, 224 { 78, 73, 65, 78, 71, 0 }, { 78, 73, 65, 79, 0, 0 }, 225 { 78, 73, 69, 0, 0, 0 }, { 78, 73, 78, 0, 0, 0 }, 226 { 78, 73, 78, 71, 0, 0 }, { 78, 73, 85, 0, 0, 0 }, 227 { 78, 79, 78, 71, 0, 0 }, { 78, 79, 85, 0, 0, 0 }, 228 { 78, 85, 0, 0, 0, 0 }, { 78, 85, 65, 78, 0, 0 }, 229 { 78, 85, 69, 0, 0, 0 }, { 78, 85, 78, 0, 0, 0 }, 230 { 78, 85, 79, 0, 0, 0 }, { 79, 0, 0, 0, 0, 0 }, 231 { 79, 85, 0, 0, 0, 0 }, { 80, 65, 0, 0, 0, 0 }, 232 { 80, 65, 73, 0, 0, 0 }, { 80, 65, 78, 0, 0, 0 }, 233 { 80, 65, 78, 71, 0, 0 }, { 80, 65, 79, 0, 0, 0 }, 234 { 80, 69, 73, 0, 0, 0 }, { 80, 69, 78, 0, 0, 0 }, 235 { 80, 69, 78, 71, 0, 0 }, { 80, 73, 0, 0, 0, 0 }, 236 { 80, 73, 65, 78, 0, 0 }, { 80, 73, 65, 79, 0, 0 }, 237 { 80, 73, 69, 0, 0, 0 }, { 80, 73, 78, 0, 0, 0 }, 238 { 80, 73, 78, 71, 0, 0 }, { 80, 79, 0, 0, 0, 0 }, 239 { 80, 79, 85, 0, 0, 0 }, { 80, 85, 0, 0, 0, 0 }, 240 { 81, 73, 0, 0, 0, 0 }, { 81, 73, 65, 0, 0, 0 }, 241 { 81, 73, 65, 78, 0, 0 }, { 81, 73, 65, 78, 71, 0 }, 242 { 81, 73, 65, 79, 0, 0 }, { 81, 73, 69, 0, 0, 0 }, 243 { 81, 73, 78, 0, 0, 0 }, { 81, 73, 78, 71, 0, 0 }, 244 { 81, 73, 79, 78, 71, 0 }, { 81, 73, 85, 0, 0, 0 }, 245 { 81, 85, 0, 0, 0, 0 }, { 81, 85, 65, 78, 0, 0 }, 246 { 81, 85, 69, 0, 0, 0 }, { 81, 85, 78, 0, 0, 0 }, 247 { 82, 65, 78, 0, 0, 0 }, { 82, 65, 78, 71, 0, 0 }, 248 { 82, 65, 79, 0, 0, 0 }, { 82, 69, 0, 0, 0, 0 }, 249 { 82, 69, 78, 0, 0, 0 }, { 82, 69, 78, 71, 0, 0 }, 250 { 82, 73, 0, 0, 0, 0 }, { 82, 79, 78, 71, 0, 0 }, 251 { 82, 79, 85, 0, 0, 0 }, { 82, 85, 0, 0, 0, 0 }, 252 { 82, 85, 65, 0, 0, 0 }, { 82, 85, 65, 78, 0, 0 }, 253 { 82, 85, 73, 0, 0, 0 }, { 82, 85, 78, 0, 0, 0 }, 254 { 82, 85, 79, 0, 0, 0 }, { 83, 65, 0, 0, 0, 0 }, 255 { 83, 65, 73, 0, 0, 0 }, { 83, 65, 78, 0, 0, 0 }, 256 { 83, 65, 78, 71, 0, 0 }, { 83, 65, 79, 0, 0, 0 }, 257 { 83, 69, 0, 0, 0, 0 }, { 83, 69, 78, 0, 0, 0 }, 258 { 83, 69, 78, 71, 0, 0 }, { 83, 72, 65, 0, 0, 0 }, 259 { 83, 72, 65, 73, 0, 0 }, { 83, 72, 65, 78, 0, 0 }, 260 { 83, 72, 65, 78, 71, 0 }, { 83, 72, 65, 79, 0, 0 }, 261 { 83, 72, 69, 0, 0, 0 }, { 83, 72, 69, 78, 0, 0 }, 262 { 88, 73, 78, 0, 0, 0 }, { 83, 72, 69, 78, 0, 0 }, 263 { 83, 72, 69, 78, 71, 0 }, { 83, 72, 73, 0, 0, 0 }, 264 { 83, 72, 79, 85, 0, 0 }, { 83, 72, 85, 0, 0, 0 }, 265 { 83, 72, 85, 65, 0, 0 }, { 83, 72, 85, 65, 73, 0 }, 266 { 83, 72, 85, 65, 78, 0 }, { 83, 72, 85, 65, 78, 71 }, 267 { 83, 72, 85, 73, 0, 0 }, { 83, 72, 85, 78, 0, 0 }, 268 { 83, 72, 85, 79, 0, 0 }, { 83, 73, 0, 0, 0, 0 }, 269 { 83, 79, 78, 71, 0, 0 }, { 83, 79, 85, 0, 0, 0 }, 270 { 83, 85, 0, 0, 0, 0 }, { 83, 85, 65, 78, 0, 0 }, 271 { 83, 85, 73, 0, 0, 0 }, { 83, 85, 78, 0, 0, 0 }, 272 { 83, 85, 79, 0, 0, 0 }, { 84, 65, 0, 0, 0, 0 }, 273 { 84, 65, 73, 0, 0, 0 }, { 84, 65, 78, 0, 0, 0 }, 274 { 84, 65, 78, 71, 0, 0 }, { 84, 65, 79, 0, 0, 0 }, 275 { 84, 69, 0, 0, 0, 0 }, { 84, 69, 78, 71, 0, 0 }, 276 { 84, 73, 0, 0, 0, 0 }, { 84, 73, 65, 78, 0, 0 }, 277 { 84, 73, 65, 79, 0, 0 }, { 84, 73, 69, 0, 0, 0 }, 278 { 84, 73, 78, 71, 0, 0 }, { 84, 79, 78, 71, 0, 0 }, 279 { 84, 79, 85, 0, 0, 0 }, { 84, 85, 0, 0, 0, 0 }, 280 { 84, 85, 65, 78, 0, 0 }, { 84, 85, 73, 0, 0, 0 }, 281 { 84, 85, 78, 0, 0, 0 }, { 84, 85, 79, 0, 0, 0 }, 282 { 87, 65, 0, 0, 0, 0 }, { 87, 65, 73, 0, 0, 0 }, 283 { 87, 65, 78, 0, 0, 0 }, { 87, 65, 78, 71, 0, 0 }, 284 { 87, 69, 73, 0, 0, 0 }, { 87, 69, 78, 0, 0, 0 }, 285 { 87, 69, 78, 71, 0, 0 }, { 87, 79, 0, 0, 0, 0 }, 286 { 87, 85, 0, 0, 0, 0 }, { 88, 73, 0, 0, 0, 0 }, 287 { 88, 73, 65, 0, 0, 0 }, { 88, 73, 65, 78, 0, 0 }, 288 { 88, 73, 65, 78, 71, 0 }, { 88, 73, 65, 79, 0, 0 }, 289 { 88, 73, 69, 0, 0, 0 }, { 88, 73, 78, 0, 0, 0 }, 290 { 88, 73, 78, 71, 0, 0 }, { 88, 73, 79, 78, 71, 0 }, 291 { 88, 73, 85, 0, 0, 0 }, { 88, 85, 0, 0, 0, 0 }, 292 { 88, 85, 65, 78, 0, 0 }, { 88, 85, 69, 0, 0, 0 }, 293 { 88, 85, 78, 0, 0, 0 }, { 89, 65, 0, 0, 0, 0 }, 294 { 89, 65, 78, 0, 0, 0 }, { 89, 65, 78, 71, 0, 0 }, 295 { 89, 65, 79, 0, 0, 0 }, { 89, 69, 0, 0, 0, 0 }, 296 { 89, 73, 0, 0, 0, 0 }, { 89, 73, 78, 0, 0, 0 }, 297 { 89, 73, 78, 71, 0, 0 }, { 89, 79, 0, 0, 0, 0 }, 298 { 89, 79, 78, 71, 0, 0 }, { 89, 79, 85, 0, 0, 0 }, 299 { 89, 85, 0, 0, 0, 0 }, { 89, 85, 65, 78, 0, 0 }, 300 { 89, 85, 69, 0, 0, 0 }, { 89, 85, 78, 0, 0, 0 }, 301 { 74, 85, 78, 0, 0, 0 }, { 89, 85, 78, 0, 0, 0 }, 302 { 90, 65, 0, 0, 0, 0 }, { 90, 65, 73, 0, 0, 0 }, 303 { 90, 65, 78, 0, 0, 0 }, { 90, 65, 78, 71, 0, 0 }, 304 { 90, 65, 79, 0, 0, 0 }, { 90, 69, 0, 0, 0, 0 }, 305 { 90, 69, 73, 0, 0, 0 }, { 90, 69, 78, 0, 0, 0 }, 306 { 90, 69, 78, 71, 0, 0 }, { 90, 72, 65, 0, 0, 0 }, 307 { 90, 72, 65, 73, 0, 0 }, { 90, 72, 65, 78, 0, 0 }, 308 { 90, 72, 65, 78, 71, 0 }, { 67, 72, 65, 78, 71, 0 }, 309 { 90, 72, 65, 78, 71, 0 }, { 90, 72, 65, 79, 0, 0 }, 310 { 90, 72, 69, 0, 0, 0 }, { 90, 72, 69, 78, 0, 0 }, 311 { 90, 72, 69, 78, 71, 0 }, { 90, 72, 73, 0, 0, 0 }, 312 { 83, 72, 73, 0, 0, 0 }, { 90, 72, 73, 0, 0, 0 }, 313 { 90, 72, 79, 78, 71, 0 }, { 90, 72, 79, 85, 0, 0 }, 314 { 90, 72, 85, 0, 0, 0 }, { 90, 72, 85, 65, 0, 0 }, 315 { 90, 72, 85, 65, 73, 0 }, { 90, 72, 85, 65, 78, 0 }, 316 { 90, 72, 85, 65, 78, 71 }, { 90, 72, 85, 73, 0, 0 }, 317 { 90, 72, 85, 78, 0, 0 }, { 90, 72, 85, 79, 0, 0 }, 318 { 90, 73, 0, 0, 0, 0 }, { 90, 79, 78, 71, 0, 0 }, 319 { 90, 79, 85, 0, 0, 0 }, { 90, 85, 0, 0, 0, 0 }, 320 { 90, 85, 65, 78, 0, 0 }, { 90, 85, 73, 0, 0, 0 }, 321 { 90, 85, 78, 0, 0, 0 }, { 90, 85, 79, 0, 0, 0 }, 322 { 0, 0, 0, 0, 0, 0 }, { 83, 72, 65, 78, 0, 0 }, 323 { 0, 0, 0, 0, 0, 0 }, }; 324 325 /** 326 * First and last Chinese character with known Pinyin according to zh 327 * collation 328 */ 329 private static final String FIRST_PINYIN_UNIHAN = "\u963F"; 330 private static final String LAST_PINYIN_UNIHAN = "\u9FFF"; 331 332 private static final Collator COLLATOR = Collator.getInstance(Locale.CHINA); 333 334 private static HanziToPinyin sInstance; 335 private final boolean mHasChinaCollator; 336 337 public static class Token { 338 /** 339 * Separator between target string for each source char 340 */ 341 public static final String SEPARATOR = " "; 342 343 public static final int LATIN = 1; 344 public static final int PINYIN = 2; 345 public static final int UNKNOWN = 3; 346 347 public Token() { 348 } 349 350 public Token(int type, String source, String target) { 351 this.type = type; 352 this.source = source; 353 this.target = target; 354 } 355 356 /** 357 * Type of this token, ASCII, PINYIN or UNKNOWN. 358 */ 359 public int type; 360 /** 361 * Original string before translation. 362 */ 363 public String source; 364 /** 365 * Translated string of source. For Han, target is corresponding Pinyin. 366 * Otherwise target is original string in source. 367 */ 368 public String target; 369 } 370 371 protected HanziToPinyin(boolean hasChinaCollator) { 372 mHasChinaCollator = hasChinaCollator; 373 } 374 375 public static HanziToPinyin getInstance() { 376 synchronized (HanziToPinyin.class) { 377 if (sInstance != null) { 378 return sInstance; 379 } 380 // Check if zh_CN collation data is available 381 final Locale locale[] = Collator.getAvailableLocales(); 382 for (int i = 0; i < locale.length; i++) { 383 if (locale[i].equals(Locale.CHINA)) { 384 // Do self validation just once. 385 if (DEBUG) { 386 Log.d(TAG, "Self validation. Result: " 387 + doSelfValidation()); 388 } 389 sInstance = new HanziToPinyin(true); 390 return sInstance; 391 } 392 } 393 Log.w(TAG, 394 "There is no Chinese collator, HanziToPinyin is disabled"); 395 sInstance = new HanziToPinyin(false); 396 return sInstance; 397 } 398 } 399 400 /** 401 * Validate if our internal table has some wrong value. 402 * 403 * @return true when the table looks correct. 404 */ 405 private static boolean doSelfValidation() { 406 char lastChar = UNIHANS[0]; 407 String lastString = Character.toString(lastChar); 408 for (char c : UNIHANS) { 409 if (lastChar == c) { 410 continue; 411 } 412 final String curString = Character.toString(c); 413 int cmp = COLLATOR.compare(lastString, curString); 414 if (cmp >= 0) { 415 Log.e(TAG, "Internal error in Unihan table. " 416 + "The last string \"" + lastString 417 + "\" is greater than current string \"" + curString 418 + "\"."); 419 return false; 420 } 421 lastString = curString; 422 } 423 return true; 424 } 425 426 private Token getToken(char character) { 427 Token token = new Token(); 428 final String letter = Character.toString(character); 429 token.source = letter; 430 int offset = -1; 431 int cmp; 432 if (character < 256) { 433 token.type = Token.LATIN; 434 token.target = letter; 435 return token; 436 } else { 437 cmp = COLLATOR.compare(letter, FIRST_PINYIN_UNIHAN); 438 if (cmp < 0) { 439 token.type = Token.UNKNOWN; 440 token.target = letter; 441 return token; 442 } else if (cmp == 0) { 443 token.type = Token.PINYIN; 444 offset = 0; 445 } else { 446 cmp = COLLATOR.compare(letter, LAST_PINYIN_UNIHAN); 447 if (cmp > 0) { 448 token.type = Token.UNKNOWN; 449 token.target = letter; 450 return token; 451 } else if (cmp == 0) { 452 token.type = Token.PINYIN; 453 offset = UNIHANS.length - 1; 454 } 455 } 456 } 457 458 token.type = Token.PINYIN; 459 if (offset < 0) { 460 int begin = 0; 461 int end = UNIHANS.length - 1; 462 while (begin <= end) { 463 offset = (begin + end) / 2; 464 final String unihan = Character.toString(UNIHANS[offset]); 465 cmp = COLLATOR.compare(letter, unihan); 466 if (cmp == 0) { 467 break; 468 } else if (cmp > 0) { 469 begin = offset + 1; 470 } else { 471 end = offset - 1; 472 } 473 } 474 } 475 if (cmp < 0) { 476 offset--; 477 } 478 StringBuilder pinyin = new StringBuilder(); 479 for (int j = 0; j < PINYINS[offset].length && PINYINS[offset][j] != 0; j++) { 480 pinyin.append((char) PINYINS[offset][j]); 481 } 482 token.target = pinyin.toString(); 483 if (TextUtils.isEmpty(token.target)) { 484 token.type = Token.UNKNOWN; 485 token.target = token.source; 486 } 487 return token; 488 } 489 490 /** 491 * Convert the input to a array of tokens. The sequence of ASCII or Unknown 492 * characters without space will be put into a Token, One Hanzi character 493 * which has pinyin will be treated as a Token. If these is no China 494 * collator, the empty token array is returned. 495 */ 496 public ArrayList<Token> get(final String input) { 497 ArrayList<Token> tokens = new ArrayList<Token>(); 498 if (!mHasChinaCollator || TextUtils.isEmpty(input)) { 499 // return empty tokens. 500 return tokens; 501 } 502 final int inputLength = input.length(); 503 final StringBuilder sb = new StringBuilder(); 504 int tokenType = Token.LATIN; 505 // Go through the input, create a new token when 506 // a. Token type changed 507 // b. Get the Pinyin of current charater. 508 // c. current character is space. 509 for (int i = 0; i < inputLength; i++) { 510 final char character = input.charAt(i); 511 if (character == ' ') { 512 if (sb.length() > 0) { 513 addToken(sb, tokens, tokenType); 514 } 515 } else if (character < 256) { 516 if (tokenType != Token.LATIN && sb.length() > 0) { 517 addToken(sb, tokens, tokenType); 518 } 519 tokenType = Token.LATIN; 520 sb.append(character); 521 } else { 522 Token t = getToken(character); 523 if (t.type == Token.PINYIN) { 524 if (sb.length() > 0) { 525 addToken(sb, tokens, tokenType); 526 } 527 tokens.add(t); 528 tokenType = Token.PINYIN; 529 } else { 530 if (tokenType != t.type && sb.length() > 0) { 531 addToken(sb, tokens, tokenType); 532 } 533 tokenType = t.type; 534 sb.append(character); 535 } 536 } 537 } 538 if (sb.length() > 0) { 539 addToken(sb, tokens, tokenType); 540 } 541 return tokens; 542 } 543 544 private void addToken(final StringBuilder sb, 545 final ArrayList<Token> tokens, final int tokenType) { 546 String str = sb.toString(); 547 tokens.add(new Token(tokenType, str, str)); 548 sb.setLength(0); 549 } 550 }
方法调用:
package com.example.test;
import java.util.ArrayList;
import com.example.test.HanziToPinyin.Token;
import android.os.Bundle;
import android.app.Activity;
import android.widget.TextView;
public class MainActivity extends Activity {
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
TextView txt = (TextView) findViewById(R.id.txtContent);
txt.append(getPinYin("令狐冲") + " -- " + getPinYin("张无忌") + " -- "
+ getPinYin("任盈盈"));
}
/**
* 汉字返回拼音,字母原样返回,都转换为小写
*
* @param input
* @return
*/
public static String getPinYin(String str) {
ArrayList<Token> tokens = HanziToPinyin.getInstance().get(str);
StringBuilder sb = new StringBuilder();
if (tokens != null && tokens.size() > 0) {
for (Token token : tokens) {
if (Token.PINYIN == token.type) {
sb.append(token.target);
} else {
sb.append(token.source);
}
}
}
return sb.toString().toLowerCase();
}
}


浙公网安备 33010602011771号