js编码解码 punyCode

  1 ; (function (w) {
  2 
  3     function IdnMapping() {
  4 
  5 
  6         /** Highest positive signed 32-bit float value */
  7         var maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
  8 
  9             /** Bootstring parameters */
 10             base = 36,
 11             tMin = 1,
 12             tMax = 26,
 13             skew = 38,
 14             damp = 700,
 15             initialBias = 72,
 16             initialN = 128, // 0x80
 17             delimiter = '-', // '\x2D'
 18 
 19             /** Regular expressions */
 20             regexPunycode = /^xn--/,
 21             regexNonASCII = /[^\x20-\x7E]/, // unprintable ASCII chars + non-ASCII chars
 22             regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, // RFC 3490 separators
 23 
 24             /** Error messages */
 25             errors = {
 26                 'overflow': 'Overflow: input needs wider integers to process',
 27                 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
 28                 'invalid-input': 'Invalid input'
 29             },
 30 
 31             /** Convenience shortcuts */
 32             baseMinusTMin = base - tMin,
 33             floor = Math.floor,
 34             stringFromCharCode = String.fromCharCode,
 35 
 36             /** Temporary variable */
 37             key;
 38 
 39         /*--------------------------------------------------------------------------*/
 40 
 41         /**
 42          * A generic error utility function.
 43          * @private
 44          * @param {String} type The error type.
 45          * @returns {Error} Throws a `RangeError` with the applicable error message.
 46          */
 47         function error(type) {
 48             throw RangeError(errors[type]);
 49         }
 50 
 51         /**
 52          * A generic `Array#map` utility function.
 53          * @private
 54          * @param {Array} array The array to iterate over.
 55          * @param {Function} callback The function that gets called for every array
 56          * item.
 57          * @returns {Array} A new array of values returned by the callback function.
 58          */
 59         function map(array, fn) {
 60             var length = array.length;
 61             var result = [];
 62             while (length--) {
 63                 result[length] = fn(array[length]);
 64             }
 65             return result;
 66         }
 67 
 68         /**
 69          * A simple `Array#map`-like wrapper to work with domain name strings or email
 70          * addresses.
 71          * @private
 72          * @param {String} domain The domain name or email address.
 73          * @param {Function} callback The function that gets called for every
 74          * character.
 75          * @returns {Array} A new string of characters returned by the callback
 76          * function.
 77          */
 78         function mapDomain(string, fn) {
 79             var parts = string.split('@');
 80             var result = '';
 81             if (parts.length > 1) {
 82                 // In email addresses, only the domain name should be punycoded. Leave
 83                 // the local part (i.e. everything up to `@`) intact.
 84                 result = parts[0] + '@';
 85                 string = parts[1];
 86             }
 87             // Avoid `split(regex)` for IE8 compatibility. See #17.
 88             string = string.replace(regexSeparators, '\x2E');
 89             var labels = string.split('.');
 90             var encoded = map(labels, fn).join('.');
 91             return result + encoded;
 92         }
 93 
 94         /**
 95          * Creates an array containing the numeric code points of each Unicode
 96          * character in the string. While JavaScript uses UCS-2 internally,
 97          * this function will convert a pair of surrogate halves (each of which
 98          * UCS-2 exposes as separate characters) into a single code point,
 99          * matching UTF-16.
100          * @see `punycode.ucs2.encode`
101          * @memberOf punycode.ucs2
102          * @name decode
103          * @param {String} string The Unicode input string (UCS-2).
104          * @returns {Array} The new array of code points.
105          */
106         function ucs2decode(string) {
107             var output = [],
108                 counter = 0,
109                 length = string.length,
110                 value,
111                 extra;
112             while (counter < length) {
113                 value = string.charCodeAt(counter++);
114                 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
115                     // high surrogate, and there is a next character
116                     extra = string.charCodeAt(counter++);
117                     if ((extra & 0xFC00) == 0xDC00) { // low surrogate
118                         output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
119                     } else {
120                         // unmatched surrogate; only append this code unit, in case the next
121                         // code unit is the high surrogate of a surrogate pair
122                         output.push(value);
123                         counter--;
124                     }
125                 } else {
126                     output.push(value);
127                 }
128             }
129             return output;
130         }
131 
132         /**
133          * Creates a string based on an array of numeric code points.
134          * @see `punycode.ucs2.decode`
135          * @memberOf punycode.ucs2
136          * @name encode
137          * @param {Array} codePoints The array of numeric code points.
138          * @returns {String} The new Unicode string (UCS-2).
139          */
140         function ucs2encode(array) {
141             return map(array,
142                 function (value) {
143                     var output = '';
144                     if (value > 0xFFFF) {
145                         value -= 0x10000;
146                         output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
147                         value = 0xDC00 | value & 0x3FF;
148                     }
149                     output += stringFromCharCode(value);
150                     return output;
151                 }).join('');
152         }
153 
154         /**
155          * Converts a basic code point into a digit/integer.
156          * @see `digitToBasic()`
157          * @private
158          * @param {Number} codePoint The basic numeric code point value.
159          * @returns {Number} The numeric value of a basic code point (for use in
160          * representing integers) in the range `0` to `base - 1`, or `base` if
161          * the code point does not represent a value.
162          */
163         function basicToDigit(codePoint) {
164             if (codePoint - 48 < 10) {
165                 return codePoint - 22;
166             }
167             if (codePoint - 65 < 26) {
168                 return codePoint - 65;
169             }
170             if (codePoint - 97 < 26) {
171                 return codePoint - 97;
172             }
173             return base;
174         }
175 
176         /**
177          * Converts a digit/integer into a basic code point.
178          * @see `basicToDigit()`
179          * @private
180          * @param {Number} digit The numeric value of a basic code point.
181          * @returns {Number} The basic code point whose value (when used for
182          * representing integers) is `digit`, which needs to be in the range
183          * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
184          * used; else, the lowercase form is used. The behavior is undefined
185          * if `flag` is non-zero and `digit` has no uppercase form.
186          */
187         function digitToBasic(digit, flag) {
188             //  0..25 map to ASCII a..z or A..Z
189             // 26..35 map to ASCII 0..9
190             return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
191         }
192 
193         /**
194          * Bias adaptation function as per section 3.4 of RFC 3492.
195          * http://tools.ietf.org/html/rfc3492#section-3.4
196          * @private
197          */
198         function adapt(delta, numPoints, firstTime) {
199             var k = 0;
200             delta = firstTime ? floor(delta / damp) : delta >> 1;
201             delta += floor(delta / numPoints);
202             for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
203                 delta = floor(delta / baseMinusTMin);
204             }
205             return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
206         }
207 
208         /**
209          * Converts a Punycode string of ASCII-only symbols to a string of Unicode
210          * symbols.
211          * @memberOf punycode
212          * @param {String} input The Punycode string of ASCII-only symbols.
213          * @returns {String} The resulting string of Unicode symbols.
214          */
215         function decode(input) {
216             // Don't use UCS-2
217             var output = [],
218                 inputLength = input.length,
219                 out,
220                 i = 0,
221                 n = initialN,
222                 bias = initialBias,
223                 basic,
224                 j,
225                 index,
226                 oldi,
227                 w,
228                 k,
229                 digit,
230                 t,
231                 /** Cached calculation results */
232                 baseMinusT;
233 
234             // Handle the basic code points: let `basic` be the number of input code
235             // points before the last delimiter, or `0` if there is none, then copy
236             // the first basic code points to the output.
237 
238             basic = input.lastIndexOf(delimiter);
239             if (basic < 0) {
240                 basic = 0;
241             }
242 
243             for (j = 0; j < basic; ++j) {
244                 // if it's not a basic code point
245                 if (input.charCodeAt(j) >= 0x80) {
246                     error('not-basic');
247                 }
248                 output.push(input.charCodeAt(j));
249             }
250 
251             // Main decoding loop: start just after the last delimiter if any basic code
252             // points were copied; start at the beginning otherwise.
253 
254             for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
255 
256                 // `index` is the index of the next character to be consumed.
257                 // Decode a generalized variable-length integer into `delta`,
258                 // which gets added to `i`. The overflow checking is easier
259                 // if we increase `i` as we go, then subtract off its starting
260                 // value at the end to obtain `delta`.
261                 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
262 
263                     if (index >= inputLength) {
264                         error('invalid-input');
265                     }
266 
267                     digit = basicToDigit(input.charCodeAt(index++));
268 
269                     if (digit >= base || digit > floor((maxInt - i) / w)) {
270                         error('overflow');
271                     }
272 
273                     i += digit * w;
274                     t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
275 
276                     if (digit < t) {
277                         break;
278                     }
279 
280                     baseMinusT = base - t;
281                     if (w > floor(maxInt / baseMinusT)) {
282                         error('overflow');
283                     }
284 
285                     w *= baseMinusT;
286 
287                 }
288 
289                 out = output.length + 1;
290                 bias = adapt(i - oldi, out, oldi == 0);
291 
292                 // `i` was supposed to wrap around from `out` to `0`,
293                 // incrementing `n` each time, so we'll fix that now:
294                 if (floor(i / out) > maxInt - n) {
295                     error('overflow');
296                 }
297 
298                 n += floor(i / out);
299                 i %= out;
300 
301                 // Insert `n` at position `i` of the output
302                 output.splice(i++, 0, n);
303 
304             }
305 
306             return ucs2encode(output);
307         }
308 
309         /**
310          * Converts a string of Unicode symbols (e.g. a domain name label) to a
311          * Punycode string of ASCII-only symbols.
312          * @memberOf punycode
313          * @param {String} input The string of Unicode symbols.
314          * @returns {String} The resulting Punycode string of ASCII-only symbols.
315          */
316         function encode(input) {
317             var n,
318                 delta,
319                 handledCPCount,
320                 basicLength,
321                 bias,
322                 j,
323                 m,
324                 q,
325                 k,
326                 t,
327                 currentValue,
328                 output = [],
329                 /** `inputLength` will hold the number of code points in `input`. */
330                 inputLength,
331                 /** Cached calculation results */
332                 handledCPCountPlusOne,
333                 baseMinusT,
334                 qMinusT;
335 
336             // Convert the input in UCS-2 to Unicode
337             input = ucs2decode(input);
338 
339             // Cache the length
340             inputLength = input.length;
341 
342             // Initialize the state
343             n = initialN;
344             delta = 0;
345             bias = initialBias;
346 
347             // Handle the basic code points
348             for (j = 0; j < inputLength; ++j) {
349                 currentValue = input[j];
350                 if (currentValue < 0x80) {
351                     output.push(stringFromCharCode(currentValue));
352                 }
353             }
354 
355             handledCPCount = basicLength = output.length;
356 
357             // `handledCPCount` is the number of code points that have been handled;
358             // `basicLength` is the number of basic code points.
359 
360             // Finish the basic string - if it is not empty - with a delimiter
361             if (basicLength) {
362                 output.push(delimiter);
363             }
364 
365             // Main encoding loop:
366             while (handledCPCount < inputLength) {
367 
368                 // All non-basic code points < n have been handled already. Find the next
369                 // larger one:
370                 for (m = maxInt, j = 0; j < inputLength; ++j) {
371                     currentValue = input[j];
372                     if (currentValue >= n && currentValue < m) {
373                         m = currentValue;
374                     }
375                 }
376 
377                 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
378                 // but guard against overflow
379                 handledCPCountPlusOne = handledCPCount + 1;
380                 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
381                     error('overflow');
382                 }
383 
384                 delta += (m - n) * handledCPCountPlusOne;
385                 n = m;
386 
387                 for (j = 0; j < inputLength; ++j) {
388                     currentValue = input[j];
389 
390                     if (currentValue < n && ++delta > maxInt) {
391                         error('overflow');
392                     }
393 
394                     if (currentValue == n) {
395                         // Represent delta as a generalized variable-length integer
396                         for (q = delta, k = base; /* no condition */; k += base) {
397                             t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
398                             if (q < t) {
399                                 break;
400                             }
401                             qMinusT = q - t;
402                             baseMinusT = base - t;
403                             output.push(
404                                 stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
405                             );
406                             q = floor(qMinusT / baseMinusT);
407                         }
408 
409                         output.push(stringFromCharCode(digitToBasic(q, 0)));
410                         bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
411                         delta = 0;
412                         ++handledCPCount;
413                     }
414                 }
415 
416                 ++delta;
417                 ++n;
418 
419             }
420             return output.join('');
421         }
422 
423 
424         this.toUnicode = function (input) {
425             return mapDomain(input,
426                 function (string) {
427                     return regexPunycode.test(string)
428                         ? decode(string.slice(4).toLowerCase())
429                         : string;
430                 });
431         }
432 
433 
434         this.toASCII = function (input) {
435             return mapDomain(input,
436                 function (string) {
437                     return regexNonASCII.test(string)
438                         ? 'xn--' + encode(string)
439                         : string;
440                 });
441         }
442 
443 
444     }
445 
446     window.IdnMapping = IdnMapping;
447 })(window);
View Code

使用:

 1  <script>
 2         window.onload = function () {
 3             var idn = new IdnMapping();
 4        //toASCII
 5             var str = idn.toASCII("www.博客园.com");
 6             console.log(str);
 7         
 8        //toUnicode
 9             var str1 = idn.toUnicode(str);
10             console.log(str1);
11         }
12     </script>

 

posted @ 2018-02-06 10:59  b̶i̶n̶g̶.̶  阅读(443)  评论(0编辑  收藏  举报