Hi everybody, We have written a convert from Punycode. This is reformed from javascript. Perhaps many non-English systems need to use this feature in domain name resolution. Anyone can use and change the code as needed.
 Class Util.Punycode Extends %RegisteredObject
{

Parameter MaxInt = 2147483647;

/// Bootstring parameters
Parameter Base = 36;

Parameter TMin = 1;

Parameter TMax = 26;

Parameter Skew = 38;

Parameter Damp = 700;

Parameter InitialBias = 72;

Parameter InitialN = 128;

Parameter Delimiter = "-";

/// Regular expressions
Parameter RegexPunycode = "^xn--";

/// non-ASCII chars
Parameter RegexNonASCII = "[^\x00-\x7E]";

/// RFC 3490 separators
Parameter RegexSeparators = "[\x2E\u3002\uFF0E\uFF61]";

/// A generic error utility function.
/// @private
/// @param {String} type The error type.
/// @returns {Error} Throws a `RangeError` with the applicable error message. Method Error(type) [ Private ] {   If (type = "overflow")   {     Throw $$$ERROR("Overflow: input needs wider integers to process")     Quit   }   If (type = "not-basic")   {     Throw $$$ERROR("Illegal input >= 0x80 (not a basic code point")     Quit   }   If (type = "invalid-input")   {     Throw $$$ERROR("invalid-input")     Quit   }   Throw $$$ERROR("Unknown error type: "_type) } /// A generic `Array#map` utility function.
/// @private
/// @param {Array} array The array to iterate over.
/// @param {Function} callback The function that gets called for every array /// item.
/// @returns {Array} A new array of values returned by the callback function. Method Map(list, fnName As %String) As %List [ Private ] {   //#Dim result As %ArrayOfDataTypes   Set output = $LB()   Set length = $LISTLENGTH(list)   For i=length:-1:1   {     Set = $LIST(list, i)     If (fnName = "regexNonASCII")     {       Set matcher = ##class(%Regex.Matcher).%New(..#RegexNonASCII, v)       If (matcher.Locate())       {         Set = "xn--"_..Encode(v)       }     }     Elseif (fnName = "regexPunycode")     {       Set matcher = ##class(%Regex.Matcher).%New(..#RegexPunycode, v)       If (matcher.Locate())       {         Set = $extract(v, 5, *)         Set = ..Decode($ZCONVERT(v, "l"))       }     }     Set $LIST(output, i) = v   }   Quit output } /// A simple `Array#map`-like wrapper to work with domain name strings or email addresses.
/// @private
/// @param {String} domain The domain name or email address.
/// @param {Function} callback The function that gets called for every character.
/// @returns {Array} A new string of characters returned by the callback function. Method MapDomain(string As %String, fnName As %String) As %String [ Private ] {   Set result = ""   If (string["@")   {     // In email addresses, only the domain name should be punycoded. Leave     // the local part (i.e. everything up to `@`) intact.     Set result = $PIECE(string, "@", 1)_"@"     Set string = $PIECE(string, "@", 2)   }   // Avoid `split(regex)` for IE8 compatibility. See #17.   Set matcher = ##class(%Regex.Matcher).%New(..#RegexSeparators, string)   Set string = matcher.ReplaceAll($char($ZHEX("2e")))   Set labels = $LISTFROMSTRING(string, ".")   Set encoded = $LISTTOSTRING(..Map(labels, fnName), ".")   Quit result_encoded } /// Creates an array containing the numeric code points of each Unicode /// character in the string. While JavaScript uses UCS-2 internally, /// this function will convert a pair of surrogate halves (each of which /// UCS-2 exposes as separate characters) into a single code point, /// matching UTF-16.
/// @see `Ucs2encode`
/// @param {String} string The Unicode input string (UCS-2).
/// @returns {Array} The new array of code points. Method Ucs2decode(string As %String) As %ArrayOfDataTypes [ Private ] {   Set output = ##class(%ArrayOfDataTypes).%New()   Set counter = 0   Set length = $length(string)   While (counter < length)   {     Set counter = counter + 1     Set value = $ascii($extract(string, counter))     If ((value >= $zhex("D800")) && (value <= $zhex("DBFF") && (counter < length)))     {       Set counter = counter + 1       Set extra = $ascii($extract(string, counter))       Set tmp = $zboolean(extra, $zhex("FC00"), 1)       If (tmp = $zhex("DC00"))       {         Set = $zboolean(value, $zhex("3FF"), 1) * (2 ** 10) + $zboolean(extra, $zhex("3FF"), 1) + $zhex("10000")         Do output.SetAt(a, (output.Count() + 1))       Else {         Do output.SetAt(value, (output.Count() + 1))         Set counter = counter - 1       }     Else {       Do output.SetAt(value, (output.Count() + 1))     }   }   Quit output } /// Creates a string based on an array of numeric code points.
/// @see `Usc2decode`
/// @param {Array} codePoints The array of numeric code points.
/// @returns {String} The new Unicode string (UCS-2). Method Ucs2encode(array As %ArrayOfDataTypes) As %String [ Private ] {   Set output = ""   Set length = array.Count()   For i=1:1:length   {     Set output = output_$char(array.GetAt(i))   }   Quit output } /// Converts a basic code point into a digit/integer. /// @see `DigitToBasic()`
/// @private
/// @param {Number} codePoint The basic numeric code point value.
/// @returns {Number} The numeric value of a basic code point (for use in /// representing integers) in the range `0` to `base - 1`, or `base` if /// the code point does not represent a value. Method BasicToDigit(codePoint) As %Integer [ Private ] {   Set res = ..#Base   If ((codePoint - $zhex("30")) < $zhex("0A"))   {     Set res = codePoint - $zhex("16")   }   Elseif ((codePoint - $zhex("41")) < $zhex("1A"))   {     Set res = codePoint - $zhex("41")   }   Elseif ((codePoint - $zhex("61")) < $zhex("1A"))   {     Set res = codePoint - $zhex("61")   }   Quit res } /// Converts a digit/integer into a basic code point. /// @see `BasicToDigit()`
/// @private
/// @param {Number} digit The numeric value of a basic code point.
/// @returns {Number} The basic code point whose value (when used for /// representing integers) is `digit`, which needs to be in the range /// `0` to `base - 1`. If `flag` is non-zero, the uppercase form is /// used; else, the lowercase form is used. The behavior is undefined /// if `flag` is non-zero and `digit` has no uppercase form. Method DigitToBasic(digit, flag) As %Integer [ Private ] {   Set result = digit + 22   If (digit < 26)   {     Set result = result + 75   }   Set tmp = 0   If (flag '= 0)   {     Set tmp = 2**5   }   Quit result - tmp } /// Bias adaptation function as per section 3.4 of RFC 3492.
/// https://tools.ietf.org/html/rfc3492#section-3.4
/// @private Method Adapt(delta, numPoints, firstTime As %Boolean) As %Integer [ Private ] {   Set = 0   Set delta1 = delta / ..#Damp \ 1   If ('firstTime)   {     Set delta1 = delta / 2 \ 1   }   Set delta2 = delta1 / numPoints \ 1   Set delta1 = delta1 + delta2   Set baseMinusTMin = ..#Base - ..#TMin   While (delta1 > ((baseMinusTMin * ..#TMax) / 2 \ 1))   {     Set = + ..#Base     Set delta1 = delta1 / baseMinusTMin \ 1   }   Quit (+ ((baseMinusTMin + 1) * delta1 / (delta1 + ..#Skew))) \ 1 } /// Converts a Punycode string of ASCII-only symbols to a string of Unicode /// symbols.
/// @param {String} input The Punycode string of ASCII-only symbols.
/// @returns {String} The resulting string of Unicode symbols. Method Decode(input As %String) As %String [ Private ] {   // Don't use UCS-2.       Set output = ##class(%ArrayOfDataTypes).%New()   Set inputLength = $length(input)   Set = 0   Set = ..#InitialN   Set bias = ..#InitialBias       // Handle the basic code points: let `basic` be the number of input code   // points before the last delimiter, or `0` if there is none, then copy   // the first basic code points to the output.   Set basic = ..LastIndexOf(input, ..#Delimiter) + 1   For j=1:1:(basic-2)   {     Set charcode = $ascii($extract(input, j))     If (charcode >= $zhex("80"))     {       Do ..Error("not-basic")     }     Do output.SetAt(charcode, output.Count() + 1)   }   // Main decoding loop: start just after the last delimiter if any basic code   // points were copied; start at the beginning otherwise.   Set index = 1   If (basic > 1)   {     Set index = basic   }   While (index <= inputLength)   {     // `index` is the index of the next character to be consumed.     // Decode a generalized variable-length integer into `delta`,     // which gets added to `i`. The overflow checking is easier     // if we increase `i` as we go, then subtract off its starting     // value at the end to obtain `delta`.     Set oldi = i     Set = 1     Set = ..#Base     Set con = 1     While (con > 0)     {       If (index > inputLength)       {         Do ..Error("invalid-input")       }       Set digit = ..BasicToDigit($ascii($extract(input, index)))       Set index = index + 1       If ((digit >= ..#Base) || (digit > ((..#MaxInt - i) / \ 1)))       {         Do ..Error("overflow 1")       }       Set = + (digit * w)       Set = ..#TMin       If (> bias)       {         If (>= (bias + ..#TMax))         {           Set = ..#TMax         }         Else         {           Set = - bias         }       }       If (digit < t)       {         Set con = 0         quit       }       Set baseMinusT = ..#Base - t       If (> (..#MaxInt / baseMinusT \ 1))       {         Do ..Error("overflow 2")       }       Set = * baseMinusT       Set = + ..#Base     }     Set out = output.Count() + 1     Set bias = ..Adapt((- oldi), out, (oldi=0))     // `i` was supposed to wrap around from `out` to `0`,     // incrementing `n` each time, so we'll fix that now:     If ((/ out \ 1) > (..#MaxInt - n))     {       Do ..Error("overflow 3")     }     Set = + (/ out \ 1)     Set = - (out * (/ out \ 1))     Set outcount = output.Count()     For id=outcount:-1:i+1     {       Do output.SetAt(output.GetAt(id), id+1)     }     Do output.SetAt(n, i+1)     Set = + 1   }   Set res = ""   Set outcount = output.Count()   For i=1:1:outcount   {     Set = $char(output.GetAt(i))     Set res = res_c   }   Quit res } /// Converts a string of Unicode symbols (e.g. a domain name label) to a /// Punycode string of ASCII-only symbols.
/// @param {String} input The string of Unicode symbols.
/// @returns {String} The resulting Punycode string of ASCII-only symbols. Method Encode(input As %String) As %String [ Private ] {   Set output = ##class(%ArrayOfDataTypes).%New()   // Convert the input in UCS-2 to an array of Unicode code points.   Set inputCodes = ..Ucs2decode(input)   // Cache the length.   Set inputLength = inputCodes.Count()   // Initialize the state.   Set = ..#InitialN   Set delta = 0   Set bias = ..#InitialBias   // Handle the basic code points.   For i=1:1:inputLength   {     Set currentValue = inputCodes.GetAt(i)     If (currentValue < $zhex("80"))     {       Do output.SetAt($char(currentValue), output.Count() + 1)     }   }   Set basicLength = output.Count()   Set handledCPCount = basicLength   // `handledCPCount` is the number of code points that have been handled;   // `basicLength` is the number of basic code points.   // Finish the basic string with a delimiter unless it's empty.   If (basicLength > 0)   {     Do output.SetAt(..#Delimiter, output.Count() + 1)   }   // Main encoding loop   While (handledCPCount < inputLength)   {     // All non-basic code points < n have been handled already. Find the next     // larger one:     Set = ..#MaxInt     For i=1:1:inputLength     {       Set currentValue = inputCodes.GetAt(i)       If ((currentValue >= n) && (currentValue < m))       {         Set = currentValue       }     }     // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,     // but guard against overflow.     Set handledCPCountPlusOne = handledCPCount + 1     If ((m-n) > ((..#MaxInt - delta) / handledCPCountPlusOne \ 1))     {     Do ..Error("overflow 4")     }     Set delta = delta + ((- n) * handledCPCountPlusOne)     Set = m     For i=1:1:inputLength     {       Set currentValue = inputCodes.GetAt(i)       If (currentValue < n)       {         Set delta = delta + 1         If (delta > ..#MaxInt)         {           Do ..Error("overflow 5")         }       }       If (currentValue = n)       {         // Represent delta as a generalized variable-length integer.         Set = delta         Set = ..#Base         Set con = 1         While (con > 0)         {           Set = ..#TMin           If (> bias)           {             If (>= (bias + ..#TMax))             {               Set = ..#TMax             }             Else             {               Set = - bias             }           }           If (< t)           {             Set con = 0             quit           }           Set qMinusT = - t           Set baseMinusT = ..#Base - t           Set qMinusTModeBaseMinusT = qMinusT - ((qMinusT / baseMinusT \ 1) * baseMinusT)           Do output.SetAt($char(..DigitToBasic(+ qMinusTModeBaseMinusT, 0)), output.Count() + 1)           Set = qMinusT / baseMinusT \ 1           Set = + ..#Base         }         Do output.SetAt($char(..DigitToBasic(q, 0)), output.Count() + 1)         Set bias = ..Adapt(delta, handledCPCountPlusOne, handledCPCount = basicLength)         Set delta = 0         Set handledCPCount = handledCPCount + 1       }     }     Set delta = delta + 1     Set = + 1   }   Set res = ""   For i=1:1:output.Count()   {     Set res = res_output.GetAt(i)   }   Quit res } /// Find the position of the last occurrence of a substring in a string.
/// @param {String} str Specifies the string to search
/// @param {String} substr Specifies the string to find
/// @returns {Ingeger} the last position Method LastIndexOf(str As %String, substr As %String) As %Integer [ Private ] {   Set pos = 0   Set tmpstr = str   Set lengthSub = $length(substr)   Do {     Set index = $find(tmpstr, substr)     If (index > 0)     {       Set pos = pos + index - lengthSub       Set tmpstr = $extract(tmpstr, index, *)     }   While (index > 0)   If (pos < 0)   {     Set pos = 0   }   Quit pos } /// Converts a Punycode string representing a domain name or an email address /// to Unicode. Only the Punycoded parts of the input will be converted, i.e. /// it doesn't matter if you call it on a string that has already been /// converted to Unicode.
/// @param {String} input The Punycoded domain name or email address to /// convert to Unicode.
/// @returns {String} The Unicode representation of the given Punycode /// string. ClassMethod ToUnicode(input As %String = "") As %String {   Set ReturnValue=""   If ($L(input)>0)   {     Set Punycode=##class(Util.Punycode).%New()     Set ReturnValue=Punycode.MapDomain(input, "regexPunycode")      Quit ReturnValue } /// Converts a Unicode string representing a domain name or an email address to /// Punycode. Only the non-ASCII parts of the domain name will be converted, /// i.e. it doesn't matter if you call it with a domain that's already in /// ASCII.
/// @param {String} input The domain name or email address to convert, as a /// Unicode string.
/// @returns {String} The Punycode representation of the given domain name or /// email address. ClassMethod ToASCII(input As %String = "") As %String {   Set ReturnValue = ""   If ($L(input)>0)   {     Set Punycode=##class(Util.Punycode).%New()     Set ReturnValue = Punycode.MapDomain(input, "regexNonASCII")   }   Quit ReturnValue } }</pre> </body></html>