<?php class Requests_IDNAEncoder { const ACE_PREFIX = 'xn--'; const BOOTSTRAP_BASE = 36; const BOOTSTRAP_TMIN = 1; const BOOTSTRAP_TMAX = 26; const BOOTSTRAP_SKEW = 38; const BOOTSTRAP_DAMP = 700; const BOOTSTRAP_INITIAL_BIAS = 72; const BOOTSTRAP_INITIAL_N = 128; public static function encode($string) { $parts = explode('.', $string); foreach ($parts as &$part) { $part = self::to_ascii($part); } return implode('.', $parts); } public static function to_ascii($string) { if (self::is_ascii($string)) { if (strlen($string) < 64) { return $string; } throw new Requests_Exception('Provided string is too long', 'idna.provided_too_long', $string); } $string = self::nameprep($string); if (self::is_ascii($string)) { if (strlen($string) < 64) { return $string; } throw new Requests_Exception('Prepared string is too long', 'idna.prepared_too_long', $string); } if (strpos($string, self::ACE_PREFIX) === 0) { throw new Requests_Exception('Provided string begins with ACE prefix', 'idna.provided_is_prefixed', $string); } $string = self::punycode_encode($string); $string = self::ACE_PREFIX . $string; if (strlen($string) < 64) { return $string; } throw new Requests_Exception('Encoded string is too long', 'idna.encoded_too_long', $string); } protected static function is_ascii($string) { return preg_match('/(?:[^\\x00-\\x7F])/', $string) !== 1; } protected static function nameprep($string) { return $string; } protected static function utf8_to_codepoints($input) { $codepoints = array(); $strlen = strlen($input); for ($position = 0; $position < $strlen; $position++) { $value = ord($input[$position]); if ((~$value & 0x80) === 0x80) { $character = $value; $length = 1; $remaining = 0; } elseif (($value & 0xe0) === 0xc0) { $character = ($value & 0x1f) << 6; $length = 2; $remaining = 1; } elseif (($value & 0xf0) === 0xe0) { $character = ($value & 0xf) << 12; $length = 3; $remaining = 2; } elseif (($value & 0xf8) === 0xf0) { $character = ($value & 0x7) << 18; $length = 4; $remaining = 3; } else { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value); } if ($remaining > 0) { if ($position + $length > $strlen) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } for ($position++; $remaining > 0; $position++) { $value = ord($input[$position]); if (($value & 0xc0) !== 0x80) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } $character |= ($value & 0x3f) << --$remaining * 6; } $position--; } if ($length > 1 && $character <= 0x7f || $length > 2 && $character <= 0x7ff || $length > 3 && $character <= 0xffff || ($character & 0xfffe) === 0xfffe || $character >= 0xfdd0 && $character <= 0xfdef || ($character > 0xd7ff && $character < 0xf900 || $character < 0x20 || $character > 0x7e && $character < 0xa0 || $character > 0xefffd)) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } $codepoints[] = $character; } return $codepoints; } public static function punycode_encode($input) { $output = ''; $n = self::BOOTSTRAP_INITIAL_N; $delta = 0; $bias = self::BOOTSTRAP_INITIAL_BIAS; $h = $b = 0; $codepoints = self::utf8_to_codepoints($input); $extended = array(); foreach ($codepoints as $char) { if ($char < 128) { $output .= chr($char); $h++; } elseif ($char < $n) { throw new Requests_Exception('Invalid character', 'idna.character_outside_domain', $char); } else { $extended[$char] = true; } } $extended = array_keys($extended); sort($extended); $b = $h; if (strlen($output) > 0) { $output .= '-'; } while ($h < count($codepoints)) { $m = array_shift($extended); $delta += ($m - $n) * ($h + 1); $n = $m; for ($num = 0; $num < count($codepoints); $num++) { $c = $codepoints[$num]; if ($c < $n) { $delta++; } elseif ($c === $n) { $q = $delta; for ($k = self::BOOTSTRAP_BASE;; $k += self::BOOTSTRAP_BASE) { if ($k <= $bias + self::BOOTSTRAP_TMIN) { $t = self::BOOTSTRAP_TMIN; } elseif ($k >= $bias + self::BOOTSTRAP_TMAX) { $t = self::BOOTSTRAP_TMAX; } else { $t = $k - $bias; } if ($q < $t) { break; } $digit = $t + ($q - $t) % (self::BOOTSTRAP_BASE - $t); $output .= self::digit_to_char($digit); $q = floor(($q - $t) / (self::BOOTSTRAP_BASE - $t)); } $output .= self::digit_to_char($q); $bias = self::adapt($delta, $h + 1, $h === $b); $delta = 0; $h++; } } $delta++; $n++; } return $output; } protected static function digit_to_char($digit) { if ($digit < 0 || $digit > 35) { throw new Requests_Exception(sprintf('Invalid digit %d', $digit), 'idna.invalid_digit', $digit); } $digits = 'abcdefghijklmnopqrstuvwxyz0123456789'; return substr($digits, $digit, 1); } protected static function adapt($delta, $numpoints, $firsttime) { if ($firsttime) { $delta = floor($delta / self::BOOTSTRAP_DAMP); } else { $delta = floor($delta / 2); } $delta += floor($delta / $numpoints); $k = 0; $max = floor((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN) * self::BOOTSTRAP_TMAX / 2); while ($delta > $max) { $delta = floor($delta / (self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN)); $k += self::BOOTSTRAP_BASE; } return $k + floor((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN + 1) * $delta / ($delta + self::BOOTSTRAP_SKEW)); } }