1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
|
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go gen_common.go
// http://www.unicode.org/reports/tr46
// Package idna implements IDNA2008 using the compatibility processing
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
// deal with the transition from IDNA2003.
//
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
// UTS #46 is defined in http://www.unicode.org/reports/tr46.
// See http://unicode.org/cldr/utility/idna.jsp for a visualization of the
// differences between these two standards.
package idna // import "golang.org/x/text/internal/export/idna"
import (
"errors"
"fmt"
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/unicode/norm"
)
// A Profile defines the configuration of a IDNA mapper.
type Profile struct {
Transitional bool
IgnoreSTD3Rules bool
IgnoreDNSLength bool
// ErrHandler func(error)
}
// String reports a string with a description of the profile for debugging
// purposes. The string format may change with different versions.
func (p *Profile) String() string {
s := ""
if p.Transitional {
s = "Transitional"
} else {
s = "NonTraditional"
}
if p.IgnoreSTD3Rules {
s += ":NoSTD3Rules"
}
return s
}
var (
// Resolve is the recommended profile for resolving domain names.
// The configuration of this profile may change over time.
Resolve = resolve
// Transitional defines a profile that implements the Transitional mapping
// as defined in UTS #46 with no additional constraints.
Transitional = transitional
// NonTransitional defines a profile that implements the Transitional
// mapping as defined in UTS #46 with no additional constraints.
NonTransitional = nonTransitional
resolve = &Profile{Transitional: true}
transitional = &Profile{Transitional: true}
nonTransitional = &Profile{}
// TODO: profiles
// V2008: strict IDNA2008
// Registrar: recommended for approving domain names.
)
// TODO: rethink error strategy
var (
// errDisallowed indicates a domain name contains a disallowed rune.
errDisallowed = errors.New("idna: disallowed rune")
// errEmptyLabel indicates a label was empty.
errEmptyLabel = errors.New("idna: empty label")
)
// process implements the algorithm described in section 4 of UTS #46,
// see http://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
var (
b []byte
err error
k, i int
)
for i < len(s) {
v, sz := trie.lookupString(s[i:])
start := i
i += sz
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
case valid:
continue
case disallowed:
if err == nil {
err = errDisallowed
}
continue
case mapped, deviation:
b = append(b, s[k:start]...)
b = info(v).appendMapping(b, s[start:i])
case ignored:
b = append(b, s[k:start]...)
// drop the rune
case unknown:
b = append(b, s[k:start]...)
b = append(b, "\ufffd"...)
}
k = i
}
if k == 0 {
// No changes so far.
s = norm.NFC.String(s)
} else {
b = append(b, s[k:]...)
if norm.NFC.QuickSpan(b) != len(b) {
b = norm.NFC.Bytes(b)
}
// TODO: the punycode converters requires strings as input.
s = string(b)
}
// TODO(perf): don't split.
labels := strings.Split(s, ".")
// Remove leading empty labels
for len(labels) > 0 && labels[0] == "" {
labels = labels[1:]
}
if len(labels) == 0 {
return "", errors.New("idna: there are no labels")
}
// Find the position of the root label.
root := len(labels) - 1
if labels[root] == "" {
root--
}
for i, label := range labels {
// Empty labels are not okay, unless it is the last.
if label == "" {
if i <= root && err == nil {
err = errEmptyLabel
}
continue
}
if strings.HasPrefix(label, acePrefix) {
u, err2 := decode(label[len(acePrefix):])
if err2 != nil {
if err == nil {
err = err2
}
// Spec says keep the old label.
continue
}
labels[i] = u
if err == nil {
err = p.validateFromPunycode(u)
}
if err == nil {
err = NonTransitional.validate(u)
}
} else if err == nil {
err = p.validate(labels[i])
}
}
if toASCII {
for i, label := range labels {
if !ascii(label) {
a, err2 := encode(acePrefix, label)
if err == nil {
err = err2
}
labels[i] = a
}
n := len(labels[i])
if !p.IgnoreDNSLength && err == nil && (n == 0 || n > 63) {
if n != 0 || i != len(labels)-1 {
err = fmt.Errorf("idna: label with invalid length %d", n)
}
}
}
}
s = strings.Join(labels, ".")
if toASCII && !p.IgnoreDNSLength && err == nil {
// Compute the length of the domain name minus the root label and its dot.
n := len(s) - 1 - len(labels[len(labels)-1])
if len(s) < 1 || n > 253 {
err = fmt.Errorf("idna: doman name with invalid length %d", n)
}
}
return s, err
}
// acePrefix is the ASCII Compatible Encoding prefix.
const acePrefix = "xn--"
func (p *Profile) simplify(cat category) category {
switch cat {
case disallowedSTD3Mapped:
if !p.IgnoreSTD3Rules {
cat = disallowed
} else {
cat = mapped
}
case disallowedSTD3Valid:
if !p.IgnoreSTD3Rules {
cat = disallowed
} else {
cat = valid
}
case deviation:
if !p.Transitional {
cat = valid
}
case validNV8, validXV8:
// TODO: handle V2008
cat = valid
}
return cat
}
func (p *Profile) validateFromPunycode(s string) error {
if !norm.NFC.IsNormalString(s) {
return errors.New("idna: punycode is not normalized")
}
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if c := p.simplify(info(v).category()); c != valid && c != deviation {
return fmt.Errorf("idna: invalid character %+q in expanded punycode", s[i:i+sz])
}
i += sz
}
return nil
}
// validate validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validate(s string) error {
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return errors.New("idna: label starts with ??--")
}
if s[0] == '-' || s[len(s)-1] == '-' {
return errors.New("idna: label may not start or end with '-'")
}
// TODO: merge the use of this in the trie.
r, _ := utf8.DecodeRuneInString(s)
if unicode.Is(unicode.M, r) {
return fmt.Errorf("idna: label starts with modifier %U", r)
}
if !bidirule.ValidString(s) {
return fmt.Errorf("idna: label violates Bidi Rule", r)
}
return nil
}
func (p *Profile) ToASCII(s string) (string, error) {
return p.process(s, true)
}
func (p *Profile) ToUnicode(s string) (string, error) {
return NonTransitional.process(s, false)
}
func ascii(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] >= utf8.RuneSelf {
return false
}
}
return true
}
|