File: pattern.go

package info (click to toggle)
golang-x-text 0.0~git20161013.0.c745997-2
links: PTS, VCS
area: main
in suites: stretch, stretch-backports
size: 17,872 kB
sloc: makefile: 14
file content (386 lines) | stat: -rw-r--r-- 9,616 bytes
parent folder | download | duplicates (3)
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package number

import (
	"errors"
	"unicode/utf8"
)

// This file contains a parser for the CLDR number patterns as described in
// http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
//
// The following BNF is derived from this standard.
//
// pattern    := subpattern (';' subpattern)?
// subpattern := affix? number exponent? affix?
// number     := decimal | sigDigits
// decimal    := '#'* '0'* ('.' fraction)? | '#' | '0'
// fraction   := '0'* '#'*
// sigDigits  := '#'* '@' '@'* '#'*
// exponent   := 'E' '+'? '0'* '0'
// padSpec    := '*' \L
//
// Notes:
// - An affix pattern may contain any runes, but runes with special meaning
//   should be escaped.
// - Sequences of digits, '#', and '@' in decimal and sigDigits may have
//   interstitial commas.

// TODO: replace special characters in affixes (-, +, ¤) with control codes.

// Format holds information for formatting numbers. It is designed to hold
// information from CLDR number patterns.
//
// This pattern is precompiled  for all patterns for all languages. Even though
// the number of patterns is not very large, we want to keep this small.
//
// This type is only intended for internal use.
type Format struct {
	// TODO: this struct can be packed a lot better than it is now. Should be
	// possible to make it 32 bytes.

	Affix     string // includes prefix and suffix. First byte is prefix length.
	Offset    uint16 // Offset into Affix for prefix and suffix
	NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.

	Multiplier     uint32
	RoundIncrement uint32 // Use Min*Digits to determine scale
	PadRune        rune

	FormatWidth uint16

	GroupingSize [2]uint8
	Flags        FormatFlag

	// Number of digits.
	MinIntegerDigits     uint8
	MaxIntegerDigits     uint8
	MinFractionDigits    uint8
	MaxFractionDigits    uint8
	MinSignificantDigits uint8
	MaxSignificantDigits uint8
	MinExponentDigits    uint8
}

// A FormatFlag is a bit mask for the flag field of a Format.
type FormatFlag uint8

const (
	AlwaysSign FormatFlag = 1 << iota
	AlwaysExpSign
	AlwaysDecimalSeparator
	ParenthesisForNegative // Common pattern. Saves space.

	PadAfterNumber
	PadAfterAffix

	PadBeforePrefix = 0 // Default
	PadAfterPrefix  = PadAfterAffix
	PadBeforeSuffix = PadAfterNumber
	PadAfterSuffix  = PadAfterNumber | PadAfterAffix
	PadMask         = PadAfterNumber | PadAfterAffix
)

type parser struct {
	*Format

	leadingSharps int

	pos            int
	err            error
	doNotTerminate bool
	groupingCount  uint
	hasGroup       bool
	buf            []byte
}

func (p *parser) setError(err error) {
	if p.err == nil {
		p.err = err
	}
}

func (p *parser) updateGrouping() {
	if p.hasGroup && p.groupingCount < 255 {
		p.GroupingSize[1] = p.GroupingSize[0]
		p.GroupingSize[0] = uint8(p.groupingCount)
	}
	p.groupingCount = 0
	p.hasGroup = true
}

var (
	// TODO: more sensible and localizeable error messages.
	errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
	errInvalidPadSpecifier   = errors.New("format: invalid pad specifier")
	errInvalidQuote          = errors.New("format: invalid quote")
	errAffixTooLarge         = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
	errDuplicatePercentSign  = errors.New("format: duplicate percent sign")
	errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
	errUnexpectedEnd         = errors.New("format: unexpected end of pattern")
)

// ParsePattern extracts formatting information from a CLDR number pattern.
//
// See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
func ParsePattern(s string) (f *Format, err error) {
	p := parser{Format: &Format{}}

	s = p.parseSubPattern(s)

	if s != "" {
		// Parse negative sub pattern.
		if s[0] != ';' {
			p.setError(errors.New("format: error parsing first sub pattern"))
			return nil, p.err
		}
		neg := parser{Format: &Format{}} // just for extracting the affixes.
		s = neg.parseSubPattern(s[len(";"):])
		p.NegOffset = uint16(len(p.buf))
		p.buf = append(p.buf, neg.buf...)
	}
	if s != "" {
		p.setError(errors.New("format: spurious characters at end of pattern"))
	}
	if p.err != nil {
		return nil, p.err
	}
	if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
		// No prefix or suffixes.
		p.NegOffset = 0
	} else {
		p.Affix = affix
	}
	return p.Format, nil
}

func (p *parser) parseSubPattern(s string) string {
	s = p.parsePad(s, PadBeforePrefix)
	s = p.parseAffix(s)
	s = p.parsePad(s, PadAfterPrefix)

	s = p.parse(p.number, s)

	s = p.parsePad(s, PadBeforeSuffix)
	s = p.parseAffix(s)
	s = p.parsePad(s, PadAfterSuffix)
	return s
}

func (p *parser) parsePad(s string, f FormatFlag) (tail string) {
	if len(s) >= 2 && s[0] == '*' {
		r, sz := utf8.DecodeRuneInString(s[1:])
		if p.PadRune != 0 {
			p.err = errMultiplePadSpecifiers
		} else {
			p.Flags |= f
			p.PadRune = r
		}
		return s[1+sz:]
	}
	return s
}

func (p *parser) parseAffix(s string) string {
	x := len(p.buf)
	p.buf = append(p.buf, 0) // placeholder for affix length

	s = p.parse(p.affix, s)

	n := len(p.buf) - x - 1
	if n > 0xFF {
		p.setError(errAffixTooLarge)
	}
	p.buf[x] = uint8(n)
	return s
}

// state implements a state transition. It returns the new state. A state
// function may set an error on the parser or may simply return on an incorrect
// token and let the next phase fail.
type state func(r rune) state

// parse repeatedly applies a state function on the given string until a
// termination condition is reached.
func (p *parser) parse(fn state, s string) (tail string) {
	for i, r := range s {
		p.doNotTerminate = false
		if fn = fn(r); fn == nil || p.err != nil {
			return s[i:]
		}
		p.FormatWidth++
	}
	if p.doNotTerminate {
		p.setError(errUnexpectedEnd)
	}
	return ""
}

func (p *parser) affix(r rune) state {
	switch r {
	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
		'#', '@', '.', '*', ',', ';':
		return nil
	case '\'':
		return p.escape
	case '%':
		if p.Multiplier != 0 {
			p.setError(errDuplicatePercentSign)
		}
		p.Multiplier = 100
	case '\u2030': // ‰ Per mille
		if p.Multiplier != 0 {
			p.setError(errDuplicatePermilleSign)
		}
		p.Multiplier = 1000
		// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
	}
	p.buf = append(p.buf, string(r)...)
	return p.affix
}

func (p *parser) escape(r rune) state {
	switch r {
	case '\'':
		return p.affix
	default:
		p.buf = append(p.buf, string(r)...)
	}
	return p.escape
}

// number parses a number. The BNF says the integer part should always have
// a '0', but that does not appear to be the case according to the rest of the
// documentation. We will allow having only '#' numbers.
func (p *parser) number(r rune) state {
	switch r {
	case '#':
		p.groupingCount++
		p.leadingSharps++
	case '@':
		p.groupingCount++
		p.leadingSharps = 0
		return p.sigDigits(r)
	case ',':
		if p.leadingSharps == 0 { // no leading commas
			return nil
		}
		p.updateGrouping()
	case 'E':
		p.MaxIntegerDigits = uint8(p.leadingSharps)
		return p.exponent
	case '.': // allow ".##" etc.
		p.updateGrouping()
		return p.fraction
	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		return p.integer(r)
	default:
		return nil
	}
	return p.number
}

func (p *parser) integer(r rune) state {
	if !('0' <= r && r <= '9') {
		var next state
		switch r {
		case 'E':
			if p.leadingSharps > 0 {
				p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
			}
			next = p.exponent
		case '.':
			next = p.fraction
		}
		p.updateGrouping()
		return next
	}
	p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
	p.groupingCount++
	p.MinIntegerDigits++
	return p.integer
}

func (p *parser) sigDigits(r rune) state {
	switch r {
	case '@':
		p.groupingCount++
		p.MaxSignificantDigits++
		p.MinSignificantDigits++
	case '#':
		return p.sigDigitsFinal(r)
	case 'E':
		p.updateGrouping()
		return p.normalizeSigDigitsWithExponent()
	default:
		p.updateGrouping()
		return nil
	}
	return p.sigDigits
}

func (p *parser) sigDigitsFinal(r rune) state {
	switch r {
	case '#':
		p.groupingCount++
		p.MaxSignificantDigits++
	case 'E':
		p.updateGrouping()
		return p.normalizeSigDigitsWithExponent()
	default:
		p.updateGrouping()
		return nil
	}
	return p.sigDigitsFinal
}

func (p *parser) normalizeSigDigitsWithExponent() state {
	p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
	p.MinFractionDigits = p.MinSignificantDigits - 1
	p.MaxFractionDigits = p.MaxSignificantDigits - 1
	p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
	return p.exponent
}

func (p *parser) fraction(r rune) state {
	switch r {
	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
		p.MinFractionDigits++
		p.MaxFractionDigits++
	case '#':
		p.MaxFractionDigits++
	case 'E':
		if p.leadingSharps > 0 {
			p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
		}
		return p.exponent
	default:
		return nil
	}
	return p.fraction
}

func (p *parser) exponent(r rune) state {
	switch r {
	case '+':
		// Set mode and check it wasn't already set.
		if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
			break
		}
		p.Flags |= AlwaysExpSign
		p.doNotTerminate = true
		return p.exponent
	case '0':
		p.MinExponentDigits++
		return p.exponent
	}
	// termination condition
	if p.MinExponentDigits == 0 {
		p.setError(errors.New("format: need at least one digit"))
	}
	return nil
}