File: parser.go

package info (click to toggle)
golang-github-hashicorp-hcl 0.0~git20161215.0.80e628d-1
links: PTS, VCS
area: main
in suites: stretch, stretch-backports
size: 948 kB
sloc: makefile: 22
file content (510 lines) | stat: -rw-r--r-- 12,471 bytes
// Package parser implements a parser for HCL (HashiCorp Configuration
// Language)
package parser

import (
	"errors"
	"fmt"
	"strings"

	"github.com/hashicorp/hcl/hcl/ast"
	"github.com/hashicorp/hcl/hcl/scanner"
	"github.com/hashicorp/hcl/hcl/token"
)

type Parser struct {
	sc *scanner.Scanner

	// Last read token
	tok       token.Token
	commaPrev token.Token

	comments    []*ast.CommentGroup
	leadComment *ast.CommentGroup // last lead comment
	lineComment *ast.CommentGroup // last line comment

	enableTrace bool
	indent      int
	n           int // buffer size (max = 1)
}

func newParser(src []byte) *Parser {
	return &Parser{
		sc: scanner.New(src),
	}
}

// Parse returns the fully parsed source and returns the abstract syntax tree.
func Parse(src []byte) (*ast.File, error) {
	p := newParser(src)
	return p.Parse()
}

var errEofToken = errors.New("EOF token found")

// Parse returns the fully parsed source and returns the abstract syntax tree.
func (p *Parser) Parse() (*ast.File, error) {
	f := &ast.File{}
	var err, scerr error
	p.sc.Error = func(pos token.Pos, msg string) {
		scerr = &PosError{Pos: pos, Err: errors.New(msg)}
	}

	f.Node, err = p.objectList(false)
	if scerr != nil {
		return nil, scerr
	}
	if err != nil {
		return nil, err
	}

	f.Comments = p.comments
	return f, nil
}

// objectList parses a list of items within an object (generally k/v pairs).
// The parameter" obj" tells this whether to we are within an object (braces:
// '{', '}') or just at the top level. If we're within an object, we end
// at an RBRACE.
func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
	defer un(trace(p, "ParseObjectList"))
	node := &ast.ObjectList{}

	for {
		if obj {
			tok := p.scan()
			p.unscan()
			if tok.Type == token.RBRACE {
				break
			}
		}

		n, err := p.objectItem()
		if err == errEofToken {
			break // we are finished
		}

		// we don't return a nil node, because might want to use already
		// collected items.
		if err != nil {
			return node, err
		}

		node.Add(n)

		// object lists can be optionally comma-delimited e.g. when a list of maps
		// is being expressed, so a comma is allowed here - it's simply consumed
		tok := p.scan()
		if tok.Type != token.COMMA {
			p.unscan()
		}
	}
	return node, nil
}

func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
	endline = p.tok.Pos.Line

	// count the endline if it's multiline comment, ie starting with /*
	if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
		// don't use range here - no need to decode Unicode code points
		for i := 0; i < len(p.tok.Text); i++ {
			if p.tok.Text[i] == '\n' {
				endline++
			}
		}
	}

	comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
	p.tok = p.sc.Scan()
	return
}

func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
	var list []*ast.Comment
	endline = p.tok.Pos.Line

	for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
		var comment *ast.Comment
		comment, endline = p.consumeComment()
		list = append(list, comment)
	}

	// add comment group to the comments list
	comments = &ast.CommentGroup{List: list}
	p.comments = append(p.comments, comments)

	return
}

// objectItem parses a single object item
func (p *Parser) objectItem() (*ast.ObjectItem, error) {
	defer un(trace(p, "ParseObjectItem"))

	keys, err := p.objectKey()
	if len(keys) > 0 && err == errEofToken {
		// We ignore eof token here since it is an error if we didn't
		// receive a value (but we did receive a key) for the item.
		err = nil
	}
	if len(keys) > 0 && err != nil && p.tok.Type == token.RBRACE {
		// This is a strange boolean statement, but what it means is:
		// We have keys with no value, and we're likely in an object
		// (since RBrace ends an object). For this, we set err to nil so
		// we continue and get the error below of having the wrong value
		// type.
		err = nil

		// Reset the token type so we don't think it completed fine. See
		// objectType which uses p.tok.Type to check if we're done with
		// the object.
		p.tok.Type = token.EOF
	}
	if err != nil {
		return nil, err
	}

	o := &ast.ObjectItem{
		Keys: keys,
	}

	if p.leadComment != nil {
		o.LeadComment = p.leadComment
		p.leadComment = nil
	}

	switch p.tok.Type {
	case token.ASSIGN:
		o.Assign = p.tok.Pos
		o.Val, err = p.object()
		if err != nil {
			return nil, err
		}
	case token.LBRACE:
		o.Val, err = p.objectType()
		if err != nil {
			return nil, err
		}
	default:
		keyStr := make([]string, 0, len(keys))
		for _, k := range keys {
			keyStr = append(keyStr, k.Token.Text)
		}

		return nil, fmt.Errorf(
			"key '%s' expected start of object ('{') or assignment ('=')",
			strings.Join(keyStr, " "))
	}

	// do a look-ahead for line comment
	p.scan()
	if len(keys) > 0 && o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
		o.LineComment = p.lineComment
		p.lineComment = nil
	}
	p.unscan()
	return o, nil
}

// objectKey parses an object key and returns a ObjectKey AST
func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
	keyCount := 0
	keys := make([]*ast.ObjectKey, 0)

	for {
		tok := p.scan()
		switch tok.Type {
		case token.EOF:
			// It is very important to also return the keys here as well as
			// the error. This is because we need to be able to tell if we
			// did parse keys prior to finding the EOF, or if we just found
			// a bare EOF.
			return keys, errEofToken
		case token.ASSIGN:
			// assignment or object only, but not nested objects. this is not
			// allowed: `foo bar = {}`
			if keyCount > 1 {
				return nil, &PosError{
					Pos: p.tok.Pos,
					Err: fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type),
				}
			}

			if keyCount == 0 {
				return nil, &PosError{
					Pos: p.tok.Pos,
					Err: errors.New("no object keys found!"),
				}
			}

			return keys, nil
		case token.LBRACE:
			var err error

			// If we have no keys, then it is a syntax error. i.e. {{}} is not
			// allowed.
			if len(keys) == 0 {
				err = &PosError{
					Pos: p.tok.Pos,
					Err: fmt.Errorf("expected: IDENT | STRING got: %s", p.tok.Type),
				}
			}

			// object
			return keys, err
		case token.IDENT, token.STRING:
			keyCount++
			keys = append(keys, &ast.ObjectKey{Token: p.tok})
		case token.ILLEGAL:
			return keys, &PosError{
				Pos: p.tok.Pos,
				Err: fmt.Errorf("illegal character"),
			}
		default:
			return keys, &PosError{
				Pos: p.tok.Pos,
				Err: fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type),
			}
		}
	}
}

// object parses any type of object, such as number, bool, string, object or
// list.
func (p *Parser) object() (ast.Node, error) {
	defer un(trace(p, "ParseType"))
	tok := p.scan()

	switch tok.Type {
	case token.NUMBER, token.FLOAT, token.BOOL, token.STRING, token.HEREDOC:
		return p.literalType()
	case token.LBRACE:
		return p.objectType()
	case token.LBRACK:
		return p.listType()
	case token.COMMENT:
		// implement comment
	case token.EOF:
		return nil, errEofToken
	}

	return nil, &PosError{
		Pos: tok.Pos,
		Err: fmt.Errorf("Unknown token: %+v", tok),
	}
}

// objectType parses an object type and returns a ObjectType AST
func (p *Parser) objectType() (*ast.ObjectType, error) {
	defer un(trace(p, "ParseObjectType"))

	// we assume that the currently scanned token is a LBRACE
	o := &ast.ObjectType{
		Lbrace: p.tok.Pos,
	}

	l, err := p.objectList(true)

	// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
	// not a RBRACE, it's an syntax error and we just return it.
	if err != nil && p.tok.Type != token.RBRACE {
		return nil, err
	}

	// No error, scan and expect the ending to be a brace
	if tok := p.scan(); tok.Type != token.RBRACE {
		return nil, fmt.Errorf("object expected closing RBRACE got: %s", tok.Type)
	}

	o.List = l
	o.Rbrace = p.tok.Pos // advanced via parseObjectList
	return o, nil
}

// listType parses a list type and returns a ListType AST
func (p *Parser) listType() (*ast.ListType, error) {
	defer un(trace(p, "ParseListType"))

	// we assume that the currently scanned token is a LBRACK
	l := &ast.ListType{
		Lbrack: p.tok.Pos,
	}

	needComma := false
	for {
		tok := p.scan()
		if needComma {
			switch tok.Type {
			case token.COMMA, token.RBRACK:
			default:
				return nil, &PosError{
					Pos: tok.Pos,
					Err: fmt.Errorf(
						"error parsing list, expected comma or list end, got: %s",
						tok.Type),
				}
			}
		}
		switch tok.Type {
		case token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
			node, err := p.literalType()
			if err != nil {
				return nil, err
			}

			// If there is a lead comment, apply it
			if p.leadComment != nil {
				node.LeadComment = p.leadComment
				p.leadComment = nil
			}

			l.Add(node)
			needComma = true
		case token.COMMA:
			// get next list item or we are at the end
			// do a look-ahead for line comment
			p.scan()
			if p.lineComment != nil && len(l.List) > 0 {
				lit, ok := l.List[len(l.List)-1].(*ast.LiteralType)
				if ok {
					lit.LineComment = p.lineComment
					l.List[len(l.List)-1] = lit
					p.lineComment = nil
				}
			}
			p.unscan()

			needComma = false
			continue
		case token.LBRACE:
			// Looks like a nested object, so parse it out
			node, err := p.objectType()
			if err != nil {
				return nil, &PosError{
					Pos: tok.Pos,
					Err: fmt.Errorf(
						"error while trying to parse object within list: %s", err),
				}
			}
			l.Add(node)
			needComma = true
		case token.BOOL:
			// TODO(arslan) should we support? not supported by HCL yet
		case token.LBRACK:
			// TODO(arslan) should we support nested lists? Even though it's
			// written in README of HCL, it's not a part of the grammar
			// (not defined in parse.y)
		case token.RBRACK:
			// finished
			l.Rbrack = p.tok.Pos
			return l, nil
		default:
			return nil, &PosError{
				Pos: tok.Pos,
				Err: fmt.Errorf("unexpected token while parsing list: %s", tok.Type),
			}
		}
	}
}

// literalType parses a literal type and returns a LiteralType AST
func (p *Parser) literalType() (*ast.LiteralType, error) {
	defer un(trace(p, "ParseLiteral"))

	return &ast.LiteralType{
		Token: p.tok,
	}, nil
}

// scan returns the next token from the underlying scanner. If a token has
// been unscanned then read that instead. In the process, it collects any
// comment groups encountered, and remembers the last lead and line comments.
func (p *Parser) scan() token.Token {
	// If we have a token on the buffer, then return it.
	if p.n != 0 {
		p.n = 0
		return p.tok
	}

	// Otherwise read the next token from the scanner and Save it to the buffer
	// in case we unscan later.
	prev := p.tok
	p.tok = p.sc.Scan()

	if p.tok.Type == token.COMMENT {
		var comment *ast.CommentGroup
		var endline int

		// fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n",
		// p.tok.Pos.Line, prev.Pos.Line, endline)
		if p.tok.Pos.Line == prev.Pos.Line {
			// The comment is on same line as the previous token; it
			// cannot be a lead comment but may be a line comment.
			comment, endline = p.consumeCommentGroup(0)
			if p.tok.Pos.Line != endline {
				// The next token is on a different line, thus
				// the last comment group is a line comment.
				p.lineComment = comment
			}
		}

		// consume successor comments, if any
		endline = -1
		for p.tok.Type == token.COMMENT {
			comment, endline = p.consumeCommentGroup(1)
		}

		if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE {
			switch p.tok.Type {
			case token.RBRACE, token.RBRACK:
				// Do not count for these cases
			default:
				// The next token is following on the line immediately after the
				// comment group, thus the last comment group is a lead comment.
				p.leadComment = comment
			}
		}

	}

	return p.tok
}

// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() {
	p.n = 1
}

// ----------------------------------------------------------------------------
// Parsing support

func (p *Parser) printTrace(a ...interface{}) {
	if !p.enableTrace {
		return
	}

	const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
	const n = len(dots)
	fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)

	i := 2 * p.indent
	for i > n {
		fmt.Print(dots)
		i -= n
	}
	// i <= n
	fmt.Print(dots[0:i])
	fmt.Println(a...)
}

func trace(p *Parser, msg string) *Parser {
	p.printTrace(msg, "(")
	p.indent++
	return p
}

// Usage pattern: defer un(trace(p, "..."))
func un(p *Parser) {
	p.indent--
	p.printTrace(")")
}