1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
package xmltree
import (
"encoding/xml"
"io"
"golang.org/x/net/html/charset"
"github.com/ChrisTrenkamp/goxpath/tree"
"github.com/ChrisTrenkamp/goxpath/tree/xmltree/xmlbuilder"
"github.com/ChrisTrenkamp/goxpath/tree/xmltree/xmlele"
)
//ParseOptions is a set of methods and function pointers that alter
//the way the XML decoder works and the Node types that are created.
//Options that are not set will default to what is set in internal/defoverride.go
type ParseOptions struct {
Strict bool
XMLRoot func() xmlbuilder.XMLBuilder
}
//DirectiveParser is an optional interface extended from XMLBuilder that handles
//XML directives.
type DirectiveParser interface {
xmlbuilder.XMLBuilder
Directive(xml.Directive, *xml.Decoder)
}
//ParseSettings is a function for setting the ParseOptions you want when
//parsing an XML tree.
type ParseSettings func(s *ParseOptions)
//MustParseXML is like ParseXML, but panics instead of returning an error.
func MustParseXML(r io.Reader, op ...ParseSettings) tree.Node {
ret, err := ParseXML(r, op...)
if err != nil {
panic(err)
}
return ret
}
//ParseXML creates an XMLTree structure from an io.Reader.
func ParseXML(r io.Reader, op ...ParseSettings) (tree.Node, error) {
ov := ParseOptions{
Strict: true,
XMLRoot: xmlele.Root,
}
for _, i := range op {
i(&ov)
}
dec := xml.NewDecoder(r)
dec.CharsetReader = charset.NewReaderLabel
dec.Strict = ov.Strict
ordrPos := 1
xmlTree := ov.XMLRoot()
t, err := dec.Token()
if err != nil {
return nil, err
}
if head, ok := t.(xml.ProcInst); ok && head.Target == "xml" {
t, err = dec.Token()
}
opts := xmlbuilder.BuilderOpts{
Dec: dec,
}
for err == nil {
switch xt := t.(type) {
case xml.StartElement:
setEle(&opts, xmlTree, xt, &ordrPos)
xmlTree = xmlTree.CreateNode(&opts)
case xml.CharData:
setNode(&opts, xmlTree, xt, tree.NtChd, &ordrPos)
xmlTree = xmlTree.CreateNode(&opts)
case xml.Comment:
setNode(&opts, xmlTree, xt, tree.NtComm, &ordrPos)
xmlTree = xmlTree.CreateNode(&opts)
case xml.ProcInst:
setNode(&opts, xmlTree, xt, tree.NtPi, &ordrPos)
xmlTree = xmlTree.CreateNode(&opts)
case xml.EndElement:
xmlTree = xmlTree.EndElem()
case xml.Directive:
if dp, ok := xmlTree.(DirectiveParser); ok {
dp.Directive(xt.Copy(), dec)
}
}
t, err = dec.Token()
}
if err == io.EOF {
err = nil
}
return xmlTree, err
}
func setEle(opts *xmlbuilder.BuilderOpts, xmlTree xmlbuilder.XMLBuilder, ele xml.StartElement, ordrPos *int) {
opts.NodePos = *ordrPos
opts.Tok = ele
opts.Attrs = opts.Attrs[0:0:cap(opts.Attrs)]
opts.NS = make(map[xml.Name]string)
opts.NodeType = tree.NtElem
for i := range ele.Attr {
attr := ele.Attr[i].Name
val := ele.Attr[i].Value
if (attr.Local == "xmlns" && attr.Space == "") || attr.Space == "xmlns" {
opts.NS[attr] = val
} else {
opts.Attrs = append(opts.Attrs, &ele.Attr[i])
}
}
if nstree, ok := xmlTree.(tree.NSElem); ok {
ns := make(map[xml.Name]string)
for _, i := range tree.BuildNS(nstree) {
ns[i.Name] = i.Value
}
for k, v := range opts.NS {
ns[k] = v
}
if ns[xml.Name{Local: "xmlns"}] == "" {
delete(ns, xml.Name{Local: "xmlns"})
}
for k, v := range ns {
opts.NS[k] = v
}
if xmlTree.GetNodeType() == tree.NtRoot {
opts.NS[xml.Name{Space: "xmlns", Local: "xml"}] = tree.XMLSpace
}
}
opts.AttrStartPos = len(opts.NS) + len(opts.Attrs) + *ordrPos
*ordrPos = opts.AttrStartPos + 1
}
func setNode(opts *xmlbuilder.BuilderOpts, xmlTree xmlbuilder.XMLBuilder, tok xml.Token, nt tree.NodeType, ordrPos *int) {
opts.Tok = xml.CopyToken(tok)
opts.NodeType = nt
opts.NodePos = *ordrPos
*ordrPos++
}
|