1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"encoding/xml"
"io"
"unicode"
rxml "miniflux.app/v2/internal/reader/xml"
)
// List of feed formats.
const (
FormatRDF = "rdf"
FormatRSS = "rss"
FormatAtom = "atom"
FormatJSON = "json"
FormatUnknown = "unknown"
)
// DetectFeedFormat tries to guess the feed format from input data.
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
r.Seek(0, io.SeekStart)
defer r.Seek(0, io.SeekStart)
if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
return FormatJSON, ""
}
r.Seek(0, io.SeekStart)
decoder := rxml.NewXMLDecoder(r)
for {
token, _ := decoder.Token()
if token == nil {
break
}
if element, ok := token.(xml.StartElement); ok {
switch element.Name.Local {
case "rss":
return FormatRSS, ""
case "feed":
for _, attr := range element.Attr {
if attr.Name.Local == "version" && attr.Value == "0.3" {
return FormatAtom, "0.3"
}
}
return FormatAtom, "1.0"
case "RDF":
return FormatRDF, ""
}
}
}
return FormatUnknown, ""
}
// detectJSONFormat checks if the reader contains JSON by reading until it finds
// the first non-whitespace character or reaches EOF/error.
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
const bufferSize = 32
buffer := make([]byte, bufferSize)
for {
n, err := r.Read(buffer)
if n == 0 {
if err == io.EOF {
return false, nil // No non-whitespace content found
}
return false, err
}
if len(buffer) < n {
panic("unreachable") // bounds check hint to compiler
}
// Check each byte in the buffer
for i := range n {
ch := buffer[i]
// Skip whitespace characters (space, tab, newline, carriage return, etc.)
if unicode.IsSpace(rune(ch)) {
continue
}
// First non-whitespace character determines if it's JSON
return ch == '{', nil
}
// If we've read less than bufferSize, we've reached EOF
if n < bufferSize {
return false, nil
}
}
}
|