File: format.go

package info (click to toggle)
miniflux 2.2.16-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,188 kB
  • sloc: xml: 4,853; javascript: 1,158; sh: 257; makefile: 161
file content (96 lines) | stat: -rw-r--r-- 2,140 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package parser // import "miniflux.app/v2/internal/reader/parser"

import (
	"encoding/xml"
	"io"
	"unicode"

	rxml "miniflux.app/v2/internal/reader/xml"
)

// List of feed formats.
const (
	FormatRDF     = "rdf"
	FormatRSS     = "rss"
	FormatAtom    = "atom"
	FormatJSON    = "json"
	FormatUnknown = "unknown"
)

// DetectFeedFormat tries to guess the feed format from input data.
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
	r.Seek(0, io.SeekStart)
	defer r.Seek(0, io.SeekStart)

	if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
		return FormatJSON, ""
	}

	r.Seek(0, io.SeekStart)
	decoder := rxml.NewXMLDecoder(r)

	for {
		token, _ := decoder.Token()
		if token == nil {
			break
		}

		if element, ok := token.(xml.StartElement); ok {
			switch element.Name.Local {
			case "rss":
				return FormatRSS, ""
			case "feed":
				for _, attr := range element.Attr {
					if attr.Name.Local == "version" && attr.Value == "0.3" {
						return FormatAtom, "0.3"
					}
				}
				return FormatAtom, "1.0"
			case "RDF":
				return FormatRDF, ""
			}
		}
	}

	return FormatUnknown, ""
}

// detectJSONFormat checks if the reader contains JSON by reading until it finds
// the first non-whitespace character or reaches EOF/error.
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
	const bufferSize = 32
	buffer := make([]byte, bufferSize)

	for {
		n, err := r.Read(buffer)
		if n == 0 {
			if err == io.EOF {
				return false, nil // No non-whitespace content found
			}
			return false, err
		}

		if len(buffer) < n {
			panic("unreachable") // bounds check hint to compiler
		}

		// Check each byte in the buffer
		for i := range n {
			ch := buffer[i]
			// Skip whitespace characters (space, tab, newline, carriage return, etc.)
			if unicode.IsSpace(rune(ch)) {
				continue
			}
			// First non-whitespace character determines if it's JSON
			return ch == '{', nil
		}

		// If we've read less than bufferSize, we've reached EOF
		if n < bufferSize {
			return false, nil
		}
	}
}