1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
package mimetype
import (
"mime"
"github.com/gabriel-vasile/mimetype/internal/charset"
"github.com/gabriel-vasile/mimetype/internal/magic"
)
// MIME struct holds information about a file format: the string representation
// of the MIME type, the extension and the parent file format.
type MIME struct {
mime string
aliases []string
extension string
// detector receives the raw input and a limit for the number of bytes it is
// allowed to check. It returns whether the input matches a signature or not.
detector magic.Detector
children []*MIME
parent *MIME
}
// String returns the string representation of the MIME type, e.g., "application/zip".
func (m *MIME) String() string {
return m.mime
}
// Extension returns the file extension associated with the MIME type.
// It includes the leading dot, as in ".html". When the file format does not
// have an extension, the empty string is returned.
func (m *MIME) Extension() string {
return m.extension
}
// Parent returns the parent MIME type from the hierarchy.
// Each MIME type has a non-nil parent, except for the root MIME type.
//
// For example, the application/json and text/html MIME types have text/plain as
// their parent because they are text files who happen to contain JSON or HTML.
// Another example is the ZIP format, which is used as container
// for Microsoft Office files, EPUB files, JAR files, and others.
func (m *MIME) Parent() *MIME {
return m.parent
}
// Is checks whether this MIME type, or any of its aliases, is equal to the
// expected MIME type. MIME type equality test is done on the "type/subtype"
// section, ignores any optional MIME parameters, ignores any leading and
// trailing whitespace, and is case insensitive.
func (m *MIME) Is(expectedMIME string) bool {
// Parsing is needed because some detected MIME types contain parameters
// that need to be stripped for the comparison.
expectedMIME, _, _ = mime.ParseMediaType(expectedMIME)
found, _, _ := mime.ParseMediaType(m.mime)
if expectedMIME == found {
return true
}
for _, alias := range m.aliases {
if alias == expectedMIME {
return true
}
}
return false
}
func newMIME(
mime, extension string,
detector magic.Detector,
children ...*MIME) *MIME {
m := &MIME{
mime: mime,
extension: extension,
detector: detector,
children: children,
}
for _, c := range children {
c.parent = m
}
return m
}
func (m *MIME) alias(aliases ...string) *MIME {
m.aliases = aliases
return m
}
// match does a depth-first search on the signature tree. It returns the deepest
// successful node for which all the children detection functions fail.
func (m *MIME) match(in []byte, readLimit uint32) *MIME {
for _, c := range m.children {
if c.detector(in, readLimit) {
return c.match(in, readLimit)
}
}
needsCharset := map[string]func([]byte) string{
"text/plain": charset.FromPlain,
"text/html": charset.FromHTML,
"text/xml": charset.FromXML,
}
// ps holds optional MIME parameters.
ps := map[string]string{}
if f, ok := needsCharset[m.mime]; ok {
if cset := f(in); cset != "" {
ps["charset"] = cset
}
}
return m.cloneHierarchy(ps)
}
// flatten transforms an hierarchy of MIMEs into a slice of MIMEs.
func (m *MIME) flatten() []*MIME {
out := []*MIME{m}
for _, c := range m.children {
out = append(out, c.flatten()...)
}
return out
}
// clone creates a new MIME with the provided optional MIME parameters.
func (m *MIME) clone(ps map[string]string) *MIME {
clonedMIME := m.mime
if len(ps) > 0 {
clonedMIME = mime.FormatMediaType(m.mime, ps)
}
return &MIME{
mime: clonedMIME,
aliases: m.aliases,
extension: m.extension,
}
}
// cloneHierarchy creates a clone of m and all its ancestors. The optional MIME
// parametes are set on the last child of the hierarchy.
func (m *MIME) cloneHierarchy(ps map[string]string) *MIME {
ret := m.clone(ps)
lastChild := ret
for p := m.Parent(); p != nil; p = p.Parent() {
pClone := p.clone(nil)
lastChild.parent = pClone
lastChild = pClone
}
return ret
}
func (m *MIME) lookup(mime string) *MIME {
for _, n := range append(m.aliases, m.mime) {
if n == mime {
return m
}
}
for _, c := range m.children {
if m := c.lookup(mime); m != nil {
return m
}
}
return nil
}
// Extend adds detection for a sub-format. The detector is a function
// returning true when the raw input file satisfies a signature.
// The sub-format will be detected if all the detectors in the parent chain return true.
// The extension should include the leading dot, as in ".html".
func (m *MIME) Extend(detector func(raw []byte, limit uint32) bool, mime, extension string, aliases ...string) {
c := &MIME{
mime: mime,
extension: extension,
detector: detector,
parent: m,
aliases: aliases,
}
mu.Lock()
m.children = append([]*MIME{c}, m.children...)
mu.Unlock()
}
|