File: fields

package info (click to toggle)
recoll 1.43.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,892 kB
  • sloc: cpp: 104,589; python: 9,603; xml: 7,272; ansic: 6,447; sh: 1,212; perl: 166; makefile: 72
file content (196 lines) | stat: -rw-r--r-- 7,578 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# (C) 2007-2011 J.F.Dockes
# License: GPL V2
#
# Field names configuration. This defines how one may search ie for:
#   author:Hemingway
#
# Important: 
#   - the field names MUST be all lowercase alphabetic ascii here. They can
#     be anycased in the documents.

[prefixes]

#####################################################
# This section defines what prefix the terms inside named fields will be
# indexed with (in addition to prefix-less indexing for general search)
# ALL prefixes MUST be all ASCII UPPERCASE (NO DIGITS)
# 
# The field names should be the canonic ones, not the aliases defined in
# the following section. Don't change those which are predefined here, 
# quite a few are hard-coded in the c++. But you can add more (for new
# fields emitted by filters).
#
# Fields can have two relevance boost factors defined, such as in:
# caption = S ; wdfinc=10
#  and/or
# caption = S ; boost = 10
# The first line would boost the xapian "within document frequency" of caption terms by a factor of
# 10 at indexing time. The second one (NOT CURRENTLY IMPLEMENTED) would automatically boost the
# weight of a caption-based field query (ie: caption:mytitle or title:mytitle) at query time.
#
# The 'pfxonly' attribute can also be set on entries to express that terms from the field should be
# indexed only with a prefix (in general, field terms are indexed both with and without a prefix).
#
# The 'noterms' attribute is used at query time to avoid adding the field values to the terms to be
# highlighted.
#
# The values defined here in the default "fields" file are mostly also hard-coded in the c code, and
# here just for reference, can't change at all.
#
# Also reserved: F(parentid), Q(uniqueid)  XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, XXPG
# Using XX was not a good idea. 
#
# Note that you can disable generating terms for the fields: "filename", "containerfilename",
# "rclUnsplitFN", "dir" and "mimetype" by setting their prefix to empty in the local "fields"
# file. If you're not interested in the corresponding searches, this can save a significant amount
# of space.
#
# There was initially some effort made to have some compatibility with Xapian Omega, but it probably
# is now irrelevant, given how differently the indexes are now structured. In any case see
# xapian/applications/omega/docs/termprefixes.rst in the Xapian source for Xapian/Omega prefix
# usage. In a nutshell Omega reserves all single letters except X which is the start for
# user-defined prefixes.
#
# Recoll uses a lot of X-starting prefixes, and also some XX ones so that locally defined prefixes
# should start with XY to avoid any possible collision with the predefined ones.
#
# I hereby commit to not using XY for Recoll.
# *** USE XY for beginning your local prefixes *** e.g.:
# myfield = XYMYFIELD

author = A
xapdate = D
keywords= K
xapyearmon = M
title = S ; wdfinc = 10
mtype = T
ext = XE; noterms = 1
rclmd5 = XM
dir = XP ; noterms = 1
abstract = XS
filename = XSFN ; noterms = 1
containerfilename = XCFN ; pfxonly = 1 ; noterms = 1
rclUnsplitFN = XSFS
xapyear = Y
recipient = XTO
rclbes = XB ; noterms = 1
annotation = XA

[values]
###########
## Fields which will be stored in Xapian values, authorizing range query
## processing.
# Entries are specified as 'fieldname = valueslot;[px=val1;py=val2...]'.
# Xapian value slots are 32 bits numbers. Numbers below 1000 are reserved
# by Recoll or Xapian. Numbers above are available for user configuration
# Values have types, which can be 'int' or 'string' at the moment. ints have
# an additional 'len' attributes, which specifies the padding size used for
# sorting (leading zeroes: all xapian sorting is text-based). 10 is fine
# for an unsigned 32 bits integer.
# myfield = 1001; type=int; len = 10
# mystrfield = 1002; type = string

[stored]
############################
# Some fields are stored in the document data record inside the index and
# can be displayed in result lists. There is no necessity that stored fields
# should be indexed (have a prefix in the preceding section). Example: "url"
#
# Some fields are stored by default, don't add them here:
#    caption, mimetype, url
# Only canonical names should be used here, not aliases.
# "rclaptg" is used for viewer specialization (depending on local config)
# "rclbes" defines the backend type (ie normal fs, firefox cache). Should
#   probably be hardcoded, don't remove it
abstract=
author=
filename=
keywords=
rclaptg=
rclbes=
recipient=
annotation=

[aliases]
##########################
# This section defines field names aliases or synonyms. Any right hand side
# value will be turned into the lhs canonic name before further treatment
#
# The left-hand values in the recoll distribution file are well known and
# must match names used in the c++ code, or even the index data
# record. They can't change! But you can add others.
#
# Filters should only add canonic names to the meta array when indexing,
# not aliases.
abstract = summary dc:summary description xesam:description
author = creator dc:creator xesam:author xesam:creator from
title = caption title dc:title subject
# catg = dc:type contentCategory
dbytes = size xesam:size
dmtime = dc:date dc:datemodified datemodified contentmodified \
       xesam:contentmodified
ext = fileextension xesam:fileextension
# Don't add subject to keywords aliases, it's better to keep it for email
keywords = keyword xesam:keyword tag tags dc:subject xesam:subject \
	 dc:description
mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
recipient = to xesam:recipient
url = dc:identifier xesam:url
annotation = pdfannot

##################
# The queryaliases section defines aliases which are used exclusively at
# query time: there is no risk to pick up a random field from a document
# (e.g. an HTML meta field) and index it. 
[queryaliases]
filename = fn
containerfilename = cfn
annotation = annot pa

[xattrtofields]
######################
# Processing for extended file attributes.
#
# By default, extended attributes are processed as document fields (after
# removing the 'user' prefix from the name on Linux).
#
# You can enter case-sensitive name translations as "xattrname = fieldname". 
# Entering an empty translation will disable use of the attribute.
# The values from the extended attributes will extend, not replace, the
# data found from equivalent fields inside the document.
# 
# Special case: an extended attribute named "modificationdate" will set the
# "dmtime" (document date) only if it is not set by an internal document
# field (e.g. email "Date:").
#
# As an example, the following would map a quite plausible "tags" extended
# attribute into the "keywords" field.
tags = keywords

# Proposed or standard freedesktop.org extended attributes
xdg.tags = keywords
xdg.comment = abstract

# MacOS Finder comments
com.apple.metadata:kMDItemFinderComment = abstract
# MacOS Finder tags. Not sure how useful it would be to index these
#com.apple.metadata:_kMDItemUserTags = keywords
com.apple.FinderInfo =

# Some standard fields are not to be used (by default).
mime_type =
charset =

########################
# Sections reserved for specific filters follow
#

##########################
# Mail filter section. You can specify mail headers to be indexed 
# in addition to the standard ones: (To:, Cc:, From:, Subject:, Date, 
# Message-Id), along with the field name to be used. For this to be useful, 
# the field name should also be listed in the [prefixes] and possibly the
# [stored] sections.
#
# [mail]
# x-my-tag = mymailtag