File: internfile.cpp

package info (click to toggle)
recoll 1.43.12-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,952 kB
  • sloc: cpp: 104,785; python: 9,933; xml: 7,314; ansic: 6,447; sh: 1,252; perl: 166; makefile: 72
file content (1212 lines) | stat: -rw-r--r-- 45,190 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
/* Copyright (C) 2004-2025 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "autoconfig.h"

#include <stdio.h>
#include <errno.h>
#include <stdint.h>
#include <fcntl.h>
#include <sys/types.h>
#include "safeunistd.h"

#include <string>
#include <iostream>
#include <map>
#include <memory>

using namespace std;

#include "cstr.h"
#include "internfile.h"
#include "rcldoc.h"
#include "mimetype.h"
#include "log.h"
#include "mimehandler.h"
#include "execmd.h"
#include "pathut.h"
#include "rclconfig.h"
#include "mh_html.h"
#include "fileudi.h"
#include "cancelcheck.h"
#include "copyfile.h"
#include "fetcher.h"
#include "extrameta.h"
#include "uncomp.h"

// The internal path element separator. This can't be the same as the rcldb 
// file to ipath separator : "|"
// We replace it with a control char if it comes out of a filter (ie:
// rclzip.py or rclchm.py can do this). If you want the SOH control char
// inside an ipath, you're out of luck (and a bit weird).
static const string cstr_isep(":");

static const char cchar_colon_repl = '\x01';
static string colon_hide(const string& in)
{
    string out;
    for (string::const_iterator it = in.begin(); it != in.end(); it++) {
        out += *it == ':' ? cchar_colon_repl : *it;
    }
    return out;
}
static string colon_restore(const string& in)
{
    string out;
    for (string::const_iterator it = in.begin(); it != in.end(); it++) {
        out += *it == cchar_colon_repl ? ':' : *it;
    }
    return out;
}

//#define DEBUGMETASTACK
#ifdef DEBUGMETASTACK
static void dumpmap(std::ostream& o, std::map<std::string, std::string> meta)
{
    for (const auto& [key, val] : meta) {
        if (key == cstr_dj_keycontent)
            continue;
        o << key << " -> " << val << " | ";
    }
    o << "\n";
}
#endif // DEBUGMETASTACK

// This is used when the user wants to retrieve a search result doc's parent
// (ie message having a given attachment)
bool FileInterner::getEnclosingUDI(const Rcl::Doc &doc, string& udi)
{
    LOGDEB("FileInterner::getEnclosingUDI(): url [" << doc.url <<"] ipath [" << doc.ipath << "]\n");
    string eipath = doc.ipath;
    string::size_type colon;
    if (eipath.empty())
        return false;
    if ((colon =  eipath.find_last_of(cstr_isep)) != string::npos) {
        eipath.erase(colon);
    } else {
        eipath.erase();
    }
    
    fileUdi::make_udi(url_gpath(doc.idxurl.empty() ? doc.url : doc.idxurl), eipath, udi);
    return true;
}

string FileInterner::getLastIpathElt(const string& ipath)
{
    string::size_type sep;
    if ((sep =  ipath.find_last_of(cstr_isep)) != string::npos) {
        return ipath.substr(sep + 1);
    } else {
        return ipath;
    }
}

bool FileInterner::ipathContains(const string& parent, const string& child)
{
    return child.find(parent) == 0 &&
        child.find(cstr_isep, parent.size()) == parent.size();
}

// Constructor: identify the input file, possibly create an
// uncompressed temporary copy, and create the top filter for the
// uncompressed file type.
//
// Empty handler on return says that we're in error, this will be
// processed by the first call to internfile().
// Split into "constructor calls init()" to allow use from other constructor
FileInterner::FileInterner(const string &fn, const struct PathStat& stp,
                           RclConfig *cnf, int flags, const string *imime)
{
    LOGDEB0("FileInterner::FileInterner(fn=" << fn << ")\n");
    if (fn.empty()) {
        LOGERR("FileInterner::FileInterner: empty file name!\n");
        return;
    }
    initcommon(cnf, flags);
    init(fn, stp, cnf, flags, imime);
}

// Note that we always succeed (set m_ok = true), except in internal
// inconsistency cases (which could just as well abort()).  Errors
// will be detected when internfile() is called. This is done so that
// our caller creates a doc record in all cases (with an error-flagged
// signature), so that the appropriate retry choices can be made. This
// used to not be the case, and was changed because this was the
// simplest way to solve the retry issues (simpler than changing the
// caller in e.g. fsindexer).
void FileInterner::init(const string &f, const struct PathStat& stp,
                        RclConfig *cnf, int flags, const string *imime)
{
    if (f.empty()) {
        LOGERR("FileInterner::init: empty file name!\n");
        return;
    }
    m_fn = f;

    // Compute udi for the input file. This is used by filters which
    // manage some kind of cache.  Indexing by udi makes things easier
    // because they sometimes get a temp as actual input.
    string udi;
    fileUdi::make_udi(f, cstr_null, udi);

    cnf->setKeyDir(path_getfather(m_fn));

    string l_mime;
    bool usfci = false;
    cnf->getConfParam("usesystemfilecommand", &usfci);

    // In general, even when the input mime type is set (when
    // previewing), we can't use it: it's the type for the actual
    // document, but this can be part of a compound document, and
    // we're dealing with the top level file here, or this could be a
    // compressed file. The flag tells us we really can use it
    // (e.g. the web indexer sets it).
    if (flags & FIF_doUseInputMimetype) {
        if (!imime) {
            LOGERR("FileInterner:: told to use null imime\n");
            return;
        }
        l_mime = *imime;
    } else {
        LOGDEB("FileInterner::init fn [" << f << "] mime [" <<
               (imime ? imime->c_str() : "(null)") << "] preview " <<
               m_forPreview << "\n");

        // Run mime type identification in any case (see comment above).
        l_mime = mimetype(m_fn, m_cfg, usfci, stp);

        // If identification fails, try to use the input parameter. This
        // is then normally not a compressed type (it's the mime type from
        // the db), and is only set when previewing, not for indexing
        if (l_mime.empty() && imime)
            l_mime = *imime;
    }

    int64_t docsize = stp.pst_size;
    struct PathStat ucstat = stp;

    if (!l_mime.empty()) {
        // Has mime: check for a compressed file. If so, create a
        // temporary uncompressed file, and rerun the mime type
        // identification, then do the rest with the temp file.
        vector<string>ucmd;
        if (m_cfg->getUncompressor(l_mime, ucmd)) {
            // Check for compressed size limit
            int maxkbs = -1;
            if (!m_cfg->getConfParam("compressedfilemaxkbs", &maxkbs) ||
                maxkbs < 0 || stp.pst_type == PathStat::PST_INVALID ||
                int(stp.pst_size / 1024) < maxkbs) {
                if (!m_uncomp->uncompressfile(m_fn, ucmd, m_tfile)) {
                    m_ok = true;
                    return;
                }
                LOGDEB1("FileInterner:: after ucomp: tfile " << m_tfile <<"\n");
                m_fn = m_tfile;
                // Stat the uncompressed file, mainly to get the size
                if (path_fileprops(m_fn, &ucstat) != 0) {
                    LOGERR("FileInterner: can't stat the uncompressed file[" <<
                           m_fn << "] errno " << errno << "\n");
                    m_ok = true;
                    return;
                } else {
                    docsize = ucstat.pst_size;
                }
                l_mime = mimetype(m_fn, m_cfg, usfci, ucstat);
                if (l_mime.empty() && imime)
                    l_mime = *imime;
            } else {
                LOGINFO("FileInterner:: " << m_fn << " over size limit " << maxkbs << " kbs\n");
            }
        }
    }

    if (l_mime.empty()) {
        // No mime type. We let it through as config may warrant that
        // we index all file names
        LOGDEB0("FileInterner:: no mime: [" << m_fn << "]\n");
    }

    // Get fields computed from extended attributes. We use the
    // original file, not the m_fn which may be the uncompressed temp
    // file
    if (!m_noxattrs)
        reapXAttrs(m_cfg, f, m_XAttrsFields);

    // Gather metadata from external commands as configured.
    reapMetaCmds(m_cfg, f, m_cmdFields);

    m_mimetype = l_mime;

    // Look for appropriate handler (might still return empty)
    RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview, f, &ucstat);

    if (!df || df->is_unknown()) {
        // No real handler for this type, for now :( 
        LOGDEB("FileInterner:: unprocessed mime: [" << l_mime << "] [" << f << "]\n");
        if (!df)
            return;
    }
    df->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index");
    df->set_property(Dijon::Filter::DJF_UDI, udi);

    df->set_docsize(docsize);
    // Don't process init errors here: doing it later allows indexing
    // the file name of even a totally unparsable file
    (void)df->set_document_file(l_mime, m_fn);
    m_handlers.push_back(df);
    LOGDEB("FileInterner:: init ok " << l_mime << " [" << m_fn << "]\n");
    m_ok = true;
}

// Setup from memory data (ie: out of the web cache). imime needs to be set.
FileInterner::FileInterner(const string &data, RclConfig *cnf, 
                           int flags, const string& imime)
{
    LOGDEB0("FileInterner::FileInterner(data)\n");
    initcommon(cnf, flags);
    init(data, cnf, flags, imime);
}

void FileInterner::init(const string &data, RclConfig *, int, const string& imime)
{
    if (imime.empty()) {
        LOGERR("FileInterner: inmemory constructor needs input mime type\n");
        return;
    }
    m_mimetype = imime;

    // Look for appropriate handler (might still return empty)
    RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview, m_fn);

    if (!df) {
        // No handler for this type, for now :( if indexallfilenames
        // is set in the config, this normally wont happen (we get mh_unknown)
        LOGDEB("FileInterner:: unprocessed mime [" << m_mimetype << "]\n");
        return;
    }
    df->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index");

    df->set_docsize(data.length());
    if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
        (void)df->set_document_string(m_mimetype, data);
    } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
        (void)df->set_document_data(m_mimetype, data.c_str(), data.length());
    } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
        TempFile temp = dataToTempFile(data, m_mimetype);
        if (temp.ok()) {
            (void)df->set_document_file(m_mimetype, temp.filename());
            m_tmpflgs[m_handlers.size()] = true;
            m_tempfiles.push_back(temp);
        }
    }
    // Don't process init errors here: doing it later allows indexing
    // the file name of even a totally unparsable file
    m_handlers.push_back(df);
    m_ok = true;
}

void FileInterner::initcommon(RclConfig *cnf, int flags)
{
    m_cfg = cnf;
    m_forPreview = ((flags & FIF_forPreview) != 0);
    m_uncomp = new Uncomp(m_forPreview);
    // Initialize handler stack.
    m_handlers.reserve(MAXHANDLERS);
    for (unsigned int i = 0; i < MAXHANDLERS; i++)
        m_tmpflgs[i] = false;
    m_targetMType = cstr_textplain;
    m_noxattrs = false;
    m_cfg->getConfParam("noxattrfields", &m_noxattrs);
    m_direct = false;
}

FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
{
    LOGDEB0("FileInterner::FileInterner(idoc)\n");
    initcommon(cnf, flags);

    std::unique_ptr<DocFetcher> fetcher(docFetcherMake(cnf, idoc));
    if (!fetcher) {
        LOGERR("FileInterner:: no backend\n");
        return;
    }
    DocFetcher::RawDoc rawdoc;
    if (!fetcher->fetch(cnf, idoc, rawdoc)) {
        LOGERR("FileInterner:: fetcher failed\n");
        return;
    }
    switch (rawdoc.kind) {
    case DocFetcher::RawDoc::RDK_FILENAME:
        init(rawdoc.data, rawdoc.st, cnf, flags, &idoc.mimetype);
        break;
    case DocFetcher::RawDoc::RDK_DATA:
        init(rawdoc.data, cnf, flags, idoc.mimetype);
        break;
    case DocFetcher::RawDoc::RDK_DATADIRECT:
        // Note: only used for demo with the sample python external
        // mbox indexer at this point. The external program is
        // responsible for all the extraction process.
        init(rawdoc.data, cnf, flags, idoc.mimetype);
        m_direct = true;
        break;
    default:
        LOGERR("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n");
    }
    return;
}

FileInterner::ErrorPossibleCause FileInterner::tryGetReason(RclConfig *cnf,
                                                            const Rcl::Doc& idoc)
{
    LOGDEB0("FileInterner::tryGetReason(idoc)\n");

    std::unique_ptr<DocFetcher> fetcher(docFetcherMake(cnf, idoc));
    if (!fetcher) {
        LOGERR("FileInterner:: no backend\n");
        return FileInterner::FetchNoBackend;
    }
    DocFetcher::Reason fetchreason = fetcher->testAccess(cnf, idoc);
    switch (fetchreason) {
    case DocFetcher::FetchNotExist: return FileInterner::FetchMissing;
    case DocFetcher::FetchNoPerm: return FileInterner::FetchPerm;
    default: return FileInterner::InternfileOther;
    }
}

bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig)
{
    std::unique_ptr<DocFetcher> fetcher(docFetcherMake(cnf, idoc));
    if (!fetcher) {
        LOGERR("FileInterner::makesig no backend for doc\n");
        return false;
    }

    bool ret = fetcher->makesig(cnf, idoc, sig);
    return ret;
}

FileInterner::~FileInterner()
{
    for (auto& entry: m_handlers) {
        returnMimeHandler(entry);
    }
    delete m_uncomp;
    // m_tempfiles will take care of itself
}

// Create a temporary file for a block of data (ie: attachment) found
// while walking the internal document tree, with a type for which the
// handler needs an actual file (ie : external script).
TempFile FileInterner::dataToTempFile(const string& dt, const string& mt)
{
    // Create temp file with appropriate suffix for mime type
    TempFile temp(m_cfg->getSuffixFromMimeType(mt));
    if (!temp.ok()) {
        LOGERR("FileInterner::dataToTempFile: cant create tempfile: " <<
               temp.getreason() << "\n");
        return TempFile();
    }
    string reason;
    if (!stringtofile(dt, temp.filename(), reason)) {
        LOGERR("FileInterner::dataToTempFile: stringtofile: " <<reason << "\n");
        return TempFile();
    }
    return temp;
}

// See if the error string is formatted as a missing helper message,
// accumulate helper name if it is. The format of the message is:
// RECFILTERROR HELPERNOTFOUND program1 [program2 ...]
void FileInterner::checkExternalMissing(const string& msg, const string& mt)
{
    LOGDEB2("checkExternalMissing: [" << msg << "]\n");
    if (m_missingdatap && msg.find("RECFILTERROR") == 0) {
        vector<string> verr;
        stringToStrings(msg, verr);
        if (verr.size() > 2) {
            vector<string>::iterator it = verr.begin();
            it++;
            if (*it == "HELPERNOTFOUND") {
                it++;
                for (; it != verr.end(); it++) {
                    m_missingdatap->addMissing(*it, mt);
                }
            }
        }                   
    }
}

void FIMissingStore::getMissingExternal(string& out) 
{
    for (const auto& [prog,tp] : m_typesForMissing) {
        out += string(" ") + prog;
    }
    trimstring(out);
}

void FIMissingStore::getMissingDescription(string& out)
{
    out.erase();
    for (const auto& [prog, tps] : m_typesForMissing) {
        out += prog + " (";
        for (const auto &tp: tps) {
            out += tp + " ";
        }
        trimstring(out);
        out += ")\n";
    }
}

FIMissingStore::FIMissingStore(const string& in)
{
    // The "missing" file is text. Each line defines a missing filter
    // and the list of mime types actually encountered that needed it
    // (see method getMissingDescription())

    vector<string> lines;
    stringToTokens(in, lines, "\n");

    for (const string& line: lines) {
        // Lines from the file are like: 
        //
        // filter name string (mime1 mime2) 
        //
        // We can't be too sure that there will never be a parenthesis
        // inside the filter string as this comes from the filter
        // itself. The list part is safer, so we start from the end.
        string::size_type lastopen = line.find_last_of("(");
        if (lastopen == string::npos)
            continue;
        string::size_type lastclose = line.find_last_of(")");
        if (lastclose == string::npos || lastclose <= lastopen + 1)
            continue;
        string smtypes = line.substr(lastopen+1, lastclose - lastopen - 1);
        vector<string> mtypes;
        stringToTokens(smtypes, mtypes);
        string filter = line.substr(0, lastopen);
        trimstring(filter);
        if (filter.empty())
            continue;

        for (const auto& mtype: mtypes) {
            m_typesForMissing[filter].insert(mtype);
        }
    }
}

// Helper for extracting a value from a map.
static inline bool getKeyValue(const map<string, string>& docdata, const string& key, string& value)
{
    auto it = docdata.find(key);
    if (it != docdata.end()) {
        value = it->second;
        LOGDEB2("getKeyValue: [" << key << "]->[" << value << "]\n");
        return true;
    }
    LOGDEB2("getKeyValue: no value for [" << key << "]\n");
    return false;
}

// Copy most metadata fields from the top filter to the recoll
// doc. Some fields need special processing, because they go into
// struct fields instead of metadata entry, or because we don't want
// to copy them.
bool FileInterner::dijontorcl(Rcl::Doc& doc)
{
    RecollFilter *df = m_handlers.back();
    if (df == 0) {
        //??
        LOGERR("FileInterner::dijontorcl: null top handler ??\n");
        return false;
    }
    for (const auto& ent :  df->get_meta_data()) {
        if (ent.first == cstr_dj_keycontent) {
            doc.text = ent.second;
            if (doc.fbytes.empty()) {
                // It's normally set by walking the filter stack, in
                // collectIpathAndMt, which was called before us.  It
                // can happen that the doc size is still empty at this
                // point if the last container filter is directly
                // returning text/plain content, so that there is no
                // ipath-less filter at the top
                 doc.fbytes = std::to_string(doc.text.length());
                LOGDEB("FileInterner::dijontorcl: fbytes->" << doc.fbytes << "\n");
            }
        } else if (ent.first == cstr_dj_keymd) {
            doc.dmtime = ent.second;
        } else if (ent.first == cstr_dj_keyanc) {
            doc.haschildren = true;
        } else if (ent.first == cstr_dj_keyorigcharset) {
            doc.origcharset = ent.second;
        } else if (ent.first == cstr_dj_keyfn) {
            // Only if not set during the stack walk
            const string *fnp = 0;
            if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty()) {
                doc.meta[Rcl::Doc::keyfn] = ent.second;
            }
        } else if (ent.first == cstr_dj_keymd5) {
            // Only if not set during the stack walk: we want the md5
            // from the actual document, not from further conversions,
            // as computed, e.g. by the html to text handler
            const string *val = 0;
            if (!doc.peekmeta(Rcl::Doc::keymd5, &val) || val->empty())
                doc.meta[Rcl::Doc::keymd5] = ent.second;
        } else if (ent.first == cstr_dj_keymt || ent.first == cstr_dj_keycharset) {
            // don't need/want these.
        } else {
            LOGDEB2("dijontorcl: " << m_cfg->fieldCanon(ent.first) << " -> " << ent.second << "\n");
            if (!ent.second.empty()) {
                doc.meta[m_cfg->fieldCanon(ent.first)] = ent.second;
            }
        }
    }
    if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_dj_keyds].empty()) {
        doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_dj_keyds];
        doc.meta.erase(cstr_dj_keyds);
    }
    return true;
}

static void copymeta(const RclConfig *cfg, Rcl::Doc& doc, const RecollFilter* hp)
{
    // Populating the set must be done in an initializer because the compiler/runtime then
    // guarantees the thread-safety of the constructor. We used to test empty and then build, but
    // this was wrong. Also, as a static local variable, this is initialized on first function call,
    // so we're sure that all cstr_xx strings are ok, because they're initialized at file level.
    static set<string> nocopyfields {
        cstr_dj_keycontent, cstr_dj_keymd,
        cstr_dj_keyanc, cstr_dj_keyorigcharset, cstr_dj_keyfn,
        cstr_dj_keymt, cstr_dj_keycharset, cstr_dj_keyds};
    
    for (const auto& entry : hp->get_meta_data()) {
        if (nocopyfields.find(entry.first) == nocopyfields.end()) {
            doc.addmeta(cfg->fieldCanon(entry.first), entry.second);
        }
    }
    getKeyValue(hp->get_meta_data(), cstr_dj_keymd, doc.dmtime);
}


// Collect the ipath from the filter stack.
// While we're at it, we also set the mimetype and filename,
// which are special properties: we want to get them from the topmost
// doc with an ipath, not the last one which is usually text/plain We
// also set the author and modification time from the last doc which
// has them.
// 
// The stack can contain objects with an ipath element (corresponding
// to actual embedded documents), and, towards the top, elements
// without an ipath element, for format translations of the last doc.
//
// The docsize is fetched from the first element without an ipath
// (first non container). If the last element directly returns
// text/plain so that there is no ipath-less element, the value will
// be set in dijontorcl(). 
// 
// The whole thing is a bit messy but it's not obvious how it should
// be cleaned up as the "inheritance" rules inside the stack are
// actually complicated.
void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
{
    LOGDEB2("FileInterner::collectIpathAndMT\n");

    // Set to true if any element in the stack sets an ipath. (at least one of
    // the docs is a compound).
    bool hasipath = false;

    // Set the attributes collected from the top file. They can be overriden (not augmented) by
    // internal values while we walk the stack. If this is a top level doc, we will call these again
    // in the end so that they will be this time added to the internal values (because
    // docFieldsFrom... uses addmeta).
    if (!m_noxattrs) {
        docFieldsFromXattrs(m_cfg, m_XAttrsFields, doc);
    }
    docFieldsFromMetaCmds(m_cfg, m_cmdFields, doc);

    // If there is no ipath stack, the mimetype is the one from the
    // file, else we'll change it further down.
    doc.mimetype = m_mimetype;

    string pathelprev;
    for (unsigned int i = 0; i < m_handlers.size(); i++) {
        const map<string, string>& docdata = m_handlers[i]->get_meta_data();
        string ipathel;
        getKeyValue(docdata, cstr_dj_keyipath, ipathel);
        if (!ipathel.empty()) {
            // Non-empty ipath. This stack element is for an
            // actual embedded document, not a format translation.
            hasipath = true;
            doc.ipath += colon_hide(ipathel) + cstr_isep;
            getKeyValue(docdata, cstr_dj_keymt, doc.mimetype);
            getKeyValue(docdata, cstr_dj_keyfn, doc.meta[Rcl::Doc::keyfn]);
        } else {
            // We copy all the metadata from the topmost actual
            // document: either the first if it has no ipath, or the
            // last one with an ipath (before pure format
            // translations). This would allow, for example mh_execm
            // handlers to use setfield() instead of embedding
            // metadata in the HTML meta tags.
            if (i == 0 || !pathelprev.empty()) {
                copymeta(m_cfg, doc, m_handlers[i == 0 ? 0 : i-1]);
            }
            if (doc.fbytes.empty()) {
                doc.fbytes = std::to_string(m_handlers[i]->get_docsize());
                LOGDEB("collectIpath..: fbytes->" << doc.fbytes << endl);
            }
        }
        // We set the author field from the innermost doc which has
        // one: allows finding, e.g. an image attachment having no
        // metadata by a search on the sender name. Only do this for
        // actually embedded documents (avoid replacing values from
        // metacmds for the topmost one). For a topmost doc, author
        // will be merged by dijontorcl() later on. About same for
        // dmtime, but an external value will be replaced, not
        // augmented if dijontorcl() finds an internal value.
        if (hasipath) {
            getKeyValue(docdata, cstr_dj_keyauthor, doc.meta[Rcl::Doc::keyau]);
            getKeyValue(docdata, cstr_dj_keymd, doc.dmtime);
        }
        pathelprev = ipathel;
    }

    if (hasipath) {
        // Trim ending ipath separator
        LOGDEB2("IPATH [" << doc.ipath << "]\n");
        if (doc.ipath.back() ==  cstr_isep[0]) {
            doc.ipath.erase(doc.ipath.end()-1);
        }
    }
}

// Remove handler from stack. Clean up temp file if needed.
void FileInterner::popHandler()
{
    if (m_handlers.empty())
        return;
    size_t i = m_handlers.size() - 1;
    if (m_tmpflgs[i]) {
        m_tempfiles.pop_back();
        m_tmpflgs[i] = false;
    }
    returnMimeHandler(m_handlers.back());
    m_handlers.pop_back();
}

enum addResols {ADD_OK, ADD_CONTINUE, ADD_BREAK, ADD_ERROR};

// Just got document from current top handler. See what type it is,
// and possibly add a filter/handler to the stack
int FileInterner::addHandler()
{
    const map<string, string>& docdata = m_handlers.back()->get_meta_data();
    string charset, mimetype;
    getKeyValue(docdata, cstr_dj_keycharset, charset);
    getKeyValue(docdata, cstr_dj_keymt, mimetype);

    LOGDEB("FileInterner::addHandler: back()  is " << mimetype <<
           " target [" << m_targetMType << "]\n");

    // A handler may send a path pointing to the content in a file instead of returning the data
    bool isdatapath{false};
    {std::string sdp;
        if (getKeyValue(docdata, cstr_dj_content_is_datapath, sdp)) {
            isdatapath = stringToBool(sdp);
        }
    }

    // If we find a document of the target type (text/plain in
    // general), we're done decoding. If we hit text/plain, we're done
    // in any case
    if (!stringicmp(mimetype, m_targetMType) || 
        !stringicmp(mimetype, cstr_textplain)) {
        m_reachedMType = mimetype;
        LOGDEB1("FileInterner::addHandler: target reached\n");
        return ADD_BREAK;
    }

    // We need to stack another handler. Check stack size
    if (m_handlers.size() >= MAXHANDLERS) {
        // Stack too big. Skip this and go on to check if there is
        // something else in the current back()
        LOGERR("FileInterner::addHandler: stack too high\n");
        return ADD_CONTINUE;
    }

    // We must not filter out HTML when it is an intermediate
    // conversion format. We discriminate between e.g. an HTML email
    // attachment (needs filtering) and a result of pdf conversion
    // (must process) by looking at the last ipath element: a
    // conversion will have an empty one (same test as in
    // collectIpathAndMT).
    string ipathel;
    getKeyValue(docdata, cstr_dj_keyipath, ipathel);
    bool dofilter = !m_forPreview &&
        (mimetype.compare(cstr_texthtml) || !ipathel.empty());
    RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, dofilter, m_fn);
    if (!newflt) {
        // If we can't find a handler, this doc can't be handled
        // but there can be other ones so we go on
        LOGINFO("FileInterner::addHandler: no filter for [" << mimetype << "]\n");
        return ADD_CONTINUE;
    }
    newflt->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index");
    if (!charset.empty())
        newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);

    // Get current content: we don't use getkeyvalue() here to avoid
    // copying the text, which may be big.
    string ns;
    const string *txt = &ns;
    {
        auto it = docdata.find(cstr_dj_keycontent);
        if (it != docdata.end())
            txt = &it->second;
    }
    bool setres = false;
    newflt->set_docsize(txt->length());
    if (!isdatapath && newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
        setres = newflt->set_document_string(mimetype, *txt);
    } else if (!isdatapath && newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
        setres = newflt->set_document_data(mimetype,txt->c_str(), txt->length());
    } else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
        if (isdatapath && !txt->empty()) {
            setres = newflt->set_document_file(mimetype, *txt);
        } else {
            TempFile temp = dataToTempFile(*txt, mimetype);
            if (temp.ok() && 
                (setres = newflt->set_document_file(mimetype, temp.filename()))) {
                m_tmpflgs[m_handlers.size()] = true;
                m_tempfiles.push_back(temp);
                // Hack here, but really helps perfs: if we happen to
                // create a temp file for, ie, an image attachment, keep
                // it around for preview to use it through get_imgtmp()
                if (!mimetype.compare(0, 6, "image/")) {
                    m_imgtmp = m_tempfiles.back();
                }
            }
        }
    }
    if (!setres) {
        LOGINFO("FileInterner::addHandler: set_doc failed inside [" << m_fn <<
                "]  for mtype " << mimetype << "\n");
    }
    // Add handler and go on, maybe this one will give us text...
    m_handlers.push_back(newflt);
    LOGDEB1("FileInterner::addHandler: added\n");
    return setres ? ADD_OK : ADD_BREAK;
}

// Information and debug after a next_document error
void FileInterner::processNextDocError(Rcl::Doc &doc)
{
    collectIpathAndMT(doc);
    m_reason = m_handlers.back()->get_error();
    checkExternalMissing(m_reason, doc.mimetype);
    LOGERR("FileInterner::internfile: next_document error [" << m_fn <<
           (doc.ipath.empty() ? "" : "|") << doc.ipath << "] " <<
           doc.mimetype << " " << m_reason << "\n");
}

FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath)
{
    LOGDEB("FileInterner::internfile. ipath [" << ipath << "]\n");

    // Get rid of possible image tempfile from older call
    m_imgtmp = TempFile();

    if (m_handlers.size() < 1) {
        // Just means the constructor failed
        LOGDEB("FileInterner::internfile: no handler: constructor failed\n");
        return FIError;
    }

    // Input Ipath vector when retrieving a given subdoc for previewing
    vector<string> vipath;
    if (!ipath.empty() && !m_direct) {
        stringToTokens(ipath, vipath, cstr_isep, true);
        for (auto& entry: vipath) {
            entry = colon_restore(entry);
        }
        if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
            LOGERR("FileInterner::internfile: can't skip\n");
            return FIError;
        }
    }

    // Try to get doc from the topmost handler
    // Security counter: looping happens when we stack one other 
    // handler or when walking the file document tree without finding
    // something to index (typical exemple: email with multiple image
    // attachments and no image filter installed). So we need to be
    // quite generous here, especially because there is another
    // security in the form of a maximum handler stack size.
    int loop = 0;
    while (!m_handlers.empty()) {
        CancelCheck::instance().checkCancel();
        if (loop++ > 1000) {
            LOGERR("FileInterner:: looping!\n");
            return FIError;
        }
        // If there are no more docs at the current top level we pop and
        // see if there is something at the previous one
        if (!m_handlers.back()->has_documents()) {
            // If looking for a specific doc, this is an error. Happens if
            // the index is stale, and the ipath points to the wrong message
            // for exemple (one with less attachments)
            if (m_forPreview) {
                m_reason += "Requested document does not exist. ";
                m_reason += m_handlers.back()->get_error();
                LOGERR("FileInterner: requested document does not exist\n");
                return FIError;
            }
            popHandler();
            continue;
        }

        // While indexing, don't stop on next_document() error. There
        // might be ie an error while decoding an attachment, but we
        // still want to process the rest of the mbox! For preview: fatal.
        if (!m_handlers.back()->next_document()) {
            // Using a temp doc here because else we'd need to pop the
            // last ipath element when we do the pophandler (else the
            // ipath continues to grow in the current doc with each
            // consecutive error). It would be better to have
            // something like ipath.pop(). We do need the MIME type
            Rcl::Doc doc1 = doc;
            processNextDocError(doc1);
            doc.mimetype = doc1.mimetype;
            if (m_forPreview) {
                m_reason += "Requested document does not exist. ";
                m_reason += m_handlers.back()->get_error();
                LOGERR("FileInterner: requested document does not exist\n");
                return FIError;
            }
            popHandler();
            continue;
        }

        // Look at the type for the next document and possibly add
        // handler to stack.
        switch (addHandler()) {
        case ADD_OK: // Just go through: handler has been stacked, use it
            LOGDEB2("addHandler returned OK\n");
            break; 
        case ADD_CONTINUE: 
            // forget this doc and retrieve next from current handler
            // (ipath stays same)
            LOGDEB2("addHandler returned CONTINUE\n");
            continue;
        case ADD_BREAK: 
            // Stop looping: doc type ok, need complete its processing
            // and return it
            LOGDEB2("addHandler returned BREAK\n");
            goto breakloop; // when you have to you have to
        case ADD_ERROR: 
            LOGDEB2("addHandler returned ERROR\n");
            return FIError;
        }

        // If we have an ipath, meaning that we are seeking a specific
        // document (ie: previewing a search result), we may have to
        // seek to the correct entry of a compound doc (ie: archive or
        // mail). When we are out of ipath entries, we stop seeking,
        // the handlers stack may still grow for translation (ie: if
        // the target doc is msword, we'll still stack the
        // word-to-text translator).
        if (!ipath.empty()) {
            if (m_handlers.size() <= vipath.size() &&
                !m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])) {
                LOGERR("FileInterner::internfile: can't skip\n");
                return FIError;
            }
        }
    }
breakloop:
    if (m_handlers.empty()) {
        LOGDEB("FileInterner::internfile: conversion ended with no doc\n");
        return FIError;
    }

#ifdef DEBUGMETASTACK
    for (const auto& h : m_handlers) {
        dumpmap(std::cerr, h->get_meta_data());
    }
#endif // DEBUGMETASTACK
    
    // Compute ipath and significant mimetype.  ipath is returned
    // through doc.ipath. We also retrieve some metadata fields from
    // the ancesters (like date or author). This is useful for email
    // attachments. The values will be replaced by those internal to
    // the document (by dijontorcl()) if any, so the order of calls is
    // important. We used to only do this when indexing, but the aux
    // fields like filename and author may be interesting when
    // previewing too
    collectIpathAndMT(doc);
    if (m_forPreview) {
        doc.mimetype = m_reachedMType;
    }
    // Keep this AFTER collectIpathAndMT
    dijontorcl(doc);

    if (doc.ipath.empty()) {
        // For a top level doc, add (concatenate) the external attributes.
        // See more detail in the comments at the top of collectIpathAndMT.
        if (!m_noxattrs) {
            docFieldsFromXattrs(m_cfg, m_XAttrsFields, doc);
        }
        docFieldsFromMetaCmds(m_cfg, m_cmdFields, doc);
    }
    // Remove spurious commas added by addmeta() from metadata values
    trimmeta(doc.meta);
    trimstring(doc.dmtime, " ,");

    // Fix the bogus mtype used to force mh_text processing of text subdocs
    if (doc.mimetype == "text/plain1") {
        doc.mimetype = "text/plain";
    }
    // Possibly destack so that we can test for FIDone. While doing this
    // possibly set aside an ancestor html text (for the GUI preview)
    while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
        if (m_forPreview) {
            MimeHandlerHtml *hth = dynamic_cast<MimeHandlerHtml*>(m_handlers.back());
            if (hth) {
                m_html = hth->get_html();
            }
        }
        popHandler();
    }
    if (m_handlers.empty())
        return FIDone;
    else 
        return FIAgain;
}

bool FileInterner::tempFileForMT(TempFile& otemp, RclConfig* cnf, 
                                 const string& mimetype)
{
    TempFile temp(cnf->getSuffixFromMimeType(mimetype));
    if (!temp.ok()) {
        LOGERR("FileInterner::tempFileForMT: can't create temp file\n");
        return false;
    }
    otemp = temp;
    return true;
}

// Static method, creates a FileInterner object to do the job.
bool FileInterner::idocToFile(
    TempFile& otemp, const string& tofile, RclConfig *cnf,
    const Rcl::Doc& idoc, bool uncompress)
{
    LOGDEB("FileInterner::idocToFile\n");

    if (idoc.ipath.empty()) {
        // Because of the mandatory first conversion in the
        // FileInterner constructor, need to use a specific method.
        return topdocToFile(otemp, tofile, cnf, idoc, uncompress);
    }

    // We set FIF_forPreview for consistency with the previous version
    // which determined this by looking at mtype!=null. Probably
    // doesn't change anything in this case.
    FileInterner interner(idoc, cnf, FIF_forPreview);
    interner.setTargetMType(idoc.mimetype);
    return interner.interntofile(otemp, tofile, idoc.ipath, idoc.mimetype);
}

// This is only needed because the FileInterner constructor always performs
// the first conversion, so that we need another approach for accessing the
// original document (targetmtype won't do).
bool FileInterner::topdocToFile(
    TempFile& otemp, const string& tofile,
    RclConfig *cnf, const Rcl::Doc& idoc, bool uncompress)
{
    std::unique_ptr<DocFetcher> fetcher(docFetcherMake(cnf, idoc));
    if (!fetcher) {
        LOGERR("FileInterner::topdocToFile no backend\n");
        return false;
    }
    DocFetcher::RawDoc rawdoc;
    if (!fetcher->fetch(cnf, idoc, rawdoc)) {
        LOGERR("FileInterner::topdocToFile fetcher failed\n");
        return false;
    }
    const char *filename = "";
    TempFile temp;
    if (tofile.empty()) {
        if (!tempFileForMT(temp, cnf, idoc.mimetype)) {
            return false;
        }
        filename = temp.filename();
    } else {
        filename = tofile.c_str();
    }
    string reason;
    switch (rawdoc.kind) {
    case DocFetcher::RawDoc::RDK_FILENAME: {
        string fn(rawdoc.data);
        TempFile temp;
        if (uncompress && isCompressed(fn, cnf)) {
            if (!maybeUncompressToTemp(temp, fn, cnf, idoc)) {
                LOGERR("FileInterner::idocToFile: uncompress failed\n");
                return false;
            }
        }
        fn = temp.ok() ? temp.filename() : rawdoc.data;
        if (!copyfile(fn.c_str(), filename, reason)) {
            LOGERR("FileInterner::idocToFile: copyfile: " << reason << "\n");
            return false;
        }
    }
        break;
    case DocFetcher::RawDoc::RDK_DATA:
    case DocFetcher::RawDoc::RDK_DATADIRECT:
        if (!stringtofile(rawdoc.data, filename, reason)) {
            LOGERR("FileInterner::idocToFile: stringtofile: " << reason <<"\n");
            return false;
        }
        break;
    default:
        LOGERR("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n");
    }

    if (tofile.empty())
        otemp = temp;
    return true;
}

bool FileInterner::interntofile(TempFile& otemp, const string& tofile,
                                const string& ipath, const string& mimetype)
{
    if (!ok()) {
        LOGERR("FileInterner::interntofile: constructor failed\n");
        return false;
    }
    Rcl::Doc doc;
    Status ret = internfile(doc, ipath);
    if (ret == FileInterner::FIError) {
        LOGERR("FileInterner::interntofile: internfile() failed\n");
        return false;
    }

    // Specialcase text/html. This is to work around a bug that will
    // get fixed some day: the internfile constructor always loads the
    // first handler so that at least one conversion is always
    // performed (and the access to the original data may be lost). A
    // common case is an "Open" on an HTML file (we end up
    // with text/plain content). As the HTML version is saved in this
    // case, use it.
    if (!stringlowercmp(cstr_texthtml, mimetype) && !get_html().empty()) {
        doc.text = get_html();
        doc.mimetype = cstr_texthtml;
    }

    const char *filename;
    TempFile temp;
    if (tofile.empty()) {
        if (!tempFileForMT(temp, m_cfg, mimetype)) {
            return false;
        }
        filename = temp.filename();
    } else {
        filename = tofile.c_str();
    }
    string reason;
    if (!stringtofile(doc.text, filename, reason)) {
        LOGERR("FileInterner::interntofile: stringtofile : " << reason << "\n");
        return false;
    }

    if (tofile.empty())
        otemp = temp;
    return true;
}

bool FileInterner::isCompressed(const string& fn, RclConfig *cnf)
{
    LOGDEB("FileInterner::isCompressed: [" << fn << "]\n");
    struct PathStat st;
    if (path_fileprops(fn, &st) < 0) {
        LOGERR("FileInterner::isCompressed: can't stat [" << fn << "]\n");
        return false;
    }
    string l_mime = mimetype(fn, cnf, true, st);
    if (l_mime.empty()) {
        LOGERR("FileInterner::isUncompressed: can't get mime for [" << fn <<
               "]\n");
        return false;
    }

    vector<string> ucmd;
    if (cnf->getUncompressor(l_mime, ucmd)) {
        return true;
    }
    return false;
}

// Static.
bool FileInterner::maybeUncompressToTemp(TempFile& temp, const string& fn, 
                                         RclConfig *cnf, const Rcl::Doc& doc)
{
    LOGDEB("FileInterner::maybeUncompressToTemp: [" << fn << "]\n");
    struct PathStat st;
    if (path_fileprops(fn.c_str(), &st) < 0) {
        LOGERR("FileInterner::maybeUncompressToTemp: can't stat [" <<fn<<"]\n");
        return false;
    }
    string l_mime = mimetype(fn, cnf, true, st);
    if (l_mime.empty()) {
        LOGERR("FileInterner::maybeUncompress.: can't id. mime for [" <<
               fn << "]\n");
        return false;
    }

    vector<string>ucmd;
    if (!cnf->getUncompressor(l_mime, ucmd)) {
        return true;
    }
    // Check for compressed size limit
    int maxkbs = -1;
    if (cnf->getConfParam("compressedfilemaxkbs", &maxkbs) &&
        maxkbs >= 0 && int(st.pst_size / 1024) > maxkbs) {
        LOGINFO("FileInterner:: " << fn << " over size limit " << maxkbs <<
                " kbs\n");
        return false;
    }
    temp = TempFile(cnf->getSuffixFromMimeType(doc.mimetype));
    if (!temp.ok()) {
        LOGERR("FileInterner: cant create temporary file\n");
        return false;
    }

    Uncomp uncomp;
    string uncomped;
    if (!uncomp.uncompressfile(fn, ucmd, uncomped)) {
        return false;
    }

    // uncompressfile choses the output file name, there is good
    // reason for this, but it's not nice here. Have to move, the
    // uncompressed file, hopefully staying on the same dev.
    string reason;
    if (!renameormove(uncomped.c_str(), temp.filename(), reason)) {
        LOGERR("FileInterner::maybeUncompress: move [" << uncomped <<
               "] -> [" << temp.filename() << "] failed: " << reason << "\n");
        return false;
    }
    return true;
}