File: csx-printf.cpp

package info (click to toggle)
libsdsl 2.1.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster
  • size: 3,980 kB
  • sloc: cpp: 42,286; makefile: 1,171; ansic: 318; sh: 201; python: 27
file content (75 lines) | stat: -rw-r--r-- 2,855 bytes parent folder | download | duplicates (17)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <sdsl/suffix_trees.hpp>
#include <iostream>

using namespace sdsl;
using namespace std;

std::string format("%3I%3S %3s %3P %3p %3L %3B  %T");
std::string header("  i SA ISA PSI  LF LCP BWT  TEXT");
static const std::streamsize BUF_SIZE=4096;
char line[BUF_SIZE];


typedef csa_bitcompressed<int_alphabet<> > csa_int_t;
typedef cst_sct3<>          cst_byte_t;
typedef cst_sct3<csa_int_t> cst_int_t;

void print_usage(const char* command)
{
    cout << "\
A pretty printer for suffix array/tree members.\n\
Transforms each input line into a CST and outputs\n\
formatted suffix array/tree members.\n\
Usage: " << command << " X \"[FORMAT]\" \"[HEADER]\" \"[SENTINEL]\"\n\
X       : Input is interpreted dependent on X.\n\
         X=1: byte sequence.\n\
         X=d: sequence of decimal numbers.\n\
FORMAT  : Format string. Default=`" << format << "`.\n\
HEADER  : Header string. Default=`" << header <<"`.\n\
SENTINEL: Sentinel character. \n\
\n\
Each line of the output will be formatted according to the format string.\
All content, except tokens which start with `%` will be copied. Tokens\
which start with `%` will be replaced as follows (let w be a positive\
number. setw(w) is used to format single numbers):\
\n\
Token      |  Replacement | Comment\n\
-----------------------------------------------------------------------\n\
 %[w]I     | Row index i.                           |                  \n\
 %[w]S     | SA[i]                                  |                  \n\
 %[w]s     | ISA[i]                                 |                  \n\
 %[w]P     | PSI[i]                                 |                  \n\
 %[w]p     | LF[i]                                  |                  \n\
 %[w]L     | LCP[i]                                 | only for CSTs    \n\
 %[w]B     | BWT[i]                                 |                  \n\
 %[w[:W]]T | Print min(idx.size(),w) chars of each  |                  \n\
           | suffix, each char formatted by setw(W).|                  \n\
 %%        | %                                      |                  \n";
}

int main(int argc, char* argv[])
{
    if (argc < 2 or !('1' == argv[1][0] or 'd'==argv[1][0])) {
        print_usage(argv[0]);
        return 1;
    }
    if (argc > 2) {
        format = argv[2];
    }
    if (argc > 3) {
        header = argv[3];
    }
    while (cin.getline(line, BUF_SIZE)) {
        cout << header  << endl;
        if ('1' == argv[1][0]) {
            cst_byte_t cst;
            construct_im(cst, (const char*)line, 1);
            csXprintf(cout, format, cst, ((argc > 4) ? argv[4][0] : '$'));
        } else if ('d' == argv[1][0]) {
            cst_int_t cst;
            construct_im(cst, (const char*)line, 'd');
            csXprintf(cout, format, cst, ((argc > 4) ? argv[4][0] : '0'));
        }
        cout << endl;
    }
}