1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
plugin_register token_filters/stop_word
[[0,0.0,0.0],true]
table_create Entries TABLE_NO_KEY
[[0,0.0,0.0],true]
column_create Entries body COLUMN_SCALAR ShortText
[[0,0.0,0.0],true]
table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord
[[0,0.0,0.0],true]
load --table Entries
[
{"body": "This is a pen"}
]
[[0,0.0,0.0],1]
column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
[[0,0.0,0.0],true]
column_create Terms is_stop_word COLUMN_SCALAR Bool
[[0,0.0,0.0],true]
load --table Terms
[
{"_key": "is", "is_stop_word": true},
{"_key": "a", "is_stop_word": true}
]
[[0,0.0,0.0],2]
table_tokenize Terms "This is a pen" --index_column index
[
[
0,
0.0,
0.0
],
[
{
"value": "this",
"position": 0,
"force_prefix": false,
"force_prefix_search": false,
"estimated_size": 1
},
{
"value": "pen",
"position": 3,
"force_prefix": false,
"force_prefix_search": false,
"estimated_size": 1
}
]
]
log_level --level debug
[[0,0.0,0.0],true]
select Entries --filter 'body @ "This is a pen"'
[
[
0,
0.0,
0.0
],
[
[
[
1
],
[
[
"_id",
"UInt32"
],
[
"body",
"ShortText"
]
],
[
1,
"This is a pen"
]
]
]
]
#|d| [ii][overlap_token_skip] tid=4 pos=0 estimated_size=1
#|d| [ii][overlap_token_skip] tid=3 pos=3 estimated_size=1
log_level --level notice
[[0,0.0,0.0],true]
|