File: CQInvertedFile.h

package info (click to toggle)
gnuift 0.1.14%2Bds-1
  • links: PTS
  • area: main
  • in suites: stretch
  • size: 5,632 kB
  • ctags: 2,973
  • sloc: cpp: 15,867; sh: 8,281; ansic: 1,812; perl: 1,007; php: 651; makefile: 483; lisp: 344
file content (456 lines) | stat: -rw-r--r-- 11,509 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
/* -*- mode: c++ -*- 
*/
/* 

    GIFT, a flexible content based image retrieval system.
    Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva

     Copyright (C) 2003, 2004 Bayreuth University
      2005 Bamberg University
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
// -*- mode: c++ -*-

/**
*
* CQInvertedFile.h - each separate Query is 
* one instance of this class, sets everything for one query
*
* @Author: Wolfgang Mller, Pruning and some of the bugfixes Henning Mller
*          
*
* modification history:
*
* WM 08  99 added things for session management
*           added compiler defines documentation
* HM 030999 created the documentation
* HM 030599 put in the parameters for the pruning
* WM 10  98 creation
*
*
*
* compiler defines used:
* _CINVERTEDFILEQUERY avoids double inclusion
*
*/
#ifndef _CINVERTEDFILEQUERY
#define _CINVERTEDFILEQUERY
#include "libGIFTQuInvertedFile/include/uses-declarations.h"
#include <memory>
#include <map>
#include "libMRML/include/CSelfDestroyPointer.h"
#include "libMRML/include/CRelevanceLevelList.h"
#include "libMRML/include/CIDRelevanceLevelPairList.h"
#include "libGIFTQuInvertedFile/include/CWeightingFunctionPointerList.h"
#include "libGIFTQuInvertedFile/include/CWeightingFunctionPointerHash.h"
#include "libMRML/include/CAlgorithm.h"
#include "libMRML/include/CQuery.h"
#include "libGIFTAcInvertedFile/include/CAcInvertedFile.h"
#include "libGIFTQuInvertedFile/include/CWeightingFunction.h"
#include "libGIFTQuInvertedFile/include/CQueryNormalizer.h"
#include "libGIFTAcInvertedFile/include/WeightingFunctionsAndNormalizers.h"
class CScoreBoard;
class CAcInvertedFile;

#include "libGIFTQuInvertedFile/include/CWeighter.h"

/** A factory for weighting functions with associated normalizers.
    The weighting functions are intended to be members of
    CQInvertedFile where they will be used.
*/
class CWeighterFactory:protected map< string,CWeighter* >{
public:
  /** clones a weighter from the factory 
      and sets the pointers in a way,
      that normalizers etc. are pointing in
      the right way to each other.

      However, you still have to set the accessor
      Why not set it at once?
      Maybe.
   */
  CWeighter* newWeighter(const string& inID)const;
  /** constructor: initializes everything
      fills the map etc.
  */
  CWeighterFactory();
  /** Destructor deletes the content it points to
  */
  ~CWeighterFactory();
};

/** The Query manager for Queries on inverted Files */
class CQInvertedFile:public CQuery{

protected:


  CAlgorithm* mDeb;

  /** You can choose to use only every lModulo-th feature */
  int mModulo;
  /** Every lModulo-th feature, starting at lModuloClass  */
  int mModuloClass;
  /** 
      This is a factory for weighters.
      With this we can find the weighters
   */
  CWeighterFactory mWeighterFactory;
  /**
     The weighter currently used.
  */
  CSelfDestroyPointer<CWeighter> mWeighter;

  /**  object of this type to have a list of the features with the weights */
  CSelfDestroyPointer<CWeightingFunctionPointerHash> 
  mQueryFeatureWeighters;

  /**
   *
   * calculates the score for all the images based on a list of features
   *
   * @author Wolfgang Mller
   */
  double keepScore(CScoreBoard& inoutScoreBoard,
		   const CWeightingFunctionPointerList& inFeatures,
		   bool lPositive)const;

  /**
   *
   * calculates the results for the images using some sort of pruning
   *
   * @author Henning Mller
   */
  double keepScorePruning(CScoreBoard& inoutScoreBoard,
			  const CWeightingFunctionPointerList& inFeatures,
			  bool inPositive,
			  int inDesiredNumberOfDocuments)const;

  /**
   *
   * 
   * @short creates a list of all the features which 
   * are in one or more of the query images
   * should be replaced by a version which operates on IDs not URLs
   *
   * @author Wolfgang Mller + Pruning: Henning Mller/reprogrammed WM 09-10-00
   */
  void buildQueryHash(CRelevanceLevelList& inQuery,
		      CWeightingFunctionPointerHash& 
		      outQueryFeatureWeighters)const;

  /**
   *
   * Building a list of normalized WF from a hash of Weighting functions 
   *
   */
  void buildNormalizedQueryList(double inPositiveRelevanceSum,
				double inNegativeRelevanceSum,
				CWeightingFunctionPointerHash& 
				inQFW,
				CWeightingFunctionPointerList& 
				outQFW)const;

  /**
   *
   * Building a list of normalized WF from a hash of Weighting functions
   *
   */
  void buildNormalizedQueryHash(double inPositiveRelevanceSum,
				double inNegativeRelevanceSum,
				CWeightingFunctionPointerHash& 
				inQFW)const;
  
protected:
  /**
   *
   * Initializer, used by both construcors
   *
   */
  void init();

public:
  /** 
      finish the initialisation phase
      make the weighting function know who its normalizers are
  */
  void finishInit();
  /**
   *
   * New constructor, taking as parameter the
   * algorithm structure, which contains all 
   * the algorithm configuration
   *
   */
  CQInvertedFile(CAccessorAdminCollection& inAccessorAdminCollection,
		 CAlgorithm& inAlgorithm);
  /**
   *
   * destructor
   *
   */
  ~CQInvertedFile();
  /**
   *
   * set the Algorithm.
   * same scheme as in setCollection
   *
   */
  virtual bool setAlgorithm(CAlgorithm& inAlgorithm);


  /**
   *
   * @short a query which returns ID/RelevanceLevel pairs instead of
   * instead of URL/RelevanceLevel pairs;
   * this is faster for merging tasks (to explain the name)
   * queries for URLs are answered by query (in herited)
   *
   */
  virtual CIDRelevanceLevelPairList* fastQuery(const CXMLElement& inQuery,
					       int inNumberOfInterestingImages,
					       double inDifferenceToBest);
  
  /**
   *
   * Assuming that a correct CWeightingFunctionPointerHash has been built by fastQuery 
   * (or another function), this function will do the rest
   *
   */
  virtual CIDRelevanceLevelPairList* fastQueryByFeature(const  CWeightingFunctionPointerList& inQuery,
							int inNumberOfInterestingImages,
							double inDifferenceToBest);
  
  /**
   *
   *  Returns the score for one image in the query 
   *
   */
  double DIDToScore(TID inDID,
		    const CWeightingFunctionPointerHash& inQuery)const;


  /**
   * 
   *  Returns the score for one image in the query 
   *
   **/
  double URLToScore(const string& inURL,
		    const CWeightingFunctionPointerHash& inQuery)const;

  /**
   *
   * Turns the featurelist of a document into a score.
   *
   */
  double FeatureListToScore(TID inDID,
			    const CDocumentFrequencyList& inFeatureList,
			    const CWeightingFunctionPointerHash& inQuery)const;

  /**
   *
   *
   */
  void buildNormalizedQueryHash(const CRelevanceLevel& inQuery,
				CWeightingFunctionPointerHash& 
				outQueryFeatureWeighters)const;


  /* variables needed for the blocking of special feature groups */

  /* the upper limit for the number of feature groups needed for the array for the blocking */
  static const int MAXIMUMNUMBEROFEATUREGROUPS=50; 
  /* you need to switch this on to activate the blocking of features */
  bool mBlockingOn;
  /* array with all the feature groups and an information if blocked or not */
  bool mBlockingArray[MAXIMUMNUMBEROFEATUREGROUPS];


  /**
   *
   * activateBlockingFeatures - sets the variable to block groups of features
   *
   */
  void activateBlockingFeatures();

  /**
   *
   * releaseBlockingFeatures - this turns off the blocking of features
   *
   */
  void releaseBlockingFeatures();

  /**
   *
   * featuresBlocked - returns true if the features are blocked
   *
   */
  bool featuresBlocked()const;

  /**
   *
   * blockFeatureGroup - this blocks one special group of features
   *
   */
  void blockFeatureGroup(const int featureNumber);

  /**
   *
   * unblockFeatureGroup - this releases the blocking of one feature group
   *
   */
  void unblockFeatureGroup(const int featureNumber);

  /**
   *
   * isBlocked - returns true if the feature is blocked and false if not
   *
   */
  bool isBlocked(const int featureNumber)const;



  /** These are the variables and functions used for the pruning process 
      
      the maximum number of sets which can be used to prune the ScoreBoard 
   */
  static const int MAX_SCOREBOARD_PRUNINGS=10; 

  /** Shows if any pruning is used 
   */
  bool mPruningUsed;

  /** true if the scoreboardis Pruned 
   */
  bool mScoreBoardPruningUsed;
  /**
     Parameters for scoreboard pruning
     @author Henning M&uuml;ller
  */       
  typedef struct{ 
    /**
       @short this marks a fraction of the number of features used
    */
    double stopAfterFeature;
    /**
       reduce the scoreboard to a factor of the number of images you 
       want to retrieve 
    */
    double reduceTo;
  } parameterPruningType;
  /** @Author Henning Mller */
  parameterPruningType 
     mScoreBoardPruningArray[MAX_SCOREBOARD_PRUNINGS];
  /** @Author Henning Mller */
  int mNumberofUsedScoreBoardPrunings;

  /* true if feature pruning is used */
  bool mFeaturePruningUsed;
  /** @Author Henning Mller */
  double mPercentageofFeatures;

  /* true if time pruning is used */
  bool mTimePruningUsed;
  /** @Author Henning Mller */
  double mStoppingTime;

  /* if this variable is used with scoreboardpruning, the resulting 
     scoreboard will be evaluated with an uninverted file 
     @Author Henning Mller */
  bool mEvaluateAfterPruning;

  /**
   *
   * sets back all the variables for the pruning
   *
   * @Author Henning Mller 
   */
  void releaseAllPrunings();


  /**
   *
   * activates the feature pruning with evaluating 
   * a certain percentage of the features
   *
   * @Author Henning Mller 
   */
  void useFeaturePruning(double percentage);

  /**
   *
   * set the variables back to its normal status
   *
   * @Author Henning Mller 
   */
  void releaseFeaturePruning();

  /**
   *
   * activates the time pruning with the give cuOffPoint
   *
   * @Author Henning Mller 
   */
  void useTimePruning(double inTimeCutoffPoint);

  /**
   *
   * releases the timePruning
   *
   * @Author Henning Mller 
   */
  void releaseTimePruning();

  /**
   *
   * creates one entry in the scoreboardPruning
   *
   * @Author Henning Mller 
   */
  void useScoreBoardPruning(double inCutAfterFraction,
			    double    inReduceToFactor);

  /**
   *
   * releases all the settings for the ScorboardPruning
   *
   * @Author Henning Mller 
   */
  void releaseScoreBoardPruning();

  /**
   *
   * activates the evaluation after the scoreboardPruning
   *
   * @Author Henning Mller 
   */
  void useEvaluateAfterPruning();

  /**
   *
   * releases the evaluation after the scoreboard pruning
   *
   * @Author Henning Mller 
   */
  void releaseEvaluateAfterPruning();
  /**
   * For FerDeLance queries
   */
  CWeighter& getWeighter();
}; /* end of class */

#endif