1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
//
// DocumentRef.h
//
// DocumentRef: Reference to an indexed document. Keeps track of all
// information stored on the document, either by the dig
// or temporary search information.
//
// Part of the ht://Dig package <https://htdig.sourceforge.net/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: DocumentRef.h,v 1.29 2004/05/28 13:15:12 lha Exp $
//
#ifndef _DocumentRef_h_
#define _DocumentRef_h_
#include "htString.h"
#include "List.h"
#include "HtWordList.h"
#include <time.h>
enum ReferenceState
{
Reference_normal,
Reference_not_found,
Reference_noindex,
Reference_obsolete
};
class DocumentRef : public Object
{
public:
//
// Construction/Destruction
//
DocumentRef();
~DocumentRef();
//
// A DocumentRef can read itself from a character string and
// convert itself into a character string
//
void Serialize(String &s);
void Deserialize(String &s);
//
// Access to the members
//
int DocID() {return docID;}
char *DocURL() {return docURL;}
time_t DocTime() {return docTime;}
char *DocTitle() {return docTitle;}
char *DocAuthor() {return docAuthor;}
char *DocHead() {return docHead;}
int DocHeadIsSet() {return docHeadIsSet;}
char *DocMetaDsc() {return docMetaDsc;}
time_t DocAccessed() {return docAccessed;}
int DocLinks() {return docLinks;}
int DocBackLinks() {return docBackLinks;}
List *Descriptions() {return &descriptions;}
ReferenceState DocState() {return docState;}
int DocSize() {return docSize;}
List *DocAnchors() {return &docAnchors;}
double DocScore() {return docScore;}
int DocSig() {return docSig;}
int DocAnchor() {return docAnchor;}
int DocHopCount() {return docHopCount;}
char *DocEmail() {return docEmail;}
char *DocNotification() {return docNotification;}
char *DocSubject() {return docSubject;}
void DocID(int d) {docID = d;}
void DocURL(const char *u) {docURL = u;}
void DocTime(time_t t) {docTime = t;}
void DocTitle(const char *t) {docTitle = t;}
void DocAuthor(const char *a) {docAuthor = a;}
void DocHead(const char *h) {docHeadIsSet = 1; docHead = h;}
void DocMetaDsc(const char *md) {docMetaDsc = md;}
void DocAccessed(time_t t) {docAccessed = t;}
void DocLinks(int l) {docLinks = l;}
void DocBackLinks(int l) {docBackLinks = l;}
void Descriptions(List &l) {descriptions = l;}
void AddDescription(const char *d, HtWordList &words);
void DocState(ReferenceState s) {docState = s;}
void DocState(int s);
void DocSize(int s) {docSize = s;}
void DocSig(int s) {docSig = s;}
void DocAnchors(List &l) {docAnchors = l;}
void AddAnchor(const char *a);
void DocScore(double s) {docScore = s;}
void DocAnchor(int a) {docAnchor = a;}
void DocHopCount(int h) {docHopCount = h;}
void DocEmail(const char *e) {docEmail = e;}
void DocNotification(const char *n) {docNotification = n;}
void DocSubject(const char *s) {docSubject = s;}
void Clear(); // Reset everything
protected:
//
// These values will be stored when serializing
//
// This is the index number of the document in the database.
int docID;
// This is the URL of the document.
String docURL;
// This is the time specified in the document's header
// Usually that's the last modified time, for servers that return it.
time_t docTime;
// This is the time that the last retrieval occurred.
time_t docAccessed;
// This is the stored excerpt of the document, just text.
String docHead;
// This indicates if the stored excerpt of the document has been set.
int docHeadIsSet;
// This is the document-specified description.
// For HTML, that's the META description tag.
String docMetaDsc;
// This is the title of the document.
String docTitle;
// This is the author of the document, as specified in meta information
String docAuthor;
// This is a list of Strings, the text of links pointing to this document.
// (e.g. <a href="docURL">description</a>
List descriptions;
// This is the state of the document--modified, normal, etc.
ReferenceState docState;
// This is the size of the original document.
int docSize;
// This is a count of the links in the document (outgoing links).
int docLinks;
// This is a count of the links to the document (incoming links).
int docBackLinks;
// This is a list of the anchors in the document (i.e. <A NAME=...)
List docAnchors;
// This is a count of the number of hops from start_urls to here.
int docHopCount;
// This is a signature of the document. (e.g. md5sum, checksum...)
// This is currently unused.
long int docSig;
//
// The following values are for the email notification of expiration
//
// This is the email destination for htnotify.
String docEmail;
// This is the date that htnotify should use as comparison.
String docNotification;
// This is the subject of the email sent out by htnotify.
String docSubject;
//
// This is used for searching and is not stored in the database
//
// This is the current score of this document.
double docScore;
// This is the nearest anchor for the search word.
int docAnchor;
};
#endif
|