#include <InvPushIndex.hpp>
Inheritance diagram for InvPushIndex:
Public Methods | |
InvPushIndex () | |
InvPushIndex (const string &prefix, int cachesize=128000000, long maxfilesize=2100000000, DOCID_T startdocid=1) | |
~InvPushIndex () | |
void | setName (const string &prefix) |
sets the name for this index. the name will be the prefix for all files related to this index | |
bool | beginDoc (const DocumentProps *dp) |
the beginning of a new document, returns true if initiation was successful | |
bool | addTerm (const Term &t) |
adding a term to the current document, returns true if term was added successfully. | |
void | endDoc (const DocumentProps *dp) |
signify the end of current document | |
virtual void | endDoc (const DocumentProps *dp, const string &mgr) |
signify the end of current document and associate with certain document manager. this doesn't change the mgr that was previously set. | |
void | endCollection (const CollectionProps *cp) |
signify the end of this collection. properties passed at the beginning of a collection should be handled by the constructor. | |
void | setDocManager (const string &mgrID) |
set the document manager to use for succeeding documents | |
Protected Methods | |
void | writeTOC (int numinv) |
void | writeDocIDs () |
void | writeCache () |
void | lastWriteCache () |
void | writeDTIDs () |
void | writeDocMgrIDs () |
int | docMgrID (const string &mgr) |
virtual void | doendDoc (const DocumentProps *dp, int mgrid) |
Protected Attributes | |
long | maxfile |
MemCache * | cache |
the biggest our file size can be | |
vector< EXDOCID_T > | docIDs |
the main memory handler for building | |
vector< TERM_T > | termIDs |
list of external docids in internal docid order | |
vector< string > | tempfiles |
list of terms in termid order | |
vector< string > | dtfiles |
list of tempfiles we've written to flush cache | |
vector< string > | docmgrs |
list of dt index files | |
FILE * | writetlookup |
ofstream | writetlist |
filestream for writing the lookup table to the docterm db | |
COUNT_T | tcount |
filestream for writing the list of located terms for each document | |
COUNT_T | tidcount |
count of total terms | |
COUNT_T | dtidcount |
count of unique terms | |
string | name |
count of unique terms in a current doc | |
TABLE_T | wordtable |
the prefix name | |
map< TERMID_T, COUNT_T > | termlist |
table of all terms and their doclists | |
int * | membuf |
maps of terms and freqs | |
int | membufsize |
memory to use for cache and buffers | |
int | curdocmgr |
|
|
|
|
|
|
|
adding a term to the current document, returns true if term was added successfully.
Implements PushIndex. Reimplemented in IncPassagePushIndex, InvFPPushIndex, and InvPassagePushIndex. |
|
the beginning of a new document, returns true if initiation was successful
Implements PushIndex. Reimplemented in IncPassagePushIndex, and InvPassagePushIndex. |
|
returns the internal id of given docmgr if not already registered, mgr will be added |
|
Reimplemented in IncPassagePushIndex, InvFPPushIndex, and InvPassagePushIndex. |
|
signify the end of this collection. properties passed at the beginning of a collection should be handled by the constructor.
Implements PushIndex. Reimplemented in InvFPPushIndex. |
|
signify the end of current document and associate with certain document manager. this doesn't change the mgr that was previously set.
|
|
signify the end of current document
Implements PushIndex. |
|
|
|
set the document manager to use for succeeding documents
Implements PushIndex. |
|
sets the name for this index. the name will be the prefix for all files related to this index
|
|
|
|
|
|
|
|
|
|
Reimplemented in InvFPPushIndex. |
|
the biggest our file size can be
|
|
|
|
the main memory handler for building
|
|
list of dt index files
|
|
list of tempfiles we've written to flush cache
|
|
count of unique terms
|
|
|
|
maps of terms and freqs
|
|
memory to use for cache and buffers
|
|
count of unique terms in a current doc
|
|
filestream for writing the list of located terms for each document
|
|
list of terms in termid order
|
|
list of external docids in internal docid order
|
|
table of all terms and their doclists
Reimplemented in InvFPPushIndex. |
|
count of total terms
|
|
the prefix name
|
|
filestream for writing the lookup table to the docterm db
|
|
|