#include <TextHandler.hpp>
Inheritance diagram for TextHandler:
Public Types | |
enum | TokenType { BEGINDOC = 1, ENDDOC = 2, WORDTOK = 3, BEGINTAG = 4, ENDTAG = 5, SYMBOLTOK = 6 } |
Public Methods | |
TextHandler () | |
virtual | ~TextHandler () |
virtual void | setTextHandler (TextHandler *th) |
Set the TextHandler that this TextHandler will pass information on to. | |
virtual TextHandler * | getTextHandler () |
Set the TextHandler that this TextHandler will pass information on to. | |
virtual void | foundToken (TokenType type, char *token=NULL, const char *orig=NULL, PropertyList *properties=NULL) |
virtual char * | handleBeginDoc (char *docno, const char *original, PropertyList *list) |
virtual char * | handleEndDoc (char *token, const char *original, PropertyList *list) |
virtual char * | handleWord (char *word, const char *original, PropertyList *list) |
virtual char * | handleBeginTag (char *tag, const char *original, PropertyList *list) |
Handle a begin tag. | |
virtual char * | handleEndTag (char *tag, const char *original, PropertyList *list) |
Handle an end tag. | |
virtual char * | handleSymbol (char *symbol, const char *original, PropertyList *list) |
virtual void | foundDoc (char *docno) |
Found a document with document number. | |
virtual void | foundDoc (char *docno, const char *original) |
virtual void | foundWord (char *word) |
Found a word. | |
virtual void | foundWord (char *word, const char *original) |
virtual void | foundEndDoc () |
Found end of doc. | |
virtual void | foundSymbol (char *sym) |
Found a word. | |
virtual char * | handleDoc (char *docno) |
Handle a doc. | |
virtual char * | handleWord (char *word) |
Handle a word, possibly transforming it. | |
virtual void | handleEndDoc () |
Handle the end of the doc. | |
virtual char * | handleSymbol (char *sym) |
Handle a word, possibly transforming it. | |
virtual string | getCategory () |
Return the category TextHandler this is. | |
virtual string | getIdentifier () |
Return a unique identifier for this TextHandler object. | |
Static Public Attributes | |
const string | category = "TextHandler" |
const string | identifier = "TextHandler" |
Protected Attributes | |
TextHandler * | textHandler |
The next textHandler in the chain. | |
string | cat |
string | iden |
char | buffer [MAXWORDSIZE] |
|
|
|
|
|
|
|
|
|
Found a document with document number.
|
|
Found end of doc.
|
|
Found a word.
|
|
|
|
|
|
Found a word.
|
|
Return the category TextHandler this is.
|
|
Return a unique identifier for this TextHandler object.
|
|
Set the TextHandler that this TextHandler will pass information on to.
|
|
Handle a doc begin - default implementation calls handleDoc for backwords compat |
|
Handle a begin tag.
Reimplemented in IndriTextHandler, and ElemDocMgr. |
|
Handle a doc.
Reimplemented in DocFreqIndexer, FreqCounter, IndriTextHandler, InvFPTextHandler, KeyfileTextHandler, PropIndexTH, FlattextDocMgr, KeyfileDocMgr, WriterInQueryHandler, and WriterTextHandler. |
|
Handle the end of the doc.
Reimplemented in DocFreqIndexer, IndriTextHandler, FlattextDocMgr, and KeyfileDocMgr. |
|
Handle a doc end - default implementation calls old handleEndDoc for backwords compat |
|
Handle an end tag.
Reimplemented in IndriTextHandler, and ElemDocMgr. |
|
Handle a word, possibly transforming it.
Reimplemented in WriterInQueryHandler, StringQuery, and QueryDocument. |
|
Handle a symbol - default implementation calls old handleSymbol for backwords compat |
|
Handle a word, possibly transforming it.
Reimplemented in CtfIndexer, DocFreqIndexer, FreqCounter, InvFPTextHandler, KeyfileTextHandler, QueryTextHandler, KeyfileDocMgr, Stemmer, Stopper, WriterInQueryHandler, WriterTextHandler, StringQuery, DocOffsetParser, and QueryDocument. |
|
Handle a word - default implementation calls old handleWord for backwords compat Reimplemented in IndriTextHandler, PropIndexTH, and BrillPOSTokenizer. |
|
Set the TextHandler that this TextHandler will pass information on to.
|
|
|
|
|
|
|
|
|
|
Reimplemented in ArabicParser, ArabicStemmer, BrillPOSParser, ChineseCharParser, ChineseParser, IdentifinderParser, InqArabicParser, InQueryOpParser, KStemmer, Parser, PorterStemmer, ReutersParser, Stemmer, Stopper, TrecParser, and WebParser. |
|
The next textHandler in the chain.
|