Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

ClusterDB Class Reference

Abstract interface for clustering databases. More...

#include <ClusterDB.hpp>

Inheritance diagram for ClusterDB:

FlatFileClusterDB KeyfileClusterDB List of all members.

Public Methods

 ClusterDB (const Index *ind, double threshold=0.25, enum ClusterParam::simTypes simType=ClusterParam::COS, enum ClusterParam::clusterTypes clusterType=ClusterParam::CENTROID, enum ClusterParam::docModes docMode=ClusterParam::DMAX)
 initialize the cluster method

virtual ~ClusterDB ()
 clean up

virtual int countClusters () const=0
 Return number of clusters.

virtual int maxID () const=0
 Return highest cluster ID.

virtual vector< Cluster * > getDocCluster (DOCID_T docId) const=0
 Get the Cluster for the given docId.

virtual ClustergetCluster (int clusterId) const=0
 Get the Cluster for the given clusterId.

virtual vector< int > getDocClusterId (DOCID_T docId) const=0
 Get the Cluster id for the given docId.

virtual int addToCluster (DOCID_T docId, int clusterId, double score)=0
 Add a document id to a cluster, given the cluster id.

virtual int addToCluster (DOCID_T docId, Cluster *cluster, double score)=0
 Add a document id to a cluster, given the cluster.

virtual int removeFromCluster (DOCID_T docId, int clusterID)=0
 Remove a document id from a cluster, given the cluster id.

virtual int deleteCluster (int clusterID)=0
 Delete a cluster entirely.

virtual int deleteCluster (Cluster *target)=0
 Delete a cluster entirely.

int addCluster (Cluster *oldCluster)
 Add a cluster to the DB (renumbers the cluster id). Returns new id.

virtual vector< int > splitCluster (int cid, int num=2)=0
 split cluster using Cluster::split

virtual int mergeClusters (int cid1, int cid2)=0
 merge two clusters.

virtual void printClusters () const
 pretty print all clusters to the standard output.

virtual int cluster (DOCID_T docId)
 Assign a document to a cluster.

virtual int cluster (DOCID_T docId, double &finalScore)
 Assign a document to a cluster, returning score.

virtual string getKeyWords (int cid, int numTerms=10) const
 Get the top N keywords for a cluster.


Protected Methods

virtual ClusternewCluster ()=0
 subclass specific cluster intitialization.

ClusterallocateCluster (int clusterID) const
 Uses ClusterFactory to create Cluster objects.


Protected Attributes

const Indexindex
 Database containing the collection to operate on.

int numDocs
 Number of documents in the database, reduces calls to db->docCount().

int numTerms
 Number of terms in the database.

double threshold
 threshold for YES/NO decisions

vector< Cluster * > clusters
 the cluster database.

const SimilarityMethodsim
 Similarity method to use.

ClusterFactoryfactory
 Cluster factory.

ThresholdFcnthresh
 Threshold function for adaptive thresholding.


Detailed Description

Abstract interface for clustering databases.


Constructor & Destructor Documentation

ClusterDB::ClusterDB const Index   ind,
double    threshold = 0.25,
enum ClusterParam::simTypes    simType = ClusterParam::COS,
enum ClusterParam::clusterTypes    clusterType = ClusterParam::CENTROID,
enum ClusterParam::docModes    docMode = ClusterParam::DMAX
 

initialize the cluster method

ClusterDB::~ClusterDB   [virtual]
 

clean up


Member Function Documentation

int ClusterDB::addCluster Cluster   oldCluster
 

Add a cluster to the DB (renumbers the cluster id). Returns new id.

virtual int ClusterDB::addToCluster DOCID_T    docId,
Cluster   cluster,
double    score
[pure virtual]
 

Add a document id to a cluster, given the cluster.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual int ClusterDB::addToCluster DOCID_T    docId,
int    clusterId,
double    score
[pure virtual]
 

Add a document id to a cluster, given the cluster id.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

Cluster * ClusterDB::allocateCluster int    clusterID const [protected]
 

Uses ClusterFactory to create Cluster objects.

int ClusterDB::cluster DOCID_T    docId,
double &    finalScore
[virtual]
 

Assign a document to a cluster, returning score.

int ClusterDB::cluster DOCID_T    docId [virtual]
 

Assign a document to a cluster.

virtual int ClusterDB::countClusters   [pure virtual]
 

Return number of clusters.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual int ClusterDB::deleteCluster Cluster   target [pure virtual]
 

Delete a cluster entirely.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual int ClusterDB::deleteCluster int    clusterID [pure virtual]
 

Delete a cluster entirely.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual Cluster* ClusterDB::getCluster int    clusterId const [pure virtual]
 

Get the Cluster for the given clusterId.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual vector<Cluster*> ClusterDB::getDocCluster DOCID_T    docId const [pure virtual]
 

Get the Cluster for the given docId.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual vector<int> ClusterDB::getDocClusterId DOCID_T    docId const [pure virtual]
 

Get the Cluster id for the given docId.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

string ClusterDB::getKeyWords int    cid,
int    numTerms = 10
const [virtual]
 

Get the top N keywords for a cluster.

virtual int ClusterDB::maxID   [pure virtual]
 

Return highest cluster ID.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual int ClusterDB::mergeClusters int    cid1,
int    cid2
[pure virtual]
 

merge two clusters.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual Cluster* ClusterDB::newCluster   [protected, pure virtual]
 

subclass specific cluster intitialization.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

void ClusterDB::printClusters   [virtual]
 

pretty print all clusters to the standard output.

virtual int ClusterDB::removeFromCluster DOCID_T    docId,
int    clusterID
[pure virtual]
 

Remove a document id from a cluster, given the cluster id.

Implemented in FlatFileClusterDB, and KeyfileClusterDB.

virtual vector<int> ClusterDB::splitCluster int    cid,
int    num = 2
[pure virtual]
 

split cluster using Cluster::split

Implemented in FlatFileClusterDB, and KeyfileClusterDB.


Member Data Documentation

vector<Cluster *> ClusterDB::clusters [protected]
 

the cluster database.

ClusterFactory* ClusterDB::factory [protected]
 

Cluster factory.

const Index* ClusterDB::index [protected]
 

Database containing the collection to operate on.

int ClusterDB::numDocs [protected]
 

Number of documents in the database, reduces calls to db->docCount().

int ClusterDB::numTerms [protected]
 

Number of terms in the database.

const SimilarityMethod* ClusterDB::sim [protected]
 

Similarity method to use.

ThresholdFcn* ClusterDB::thresh [protected]
 

Threshold function for adaptive thresholding.

double ClusterDB::threshold [protected]
 

threshold for YES/NO decisions


The documentation for this class was generated from the following files:
Generated on Wed Nov 3 12:59:26 2004 for Lemur Toolkit by doxygen1.2.18