#include "CLucene/StdHeader.h" #ifndef _lucene_search_FuzzyQuery_ #define _lucene_search_FuzzyQuery_ #include "CLucene/index/IndexReader.h" #include "CLucene/index/Term.h" #include "MultiTermQuery.h" #ifndef NO_FUZZY_QUERY namespace lucene{ namespace search{ /** Subclass of FilteredTermEnum for enumerating all terms that are similiar to the specified filter term.
Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than all that precede it. */ class FuzzyTermEnum: public FilteredTermEnum { private: double distance; bool fieldMatch; bool endEnum; Term* searchTerm; const char_t* field; /* DSR:PROPOSED: Why not make text and textlen const too? */ const char_t* text; const int_t textlen; float_t FUZZY_THRESHOLD; float_t SCALE_FACTOR; public: FuzzyTermEnum(IndexReader& reader, Term* term); ~FuzzyTermEnum(); bool EndEnum(); void close(); void setFuzzyThreshold(float_t value); protected: /** The termCompare method in FuzzyTermEnum uses Levenshtein distance to calculate the distance between the given term and the comparing term. */ bool termCompare(Term* term) ; float_t difference(); /****************************** * Compute Levenshtein distance ******************************/ private: /** Finds and returns the smallest of three integers */ static int_t Min(const int_t a, const int_t b, const int_t c); /** * This static array saves us from the time required to create a new array * everytime editDistance is called. */ int_t* e; int_t eWidth; int_t eHeight; /** Levenshtein distance also known as edit distance is a measure of similiarity between two strings where the distance is measured as the number of character deletions, insertions or substitutions required to transform one string to the other string.
This method takes in four parameters; two strings and their respective lengths to compute the Levenshtein distance between the two strings. The result is returned as an integer. */ /* DSR:PROPOSED: Why not make args const? */ /* int_t editDistance(char_t* s, char_t* t, int_t n, int_t m) ;*/ int_t editDistance(const char_t* s, const char_t* t, const int_t n, const int_t m) ; }; /** Implements the fuzzy search query */ class FuzzyQuery: public MultiTermQuery { private: Term* fuzzyTerm; public: FuzzyQuery(Term* term); ~FuzzyQuery(); void prepare(IndexReader& reader); const char_t* toString(const char_t* field); const char_t* getQueryName() const; }; }} #endif #endif