#include "CLucene/StdHeader.h" #include "QueryParser.h" #include "QueryParserConstants.h" #include "CLucene/analysis/AnalysisHeader.h" #include "CLucene/util/Reader.h" #include "CLucene/search/SearchHeader.h" #include "CLucene/index/Term.h" #include "CLucene/search/WildcardQuery.h" #include "CLucene/search/FuzzyQuery.h" #include "CLucene/search/PrefixQuery.h" #include "TokenList.h" #include "QueryToken.h" #include "QueryParserBase.h" #include "Lexer.h" using namespace lucene::util; using namespace lucene::index; using namespace lucene::analysis; using namespace lucene::search; namespace lucene{ namespace queryParser{ QueryParser::~QueryParser(){ delete[] field; } QueryParser::QueryParser(const char_t* _field, Analyzer& _analyzer) : analyzer(_analyzer), field( stringDuplicate( _field) ), tokens(NULL) { } // Returns a new instance of the QueryParser class with a specified query, field and // analyzer values. // // The query to parse. // The default field for query terms. // Used to find terms in the query text. //static Query& QueryParser::Parse(const char_t* query, const char_t* field, Analyzer& analyzer) { QueryParser parser(field, analyzer); return parser.Parse(query); } // Returns a parsed Query instance. // // The query value to be parsed. // A parsed Query instance. Query& QueryParser::Parse(const char_t* query) { Reader* r = new StringReader(query); Query* ret = NULL; _TRY{ ret = &Parse( *r ); }_FINALLY ( delete r; ); return *ret; } // Returns a parsed Query instance. // // The TextReader value to be parsed. // A parsed Query instance. Query& QueryParser::Parse(Reader& reader) { Lexer lexer(reader); tokens = &lexer.Lex(); if ( tokens->Peek().Type== lucene::queryParser::EOF_ ) _THROWC( "No query given."); return *MatchQuery(field); } // matches for CONJUNCTION // CONJUNCTION ::= | int_t QueryParser::MatchConjunction() { switch(tokens->Peek().Type) { case lucene::queryParser::AND_: delete &tokens->Extract(); return CONJ_AND; case lucene::queryParser::OR: delete &tokens->Extract(); return CONJ_OR; default: return CONJ_NONE; } } // matches for MODIFIER // MODIFIER ::= | | int_t QueryParser::MatchModifier() { switch(tokens->Peek().Type) { case lucene::queryParser::PLUS: delete &tokens->Extract(); return MOD_REQ; case lucene::queryParser::MINUS: case lucene::queryParser::NOT: delete &tokens->Extract(); return MOD_NOT; default: return MOD_NONE; } } // matches for QUERY // QUERY ::= [MODIFIER] QueryParser::CLAUSE ( [MODIFIER] CLAUSE)* Query* QueryParser::MatchQuery(const char_t* field) { VoidList clauses; Query* q = NULL; Query* firstQuery = NULL; int_t mods = MOD_NONE; int_t conj = CONJ_NONE; mods = MatchModifier(); // match for CLAUSE q = MatchClause(field); AddClause(clauses, CONJ_NONE, mods, q); if(mods == MOD_NONE) firstQuery = q; // match for CLAUSE* while(true) { if(tokens->Peek().Type == lucene::queryParser::EOF_) { delete MatchQueryToken(lucene::queryParser::EOF_); break; } if(tokens->Peek().Type == lucene::queryParser::RPAREN) { //MatchQueryToken(lucene::queryParser::RPAREN); break; } conj = MatchConjunction(); mods = MatchModifier(); q = MatchClause(field); AddClause(clauses, conj, mods, q); } // finalize query if(clauses.size() == 1 && firstQuery != NULL) { BooleanClause* c = clauses[0]; c->deleteQuery=false; clauses.clear(); delete c; return firstQuery; } else { BooleanQuery* query = new BooleanQuery(); for ( uint_t i=0;iadd(*clauses[i]); return query; } } // matches for CLAUSE // CLAUSE ::= [TERM ] ( TERM | ( QUERY )) Query* QueryParser::MatchClause(const char_t* field) { Query* q = NULL; const char_t* sfield = field; bool delField = false; // match for [TERM ] QueryToken* term = &tokens->Extract(); if(term->Type == lucene::queryParser::TERM && tokens->Peek().Type == lucene::queryParser::COLON) { delete MatchQueryToken(lucene::queryParser::COLON); sfield = stringDuplicate(term->Value); delField= true; delete term; } else { tokens->Push(term); } // match for // TERM | ( QUERY ) if(tokens->Peek().Type == lucene::queryParser::LPAREN) { delete MatchQueryToken(lucene::queryParser::LPAREN); q = MatchQuery(sfield); delete MatchQueryToken(lucene::queryParser::RPAREN); } else { q = MatchTerm(sfield); } if ( delField ) delete[] sfield; return q; } // matches for TERM // TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER // [ ] [ []] // // QueryParser::| ( | ) [ ] // | [SLOP] [ ] Query* QueryParser::MatchTerm(const char_t* field) { QueryToken* term = NULL; QueryToken* slop = NULL; QueryToken* boost = NULL; bool prefix = false; bool wildcard = false; bool fuzzy = false; bool rangein = false; Query* q = NULL; term = &tokens->Extract(); switch(term->Type) { case lucene::queryParser::TERM: case lucene::queryParser::NUMBER: #ifndef NO_PREFIX_QUERY case lucene::queryParser::PREFIXTERM: #endif #ifndef NO_WILDCARD_QUERY case lucene::queryParser::WILDTERM: #endif if(term->Type == lucene::queryParser::PREFIXTERM) prefix = true; if(term->Type == lucene::queryParser::WILDTERM) wildcard = true; #ifndef NO_FUZZY_QUERY if(tokens->Peek().Type == lucene::queryParser::FUZZY) { delete MatchQueryToken(lucene::queryParser::FUZZY); fuzzy = true; } #endif if(tokens->Peek().Type == lucene::queryParser::CARAT) { delete MatchQueryToken(lucene::queryParser::CARAT); boost = MatchQueryToken(lucene::queryParser::NUMBER); #ifndef NO_FUZZY_QUERY if(tokens->Peek().Type == lucene::queryParser::FUZZY) { delete MatchQueryToken(lucene::queryParser::FUZZY); fuzzy = true; } #endif } if (false){ } #ifndef NO_WILDCARD_QUERY else if(wildcard){ Term* t = new Term(field, term->Value); q = new WildcardQuery(t); t->finalize(); break; } #endif #ifndef NO_PREFIX_QUERY else if(prefix){ char_t* buf = stringDuplicate(term->Value); buf[stringLength(buf)-1] = 0; Term* t = new Term(field, buf); q = new PrefixQuery(t); t->finalize(); delete[] buf; break; } #endif #ifndef NO_FUZZY_QUERY else if(fuzzy){ Term* t = new Term(field, term->Value); q = new FuzzyQuery(t); t->finalize(); break; } #endif else{ q = GetFieldQuery(field, analyzer, term->Value); break; } #ifndef NO_RANGE_QUERY case lucene::queryParser::RANGEIN: case lucene::queryParser::RANGEEX: { if(term->Type == lucene::queryParser::RANGEIN) rangein = true; if(tokens->Peek().Type == lucene::queryParser::CARAT) { delete MatchQueryToken(lucene::queryParser::CARAT); boost = MatchQueryToken(lucene::queryParser::NUMBER); } char_t* tmp = stringDuplicate ( term->Value +1); tmp[stringLength(tmp)-1] = 0; q = GetRangeQuery(field, analyzer, tmp, rangein); delete[] tmp; } break; #endif case lucene::queryParser::QUOTED: if(tokens->Peek().Type == lucene::queryParser::SLOP) { slop = MatchQueryToken(lucene::queryParser::SLOP); } if(tokens->Peek().Type == lucene::queryParser::CARAT) { delete MatchQueryToken(lucene::queryParser::CARAT); boost = MatchQueryToken(lucene::queryParser::NUMBER); } //todo: check term->Value+1 q = GetFieldQuery(field, analyzer, term->Value+1); if(slop != NULL && q->instanceOf(_T("PhraseQuery")) ) { try { int_t s = stringToInteger(slop->Value+1); ((PhraseQuery*)q)->setSlop( s ); } catch(...) { // ignored } } break; } // end of switch delete term; if(boost != NULL) { float_t f = 1.0F; try { //TODO: check this char_t* tmp; f = stringToFloat(boost->Value, &tmp); //f = Single.Parse(boost->Value, NumberFormatInfo.InvariantInfo); } catch(...) { // ignored } delete boost; q->setBoost( f); } return q; } // matches for QueryToken of the specified type and returns it // otherwise Exception throws QueryToken* QueryParser::MatchQueryToken(QueryTokenTypes expectedType) { if(tokens->Count() == 0) { QueryParserBase::throwParserException(_T("Error: UnExpected End of Program"),' ',0,0); } QueryToken* t = &tokens->Extract(); if (expectedType != t->Type) { char_t buf[200]; stringPrintF(buf,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType); delete t; QueryParserBase::throwParserException(buf,' ',0,0); } return t; } }}