#include "CLucene/StdHeader.h"
#include "QueryParser.h"
#include "QueryParserConstants.h"
#include "CLucene/analysis/AnalysisHeader.h"
#include "CLucene/util/Reader.h"
#include "CLucene/search/SearchHeader.h"
#include "CLucene/index/Term.h"
#include "CLucene/search/WildcardQuery.h"
#include "CLucene/search/FuzzyQuery.h"
#include "CLucene/search/PrefixQuery.h"
#include "TokenList.h"
#include "QueryToken.h"
#include "QueryParserBase.h"
#include "Lexer.h"
using namespace lucene::util;
using namespace lucene::index;
using namespace lucene::analysis;
using namespace lucene::search;
namespace lucene{ namespace queryParser{
QueryParser::~QueryParser(){
delete[] field;
}
QueryParser::QueryParser(const char_t* _field, Analyzer& _analyzer) :
analyzer(_analyzer),
field( stringDuplicate( _field) ),
tokens(NULL)
{
}
// Returns a new instance of the QueryParser class with a specified query, field and
// analyzer values.
//
// The query to parse.
// The default field for query terms.
// Used to find terms in the query text.
//static
Query& QueryParser::Parse(const char_t* query, const char_t* field, Analyzer& analyzer)
{
QueryParser parser(field, analyzer);
return parser.Parse(query);
}
// Returns a parsed Query instance.
//
// The query value to be parsed.
// A parsed Query instance.
Query& QueryParser::Parse(const char_t* query)
{
Reader* r = new StringReader(query);
Query* ret = NULL;
_TRY{
ret = &Parse( *r );
}_FINALLY (
delete r;
);
return *ret;
}
// Returns a parsed Query instance.
//
// The TextReader value to be parsed.
// A parsed Query instance.
Query& QueryParser::Parse(Reader& reader)
{
Lexer lexer(reader);
tokens = &lexer.Lex();
if ( tokens->Peek().Type== lucene::queryParser::EOF_ )
_THROWC( "No query given.");
return *MatchQuery(field);
}
// matches for CONJUNCTION
// CONJUNCTION ::= |
int_t QueryParser::MatchConjunction()
{
switch(tokens->Peek().Type)
{
case lucene::queryParser::AND_:
delete &tokens->Extract();
return CONJ_AND;
case lucene::queryParser::OR:
delete &tokens->Extract();
return CONJ_OR;
default:
return CONJ_NONE;
}
}
// matches for MODIFIER
// MODIFIER ::= | |
int_t QueryParser::MatchModifier()
{
switch(tokens->Peek().Type)
{
case lucene::queryParser::PLUS:
delete &tokens->Extract();
return MOD_REQ;
case lucene::queryParser::MINUS:
case lucene::queryParser::NOT:
delete &tokens->Extract();
return MOD_NOT;
default:
return MOD_NONE;
}
}
// matches for QUERY
// QUERY ::= [MODIFIER] QueryParser::CLAUSE ( [MODIFIER] CLAUSE)*
Query* QueryParser::MatchQuery(const char_t* field)
{
VoidList clauses;
Query* q = NULL;
Query* firstQuery = NULL;
int_t mods = MOD_NONE;
int_t conj = CONJ_NONE;
mods = MatchModifier();
// match for CLAUSE
q = MatchClause(field);
AddClause(clauses, CONJ_NONE, mods, q);
if(mods == MOD_NONE)
firstQuery = q;
// match for CLAUSE*
while(true)
{
if(tokens->Peek().Type == lucene::queryParser::EOF_)
{
delete MatchQueryToken(lucene::queryParser::EOF_);
break;
}
if(tokens->Peek().Type == lucene::queryParser::RPAREN)
{
//MatchQueryToken(lucene::queryParser::RPAREN);
break;
}
conj = MatchConjunction();
mods = MatchModifier();
q = MatchClause(field);
AddClause(clauses, conj, mods, q);
}
// finalize query
if(clauses.size() == 1 && firstQuery != NULL)
{
BooleanClause* c = clauses[0];
c->deleteQuery=false;
clauses.clear();
delete c;
return firstQuery;
}
else
{
BooleanQuery* query = new BooleanQuery();
for ( uint_t i=0;iadd(*clauses[i]);
return query;
}
}
// matches for CLAUSE
// CLAUSE ::= [TERM ] ( TERM | ( QUERY ))
Query* QueryParser::MatchClause(const char_t* field)
{
Query* q = NULL;
const char_t* sfield = field;
bool delField = false;
// match for [TERM ]
QueryToken* term = &tokens->Extract();
if(term->Type == lucene::queryParser::TERM &&
tokens->Peek().Type == lucene::queryParser::COLON)
{
delete MatchQueryToken(lucene::queryParser::COLON);
sfield = stringDuplicate(term->Value);
delField= true;
delete term;
}
else
{
tokens->Push(term);
}
// match for
// TERM | ( QUERY )
if(tokens->Peek().Type == lucene::queryParser::LPAREN)
{
delete MatchQueryToken(lucene::queryParser::LPAREN);
q = MatchQuery(sfield);
delete MatchQueryToken(lucene::queryParser::RPAREN);
}
else
{
q = MatchTerm(sfield);
}
if ( delField )
delete[] sfield;
return q;
}
// matches for TERM
// TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER
// [ ] [ []]
//
// QueryParser::| ( | ) [ ]
// | [SLOP] [ ]
Query* QueryParser::MatchTerm(const char_t* field)
{
QueryToken* term = NULL;
QueryToken* slop = NULL;
QueryToken* boost = NULL;
bool prefix = false;
bool wildcard = false;
bool fuzzy = false;
bool rangein = false;
Query* q = NULL;
term = &tokens->Extract();
switch(term->Type)
{
case lucene::queryParser::TERM:
case lucene::queryParser::NUMBER:
#ifndef NO_PREFIX_QUERY
case lucene::queryParser::PREFIXTERM:
#endif
#ifndef NO_WILDCARD_QUERY
case lucene::queryParser::WILDTERM:
#endif
if(term->Type == lucene::queryParser::PREFIXTERM)
prefix = true;
if(term->Type == lucene::queryParser::WILDTERM)
wildcard = true;
#ifndef NO_FUZZY_QUERY
if(tokens->Peek().Type == lucene::queryParser::FUZZY)
{
delete MatchQueryToken(lucene::queryParser::FUZZY);
fuzzy = true;
}
#endif
if(tokens->Peek().Type == lucene::queryParser::CARAT)
{
delete MatchQueryToken(lucene::queryParser::CARAT);
boost = MatchQueryToken(lucene::queryParser::NUMBER);
#ifndef NO_FUZZY_QUERY
if(tokens->Peek().Type == lucene::queryParser::FUZZY)
{
delete MatchQueryToken(lucene::queryParser::FUZZY);
fuzzy = true;
}
#endif
}
if (false){
}
#ifndef NO_WILDCARD_QUERY
else if(wildcard){
Term* t = new Term(field, term->Value);
q = new WildcardQuery(t);
t->finalize();
break;
}
#endif
#ifndef NO_PREFIX_QUERY
else if(prefix){
char_t* buf = stringDuplicate(term->Value);
buf[stringLength(buf)-1] = 0;
Term* t = new Term(field, buf);
q = new PrefixQuery(t);
t->finalize();
delete[] buf;
break;
}
#endif
#ifndef NO_FUZZY_QUERY
else if(fuzzy){
Term* t = new Term(field, term->Value);
q = new FuzzyQuery(t);
t->finalize();
break;
}
#endif
else{
q = GetFieldQuery(field, analyzer, term->Value);
break;
}
#ifndef NO_RANGE_QUERY
case lucene::queryParser::RANGEIN:
case lucene::queryParser::RANGEEX:
{
if(term->Type == lucene::queryParser::RANGEIN)
rangein = true;
if(tokens->Peek().Type == lucene::queryParser::CARAT)
{
delete MatchQueryToken(lucene::queryParser::CARAT);
boost = MatchQueryToken(lucene::queryParser::NUMBER);
}
char_t* tmp = stringDuplicate ( term->Value +1);
tmp[stringLength(tmp)-1] = 0;
q = GetRangeQuery(field, analyzer, tmp, rangein);
delete[] tmp;
}
break;
#endif
case lucene::queryParser::QUOTED:
if(tokens->Peek().Type == lucene::queryParser::SLOP)
{
slop = MatchQueryToken(lucene::queryParser::SLOP);
}
if(tokens->Peek().Type == lucene::queryParser::CARAT)
{
delete MatchQueryToken(lucene::queryParser::CARAT);
boost = MatchQueryToken(lucene::queryParser::NUMBER);
}
//todo: check term->Value+1
q = GetFieldQuery(field, analyzer, term->Value+1);
if(slop != NULL && q->instanceOf(_T("PhraseQuery")) )
{
try
{
int_t s = stringToInteger(slop->Value+1);
((PhraseQuery*)q)->setSlop( s );
}
catch(...)
{
// ignored
}
}
break;
} // end of switch
delete term;
if(boost != NULL)
{
float_t f = 1.0F;
try
{
//TODO: check this
char_t* tmp;
f = stringToFloat(boost->Value, &tmp);
//f = Single.Parse(boost->Value, NumberFormatInfo.InvariantInfo);
}
catch(...)
{
// ignored
}
delete boost;
q->setBoost( f);
}
return q;
}
// matches for QueryToken of the specified type and returns it
// otherwise Exception throws
QueryToken* QueryParser::MatchQueryToken(QueryTokenTypes expectedType)
{
if(tokens->Count() == 0)
{
QueryParserBase::throwParserException(_T("Error: UnExpected End of Program"),' ',0,0);
}
QueryToken* t = &tokens->Extract();
if (expectedType != t->Type)
{
char_t buf[200];
stringPrintF(buf,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType);
delete t;
QueryParserBase::throwParserException(buf,' ',0,0);
}
return t;
}
}}