#include "CLucene/StdHeader.h" #include "IndexSearcher.h" #include "SearchHeader.h" #include "CLucene/store/Directory.h" #include "CLucene/document/Document.h" #include "CLucene/index/IndexReader.h" #include "CLucene/index/Term.h" #include "CLucene/util/BitSet.h" #include "HitQueue.h" using namespace lucene::index; using namespace lucene::util; using namespace lucene::document; namespace lucene{ namespace search { SimpleTopDocsCollector::SimpleTopDocsCollector(const BitSet* bs, HitQueue& hitQueue, int_t* totalhits,const int_t ndocs, const float_t minScore): bits(bs), hq(hitQueue), nDocs(ndocs), totalHits(totalhits), ms(minScore) { } void SimpleTopDocsCollector::collect(const int_t doc, const float_t score) { if (score > 0.0f && // ignore zeroed buckets (bits==NULL || bits->get(doc))) { // skip docs not in bits totalHits[0]++; if (score >= ms) { hq.put(new ScoreDoc(doc, score)); // update hit queue if (hq.Size() > nDocs) { // if hit queue overfull delete hq.pop(); // remove lowest in hit queue minScore = hq.top()->score; // reset minScore } } } } SimpleFilteredCollector::SimpleFilteredCollector(BitSet* bs, HitCollector& collector): bits(bs), results(collector) { } void SimpleFilteredCollector::collect(const int_t doc, const float_t score) { if (bits->get(doc)) { // skip docs not in bits results.collect(doc, score); } } /** Creates a searcher searching the index in the named directory. */ IndexSearcher::IndexSearcher(const char_t* path): reader( IndexReader::open(path) ), readerOwner(true) { } /** Creates a searcher searching the provided index. */ IndexSearcher::IndexSearcher(IndexReader& r): reader(r), readerOwner(false) { } IndexSearcher::~IndexSearcher(){ } /** Frees resources associated with this Searcher. */ void IndexSearcher::close(){ if ( readerOwner ){ reader.close(); delete &reader; } } int_t IndexSearcher::docFreq(const Term& term) const{ return reader.docFreq(term); } /** For use by {@link HitCollector} implementations. */ Document& IndexSearcher::doc(const int_t i) { return reader.document(i); } int_t IndexSearcher::maxDoc() const { return reader.MaxDoc(); } TopDocs& IndexSearcher::Search(Query& query, const Filter* filter, const int_t nDocs){ Scorer* scorer = Query::scorer(query, *this, reader); if (scorer == NULL) return *new TopDocs(0, new ScoreDoc*[0],0); const BitSet* bits = filter != NULL ? filter->bits(reader) : NULL; HitQueue& hq = *new HitQueue(nDocs); int_t* totalHits = new int_t[1]; totalHits[0] = 0; SimpleTopDocsCollector hitCol(bits,hq,totalHits,nDocs,0.0f); scorer->score( hitCol, reader.MaxDoc()); ScoreDoc** scoreDocs = new ScoreDoc*[hq.Size()]; int_t scoreDocsLength = hq.Size(); for (int_t i = hq.Size()-1; i >= 0; i--) // put docs in array scoreDocs[i] = hq.pop(); int_t totalHitsInt = totalHits[0]; delete &hq; delete bits; delete[] totalHits; delete scorer; return *new TopDocs(totalHitsInt, scoreDocs, scoreDocsLength); } /** Lower-level search API. * *

{@link HitCollector#collect(int_t,float_t)} is called for every non-zero * scoring document. * *

Applications should only use this if they need all of the * matching documents. The high-level search API ({@link * Searcher#search(Query)}) is usually more efficient, as it skips * non-high-scoring hits. * * @param query to match documents * @param filter if non-NULL, a bitset used to eliminate some documents * @param results to receive hits */ void IndexSearcher::Search(Query& query, const Filter* filter, HitCollector& results){ BitSet* bs = NULL; /* DSR:CL_BUG(1): Original impl reset collector to refer to stack-allocated ** object that was then deleted. */ SimpleFilteredCollector* fc = NULL; /* DSR:CL_BUG(1) */ HitCollector& collector = results; if (filter != NULL) { bs = filter->bits(reader); /* DSR:CL_BUG(1): Old impl: ** SimpleFilteredCollector fc(bs,results); ** collector = fc; ** Upon exiting this block, fc was automatically deleted, leaving ** collector referring to invalid memory. */ fc = new SimpleFilteredCollector(bs, results); } Scorer* scorer = Query::scorer(query, *this, reader); if (scorer != NULL) { /* DSR:CL_BUG(1): Old impl (passed collector, which pointed to deleted ** memory, to scorer->score(...)): ** scorer->score(collector, reader.MaxDoc()); */ scorer->score(fc == NULL ? collector : (HitCollector&)*fc, reader.MaxDoc()); delete scorer; /* DSR:CL_BUG_LEAK(2): scorer was not deleted. */ } delete fc; /* DSR:CL_BUG(1) */ delete bs; } }}