#include "CLucene/StdHeader.h" #include "SegmentsReader.h" #include "SegmentInfo.h" #include "IndexReader.h" #include "CLucene/document/Document.h" #include "Terms.h" #include "SegmentMergeQueue.h" namespace lucene{ namespace index{ SegmentsTermDocs::SegmentsTermDocs(SegmentReader** r, int_t rLen, const int_t* s): base(0), pointer(0), segTermDocs ( new SegmentTermDocs*[rLen] ), readers(r), readersLength(rLen), starts(s) { //TODO: do we really need to do this? for ( int_t i=0;idoc; } int_t SegmentsTermDocs::Freq() const { return current->freq; } void SegmentsTermDocs::seek( Term* tterm) { term = tterm->pointer(); base = 0; pointer = 0; current = NULL; } bool SegmentsTermDocs::next() { if (current != NULL && current->next()) { return true; } else if (pointer < readersLength) { base = starts[pointer]; current = termDocs(pointer++); return next(); } else return false; } /** Optimized implementation. */ int_t SegmentsTermDocs::read(int_t docs[], int_t freqs[]) { while (true) { while (current == NULL) { if (pointer < readersLength) { // try next segment base = starts[pointer]; current = termDocs(pointer++); } else { return 0; } } int_t end = current->read(docs, freqs); if (end == 0) { // none left in segment current = NULL; } else { // got some int_t b = base; // adjust doc numbers for (int_t i = 0; i < end; i++) docs[i] += b; return end; } } } /** As yet unoptimized implementation. */ bool SegmentsTermDocs::skipTo(const int_t target) { do { if (!next()) return false; } while (target > Doc()); return true; } void SegmentsTermDocs::close() { for (int_t i = 0; i < readersLength; i++) { if (segTermDocs[i] != NULL){ segTermDocs[i]->close(); } } _DELETE( segTermDocs ); term->finalize(); } SegmentTermDocs* SegmentsTermDocs::termDocs(const SegmentReader& reader) const { SegmentTermDocs& ret = (SegmentTermDocs&)reader.termDocs(); return &ret; } SegmentTermDocs* SegmentsTermDocs::termDocs(const int_t i) const { if (term == NULL) return NULL; SegmentTermDocs* result = segTermDocs[i]; if (result == NULL){ segTermDocs[i] = termDocs(*readers[i]); result = segTermDocs[i]; } result->seek(term); return result; } SegmentsTermEnum::SegmentsTermEnum(SegmentReader** readers, const int_t readersLength, const int_t starts[], const Term* t): queue( *new SegmentMergeQueue(readersLength)) { for (int_t i = 0; i < readersLength; i++) { SegmentReader* reader = readers[i]; SegmentTermEnum* termEnum; if (t != NULL) { termEnum = (SegmentTermEnum*)&reader->getTerms(t); } else termEnum = (SegmentTermEnum*)&reader->getTerms(); SegmentMergeInfo* smi = new SegmentMergeInfo(starts[i], *termEnum, *reader); /* DSR:CL_BUG_LEAK: termEnum->getTerm() call below leaked a reference; ** I ordered it to return an unowned reference instead. */ if (t == NULL ? smi->next() : termEnum->getTerm(false) != NULL) queue.put(smi); // initialize queue else smi->close(); } if (t != NULL && queue.Size() > 0) { SegmentMergeInfo* top = queue.top(); /* DSR: The getTerm() call below should and does return an owned reference. */ term = top->termEnum.getTerm(); docFreq = top->termEnum.DocFreq(); }else{ term = NULL; docFreq = 0; } } SegmentsTermEnum::~SegmentsTermEnum(){ } bool SegmentsTermEnum::next() { SegmentMergeInfo* top = queue.top(); if (top == NULL) { term->finalize(); // DSR:CL_BUG_LEAK: This call needed to be reenabled. term = NULL; return false; } term->finalize(); //TODO: need this? DSR: Yes. The getTerm method // requires the client programmer to indicate whether he // owns the returned reference, so we can discard ours // right away. term = top->term; docFreq = 0; while (top != NULL && term->compareTo(*top->term) == 0) { queue.pop(); //don't delete, this is the top docFreq += top->termEnum.DocFreq(); // increment freq if (top->next()) queue.put(top); // restore queue else{ top->close(); // done with a segment delete top; //TODO: need this? // DSR:CL_BUG_LEAK: Yes; I reenabled it. top = NULL; } top = queue.top(); } return true; } Term* SegmentsTermEnum::getTerm(const bool pointer) { if ( pointer && term!=NULL ) return term->pointer(); else return term; } int_t SegmentsTermEnum::DocFreq() const { return docFreq; } void SegmentsTermEnum::close() { /* DSR:CL_BUG_LEAK: If this enumeration hasn't actually been exhausted yet, ** we need to free this->term. */ if (term != NULL) { term->finalize(); term = NULL; } queue.close(); delete &queue; } SegmentsTermPositions::SegmentsTermPositions(SegmentReader** r, const int_t rLen, const int_t* s): SegmentsTermDocs(r,rLen, s) { } int_t SegmentsTermPositions::nextPosition() { return ((SegmentTermPositions*)current)->nextPosition(); } void SegmentsTermPositions::seek(Term* term){ SegmentsTermDocs::seek(term); }; int_t SegmentsTermPositions::Doc() const{ return SegmentsTermDocs::Doc(); }; int_t SegmentsTermPositions::Freq() const{ return SegmentsTermDocs::Freq(); }; int_t SegmentsTermPositions::read(int_t docs[], int_t freqs[]){ return SegmentsTermDocs::read(docs,freqs); }; bool SegmentsTermPositions::skipTo(const int_t target){ return SegmentsTermDocs::skipTo(target); }; bool SegmentsTermPositions::next(){ return SegmentsTermDocs::next(); } void SegmentsTermPositions::close(){ SegmentsTermDocs::close(); } SegmentTermDocs& SegmentsTermPositions::termDocs(SegmentReader& reader) { return (SegmentTermDocs&)reader.termPositions(); } SegmentsReader::SegmentsReader(Directory& directory, SegmentReader** r, int_t rLen): IndexReader(directory), readers(r), readersLength(rLen), maxDoc(0), numDocs(-1) { starts = new int_t[readersLength + 1]; // build starts array for (int_t i = 0; i < readersLength; i++) { starts[i] = maxDoc; maxDoc += readers[i]->MaxDoc(); // compute maxDocs } starts[readersLength] = maxDoc; } SegmentsReader::~SegmentsReader(){ delete[] starts; starts = NULL; for ( int i=0;iNumDocs(); // sum from readers numDocs = n; } UNLOCK_MUTEX(NumDocs_LOCK); return numDocs; } int_t SegmentsReader::MaxDoc() const { return maxDoc; } lucene::document::Document& SegmentsReader::document(const int_t n) { int_t i = readerIndex(n); // find segment num return readers[i]->document(n - starts[i]); // dispatch to segment reader } bool SegmentsReader::isDeleted(const int_t n) { int_t i = readerIndex(n); // find segment num return readers[i]->isDeleted(n - starts[i]); // dispatch to segment reader } l_byte_t* SegmentsReader::getNorms(const char_t* field){ l_byte_t* bytes; LOCK_MUTEX(getNorms_LOCK); bytes = normsCache.get(field); if (bytes != NULL){ //UNLOCK_MUTEX(getNorms_LOCK); return bytes; // cache hit } bytes = new l_byte_t[MaxDoc()]; for (int_t i = 0; i < readersLength; i++) readers[i]->getNorms(field, bytes, starts[i]); normsCache.put(field, bytes); // update cache UNLOCK_MUTEX(getNorms_LOCK); return bytes; } TermEnum& SegmentsReader::getTerms() const { return *new SegmentsTermEnum(readers, readersLength, starts, NULL); } TermEnum& SegmentsReader::getTerms(const Term* term) const { return *new SegmentsTermEnum(readers, readersLength, starts, term); } int_t SegmentsReader::docFreq(const Term& t) const { int_t total = 0; // sum freqs in segments for (int_t i = 0; i < readersLength; i++) total += readers[i]->docFreq(t); return total; } TermDocs& SegmentsReader::termDocs() const { TermDocs* ret = (TermDocs*)new SegmentsTermDocs(readers,readersLength, starts); return *ret; } TermPositions& SegmentsReader::termPositions() const { TermPositions* ret = (TermPositions*)new SegmentsTermPositions(readers,readersLength, starts); return *ret; } void SegmentsReader::doDelete(const int_t n) { LOCK_MUTEX(doDelete_LOCK); numDocs = -1; // invalidate cache int_t i = readerIndex(n); // find segment num readers[i]->doDelete(n - starts[i]); // dispatch to segment reader UNLOCK_MUTEX(doDelete_LOCK); } int_t SegmentsReader::readerIndex(const int_t n) const { // find reader for doc n: int_t lo = 0; // search starts array int_t hi = readersLength - 1; // for first element less // than n, return its index while (hi >= lo) { int_t mid = (lo + hi) >> 1; int_t midValue = starts[mid]; if (n < midValue) hi = mid - 1; else if (n > midValue) lo = mid + 1; else return mid; } return hi; } void SegmentsReader::doClose() { LOCK_MUTEX(doClose_LOCK); for (int_t i = 0; i < readersLength; i++){ readers[i]->close(); } UNLOCK_MUTEX(doClose_LOCK); } }}