#include "CLucene/StdHeader.h" #include "TermInfosReader.h" #include "Term.h" #include "Terms.h" #include "SegmentTermEnum.h" #include "CLucene/store/Directory.h" #include "FieldInfos.h" #include "TermInfo.h" #include "TermInfosWriter.h" #include "CLucene/util/Arrays.h" namespace lucene{ namespace index { const char_t* TermInfosReader::segmentname( const char_t* ext ){ char_t* buf = new char_t[MAX_PATH]; stringPrintF(buf,_T("%s%s"), segment,ext ); return buf; } TermInfosReader::TermInfosReader(Directory& dir, const char_t* seg, FieldInfos& fis): directory (dir), segment (seg), fieldInfos (fis), indexTerms(NULL) { const char_t* n = segmentname(_T(".tis")); _enum = new SegmentTermEnum( directory.openFile( n ), fieldInfos, false); delete[] n; size = ( _enum->size ); readIndex(); } TermInfosReader::~TermInfosReader(){ } void TermInfosReader::close() { if (_enum != NULL){ _enum->close(); _enum->input.close(); delete &_enum->input; _DELETE(_enum); } for ( int_t i=0;ifinalize(); delete indexInfos[i]; } delete[] indexTerms; delete[] indexPointers; delete[] indexInfos; indexTerms = NULL; indexPointers = NULL; indexInfos = NULL; } int_t TermInfosReader::Size() { return size; } DEFINE_MUTEX(getInt_LOCK); Term* TermInfosReader::get(const int_t position) { Term* ret; LOCK_MUTEX(getInt_LOCK); if (size == 0) return NULL; /* DSR:CL_BUG_LEAK: The _enum->getTerm() call below leaked a reference; I ** ordered it to return an unowned reference instead. */ if ( _enum != NULL && _enum->getTerm(false) != NULL && position >= _enum->position && position < (_enum->position + LUCENE_WRITER_INDEX_INTERVAL )) { ret = scanEnum(position); //UNLOCK_MUTEX(getInt_LOCK); return ret; // can avoid seek } seekEnum(position / LUCENE_WRITER_INDEX_INTERVAL); // must seek ret = scanEnum(position); UNLOCK_MUTEX(getInt_LOCK); return ret; } TermInfo* TermInfosReader::get(const Term& term){ if (size == 0) return NULL; TermInfo* ret; LOCK_MUTEX(getTerm_LOCK); // optimize sequential access: first try scanning cached _enum w/o seeking if ( _enum->getTerm(false) != NULL // term is at or past current && ((_enum->prev != NULL && term.compareTo(*_enum->prev) > 0) || term.compareTo(*_enum->getTerm(false)) >= 0)) { int_t _enumOffset = (_enum->position/LUCENE_WRITER_INDEX_INTERVAL)+1; if (indexTermsLength == _enumOffset // but before end of block || term.compareTo(*indexTerms[_enumOffset]) < 0){ ret = scanEnum(term); // no need to seek //UNLOCK_MUTEX(getTerm_LOCK); return ret; } } // random-access: must seek seekEnum(getIndexOffset(term)); ret = scanEnum(term); UNLOCK_MUTEX(getTerm_LOCK); return ret; } int_t TermInfosReader::getPosition(const Term& term) { if (size == 0) return -1; LOCK_MUTEX(getPosition_LOCK); int_t indexOffset = getIndexOffset(term); seekEnum(indexOffset); while(term.compareTo(*_enum->getTerm(false)) > 0 && _enum->next()) {} UNLOCK_MUTEX(getPosition_LOCK); if (term.compareTo(*_enum->getTerm(false)) == 0) return _enum->position; else return -1; } SegmentTermEnum& TermInfosReader::getTerms(){ SegmentTermEnum* cln; LOCK_MUTEX(getTerms_LOCK); if (_enum->position != -1) // if not at start seekEnum(0); // reset to start //TODO2: check this code cln = _enum->clone(); UNLOCK_MUTEX(getTerms_LOCK); return *cln; } SegmentTermEnum& TermInfosReader::getTerms(const Term& term) { SegmentTermEnum* cln; LOCK_MUTEX(getTermsTerm_LOCK); /* DSR:CL_BUG_LEAK: The get(term) call below returns a new object; it was ** not deleted in CL 0.8.11. */ delete get(term); // Seek _enum to term; delete the new TermInfo that's returned. cln = _enum->clone(); UNLOCK_MUTEX(getTermsTerm_LOCK); return *cln; } void TermInfosReader::readIndex() { const char_t* buf = segmentname(_T(".tii")); InputStream& is = directory.openFile( buf ); delete[] buf; SegmentTermEnum& indexEnum = *new SegmentTermEnum( is, fieldInfos, true); _TRY { indexTermsLength = indexEnum.size; indexTerms = new Term*[indexTermsLength]; indexInfos = new TermInfo*[indexTermsLength]; indexPointers = new long_t[indexTermsLength]; for (int_t i = 0; indexEnum.next(); i++) { indexTerms[i] = indexEnum.getTerm(); indexInfos[i] = indexEnum.getTermInfo(); indexPointers[i] = indexEnum.indexPointer; } } _FINALLY( indexEnum.close(); delete &indexEnum; is.close(); delete &is; ); } int_t TermInfosReader::getIndexOffset(const Term& term) { int_t lo = 0; // binary search indexTerms[] int_t hi = indexTermsLength - 1; while (hi >= lo) { int_t mid = (lo + hi) >> 1; int_t delta = term.compareTo(*indexTerms[mid]); if (delta < 0) hi = mid - 1; else if (delta > 0) lo = mid + 1; else return mid; } return hi; } void TermInfosReader::seekEnum(const int_t indexOffset) { _enum->seek( indexPointers[indexOffset], (indexOffset * LUCENE_WRITER_INDEX_INTERVAL) - 1, *indexTerms[indexOffset], *indexInfos[indexOffset]); } TermInfo* TermInfosReader::scanEnum(const Term& term) { while ( term.compareTo(*_enum->getTerm(false) ) > 0 && _enum->next()) {} if (_enum->getTerm(false) != NULL && term.compareTo(*_enum->getTerm(false)) == 0) return _enum->getTermInfo(); else return NULL; } Term* TermInfosReader::scanEnum(const int_t position) { while(_enum->position < position) if (!_enum->next()) return NULL; return _enum->getTerm(); } }}