#include "CLucene/StdHeader.h" #include "SegmentHeader.h" #include "SegmentInfo.h" #include "FieldInfos.h" #include "FieldsReader.h" #include "IndexReader.h" #include "TermInfosReader.h" #include "CLucene/util/BitVector.h" #include "Terms.h" #include "CLucene/util/VoidMap.h" using namespace lucene::util; namespace lucene{ namespace index{ Norm::Norm(InputStream& instrm):in(instrm) , bytes(NULL) { //CONSTRUCTOR } Norm::~Norm(){ } //static void SegmentReader::segmentname(char_t* buffer,const char_t* segment, const char_t* ext, const int_t x){ if ( x== -1 ) stringPrintF(buffer,_T("%s%s"), segment,ext ); else stringPrintF(buffer,_T("%s%s%d"), segment,ext,x ); } char_t* SegmentReader::segmentname(const char_t* ext, const int_t x){ char_t* buf = new char_t[MAX_PATH]; segmentname(buf,ext,x); return buf ; } void SegmentReader::segmentname(char_t* buffer,const char_t* ext, const int_t x ){ SegmentReader::segmentname(buffer,segment,ext,x); } SegmentReader::~SegmentReader(){ delete fieldInfos; delete fieldsReader; delete tis; delete freqStream; delete proxStream; delete[] segment; delete deletedDocs; } SegmentReader::SegmentReader(SegmentInfo& si, const bool closeDir) : IndexReader(si.dir), //init super closeDirectory(closeDir), deletedDocs(NULL), deletedDocsDirty(false), segment( stringDuplicate(si.name) ), fieldInfos(NULL), // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream( NULL ), proxStream( NULL ) { char_t buf[MAX_PATH]; segmentname(buf, _T(".fnm")); fieldInfos = new FieldInfos(directory, buf ); segmentname(buf, _T(".frq")); freqStream = &directory.openFile( buf ); segmentname(buf, _T(".prx")); proxStream= &directory.openFile( buf ); fieldsReader = new FieldsReader(directory, segment, *fieldInfos); tis = new TermInfosReader(directory, segment, *fieldInfos); if (hasDeletions(si)){ segmentname(buf, _T(".del")); deletedDocs = new lucene::util::BitVector(directory, buf ); } openNorms(); } void SegmentReader::doClose() { LOCK_MUTEX(doClose_LOCK); #ifndef CLUCENE_LITE if (deletedDocsDirty) { LuceneLock* lock = directory.makeLock(_T("commit.lock")); SegmentReaderLockWith with ( lock, this ); LOCK_MUTEX(DIRECTORIES_MUTEX); // in- & inter-process sync with.run(); UNLOCK_MUTEX(DIRECTORIES_MUTEX); delete lock; deletedDocsDirty = false; } #endif // CLUCENE_LITE fieldsReader->close(); tis->close(); if (freqStream != NULL) freqStream->close(); if (proxStream != NULL) proxStream->close(); closeNorms(); if (closeDirectory) directory.close(); UNLOCK_MUTEX(doClose_LOCK); } //static bool SegmentReader::hasDeletions(const SegmentInfo& si) { char_t f[MAX_PATH]; SegmentReader::segmentname(f, si.name,_T(".del"),-1 ); bool ret = si.dir.fileExists( f ); return ret; } void SegmentReader::doDelete(const int_t docNum) { LOCK_MUTEX(doDelete_LOCK); if (deletedDocs == NULL) deletedDocs = new lucene::util::BitVector(MaxDoc()); deletedDocsDirty = true; deletedDocs->set(docNum); UNLOCK_MUTEX(doDelete_LOCK); } StringArrayConst& SegmentReader::files() { StringArrayConst& files = *new StringArrayConst(true,DELETE_TYPE_DELETE_ARRAY); files.push_back( segmentname(_T(".fnm") )); files.push_back( segmentname(_T(".fdx") )); files.push_back( segmentname(_T(".fdt") )); files.push_back( segmentname(_T(".tii") )); files.push_back( segmentname(_T(".tis") )); files.push_back( segmentname(_T(".frq") )); files.push_back( segmentname(_T(".prx") )); const char_t* tmp = segmentname(_T(".del")); if (directory.fileExists( tmp ) ) files.push_back( tmp ); else delete[] tmp; for (int_t i = 0; i < fieldInfos->size(); i++) { FieldInfo& fi = fieldInfos->fieldInfo(i); if (fi.isIndexed) files.push_back( segmentname(_T(".f"), i) ); } return files; } TermEnum& SegmentReader::getTerms() const { return tis->getTerms(); } TermEnum& SegmentReader::getTerms(const Term* t) const { return tis->getTerms(*t); } lucene::document::Document& SegmentReader::document(const int_t n) { lucene::document::Document* ret; LOCK_MUTEX(document_LOCK); if (isDeleted(n)){ //UNLOCK_MUTEX(document_LOCK); _THROWC( "attempt to access a deleted document" ); } ret = &fieldsReader->doc(n); UNLOCK_MUTEX(document_LOCK); return *ret; } bool SegmentReader::isDeleted(const int_t n) { bool ret; LOCK_MUTEX(isDeleted_LOCK); ret = (deletedDocs != NULL && deletedDocs->get(n)); UNLOCK_MUTEX(isDeleted_LOCK); return ret; } TermDocs& SegmentReader::termDocs() const { return *new SegmentTermDocs((void*)this); } TermPositions& SegmentReader::termPositions() const { return *new SegmentTermPositions((void*)this); } int_t SegmentReader::docFreq(const Term& t) const { TermInfo* ti = tis->get(t); if (ti != NULL){ int_t ret = ti->docFreq; delete ti; return ret; }else return 0; } int_t SegmentReader::NumDocs() { int_t n = MaxDoc(); if (deletedDocs != NULL) n -= deletedDocs->Count(); return n; } int_t SegmentReader::MaxDoc() const { return fieldsReader->Size(); } l_byte_t* SegmentReader::getNorms(const char_t* field) { Norm* norm = (Norm*)norms.get(field); if (norm == NULL) return NULL; if (norm->bytes == NULL) { l_byte_t* bytes = new l_byte_t[MaxDoc()]; //array getNorms(field, bytes, 0); norm->bytes = bytes; } return norm->bytes; } void SegmentReader::getNorms(const char_t* field, l_byte_t* bytes, const int_t offset) { InputStream* _normStream = normStream(field); if (_normStream == NULL) return; // use zeros in array _TRY { _normStream->readBytes(bytes, offset, MaxDoc()); } _FINALLY ( _normStream->close(); delete _normStream; ); } InputStream* SegmentReader::normStream(const char_t* field) { Norm* norm = norms.get(field); if (norm == NULL) return NULL; InputStream* result = &norm->in.clone(); result->seek(0); return result; } void SegmentReader::openNorms() { for (int_t i = 0; i < fieldInfos->size(); i++) { FieldInfo& fi = fieldInfos->fieldInfo(i); if (fi.isIndexed) { char_t f[MAX_PATH]; segmentname(f, _T(".f"), fi.number); //TODO, should fi.name be copied? norms.put(fi.name, new Norm( directory.openFile( f ) )); } } } void SegmentReader::closeNorms() { LOCK_MUTEX(norms_mutex); map::iterator itr = norms.begin(); while (itr != norms.end()) { itr->second->in.close(); delete &itr->second->in; /* DSR:CL_BUG: bytes should be deleted with delete[], not delete. */ delete[] itr->second->bytes; delete itr->second; itr ++; } UNLOCK_MUTEX(norms_mutex); } #ifndef CLUCENE_LITE void* SegmentReaderLockWith::doBody() { char_t tmpName[MAX_PATH]; char_t delName[MAX_PATH]; reader->segmentname(tmpName, _T(".tmp")); reader->segmentname(delName, _T(".del")); reader->deletedDocs->write(reader->directory, tmpName ); reader->directory.renameFile( tmpName, delName ); return NULL; } #endif //CLUCENE_LITE }}