#include "CLucene/StdHeader.h" #ifndef CLUCENE_LITE #include "IndexWriter.h" #include "CLucene/util/VoidList.h" #include "CLucene/store/Directory.h" #include "CLucene/store/RAMDirectory.h" #include "CLucene/store/Lock.h" #include "CLucene/document/Document.h" #include "DocumentWriter.h" #include "SegmentInfos.h" #include "SegmentMerger.h" #ifdef USE_INFO_STREAM # include #endif using namespace std; using namespace lucene::store; using namespace lucene::util; namespace lucene{ namespace index { /*IndexWriter::IndexWriter(): segmentInfos (*new SegmentInfos), ramDirectory (*new lucene::store::RAMDirectory) { }*/ void IndexWriter::_finalize(){ if ( writeLock != NULL ){ writeLock->release(); // release write lock _DELETE( writeLock ); } delete &ramDirectory; delete &segmentInfos; } IndexWriter::~IndexWriter() { _finalize(); } void IndexWriter::_IndexWriter(const bool create){ LuceneLock* newLock = directory.makeLock(_T("write.lock")); if (!newLock->obtain()){ // obtain write lock delete newLock; _finalize(); _THROWC( "Index locked for write or no write access." ); } writeLock = newLock; // save it LuceneLock* lock = directory.makeLock(_T("commit.lock")); IndexWriterLockWith with ( lock,this,create ); LOCK_MUTEX(DIRECTORIES_MUTEX); // in- & inter-process sync with.run(); UNLOCK_MUTEX(DIRECTORIES_MUTEX); delete lock; } IndexWriter::IndexWriter(const char_t* path, lucene::analysis::Analyzer& a, const bool create): directory( FSDirectory::getDirectory(path, create) ), analyzer(a), segmentInfos (*new SegmentInfos), ramDirectory (*new lucene::store::RAMDirectory), infoStream(NULL), writeLock(NULL), ownDir(true) { _IndexWriter ( create ); } IndexWriter::IndexWriter(lucene::store::Directory& d, lucene::analysis::Analyzer& a, const bool create): directory(d), analyzer(a), segmentInfos (*new SegmentInfos), ramDirectory (*new lucene::store::RAMDirectory), infoStream(NULL), writeLock(NULL), ownDir(false) { _IndexWriter ( create ); } void* IndexWriterLockWith::doBody() { if (create){ //todo: something like this //if ( !writer->directory.fileExists("narrow_charset") ) // writerr->directory.createFile("narrow_charset"); writer->segmentInfos.write(writer->directory); }else writer->segmentInfos.read(writer->directory); return NULL; } void* IndexWriterLockWith2::doBody() { writer->segmentInfos.write(writer->directory); // commit before deleting writer->deleteSegments(*segmentsToDelete); // delete now-unused segments return NULL; } void IndexWriter::close( const bool closeDir ) { LOCK_MUTEX(close_LOCK); flushRamSegments(); ramDirectory.close(); if ( closeDir || ownDir ) directory.close(); if ( writeLock != NULL ) writeLock->release(); // release write lock _DELETE( writeLock ); UNLOCK_MUTEX(close_LOCK); } int_t IndexWriter::docCount() { int_t count = 0; LOCK_MUTEX(docCount_LOCK); for (uint_t i = 0; i < segmentInfos.size(); i++) { SegmentInfo& si = segmentInfos.info(i); count += si.docCount; } UNLOCK_MUTEX(docCount_LOCK); return count; } void IndexWriter::addDocument(lucene::document::Document& doc) { DocumentWriter* dw = new DocumentWriter(ramDirectory, analyzer, maxFieldLength); char_t* segmentName = newSegmentName(); dw->addDocument(segmentName, doc); LOCK_MUTEX(THIS_LOCK); segmentInfos.push_back(new SegmentInfo(segmentName, 1, ramDirectory)); maybeMergeSegments(); UNLOCK_MUTEX(THIS_LOCK); // delete segmentName; newSegmentName() doesn't call new or even malloc. // --RGR delete[] segmentName; //the lucene::util::Misc::join uses new[] --BVK delete dw; } void IndexWriter::optimize() { LOCK_MUTEX(optimize_LOCK); flushRamSegments(); while (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && (SegmentReader::hasDeletions(segmentInfos.info(0)) || &segmentInfos.info(0).dir != &directory ))) { int_t minSegment = segmentInfos.size() - mergeFactor; mergeSegments(minSegment < 0 ? 0 : minSegment); } UNLOCK_MUTEX(optimize_LOCK); } void IndexWriter::addIndexes(Directory** dirs, const int_t dirsLength) { LOCK_MUTEX(addIndexes_LOCK); optimize(); // start with zero or 1 seg for (int_t i = 0; i < dirsLength; i++) { /* DSR: Changed SegmentInfos constructor arg (see bug discussion below). */ SegmentInfos sis(false); sis.read( *dirs[i]); for (uint_t j = 0; j < sis.size(); j++) { /* DSR:CL_BUG: ** In CLucene 0.8.11, the next call placed a pointer to a SegmentInfo ** object from stack variable $sis into the vector this->segmentInfos. ** Then, when the call to optimize() is made just before exiting this ** function, $sis had already been deallocated (and has deleted its ** member objects), leaving dangling pointers in this->segmentInfos. ** I added a SegmentInfos constructor that allowed me to order it not ** to delete its members, invoked the new constructor form above for ** $sis, and the problem was solved. */ segmentInfos.push_back(&sis.info(j)); // add each info } } optimize(); // cleanup UNLOCK_MUTEX(addIndexes_LOCK); } void IndexWriter::flushRamSegments() { int_t minSegment = segmentInfos.size()-1; //don't make this unsigned... int_t docCount = 0; while (minSegment >= 0 && &segmentInfos.info(minSegment).dir == &ramDirectory) { docCount += segmentInfos.info(minSegment).docCount; minSegment--; } if (minSegment < 0 || // add one FS segment? (docCount + segmentInfos.info(minSegment).docCount) > mergeFactor || !(&segmentInfos.info(segmentInfos.size()-1).dir == &ramDirectory)) minSegment++; if (minSegment >= segmentInfos.size()) return; // none to merge mergeSegments(minSegment); } void IndexWriter::maybeMergeSegments() { long_t targetMergeDocs = mergeFactor; while (targetMergeDocs <= maxMergeDocs) { // find segments smaller than current target size int_t minSegment = segmentInfos.size(); int_t mergeDocs = 0; while (--minSegment >= 0) { SegmentInfo& si = segmentInfos.info(minSegment); if (si.docCount >= targetMergeDocs) break; mergeDocs += si.docCount; } if (mergeDocs >= targetMergeDocs) // found a merge to do mergeSegments(minSegment+1); else break; targetMergeDocs *= mergeFactor; // increase target size } } void IndexWriter::mergeSegments(const uint_t minSegment) { const char_t* mergedName = newSegmentName(); int_t mergedDocCount = 0; #ifdef USE_INFO_STREAM if (infoStream != NULL) *infoStream<< "merging segments" << "\n"; #endif SegmentMerger merger(directory, mergedName); lucene::util::VoidList segmentsToDelete; for (uint_t i = minSegment; i < segmentInfos.size(); i++) { SegmentInfo& si = segmentInfos.info(i); #ifdef USE_INFO_STREAM if ( infoStream != NULL) *infoStream << " " << si.name << " (" << si.docCount << " docs)"; #endif SegmentReader* reader = new SegmentReader(si,false); merger.add(*reader); if ((&reader->directory == &this->directory) || // if we own the directory (&reader->directory == &this->ramDirectory)) segmentsToDelete.push_back(reader); // queue segment for deletion mergedDocCount += si.docCount; } #ifdef USE_INFO_STREAM if (infoStream != NULL) { *infoStream<<"\n into "< minSegment ) segmentInfos.pop_back(); segmentInfos.push_back( new SegmentInfo(mergedName, mergedDocCount, directory)); LuceneLock* lock = directory.makeLock(_T("commit.lock")); IndexWriterLockWith2 with ( lock,this,&segmentsToDelete ); LOCK_MUTEX(DIRECTORIES_MUTEX); // in- & inter-process sync with.run(); UNLOCK_MUTEX(DIRECTORIES_MUTEX); delete lock; delete[] mergedName; //ADD: } void IndexWriter::deleteSegments(lucene::util::VoidList &segments) { StringArrayConst deletable; deletable.setDoDelete(DELETE_TYPE_DELETE_ARRAY); StringArrayConst &deleteArray = readDeleteableFiles(); deleteFiles(deleteArray, deletable); // try to delete deleteable delete &deleteArray; for (uint_t i = 0; i < segments.size(); i++) { SegmentReader* reader = segments.at(i); StringArrayConst& files = reader->files(); if (&reader->directory == &this->directory) deleteFiles(files, deletable); // try to delete our files else deleteFiles(files, reader->directory); // delete, eg, RAM files delete &files; } writeDeleteableFiles(deletable); // note files we can't delete } void IndexWriter::deleteFiles(const StringArrayConst& files, lucene::store::Directory& directory) { for (uint_t i = 0; i < files.size(); i++) directory.deleteFile( files[i] ); } void IndexWriter::deleteFiles(const StringArrayConst& files, StringArrayConst& deletable) { for (uint_t i = 0; i < files.size(); i++) { const char_t* file = files[i]; try { directory.deleteFile(file); // try to delete each file } catch (THROW_TYPE e) { // if delete fails if (directory.fileExists(file)) { #ifdef USE_INFO_STREAM if (infoStream != NULL) *infoStream << e.errstr << "; Will re-try later.\n"; #endif deletable.push_back(stringDuplicate(file)); // add to deletable } } } } StringArrayConst& IndexWriter::readDeleteableFiles() { StringArrayConst& result = *new StringArrayConst(true,DELETE_TYPE_DELETE_ARRAY); if (!directory.fileExists(_T("deletable"))) return result; InputStream& input = directory.openFile(_T("deletable")); _TRY { for (int_t i = input.readInt(); i > 0; i--) // read file names result.push_back(input.readString()); } _FINALLY( input.close(); delete &input; ); return result; } void IndexWriter::writeDeleteableFiles(StringArrayConst& files) { OutputStream& output = directory.createFile(_T("deleteable.new")); _TRY { output.writeInt(files.size()); for (uint_t i = 0; i < files.size(); i++) output.writeString( files.at(i) ); } _FINALLY( output.close(); delete &output; ); directory.renameFile(_T("deleteable.new"), _T("deletable")); } char_t* IndexWriter::newSegmentName() { char_t buf[9]; LOCK_MUTEX(newSegmentName_LOCK); integerToString(segmentInfos.counter++,buf,CHAR_RADIX); //36 is RADIX of 10 digits and 26 numbers UNLOCK_MUTEX(newSegmentName_LOCK); return lucene::util::Misc::join( _T("_"),buf); } }} #endif