// clucene includes
#include "CLucene.h"
#include "CLucene/util/Reader.h"
#include "CLucene/util/Misc.h"
#include "CLucene/queryParser/MultiFieldQueryParser.h"
#ifdef HIGHLIGHTING
# include "..\contributions\highlighter\QueryTermExtractor.h"
# include "..\contributions\highlighter\QueryScorer.h"
# include "..\contributions\highlighter\Highlighter.h"
# include "..\contributions\highlighter\SimpleHTMLFormatter.h"
#define HIGHLIGHT_HTML_START _T("")
#define HIGHLIGHT_HTML_END _T("")
#define HIGHLIGHT_FRAGMENT_SIZE 100
#define HIGHLIGHT_MAX_NUM_FRAGMENTS 3
#define HIGHLIGHT_SEPARATOR _T("...")
using namespace lucene::search::highlight;
#endif
#include
#include "clucene_dll.h"
using namespace std;
using namespace lucene::index;
using namespace lucene::analysis;
using namespace lucene::analysis::standard;
using namespace lucene::util;
using namespace lucene::store;
using namespace lucene::document;
using namespace lucene::search;
using namespace lucene::queryParser;
#ifdef _UNICODE
//# ifdef USE_NARROW
# define o_char_t char
# define _CONV_AWAY(x) Misc::wideToChar(x)
# define _CONV_BACK(x) Misc::charToWide(x)
# define _COPY_AWAY(src,dest,len) Misc::wideToChar(src,dest,len)
# define _COPY_BACK(src,dest,len) Misc::charToWide(src,dest,len)
//# else
//# endif
#else
//# ifdef USE_WIDE
# define o_char_t wchar_t
# define _CONV_AWAY(x) Misc::charToWide(x)
# define _CONV_BACK(x) Misc::wideToChar(x)
# define _COPY_AWAY(src,dest,len) Misc::charToWide(src,dest,len)
# define _COPY_BACK(src,dest,len) Misc::wideToChar(src,dest,len)
//# else
//# endif
#endif
//private================================
struct Resource{
public:
//searcher
Document* doc;
IndexSearcher* searcher;
Query* query;
Hits* hits;
int hitPos;
int count;
char_t directory[CL_MAX_DIR];
//general
char_t errstr[errlen];
//Directory* dir;
IndexReader* reader;
Resource(const char_t* dir){
doc = NULL;
searcher = NULL;
query = NULL;
hits = NULL;
hitPos = 0;
errstr[0]=0;
reader = NULL;
count=1;
stringNCopy(directory,dir,CL_MAX_DIR);
}
~Resource(){
delete doc;
doc = NULL;
delete reader;
reader = NULL;
}
};
extern "C" {
//
Resource* resources[MAX_CLUCENE_DLL_RESOURCES];
int resourcesInitialised = 0;
bool resource_mutex = false;
char_t globalerrstr[errlen];
StandardAnalyzer analyzer;
int isValidResource(const int resource, const int reportFail = 1);
}
#ifdef USE_UNICODE
void reportGlobalError(const wchar_t* error){
unsigned int i;
if ( error == NULL )
return;
for ( i=0;ierrstr[i]=error[i];
resources[resource]->errstr[i]=0;
//CL_CallBack_Error ( errstr );
}
}
#endif
void reportGlobalError(const char* error){
unsigned int i;
if ( error == NULL )
return;
for ( i=0;ierrstr[i]=error[i];
resources[resource]->errstr[i]=0;
//CL_CallBack_Error ( errstr );
}
}
int getResource(const char_t* dir){
//multithreading mutex lock...
while ( resource_mutex ){
processSleep(5);
}
resource_mutex = true;
_TRY{
if ( !resourcesInitialised ){
for ( int i=1;idirectory,dir)==0 ){
//directory already found
resources[i]->count++;
resource_mutex = false;
return i;
}
}
if ( pos > 0 ){
//no existing directory found
resources[pos] = new Resource(dir);
resource_mutex = false;
return pos;
}
reportGlobalError(_T("Max resource limit exceeded"));
}_FINALLY(
resource_mutex = false;
);
return 0;
}
extern "C" {
int isValidResource(const int resource, const int reportFail){
if ( resource > -1 && resource < MAX_CLUCENE_DLL_RESOURCES && resources[resource] != NULL )
return true;
else if ( reportFail ){
reportGlobalError(_T("Not a valid resource"));
return false;
}else
return false;
}
void resetError(const int resource){
globalerrstr[0]=0;
if ( resource > 0 && resource < MAX_CLUCENE_DLL_RESOURCES && resources[resource] != NULL )
resources[resource]->errstr[0]=0;
}
#ifndef CLUCENE_LITE
int _addField(const int resource, const char_t* wfield, Reader& reader, const int store, const int index, const int token){
resetError(resource);
int ret = 0;
if ( isValidResource(resource) ){
if ( resources[resource]->doc == NULL ){
CL_New_Document(resource);
}
try{
lucene::document::Field* fld = new lucene::document::Field(wfield,&reader,store!=0,index!=0,token!=0);
resources[resource]->doc->add(*fld);
ret = 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( _T("Unknown error"), resource);
}
}
return ret;
}
#endif
int _CL_Search(const int resource, Query* q){
try{
CL_ClearSearch(resource);
resources[resource]->query = q;
resources[resource]->searcher = new IndexSearcher(*resources[resource]->reader);
resources[resource]->hits = &resources[resource]->searcher->search(*resources[resource]->query);
return 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( _T("Unknown error"), resource);
}
return 0;
}
//
//end of private ================================
#ifdef USE_OTHER
CLUCENEDLL_API void CL_ERRSTR_OTHER(const int resource, o_char_t* pl, size_t len){
char_t* buffer = new char_t[len];
CL_ERRSTR(resource,buffer,len);
_COPY_AWAY(buffer,pl,len);
delete[] buffer;
}
CLUCENEDLL_API int CL_OPEN_OTHER(const o_char_t* dir, int create){
char_t odir[CL_MAX_DIR];
_COPY_BACK(dir,odir,CL_MAX_DIR);
return CL_OPEN(odir,create);
}
CLUCENEDLL_API void CL_DOCUMENT_INFO_OTHER(const int resource, o_char_t* info, const int len){
char_t* ret = new char_t[len];
CL_DOCUMENT_INFO(resource,ret,len);
_COPY_AWAY(ret,info,len);
delete[] ret;
}
CLUCENEDLL_API int CL_SEARCHMULTIFIELDSFLAGGED_OTHER(const int resource, const o_char_t* query, const o_char_t** fields, const int fieldsLen, const l_byte_t* flags){
char_t* oquery = _CONV_BACK(query);
char_t** ofields = new char_t*[fieldsLen];
for ( int i=0;ireader = &IndexReader::open(dir);
return resource;
}else if ( create ){
#ifndef CLUCENE_LITE
Directory* d = &FSDirectory::getDirectory( dir,true );
IndexWriter w(*d,analyzer,true);
w.close(false);
resources[resource]->reader = &IndexReader::open(*d);
return resource;
#else
reportGlobalError("Can't create directory in Lite Mode");
#endif
}
}catch(exception e){
reportGlobalError( e.what());
}catch(...){
reportGlobalError( _T("Unknown error") );
}
}
return 0;
}
//info must be an inistialised char array, a maximum len will be copied
CLUCENEDLL_API void CL_DOCUMENT_INFO(const int resource, char_t* info, const int len){
if ( isValidResource(resource) ){
if ( resources[resource]->doc != NULL ){
const char_t* inf = resources[resource]->doc->toString();
stringNCopy(info,inf,len);
delete[] inf;
}else{
info[0]=0;
reportError(_T("No document available"), resource);
}
}
}
CLUCENEDLL_API int CL_SEARCHMULTIFIELDSFLAGGED(const int resource, const char_t* query, const char_t** fields, const int fieldsLen, const l_byte_t* flags){
int ret = 0;
if ( isValidResource(resource) ){
Query* q = &MultiFieldQueryParser::Parse(query,fields,fieldsLen,flags,analyzer);
ret = _CL_Search(resource,q);
}
return ret;
}
CLUCENEDLL_API int CL_SEARCHMULTIFIELDS(const int resource, const char_t* qry, const char_t** fields, const int fieldsLen){
int ret = 0;
if ( isValidResource(resource) ){
l_byte_t* flags = new l_byte_t[fieldsLen];
for ( int i=0;iquery;
if ( q != NULL ){
const char_t* buf = q->toString(_T(""));
stringNCopy(pl,buf,len);
delete[] buf;
}
}
}
#ifdef HIGHLIGHTING
CLUCENEDLL_API void CL_HIGHLIGHT_X(const int resource, const char_t* text, const int text_is_filename, char_t* ret, const size_t ret_len, const char_t* separator, const int max_fragments, const int fragment_size, const int type, const char_t* d1, const char_t* d2){
if ( isValidResource(resource) ){
Query* q = resources[resource]->query;
if ( q != NULL ){
SimpleAnalyzer simpleAnalyzer;
if ( type == 1 ){
Highlighter highlighter(new SimpleHTMLFormatter( d1, d2), new QueryScorer(q), new SimpleFragmenter(fragment_size));
Reader* r = NULL;
//if ( text_is_filename ){
//todo: make getBestFragments use an filereader object
// r = new FileReader(text);
//}else{
r = new StringReader(text);
//}
TokenStream& ts = simpleAnalyzer.tokenStream(_T("dummy"), r);
// Compare two best fragments
char_t * highlighted_text = highlighter.getBestFragments(&ts, text, max_fragments, separator);
stringNCopy(ret,highlighted_text,ret_len);
delete[] highlighted_text;
ts.close();
delete &ts;
delete r;
}
}
}
}
CLUCENEDLL_API void CL_HIGHLIGHT(const int resource, const char_t* text, const int text_is_filename, char_t* ret, const size_t ret_len){
CL_HIGHLIGHT_X(resource,text,text_is_filename,ret,ret_len,HIGHLIGHT_SEPARATOR,HIGHLIGHT_MAX_NUM_FRAGMENTS,HIGHLIGHT_FRAGMENT_SIZE, 1,HIGHLIGHT_HTML_START,HIGHLIGHT_HTML_END);
}
#endif
CLUCENEDLL_API type_long CL_GETDATEFIELD(const int resource, const char_t* field){
type_long ret = 0;
if ( isValidResource(resource) ){
if ( resources[resource]->hits == NULL || resources[resource]->searcher == NULL ){
reportError (_T("No hits or searcher available"), resource);
return 0;
}
try{
Document& doc = resources[resource]->hits->doc(resources[resource]->hitPos);
if ( &doc != NULL ){
const char_t* val = doc.get(field);
if ( val != NULL ){
try{
ret = DateField::stringToTime(val);
}catch(...){
reportError(_T("Field is not a date"), resource);
}
}else{
reportError(_T("Field does not exist"), resource);
}
}
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( _T("Unknown error"), resource);
}
}
return ret;
}
//value will be initialised as a char array with a length of value_len
CLUCENEDLL_API int CL_GETFIELD(const int resource, const char_t* field, char_t** value, size_t* value_len){
int ret = 0;
if ( isValidResource(resource) ){
*value_len = 0;
if ( resources[resource]->hits == NULL || resources[resource]->searcher == NULL ){
reportError ( _T("No hits or searcher available"), resource);
return 0;
}
try{
Document& doc = resources[resource]->hits->doc(resources[resource]->hitPos);
if ( &doc != NULL ){
//todo: should be a way of getting value length.. not binary safe
const char_t* val = doc.get(field); //a reference to that field
if ( val != NULL ){
*value_len = stringLength (val);
*value = new char_t[*value_len+1];
stringNCopy(*value,val,*value_len+1);
ret = 1;
}else{
reportError(_T("Field does not exist"), resource);
}
}
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( _T("Unknown error"), resource);
}
}
return ret;
}
//copy error string. 'pl' will be a maximum length of 'len'
CLUCENEDLL_API void CL_ERRSTR(const int resource, char_t* pl, size_t len){
//resetError(resource);
if ( isValidResource(resource,false) ){
size_t i=0;
for ( i=0; ierrstr); i++ )
pl[i]=resources[resource]->errstr[i];
pl[i]=0;
}else
CL_ERRSTRGLOBAL(pl,len);
}
//copy global error string. 'pl' will be a maximum length of 'len'
CLUCENEDLL_API void CL_ERRSTRGLOBAL(char_t* pl, size_t len){
size_t i=0;
for ( i=0; idoc == NULL ){
CL_New_Document(resource);
}
try{
char_t* value = lucene::document::DateField::timeToString(time);
lucene::document::Field* fld = new lucene::document::Field(field,value,store!=0,index!=0,token!=0);
resources[resource]->doc->add(*fld);
delete[] value;
ret = 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( _T("Unknown error"), resource);
}
}
return ret;
}
//deletes documents returned from specified query.
//returns number of documents deleted.
//returns -1 if an error occurs
CLUCENEDLL_API int CL_DELETE(const int resource, const char_t* query,const char_t* field){
int ret = -1;
if ( isValidResource(resource) ){
Query* q = NULL;
IndexSearcher *s = NULL;
Hits* h = NULL;
try{
q = &QueryParser::Parse(query,field,analyzer);
s = new IndexSearcher(*resources[resource]->reader);
h = &s->search(*q);
ret = 0;
for ( int i=0;iLength();i++ ){
resources[resource]->reader->Delete(h->id(i));
ret++;
}
}catch(exception e){
reportError( e.what(), resource);
ret = -1;
}catch(...){
reportError( _T("Unknown error"), resource);
ret = -1;
}
if ( s != NULL )
s->close();
delete s;
delete h;
delete q;
}
return ret;
}
#endif
//NON-CHAR SPECIFIC CODE//////////////////////////////
//
//
CLUCENEDLL_API void CL_Cleanup(){
for ( int i=1;i1){
}
}
}
//reopen the reader to take note of segment changes
CLUCENEDLL_API int CL_Reload(const int resource){
resetError(resource);
if ( isValidResource(resource) ){
try{
resources[resource]->reader->close();
delete resources[resource]->reader;
resources[resource]->reader = &IndexReader::open(resources[resource]->directory);
return 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( "Unknown error", resource);
}
}
return 0;
}
CLUCENEDLL_API int CL_Close(const int resource){
resetError(resource);
if ( isValidResource(resource) ){
resources[resource]->count--;
if ( resources[resource]->count < 1 ){
try{
resources[resource]->reader->close();
CL_ClearSearch(resource);
delete resources[resource];
resources[resource] = NULL;
return 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( "Unknown error", resource);
}
}else
return resources[resource]->count +1;
}
return 0;
}
CLUCENEDLL_API void CL_ClearSearch(const int resource){
if ( isValidResource(resource) ){
try{
if ( resources[resource]->searcher != NULL ){
resources[resource]->searcher->close();
_DELETE(resources[resource]->searcher);
}
}catch(...){
}
try{
if ( resources[resource]->query != NULL ){
_DELETE(resources[resource]->query);
}
}catch(...){
}
_DELETE (resources[resource]->hits);
resources[resource]->hitPos = 0;
}
}
CLUCENEDLL_API int CL_HitCount(const int resource){
if ( isValidResource(resource) ){
if ( resources[resource]->hits == NULL ){
reportError("No hits available", resource);
return 0;
}else
return resources[resource]->hits->Length();
}else
return 0;
}
CLUCENEDLL_API int CL_NextHit(const int resource){
int ret = 0;
if ( isValidResource(resource) ){
if ( resources[resource]->hits == NULL || resources[resource]->searcher == NULL ){
reportError ("No hits or searcher available", resource);
}else{
resources[resource]->hitPos++;
if ( resources[resource]->hitPos < resources[resource]->hits->Length() )
ret = 1;
}
}
return ret;
}
CLUCENEDLL_API int CL_GotoHit(const int resource, const int number){
int ret = 0;
if ( isValidResource(resource) ){
if ( resources[resource]->hits == NULL || resources[resource]->searcher == NULL ){
reportError ("No hits or searcher available", resource);
}else{
if ( (number < 0) || (number > resources[resource]->hits->Length()) )
ret = 1;
else
resources[resource]->hitPos = number;
}
}
return ret;
}
CLUCENEDLL_API float CL_HitScore(const int resource){
float ret = 0;
if ( isValidResource(resource) ){
if ( resources[resource]->hits == NULL || resources[resource]->searcher == NULL ){
reportError ( _T("No hits or searcher available"), resource);
return 0;
}
try{
ret = resources[resource]->hits->score(resources[resource]->hitPos);
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( _T("Unknown error"), resource);
}
}
return ret;
}
#ifndef CLUCENE_LITE
//resets the current document
CLUCENEDLL_API int CL_New_Document(const int resource){
resetError(resource);
if ( isValidResource(resource) ){
try{
delete resources[resource]->doc;
resources[resource]->doc = new Document();
return 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( "Unknown error", resource);
}
}
return 0;
}
CLUCENEDLL_API int CL_Optimize(const int resource){
resetError(resource);
if ( isValidResource(resource) ){
try{
//temporarily close the reader
resources[resource]->reader->close();
delete resources[resource]->reader;
resources[resource]->reader = NULL;
IndexWriter w(resources[resource]->directory,analyzer,false);
w.optimize();
w.close(false);
//reopen the reader.
resources[resource]->reader = &IndexReader::open(resources[resource]->directory);
return 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( "Unknown error", resource);
}
}
return 0;
}
//adds the current document
CLUCENEDLL_API int CL_Insert_Document(const int resource){
resetError(resource);
int ret = 0;
if ( isValidResource(resource) ){
if ( resources[resource]->doc == NULL ){
reportError("No document available", resource);
return 0;
}
IndexWriter* w = NULL;
try{
w = new IndexWriter(resources[resource]->reader->directory,analyzer,false);
w->addDocument(*resources[resource]->doc);
ret = 1;
}catch(exception e){
reportError( e.what(), resource);
}catch(...){
reportError( "Unknown error", resource);
}
if ( w != NULL )
w->close(false);
_DELETE(w);
//clear current document
delete resources[resource]->doc;
resources[resource]->doc = new Document();
}
return ret;
}
#endif
//
//
//////////////////////////////////////////////////////
}//extern "C"