Skip to content
Snippets Groups Projects
Commit 44e427a8 authored by srosse's avatar srosse
Browse files

OO-1522: use the current index to search for the file document indexer

parent e0d90dae
No related branches found
No related tags found
No related merge requests found
...@@ -28,6 +28,7 @@ package org.olat.search; ...@@ -28,6 +28,7 @@ package org.olat.search;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.olat.core.commons.persistence.SortKey; import org.olat.core.commons.persistence.SortKey;
...@@ -73,6 +74,19 @@ public interface SearchService { ...@@ -73,6 +74,19 @@ public interface SearchService {
int firstResult, int maxReturns, SortKey... orderBy) int firstResult, int maxReturns, SortKey... orderBy)
throws ServiceNotAvailableException, ParseException, QueryException; throws ServiceNotAvailableException, ParseException, QueryException;
/**
* Search a document with the specified resource URL.
*
* @param resourceUrl
* @return The whole document with all fields, null if not found.
* @throws ServiceNotAvailableException
* @throws ParseException
* @throws QueryException
*/
public Document doSearch(String resourceUrl)
throws ServiceNotAvailableException, ParseException, QueryException;
/** /**
* Check a query for similar words. * Check a query for similar words.
* @param query * @param query
......
...@@ -61,6 +61,8 @@ public abstract class AbstractOlatDocument implements Serializable { ...@@ -61,6 +61,8 @@ public abstract class AbstractOlatDocument implements Serializable {
public static final String FILETYPE_FIELD_NAME = "filetype"; public static final String FILETYPE_FIELD_NAME = "filetype";
public static final String RESOURCEURL_FIELD_NAME = "resourceurl"; public static final String RESOURCEURL_FIELD_NAME = "resourceurl";
public static final String RESOURCEURL_MD5_FIELD_NAME = "resourceurlmd";
public static final String AUTHOR_FIELD_NAME = "author"; public static final String AUTHOR_FIELD_NAME = "author";
......
...@@ -35,6 +35,7 @@ import org.apache.lucene.document.Document; ...@@ -35,6 +35,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.olat.core.util.Encoder;
import org.olat.core.util.StringHelper; import org.olat.core.util.StringHelper;
...@@ -88,6 +89,7 @@ public class OlatDocument extends AbstractOlatDocument { ...@@ -88,6 +89,7 @@ public class OlatDocument extends AbstractOlatDocument {
document.add(createTextField(DESCRIPTION_FIELD_NAME,getDescription(), 2)); document.add(createTextField(DESCRIPTION_FIELD_NAME,getDescription(), 2));
document.add(createTextField(CONTENT_FIELD_NAME, getContent(), 0.5f)); document.add(createTextField(CONTENT_FIELD_NAME, getContent(), 0.5f));
document.add(new StringField(RESOURCEURL_FIELD_NAME, getResourceUrl(), Field.Store.YES)); document.add(new StringField(RESOURCEURL_FIELD_NAME, getResourceUrl(), Field.Store.YES));
document.add(new StringField(RESOURCEURL_MD5_FIELD_NAME, Encoder.md5hash(getResourceUrl()), Field.Store.YES));
document.add(new StringField(DOCUMENTTYPE_FIELD_NAME,getDocumentType(), Field.Store.YES)); document.add(new StringField(DOCUMENTTYPE_FIELD_NAME,getDocumentType(), Field.Store.YES));
if(getCssIcon() != null) { if(getCssIcon() != null) {
document.add(new StringField(CSS_ICON,getCssIcon(), Field.Store.YES)); document.add(new StringField(CSS_ICON,getCssIcon(), Field.Store.YES));
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.search.service;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.Callable;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.Encoder;
import org.olat.search.SearchService;
import org.olat.search.model.AbstractOlatDocument;
/**
*
* Initial date: 24.10.2013<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
class GetDocumentByCallable implements Callable<Document> {
private static final OLog log = Tracing.createLoggerFor(GetDocumentByCallable.class);
private final String resourceUrl;
private final SearchServiceImpl searchService;
public GetDocumentByCallable(String resourceUrl, SearchServiceImpl searchService) {
this.resourceUrl = resourceUrl;
this.searchService = searchService;
}
@Override
public Document call() {
Document doc = null;
IndexSearcher searcher = null;
try {
if (searchService.existIndex()) {
searcher = searchService.getIndexSearcher();
String url = Encoder.md5hash(resourceUrl);
String queryStr = "+" + AbstractOlatDocument.RESOURCEURL_MD5_FIELD_NAME + ":\"" + url + "\"";
QueryParser idQueryParser = new QueryParser(SearchService.OO_LUCENE_VERSION, queryStr, new KeywordAnalyzer());
Query query = idQueryParser.parse(queryStr);
TopDocs docs = searcher.search(query, 500);
int numOfDocs = docs.totalHits;
Set<String> retrievedFields = new HashSet<String>();
retrievedFields.add(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
retrievedFields.add(AbstractOlatDocument.RESOURCEURL_MD5_FIELD_NAME);
for(int i=0; i<numOfDocs; i++) {
Document foundDoc = searcher.doc(docs.scoreDocs[i].doc, retrievedFields);
String possibleResourceUrl = foundDoc.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
if(resourceUrl.equals(possibleResourceUrl)) {
doc = searcher.doc(docs.scoreDocs[i].doc);
}
}
}
} catch (Exception naex) {
log.error("", naex);
} finally {
searchService.releaseIndexSearcher(searcher);
}
return doc;
}
}
\ No newline at end of file
...@@ -28,6 +28,7 @@ package org.olat.search.service; ...@@ -28,6 +28,7 @@ package org.olat.search.service;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.ParseException;
import org.olat.core.commons.persistence.SortKey; import org.olat.core.commons.persistence.SortKey;
import org.olat.core.id.Identity; import org.olat.core.id.Identity;
...@@ -127,4 +128,11 @@ public class SearchServiceDisabled implements SearchService { ...@@ -127,4 +128,11 @@ public class SearchServiceDisabled implements SearchService {
log.error("call doSearch on disabled search service"); log.error("call doSearch on disabled search service");
throw new ServiceNotAvailableException("call doSearch on disabled search service"); throw new ServiceNotAvailableException("call doSearch on disabled search service");
} }
@Override
public Document doSearch(String resourceUrl)
throws ServiceNotAvailableException, ParseException, QueryException {
log.error("call doSearch on disabled search service");
throw new ServiceNotAvailableException("call doSearch on disabled search service");
}
} }
...@@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean; ...@@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.MultiReader;
...@@ -231,7 +232,7 @@ public class SearchServiceImpl implements SearchService, GenericEventListener { ...@@ -231,7 +232,7 @@ public class SearchServiceImpl implements SearchService, GenericEventListener {
searchSpellChecker.setSpellCheckEnabled(searchModuleConfig.getSpellCheckEnabled()); searchSpellChecker.setSpellCheckEnabled(searchModuleConfig.getSpellCheckEnabled());
searchSpellChecker.setSearchExecutor(searchExecutor); searchSpellChecker.setSearchExecutor(searchExecutor);
indexer = new Index(searchModuleConfig, searchSpellChecker, mainIndexer, lifeIndexer, coordinatorManager); indexer = new Index(searchModuleConfig, this, searchSpellChecker, mainIndexer, lifeIndexer, coordinatorManager);
indexPath = searchModuleConfig.getFullIndexPath(); indexPath = searchModuleConfig.getFullIndexPath();
permanentIndexPath = searchModuleConfig.getFullPermanentIndexPath(); permanentIndexPath = searchModuleConfig.getFullPermanentIndexPath();
...@@ -335,6 +336,19 @@ public class SearchServiceImpl implements SearchService, GenericEventListener { ...@@ -335,6 +336,19 @@ public class SearchServiceImpl implements SearchService, GenericEventListener {
} }
} }
@Override
public Document doSearch(String queryString)
throws ServiceNotAvailableException, ParseException, QueryException {
try {
GetDocumentByCallable run = new GetDocumentByCallable(queryString, this);
Future<Document> futureResults = searchExecutor.submit(run);
return futureResults.get();
} catch (Exception e) {
log.error("", e);
return null;
}
}
protected BooleanQuery createQuery(String queryString, List<String> condQueries) protected BooleanQuery createQuery(String queryString, List<String> condQueries)
throws ParseException { throws ParseException {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery();
......
...@@ -26,14 +26,23 @@ ...@@ -26,14 +26,23 @@
package org.olat.search.service.document.file; package org.olat.search.service.document.file;
import java.io.IOException; import java.io.IOException;
import java.util.Date;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.olat.core.CoreSpringFactory;
import org.olat.core.logging.OLog; import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing; import org.olat.core.logging.Tracing;
import org.olat.core.util.vfs.LocalImpl; import org.olat.core.util.vfs.LocalImpl;
import org.olat.core.util.vfs.VFSLeaf; import org.olat.core.util.vfs.VFSLeaf;
import org.olat.search.QueryException;
import org.olat.search.SearchModule; import org.olat.search.SearchModule;
import org.olat.search.SearchService;
import org.olat.search.ServiceNotAvailableException;
import org.olat.search.model.AbstractOlatDocument;
import org.olat.search.service.SearchResourceContext; import org.olat.search.service.SearchResourceContext;
import org.olat.search.service.SearchServiceImpl;
/** /**
* Lucene document mapper. * Lucene document mapper.
...@@ -69,13 +78,13 @@ public class FileDocumentFactory { ...@@ -69,13 +78,13 @@ public class FileDocumentFactory {
private final static String XML_SUFFIX = "xml"; private final static String XML_SUFFIX = "xml";
private final static String TEXT_SUFFIX = "txt tex readme csv"; private final static String TEXT_SUFFIX = "txt tex readme csv";
//as a special parser; //as a special parser;
private static final String IMS_MANIFEST_FILE = "imsmanifest.xml"; private static final String IMS_MANIFEST_FILE = "imsmanifest.xml";
private int excludedFileSizeCount = 0; private int excludedFileSizeCount = 0;
private static SearchModule searchModule; private static SearchModule searchModule;
/** /**
* [used by spring] * [used by spring]
...@@ -92,11 +101,30 @@ public class FileDocumentFactory { ...@@ -92,11 +101,30 @@ public class FileDocumentFactory {
public Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) public Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
throws IOException, DocumentAccessException { throws IOException, DocumentAccessException {
try { try {
Document doc = null;
try {
String resourceUrl = leafResourceContext.getResourceUrl();
SearchService searchService = CoreSpringFactory.getImpl(SearchServiceImpl.class);
Document indexedDoc = searchService.doSearch(resourceUrl);
if(indexedDoc != null) {
String timestamp = indexedDoc.get(AbstractOlatDocument.TIME_STAMP_NAME);
if(timestamp != null) {
Date lastMod = DateTools.stringToDate(timestamp);
Date lastMod2 = new Date(leaf.getLastModified());
if(lastMod2.compareTo(lastMod) < 0) {
return indexedDoc;
}
}
}
} catch (ServiceNotAvailableException | ParseException | QueryException | java.text.ParseException e) {
log.error("", e);
}
String fileName = leaf.getName(); String fileName = leaf.getName();
String suffix = FileTypeDetector.getSuffix(leaf); String suffix = FileTypeDetector.getSuffix(leaf);
if (log.isDebug()) log.debug("suffix=" + suffix); if (log.isDebug()) log.debug("suffix=" + suffix);
Document doc = null;
if (PDF_SUFFIX.indexOf(suffix) >= 0) { if (PDF_SUFFIX.indexOf(suffix) >= 0) {
if(searchModule.getPdfFileEnabled()) { if(searchModule.getPdfFileEnabled()) {
doc = PdfDocument.createDocument(leafResourceContext, leaf); doc = PdfDocument.createDocument(leafResourceContext, leaf);
......
...@@ -37,6 +37,7 @@ import org.olat.core.logging.Tracing; ...@@ -37,6 +37,7 @@ import org.olat.core.logging.Tracing;
import org.olat.core.util.FileUtils; import org.olat.core.util.FileUtils;
import org.olat.core.util.coordinate.CoordinatorManager; import org.olat.core.util.coordinate.CoordinatorManager;
import org.olat.search.SearchModule; import org.olat.search.SearchModule;
import org.olat.search.SearchService;
import org.olat.search.service.spell.SearchSpellChecker; import org.olat.search.service.spell.SearchSpellChecker;
/** /**
...@@ -63,7 +64,7 @@ public class Index { ...@@ -63,7 +64,7 @@ public class Index {
* @param restartInterval Restart interval of full-index in milliseconds. * @param restartInterval Restart interval of full-index in milliseconds.
* @param indexInterval Sleeping time in milliseconds between adding documents to index. * @param indexInterval Sleeping time in milliseconds between adding documents to index.
*/ */
public Index(SearchModule searchModule, SearchSpellChecker spellChecker, MainIndexer mainIndexer, public Index(SearchModule searchModule, SearchService searchService, SearchSpellChecker spellChecker, MainIndexer mainIndexer,
LifeFullIndexer lifeIndexer, CoordinatorManager coordinatorManager) { LifeFullIndexer lifeIndexer, CoordinatorManager coordinatorManager) {
this.spellChecker = spellChecker; this.spellChecker = spellChecker;
this.indexPath = searchModule.getFullIndexPath(); this.indexPath = searchModule.getFullIndexPath();
...@@ -71,7 +72,7 @@ public class Index { ...@@ -71,7 +72,7 @@ public class Index {
this.permanentIndexPath = searchModule.getFullPermanentIndexPath(); this.permanentIndexPath = searchModule.getFullPermanentIndexPath();
this.lifeIndexer = lifeIndexer; this.lifeIndexer = lifeIndexer;
fullIndexer = new OlatFullIndexer(this, searchModule, mainIndexer, coordinatorManager); fullIndexer = new OlatFullIndexer(this, searchModule, searchService, mainIndexer, coordinatorManager);
} }
/** /**
......
...@@ -53,6 +53,7 @@ import org.apache.lucene.index.IndexWriterConfig; ...@@ -53,6 +53,7 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.olat.core.commons.persistence.DBFactory; import org.olat.core.commons.persistence.DBFactory;
...@@ -60,13 +61,17 @@ import org.olat.core.logging.OLog; ...@@ -60,13 +61,17 @@ import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing; import org.olat.core.logging.Tracing;
import org.olat.core.util.WorkThreadInformations; import org.olat.core.util.WorkThreadInformations;
import org.olat.core.util.coordinate.CoordinatorManager; import org.olat.core.util.coordinate.CoordinatorManager;
import org.olat.search.QueryException;
import org.olat.search.SearchModule; import org.olat.search.SearchModule;
import org.olat.search.SearchService; import org.olat.search.SearchService;
import org.olat.search.ServiceNotAvailableException;
import org.olat.search.model.OlatDocument; import org.olat.search.model.OlatDocument;
import org.olat.search.service.SearchResourceContext; import org.olat.search.service.SearchResourceContext;
/** /**
* Controls the hole generation of a full-index. Runs in own thread. * Controls the hole generation of a full-index. It run in its own thread the main index.
* The sub-indexers can use a thread pool to parallelize the works.
*
* @author Christian Guretzki * @author Christian Guretzki
*/ */
public class OlatFullIndexer { public class OlatFullIndexer {
...@@ -109,6 +114,7 @@ public class OlatFullIndexer { ...@@ -109,6 +114,7 @@ public class OlatFullIndexer {
private Map<String,Integer> fileTypeCounters; private Map<String,Integer> fileTypeCounters;
private final MainIndexer mainIndexer; private final MainIndexer mainIndexer;
private final SearchService searchService;
private final CoordinatorManager coordinatorManager; private final CoordinatorManager coordinatorManager;
private static final Object indexerWriterBlock = new Object(); private static final Object indexerWriterBlock = new Object();
...@@ -122,11 +128,14 @@ public class OlatFullIndexer { ...@@ -122,11 +128,14 @@ public class OlatFullIndexer {
* @param restartInterval Restart interval in milliseconds. * @param restartInterval Restart interval in milliseconds.
* @param indexInterval Sleep time in milliseconds between adding documents. * @param indexInterval Sleep time in milliseconds between adding documents.
*/ */
public OlatFullIndexer(Index index, SearchModule searchModule, public OlatFullIndexer(Index index, SearchModule searchModule, SearchService searchService,
MainIndexer mainIndexer, CoordinatorManager coordinatorManager) { MainIndexer mainIndexer, CoordinatorManager coordinatorManager) {
this.index = index; this.index = index;
this.mainIndexer = mainIndexer; this.mainIndexer = mainIndexer;
this.searchService = searchService;
this.coordinatorManager = coordinatorManager; this.coordinatorManager = coordinatorManager;
// -1 because the thread pool used a CallerRunPolicy, which means the main thread
// will do the work if the queue of the poll is full.
if(searchModule.getFolderPoolSize() <= 2) { if(searchModule.getFolderPoolSize() <= 2) {
indexerPoolSize = 1; indexerPoolSize = 1;
} else { } else {
...@@ -336,6 +345,14 @@ public class OlatFullIndexer { ...@@ -336,6 +345,14 @@ public class OlatFullIndexer {
} }
} }
public Document getDocument(String businessPath) {
try {
return searchService.doSearch(businessPath);
} catch (ServiceNotAvailableException | ParseException | QueryException e) {
return null;
}
}
/** /**
* Add a document to the index writer. The document is indexed by a single threaded executor, * Add a document to the index writer. The document is indexed by a single threaded executor,
* Lucene want that write operations happen within a single thread. The access is synchronized * Lucene want that write operations happen within a single thread. The access is synchronized
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment