OO-1522: use the current index to search for the file document indexer

44e427a8 · srosse · e0d90dae · 44e427a8 · 44e427a8 · 44e427a8
Commit 44e427a8 authored 9 years ago by srosse
--- a/src/main/java/org/olat/search/SearchService.java
+++ b/src/main/java/org/olat/search/SearchService.java
@@ -28,6 +28,7 @@ package org.olat.search;
 import java.util.List;
 import java.util.Set;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.util.Version;
 import org.olat.core.commons.persistence.SortKey;
@@ -73,6 +74,19 @@ public interface SearchService {
 			int firstResult, int maxReturns, SortKey... orderBy)
 	throws ServiceNotAvailableException, ParseException, QueryException;
+	/**
+	 * Search a document with the specified resource URL.
+	 * 
+	 * @param resourceUrl
+	 * @return The whole document with all fields, null if not found.
+	 * @throws ServiceNotAvailableException
+	 * @throws ParseException
+	 * @throws QueryException
+	 */
+	public Document doSearch(String resourceUrl)
+			throws ServiceNotAvailableException, ParseException, QueryException;
 	/**
 	 * Check a query for similar words.
 	 * @param query

--- a/src/main/java/org/olat/search/model/AbstractOlatDocument.java
+++ b/src/main/java/org/olat/search/model/AbstractOlatDocument.java
@@ -61,6 +61,8 @@ public abstract class AbstractOlatDocument implements Serializable {
 	public  static final String FILETYPE_FIELD_NAME = "filetype";
 	public  static final String RESOURCEURL_FIELD_NAME = "resourceurl";
+	public  static final String RESOURCEURL_MD5_FIELD_NAME = "resourceurlmd";
 	public  static final String AUTHOR_FIELD_NAME = "author";

--- a/src/main/java/org/olat/search/model/OlatDocument.java
+++ b/src/main/java/org/olat/search/model/OlatDocument.java
@@ -35,6 +35,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.olat.core.util.Encoder;
 import org.olat.core.util.StringHelper;
@@ -88,6 +89,7 @@ public class OlatDocument extends AbstractOlatDocument {
 		document.add(createTextField(DESCRIPTION_FIELD_NAME,getDescription(), 2));
 		document.add(createTextField(CONTENT_FIELD_NAME, getContent(), 0.5f));
 		document.add(new StringField(RESOURCEURL_FIELD_NAME, getResourceUrl(), Field.Store.YES));
+		document.add(new StringField(RESOURCEURL_MD5_FIELD_NAME, Encoder.md5hash(getResourceUrl()), Field.Store.YES));
 		document.add(new StringField(DOCUMENTTYPE_FIELD_NAME,getDocumentType(), Field.Store.YES));
 		if(getCssIcon() != null) {
 			document.add(new StringField(CSS_ICON,getCssIcon(), Field.Store.YES));

--- a/src/main/java/org/olat/search/service/GetDocumentByCallable.java
+++ b/src/main/java/org/olat/search/service/GetDocumentByCallable.java
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
+package org.olat.search.service;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.olat.core.logging.OLog;
+import org.olat.core.logging.Tracing;
+import org.olat.core.util.Encoder;
+import org.olat.search.SearchService;
+import org.olat.search.model.AbstractOlatDocument;
+/**
+ * 
+ * Initial date: 24.10.2013<br>
+ * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
+ *
+ */
+class GetDocumentByCallable implements Callable<Document> {
+	private static final OLog log = Tracing.createLoggerFor(GetDocumentByCallable.class);
+	private final String resourceUrl;
+	private final SearchServiceImpl searchService;
+	public GetDocumentByCallable(String resourceUrl,  SearchServiceImpl searchService) {
+		this.resourceUrl = resourceUrl;
+		this.searchService = searchService;
+	}
+	@Override
+	public Document call() {
+		Document doc = null;
+		IndexSearcher searcher = null;
+		try {
+			if (searchService.existIndex()) {
+				searcher = searchService.getIndexSearcher();
+				String url = Encoder.md5hash(resourceUrl);
+				String queryStr = "+" + AbstractOlatDocument.RESOURCEURL_MD5_FIELD_NAME + ":\"" + url + "\"";
+				QueryParser idQueryParser = new QueryParser(SearchService.OO_LUCENE_VERSION, queryStr, new KeywordAnalyzer());
+				Query query = idQueryParser.parse(queryStr);
+				TopDocs docs = searcher.search(query, 500);
+				int numOfDocs = docs.totalHits;
+				Set<String> retrievedFields = new HashSet<String>();
+				retrievedFields.add(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
+				retrievedFields.add(AbstractOlatDocument.RESOURCEURL_MD5_FIELD_NAME);
+				for(int i=0; i<numOfDocs; i++) {
+					Document foundDoc = searcher.doc(docs.scoreDocs[i].doc, retrievedFields);
+					String possibleResourceUrl = foundDoc.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
+					if(resourceUrl.equals(possibleResourceUrl)) {
+						doc = searcher.doc(docs.scoreDocs[i].doc);
+					}
+				}
+			}
+		} catch (Exception naex) {
+			log.error("", naex);
+		} finally {
+			searchService.releaseIndexSearcher(searcher);
+		}
+		return doc;
+	}
+}
\ No newline at end of file
--- a/src/main/java/org/olat/search/service/SearchServiceDisabled.java
+++ b/src/main/java/org/olat/search/service/SearchServiceDisabled.java
@@ -28,6 +28,7 @@ package org.olat.search.service;
 import java.util.List;
 import java.util.Set;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.olat.core.commons.persistence.SortKey;
 import org.olat.core.id.Identity;
@@ -127,4 +128,11 @@ public class SearchServiceDisabled implements SearchService {
 		log.error("call doSearch on disabled search service");
 		throw new ServiceNotAvailableException("call doSearch on disabled search service");
 	}
+	@Override
+	public Document doSearch(String resourceUrl)
+			throws ServiceNotAvailableException, ParseException, QueryException {
+		log.error("call doSearch on disabled search service");
+		throw new ServiceNotAvailableException("call doSearch on disabled search service");
+	}
 }
--- a/src/main/java/org/olat/search/service/SearchServiceImpl.java
+++ b/src/main/java/org/olat/search/service/SearchServiceImpl.java
@@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiReader;
@@ -231,7 +232,7 @@ public class SearchServiceImpl implements SearchService, GenericEventListener {
 		searchSpellChecker.setSpellCheckEnabled(searchModuleConfig.getSpellCheckEnabled());
 		searchSpellChecker.setSearchExecutor(searchExecutor);
-		indexer = new Index(searchModuleConfig, searchSpellChecker, mainIndexer, lifeIndexer, coordinatorManager);
+		indexer = new Index(searchModuleConfig, this, searchSpellChecker, mainIndexer, lifeIndexer, coordinatorManager);
 		indexPath = searchModuleConfig.getFullIndexPath();	
 		permanentIndexPath = searchModuleConfig.getFullPermanentIndexPath();
@@ -335,6 +336,19 @@ public class SearchServiceImpl implements SearchService, GenericEventListener {
 		}
 	}
+	@Override
+	public Document doSearch(String queryString)
+	throws ServiceNotAvailableException, ParseException, QueryException {
+		try {
+			GetDocumentByCallable run = new GetDocumentByCallable(queryString, this);
+			Future<Document> futureResults = searchExecutor.submit(run);
+			return futureResults.get();
+		} catch (Exception e) {
+			log.error("", e);
+			return null;
+		}
+	}
 	protected BooleanQuery createQuery(String queryString, List<String> condQueries)
 	throws ParseException {
 		BooleanQuery query = new BooleanQuery();

--- a/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java
+++ b/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java
@@ -26,14 +26,23 @@
 package org.olat.search.service.document.file;
 import java.io.IOException;
+import java.util.Date;
+import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.olat.core.CoreSpringFactory;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
 import org.olat.core.util.vfs.LocalImpl;
 import org.olat.core.util.vfs.VFSLeaf;
+import org.olat.search.QueryException;
 import org.olat.search.SearchModule;
+import org.olat.search.SearchService;
+import org.olat.search.ServiceNotAvailableException;
+import org.olat.search.model.AbstractOlatDocument;
 import org.olat.search.service.SearchResourceContext;
+import org.olat.search.service.SearchServiceImpl;
 /**
 * Lucene document mapper.
@@ -69,13 +78,13 @@ public class FileDocumentFactory {
 	private final static String XML_SUFFIX = "xml";
 	private final static String TEXT_SUFFIX = "txt tex readme csv";
-  //as a special parser;
+	//as a special parser;
-  private static final String IMS_MANIFEST_FILE = "imsmanifest.xml";
+	private static final String IMS_MANIFEST_FILE = "imsmanifest.xml";
-  private int  excludedFileSizeCount = 0;
+	private int excludedFileSizeCount = 0;
-  private static SearchModule searchModule;
+	private static SearchModule searchModule;
 	/**
 	 * [used by spring]
@@ -92,11 +101,30 @@ public class FileDocumentFactory {
 	public Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
 	throws IOException, DocumentAccessException {
 		try {
+			Document doc = null;
+			try {
+				String resourceUrl = leafResourceContext.getResourceUrl();
+				SearchService searchService = CoreSpringFactory.getImpl(SearchServiceImpl.class);
+				Document indexedDoc = searchService.doSearch(resourceUrl);
+				if(indexedDoc != null) {
+					String timestamp = indexedDoc.get(AbstractOlatDocument.TIME_STAMP_NAME);
+					if(timestamp != null) {
+						Date lastMod = DateTools.stringToDate(timestamp);
+						Date lastMod2 = new Date(leaf.getLastModified());
+						if(lastMod2.compareTo(lastMod) < 0) {
+							return indexedDoc;
+						}
+					}
+				}
+			} catch (ServiceNotAvailableException | ParseException | QueryException | java.text.ParseException e) {
+				log.error("", e);
+			}
 			String fileName = leaf.getName();
 			String suffix = FileTypeDetector.getSuffix(leaf);
 			if (log.isDebug()) log.debug("suffix=" + suffix);
-			Document doc = null;
 			if (PDF_SUFFIX.indexOf(suffix) >= 0) {
 				if(searchModule.getPdfFileEnabled()) {
 					doc = PdfDocument.createDocument(leafResourceContext, leaf);

--- a/src/main/java/org/olat/search/service/indexer/Index.java
+++ b/src/main/java/org/olat/search/service/indexer/Index.java
@@ -37,6 +37,7 @@ import org.olat.core.logging.Tracing;
 import org.olat.core.util.FileUtils;
 import org.olat.core.util.coordinate.CoordinatorManager;
 import org.olat.search.SearchModule;
+import org.olat.search.SearchService;
 import org.olat.search.service.spell.SearchSpellChecker;
 /**
@@ -63,7 +64,7 @@ public class Index {
 	 * @param restartInterval Restart interval of full-index in milliseconds.
 	 * @param indexInterval   Sleeping time in milliseconds between adding documents to index.
 	 */
-	public Index(SearchModule searchModule, SearchSpellChecker spellChecker, MainIndexer mainIndexer,
+	public Index(SearchModule searchModule, SearchService searchService, SearchSpellChecker spellChecker, MainIndexer mainIndexer,
 			LifeFullIndexer lifeIndexer, CoordinatorManager coordinatorManager) {
 		this.spellChecker = spellChecker;
 		this.indexPath = searchModule.getFullIndexPath();
@@ -71,7 +72,7 @@ public class Index {
 		this.permanentIndexPath = searchModule.getFullPermanentIndexPath();
 		this.lifeIndexer = lifeIndexer;
-		fullIndexer = new OlatFullIndexer(this, searchModule, mainIndexer, coordinatorManager);
+		fullIndexer = new OlatFullIndexer(this, searchModule, searchService, mainIndexer, coordinatorManager);
 	}
 	/**

--- a/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java
+++ b/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java
@@ -53,6 +53,7 @@ import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.LogDocMergePolicy;
 import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.olat.core.commons.persistence.DBFactory;
@@ -60,13 +61,17 @@ import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
 import org.olat.core.util.WorkThreadInformations;
 import org.olat.core.util.coordinate.CoordinatorManager;
+import org.olat.search.QueryException;
 import org.olat.search.SearchModule;
 import org.olat.search.SearchService;
+import org.olat.search.ServiceNotAvailableException;
 import org.olat.search.model.OlatDocument;
 import org.olat.search.service.SearchResourceContext;
 /**
- * Controls the hole generation of a full-index. Runs in own thread.
+ * Controls the hole generation of a full-index. It run in its own thread the main index.
+ * The sub-indexers can use a thread pool to parallelize the works.
+ * 
 * @author Christian Guretzki
 */
 public class OlatFullIndexer {
@@ -109,6 +114,7 @@ public class OlatFullIndexer {
 	private Map<String,Integer> fileTypeCounters;
 	private final MainIndexer mainIndexer;
+	private final SearchService searchService;
 	private final CoordinatorManager coordinatorManager;
 	private static final Object indexerWriterBlock = new Object();
@@ -122,11 +128,14 @@ public class OlatFullIndexer {
 	 * @param restartInterval Restart interval in milliseconds.
 	 * @param indexInterval   Sleep time in milliseconds between adding documents.
 	 */
-	public OlatFullIndexer(Index index, SearchModule searchModule,
+	public OlatFullIndexer(Index index, SearchModule searchModule, SearchService searchService,
 			MainIndexer mainIndexer, CoordinatorManager coordinatorManager) {
 		this.index = index;
 		this.mainIndexer = mainIndexer;
+		this.searchService = searchService;
 		this.coordinatorManager = coordinatorManager;
+		// -1 because the thread pool used a CallerRunPolicy, which means the main thread
+		// will do the work if the queue of the poll is full.
 		if(searchModule.getFolderPoolSize() <= 2) {
 			indexerPoolSize = 1;
 		} else {
@@ -336,6 +345,14 @@ public class OlatFullIndexer {
 		}
 	}
+	public Document getDocument(String businessPath) {
+		try {
+			return searchService.doSearch(businessPath);
+		} catch (ServiceNotAvailableException | ParseException | QueryException e) {
+			return null;
+		}
+	}
 	/**
 	 * Add a document to the index writer. The document is indexed by a single threaded executor,
 	 * Lucene want that write operations happen within a single thread. The access is synchronized