diff --git a/src/main/java/org/olat/search/SearchService.java b/src/main/java/org/olat/search/SearchService.java index 04f13561e68c6d036fc72d274df7651038334117..ff9e815bf0246f50a01368e7be8b678f89b6b878 100644 --- a/src/main/java/org/olat/search/SearchService.java +++ b/src/main/java/org/olat/search/SearchService.java @@ -28,6 +28,7 @@ package org.olat.search; import java.util.List; import java.util.Set; +import org.apache.lucene.document.Document; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.util.Version; import org.olat.core.commons.persistence.SortKey; @@ -73,6 +74,19 @@ public interface SearchService { int firstResult, int maxReturns, SortKey... orderBy) throws ServiceNotAvailableException, ParseException, QueryException; + /** + * Search a document with the specified resource URL. + * + * @param resourceUrl + * @return The whole document with all fields, null if not found. + * @throws ServiceNotAvailableException + * @throws ParseException + * @throws QueryException + */ + public Document doSearch(String resourceUrl) + throws ServiceNotAvailableException, ParseException, QueryException; + + /** * Check a query for similar words. * @param query diff --git a/src/main/java/org/olat/search/model/AbstractOlatDocument.java b/src/main/java/org/olat/search/model/AbstractOlatDocument.java index dc82d18c6657290a9dd01c59fe30650e366abd20..0db7b59d9284475e91d887b84c4278eea8a9c3e8 100644 --- a/src/main/java/org/olat/search/model/AbstractOlatDocument.java +++ b/src/main/java/org/olat/search/model/AbstractOlatDocument.java @@ -61,6 +61,8 @@ public abstract class AbstractOlatDocument implements Serializable { public static final String FILETYPE_FIELD_NAME = "filetype"; public static final String RESOURCEURL_FIELD_NAME = "resourceurl"; + + public static final String RESOURCEURL_MD5_FIELD_NAME = "resourceurlmd"; public static final String AUTHOR_FIELD_NAME = "author"; diff --git a/src/main/java/org/olat/search/model/OlatDocument.java b/src/main/java/org/olat/search/model/OlatDocument.java index ee790ad0c1bb2c650c463762406d4db0cce7e058..ce534f89d2dc9decfd9e22afbe0e611f4b0181c5 100644 --- a/src/main/java/org/olat/search/model/OlatDocument.java +++ b/src/main/java/org/olat/search/model/OlatDocument.java @@ -35,6 +35,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.olat.core.util.Encoder; import org.olat.core.util.StringHelper; @@ -88,6 +89,7 @@ public class OlatDocument extends AbstractOlatDocument { document.add(createTextField(DESCRIPTION_FIELD_NAME,getDescription(), 2)); document.add(createTextField(CONTENT_FIELD_NAME, getContent(), 0.5f)); document.add(new StringField(RESOURCEURL_FIELD_NAME, getResourceUrl(), Field.Store.YES)); + document.add(new StringField(RESOURCEURL_MD5_FIELD_NAME, Encoder.md5hash(getResourceUrl()), Field.Store.YES)); document.add(new StringField(DOCUMENTTYPE_FIELD_NAME,getDocumentType(), Field.Store.YES)); if(getCssIcon() != null) { document.add(new StringField(CSS_ICON,getCssIcon(), Field.Store.YES)); diff --git a/src/main/java/org/olat/search/service/GetDocumentByCallable.java b/src/main/java/org/olat/search/service/GetDocumentByCallable.java new file mode 100644 index 0000000000000000000000000000000000000000..8ade7d9eef807b5c55c02392888b6ee8c3a8e490 --- /dev/null +++ b/src/main/java/org/olat/search/service/GetDocumentByCallable.java @@ -0,0 +1,91 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.search.service; + +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.Callable; + +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.olat.core.logging.OLog; +import org.olat.core.logging.Tracing; +import org.olat.core.util.Encoder; +import org.olat.search.SearchService; +import org.olat.search.model.AbstractOlatDocument; + +/** + * + * Initial date: 24.10.2013<br> + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + * + */ +class GetDocumentByCallable implements Callable<Document> { + + private static final OLog log = Tracing.createLoggerFor(GetDocumentByCallable.class); + + private final String resourceUrl; + private final SearchServiceImpl searchService; + + public GetDocumentByCallable(String resourceUrl, SearchServiceImpl searchService) { + this.resourceUrl = resourceUrl; + this.searchService = searchService; + } + + @Override + public Document call() { + Document doc = null; + IndexSearcher searcher = null; + try { + if (searchService.existIndex()) { + searcher = searchService.getIndexSearcher(); + String url = Encoder.md5hash(resourceUrl); + String queryStr = "+" + AbstractOlatDocument.RESOURCEURL_MD5_FIELD_NAME + ":\"" + url + "\""; + QueryParser idQueryParser = new QueryParser(SearchService.OO_LUCENE_VERSION, queryStr, new KeywordAnalyzer()); + Query query = idQueryParser.parse(queryStr); + + TopDocs docs = searcher.search(query, 500); + int numOfDocs = docs.totalHits; + + + Set<String> retrievedFields = new HashSet<String>(); + retrievedFields.add(AbstractOlatDocument.RESOURCEURL_FIELD_NAME); + retrievedFields.add(AbstractOlatDocument.RESOURCEURL_MD5_FIELD_NAME); + + for(int i=0; i<numOfDocs; i++) { + Document foundDoc = searcher.doc(docs.scoreDocs[i].doc, retrievedFields); + String possibleResourceUrl = foundDoc.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME); + if(resourceUrl.equals(possibleResourceUrl)) { + doc = searcher.doc(docs.scoreDocs[i].doc); + } + } + } + } catch (Exception naex) { + log.error("", naex); + } finally { + searchService.releaseIndexSearcher(searcher); + } + return doc; + } +} \ No newline at end of file diff --git a/src/main/java/org/olat/search/service/SearchServiceDisabled.java b/src/main/java/org/olat/search/service/SearchServiceDisabled.java index 72c83d0c65ee3e13df9bb1a67a2a6d0f7e0be624..27b2595720d1be176fc4ea7b6499d6520550b4fe 100644 --- a/src/main/java/org/olat/search/service/SearchServiceDisabled.java +++ b/src/main/java/org/olat/search/service/SearchServiceDisabled.java @@ -28,6 +28,7 @@ package org.olat.search.service; import java.util.List; import java.util.Set; +import org.apache.lucene.document.Document; import org.apache.lucene.queryparser.classic.ParseException; import org.olat.core.commons.persistence.SortKey; import org.olat.core.id.Identity; @@ -127,4 +128,11 @@ public class SearchServiceDisabled implements SearchService { log.error("call doSearch on disabled search service"); throw new ServiceNotAvailableException("call doSearch on disabled search service"); } + + @Override + public Document doSearch(String resourceUrl) + throws ServiceNotAvailableException, ParseException, QueryException { + log.error("call doSearch on disabled search service"); + throw new ServiceNotAvailableException("call doSearch on disabled search service"); + } } diff --git a/src/main/java/org/olat/search/service/SearchServiceImpl.java b/src/main/java/org/olat/search/service/SearchServiceImpl.java index 888bf689f23a9e42dc2c18b7de9299b3a5b2a88f..115844ddd6f22c06d214e494a7c87c99998439a1 100644 --- a/src/main/java/org/olat/search/service/SearchServiceImpl.java +++ b/src/main/java/org/olat/search/service/SearchServiceImpl.java @@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; @@ -231,7 +232,7 @@ public class SearchServiceImpl implements SearchService, GenericEventListener { searchSpellChecker.setSpellCheckEnabled(searchModuleConfig.getSpellCheckEnabled()); searchSpellChecker.setSearchExecutor(searchExecutor); - indexer = new Index(searchModuleConfig, searchSpellChecker, mainIndexer, lifeIndexer, coordinatorManager); + indexer = new Index(searchModuleConfig, this, searchSpellChecker, mainIndexer, lifeIndexer, coordinatorManager); indexPath = searchModuleConfig.getFullIndexPath(); permanentIndexPath = searchModuleConfig.getFullPermanentIndexPath(); @@ -335,6 +336,19 @@ public class SearchServiceImpl implements SearchService, GenericEventListener { } } + @Override + public Document doSearch(String queryString) + throws ServiceNotAvailableException, ParseException, QueryException { + try { + GetDocumentByCallable run = new GetDocumentByCallable(queryString, this); + Future<Document> futureResults = searchExecutor.submit(run); + return futureResults.get(); + } catch (Exception e) { + log.error("", e); + return null; + } + } + protected BooleanQuery createQuery(String queryString, List<String> condQueries) throws ParseException { BooleanQuery query = new BooleanQuery(); diff --git a/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java b/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java index 97cf3e2cd67ef0b6e19e776abdc2488bf24d177d..c9b2b71333c23bc9321d3ed6b6f6f4bbd982e2f1 100644 --- a/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java +++ b/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java @@ -26,14 +26,23 @@ package org.olat.search.service.document.file; import java.io.IOException; +import java.util.Date; +import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; +import org.apache.lucene.queryparser.classic.ParseException; +import org.olat.core.CoreSpringFactory; import org.olat.core.logging.OLog; import org.olat.core.logging.Tracing; import org.olat.core.util.vfs.LocalImpl; import org.olat.core.util.vfs.VFSLeaf; +import org.olat.search.QueryException; import org.olat.search.SearchModule; +import org.olat.search.SearchService; +import org.olat.search.ServiceNotAvailableException; +import org.olat.search.model.AbstractOlatDocument; import org.olat.search.service.SearchResourceContext; +import org.olat.search.service.SearchServiceImpl; /** * Lucene document mapper. @@ -69,13 +78,13 @@ public class FileDocumentFactory { private final static String XML_SUFFIX = "xml"; private final static String TEXT_SUFFIX = "txt tex readme csv"; - //as a special parser; - private static final String IMS_MANIFEST_FILE = "imsmanifest.xml"; + //as a special parser; + private static final String IMS_MANIFEST_FILE = "imsmanifest.xml"; - private int excludedFileSizeCount = 0; + private int excludedFileSizeCount = 0; - private static SearchModule searchModule; + private static SearchModule searchModule; /** * [used by spring] @@ -92,11 +101,30 @@ public class FileDocumentFactory { public Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException, DocumentAccessException { try { + Document doc = null; + try { + String resourceUrl = leafResourceContext.getResourceUrl(); + SearchService searchService = CoreSpringFactory.getImpl(SearchServiceImpl.class); + + Document indexedDoc = searchService.doSearch(resourceUrl); + if(indexedDoc != null) { + String timestamp = indexedDoc.get(AbstractOlatDocument.TIME_STAMP_NAME); + if(timestamp != null) { + Date lastMod = DateTools.stringToDate(timestamp); + Date lastMod2 = new Date(leaf.getLastModified()); + if(lastMod2.compareTo(lastMod) < 0) { + return indexedDoc; + } + } + } + } catch (ServiceNotAvailableException | ParseException | QueryException | java.text.ParseException e) { + log.error("", e); + } + String fileName = leaf.getName(); String suffix = FileTypeDetector.getSuffix(leaf); if (log.isDebug()) log.debug("suffix=" + suffix); - - Document doc = null; + if (PDF_SUFFIX.indexOf(suffix) >= 0) { if(searchModule.getPdfFileEnabled()) { doc = PdfDocument.createDocument(leafResourceContext, leaf); diff --git a/src/main/java/org/olat/search/service/indexer/Index.java b/src/main/java/org/olat/search/service/indexer/Index.java index 25db3b52e23219d2f168707ad3625f47d01bc006..3ebeb196de6d68cfb85a758574cc380b77c40264 100644 --- a/src/main/java/org/olat/search/service/indexer/Index.java +++ b/src/main/java/org/olat/search/service/indexer/Index.java @@ -37,6 +37,7 @@ import org.olat.core.logging.Tracing; import org.olat.core.util.FileUtils; import org.olat.core.util.coordinate.CoordinatorManager; import org.olat.search.SearchModule; +import org.olat.search.SearchService; import org.olat.search.service.spell.SearchSpellChecker; /** @@ -63,7 +64,7 @@ public class Index { * @param restartInterval Restart interval of full-index in milliseconds. * @param indexInterval Sleeping time in milliseconds between adding documents to index. */ - public Index(SearchModule searchModule, SearchSpellChecker spellChecker, MainIndexer mainIndexer, + public Index(SearchModule searchModule, SearchService searchService, SearchSpellChecker spellChecker, MainIndexer mainIndexer, LifeFullIndexer lifeIndexer, CoordinatorManager coordinatorManager) { this.spellChecker = spellChecker; this.indexPath = searchModule.getFullIndexPath(); @@ -71,7 +72,7 @@ public class Index { this.permanentIndexPath = searchModule.getFullPermanentIndexPath(); this.lifeIndexer = lifeIndexer; - fullIndexer = new OlatFullIndexer(this, searchModule, mainIndexer, coordinatorManager); + fullIndexer = new OlatFullIndexer(this, searchModule, searchService, mainIndexer, coordinatorManager); } /** diff --git a/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java b/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java index fbe94ac7127372344606f6c7d78732855edb0b63..0f9b1f7e1f2bb1f25ef8804a187d37415a09bf3d 100644 --- a/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java @@ -53,6 +53,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.olat.core.commons.persistence.DBFactory; @@ -60,13 +61,17 @@ import org.olat.core.logging.OLog; import org.olat.core.logging.Tracing; import org.olat.core.util.WorkThreadInformations; import org.olat.core.util.coordinate.CoordinatorManager; +import org.olat.search.QueryException; import org.olat.search.SearchModule; import org.olat.search.SearchService; +import org.olat.search.ServiceNotAvailableException; import org.olat.search.model.OlatDocument; import org.olat.search.service.SearchResourceContext; /** - * Controls the hole generation of a full-index. Runs in own thread. + * Controls the hole generation of a full-index. It run in its own thread the main index. + * The sub-indexers can use a thread pool to parallelize the works. + * * @author Christian Guretzki */ public class OlatFullIndexer { @@ -109,6 +114,7 @@ public class OlatFullIndexer { private Map<String,Integer> fileTypeCounters; private final MainIndexer mainIndexer; + private final SearchService searchService; private final CoordinatorManager coordinatorManager; private static final Object indexerWriterBlock = new Object(); @@ -122,11 +128,14 @@ public class OlatFullIndexer { * @param restartInterval Restart interval in milliseconds. * @param indexInterval Sleep time in milliseconds between adding documents. */ - public OlatFullIndexer(Index index, SearchModule searchModule, + public OlatFullIndexer(Index index, SearchModule searchModule, SearchService searchService, MainIndexer mainIndexer, CoordinatorManager coordinatorManager) { this.index = index; this.mainIndexer = mainIndexer; + this.searchService = searchService; this.coordinatorManager = coordinatorManager; + // -1 because the thread pool used a CallerRunPolicy, which means the main thread + // will do the work if the queue of the poll is full. if(searchModule.getFolderPoolSize() <= 2) { indexerPoolSize = 1; } else { @@ -336,6 +345,14 @@ public class OlatFullIndexer { } } + public Document getDocument(String businessPath) { + try { + return searchService.doSearch(businessPath); + } catch (ServiceNotAvailableException | ParseException | QueryException e) { + return null; + } + } + /** * Add a document to the index writer. The document is indexed by a single threaded executor, * Lucene want that write operations happen within a single thread. The access is synchronized