From 6ff3617585be911c28a32d9e3070f4f60855a922 Mon Sep 17 00:00:00 2001 From: srosse <none@none> Date: Mon, 13 Feb 2017 08:26:18 +0100 Subject: [PATCH] OO-2526: don't write the marker file anymore, make the buffer configurable, add some close session... --- .../java/org/olat/search/SearchModule.java | 8 ++++++- .../document/file/FileDocumentFactory.java | 24 +++++++++---------- .../service/indexer/FolderIndexerWorker.java | 16 ++++++++----- .../search/service/indexer/LeafIndexer.java | 4 ---- .../service/indexer/OlatFullIndexer.java | 6 +---- .../service/indexer/group/GroupIndexer.java | 1 + .../indexer/identity/IdentityIndexer.java | 1 + .../indexer/repository/RepositoryIndexer.java | 6 ++++- .../course/DialogCourseNodeIndexer.java | 4 ---- 9 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/olat/search/SearchModule.java b/src/main/java/org/olat/search/SearchModule.java index 67ff87bdfd2..2ecf210c992 100644 --- a/src/main/java/org/olat/search/SearchModule.java +++ b/src/main/java/org/olat/search/SearchModule.java @@ -114,7 +114,9 @@ public class SearchModule extends AbstractSpringModule { private String fullPdfTextBufferPath; private long maxFileSize = 10485760; - private double ramBufferSizeMB = 16; + + @Value("${search.ram.buffer.size:16}") + private double ramBufferSizeMB; private boolean useCompoundFile = false; @Autowired @Qualifier("fileSizeSuffixes") @@ -409,6 +411,10 @@ public class SearchModule extends AbstractSpringModule { public double getRAMBufferSizeMB() { return ramBufferSizeMB; } + + public void setRAMBufferSizeMB(double ramBufferSizeMB) { + this.ramBufferSizeMB = ramBufferSizeMB; + } public boolean getUseCompoundFile() { return useCompoundFile; diff --git a/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java b/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java index 997adc0b192..e6797d7af4b 100644 --- a/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java +++ b/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java @@ -205,16 +205,14 @@ public class FileDocumentFactory { */ public boolean isFileSupported(VFSLeaf leaf) { String fileName = leaf.getName(); - if (fileName != null && fileName.startsWith(".")) { + if (fileName == null || fileName.startsWith(".")) { //don't index all mac os x hidden files return false; } - String suffix; - try { - suffix = FileTypeDetector.getSuffix(leaf); - } catch (DocumentNotImplementedException e) { - return false; + long fileSize = leaf.getSize(); + if(fileSize == 0) { + return false;// don't index empty files } // 1. Check if file is not on fileBlackList @@ -230,20 +228,22 @@ public class FileDocumentFactory { } } + String suffix; + try { + suffix = FileTypeDetector.getSuffix(leaf); + } catch (DocumentNotImplementedException e) { + return false; + } + // 2. Check for certain file-type the file size if (searchModule.getFileSizeSuffixes().contains(suffix)) { long maxFileSize = searchModule.getMaxFileSize(); - if ( (maxFileSize != 0) && (leaf.getSize() > maxFileSize) ) { + if ( (maxFileSize != 0) && (fileSize > maxFileSize) ) { log.info("File too big, exlude from search index. filename=" + fileName); excludedFileSizeCount++; return false; } } - /* 3. Check if suffix is supported - if (supportedSuffixes.indexOf(suffix) >= 0) { - return true; - }*/ - //index all files (index metadatas) return true; } diff --git a/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java b/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java index 7924b868a80..f54cd28e825 100644 --- a/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java +++ b/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java @@ -34,10 +34,10 @@ import org.olat.core.CoreSpringFactory; import org.olat.core.commons.persistence.DBFactory; import org.olat.core.logging.OLog; import org.olat.core.logging.Tracing; -import org.olat.core.util.WorkThreadInformations; import org.olat.core.util.vfs.VFSContainer; import org.olat.core.util.vfs.VFSItem; import org.olat.core.util.vfs.VFSLeaf; +import org.olat.core.util.vfs.filters.VFSItemFilter; import org.olat.search.service.SearchResourceContext; import org.olat.search.service.document.file.DocumentAccessException; import org.olat.search.service.document.file.FileDocumentFactory; @@ -86,7 +86,7 @@ public class FolderIndexerWorker implements Callable<Boolean> { throws IOException, InterruptedException { // Items: List of VFSContainer & VFSLeaf String myFilePath = fPath; - for (VFSItem item : cont.getItems()) { + for (VFSItem item : cont.getItems(new SystemFileFilter())) { if (item instanceof VFSContainer) { // ok it is a container go further if (log.isDebug()) log.debug(item.getName() + " is a VFSContainer => go further "); @@ -111,8 +111,6 @@ public class FolderIndexerWorker implements Callable<Boolean> { if (docFactory.isFileSupported(leaf)) { String myFilePath = fPath + "/" + leaf.getName(); leafResourceContext.setFilePath(myFilePath); - WorkThreadInformations.setInfoFiles(myFilePath, leaf); - WorkThreadInformations.set("Index VFSLeaf=" + myFilePath + " at " + leafResourceContext.getResourceUrl()); Document document = docFactory.createDocument(leafResourceContext, leaf); if(document != null) {//document which are disabled return null writer.addDocument(document); @@ -128,8 +126,6 @@ public class FolderIndexerWorker implements Callable<Boolean> { log.warn("IOException: Can not index leaf=" + leaf.getName(), ioEx); } catch (Exception ex) { log.warn("Exception: Can not index leaf=" + leaf.getName(), ex); - } finally { - WorkThreadInformations.unset(); } } @@ -152,4 +148,12 @@ public class FolderIndexerWorker implements Callable<Boolean> { public void setAccessRule(FolderIndexerAccess accessRule) { this.accessRule = accessRule; } + + private static class SystemFileFilter implements VFSItemFilter { + @Override + public boolean accept(VFSItem vfsItem) { + String name = vfsItem.getName(); + return !name.startsWith(".") && !name.equals("__MACOSX"); + } + } } diff --git a/src/main/java/org/olat/search/service/indexer/LeafIndexer.java b/src/main/java/org/olat/search/service/indexer/LeafIndexer.java index 56582d95265..03f6211e693 100644 --- a/src/main/java/org/olat/search/service/indexer/LeafIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/LeafIndexer.java @@ -31,7 +31,6 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.olat.core.CoreSpringFactory; import org.olat.core.commons.modules.bc.vfs.OlatRootFolderImpl; -import org.olat.core.util.WorkThreadInformations; import org.olat.core.util.vfs.VFSContainer; import org.olat.core.util.vfs.VFSLeaf; import org.olat.search.service.SearchResourceContext; @@ -56,7 +55,6 @@ public abstract class LeafIndexer extends AbstractHierarchicalIndexer { } leafResourceContext.setFilePath(myFilePath); - WorkThreadInformations.set("Index VFSLeaf=" + myFilePath + " at " + leafResourceContext.getResourceUrl()); Document document = CoreSpringFactory.getImpl(FileDocumentFactory.class).createDocument(leafResourceContext, leaf); indexWriter.addDocument(document); } else { @@ -70,8 +68,6 @@ public abstract class LeafIndexer extends AbstractHierarchicalIndexer { throw new InterruptedException(iex.getMessage()); } catch (Exception ex) { logWarn("Exception: Can not index leaf=" + leaf.getName(), ex); - } finally { - WorkThreadInformations.unset(); } } diff --git a/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java b/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java index 42cbd8cb9e9..506f1899952 100644 --- a/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/OlatFullIndexer.java @@ -61,7 +61,6 @@ import org.apache.lucene.store.FSDirectory; import org.olat.core.commons.persistence.DBFactory; import org.olat.core.logging.OLog; import org.olat.core.logging.Tracing; -import org.olat.core.util.WorkThreadInformations; import org.olat.core.util.coordinate.CoordinatorManager; import org.olat.search.QueryException; import org.olat.search.SearchModule; @@ -225,14 +224,12 @@ public class OlatFullIndexer { /** * Create index-writer object. In multi-threaded mode ctreates an array of index-workers. * Start indexing with main-index as root object. Index recursive all elements. - * At the end optimze and close new index. + * At the end optimize and close new index. * The new index is stored in [temporary-index-path]/main * @throws InterruptedException */ private void doIndex() throws InterruptedException{ try { - WorkThreadInformations.setLongRunningTask("indexer"); - if(indexerExecutor == null) { BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>(2); indexerExecutor = new ThreadPoolExecutor(indexerPoolSize, indexerPoolSize, 0L, TimeUnit.MILLISECONDS, @@ -275,7 +272,6 @@ public class OlatFullIndexer { } catch (IOException e) { log.warn("Can not create IndexWriter, indexname=" + tempIndexPath, e); } finally { - WorkThreadInformations.unsetLongRunningTask("indexer"); DBFactory.getInstance().commitAndCloseSession(); log.debug("doIndex: commit & close session"); diff --git a/src/main/java/org/olat/search/service/indexer/group/GroupIndexer.java b/src/main/java/org/olat/search/service/indexer/group/GroupIndexer.java index 752bdedd116..32a9750d551 100644 --- a/src/main/java/org/olat/search/service/indexer/group/GroupIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/group/GroupIndexer.java @@ -94,6 +94,7 @@ public class GroupIndexer extends AbstractHierarchicalIndexer { logError("Error indexing group=" + businessGroup, err); DBFactory.getInstance().rollbackAndCloseSession(); } + DBFactory.getInstance().commitAndCloseSession(); } long indexTime = System.currentTimeMillis() - startTime; if (isLogDebugEnabled()) logDebug("GroupIndexer finished in " + indexTime + " ms"); diff --git a/src/main/java/org/olat/search/service/indexer/identity/IdentityIndexer.java b/src/main/java/org/olat/search/service/indexer/identity/IdentityIndexer.java index 259a99e783b..256f0f2c0be 100644 --- a/src/main/java/org/olat/search/service/indexer/identity/IdentityIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/identity/IdentityIndexer.java @@ -87,6 +87,7 @@ public class IdentityIndexer extends AbstractHierarchicalIndexer { logWarn("Exception while indexing identity::" + identityKey + ". Skipping this user, try next one.", ex); DBFactory.getInstance().rollbackAndCloseSession(); } + DBFactory.getInstance().commitAndCloseSession(); } if (isLogDebugEnabled()) logDebug("IdentityIndexer finished with counter::" + counter); } diff --git a/src/main/java/org/olat/search/service/indexer/repository/RepositoryIndexer.java b/src/main/java/org/olat/search/service/indexer/repository/RepositoryIndexer.java index 528e8ec3054..ec79e70948f 100644 --- a/src/main/java/org/olat/search/service/indexer/repository/RepositoryIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/repository/RepositoryIndexer.java @@ -112,7 +112,6 @@ public class RepositoryIndexer extends AbstractHierarchicalIndexer { final SearchRepositoryEntryParameters params = new SearchRepositoryEntryParameters(); params.setRoles(roles); - boolean debug = isLogDebugEnabled(); @@ -136,6 +135,10 @@ public class RepositoryIndexer extends AbstractHierarchicalIndexer { logInfo("doIndex: repositoryEntry was deleted while we were indexing. The deleted repositoryEntry was: "+repositoryEntry); continue; } + if(repositoryEntry.getAccess() == RepositoryEntry.DELETED) { + continue; + } + repositoryEntry = reloadedRepositoryEntry; if (debug) { logDebug("Index repositoryEntry=" + repositoryEntry + " counter=" + counter++ + " with ResourceableId=" + repositoryEntry.getOlatResource().getResourceableId()); @@ -169,6 +172,7 @@ public class RepositoryIndexer extends AbstractHierarchicalIndexer { logWarn("Exception=" + ex.getMessage() + " for repo entry " + entryDebug, ex); dbInstance.rollbackAndCloseSession(); } + dbInstance.commitAndCloseSession(); } counter += repositoryList.size(); diff --git a/src/main/java/org/olat/search/service/indexer/repository/course/DialogCourseNodeIndexer.java b/src/main/java/org/olat/search/service/indexer/repository/course/DialogCourseNodeIndexer.java index 102a71a4656..5f69ec1b34b 100644 --- a/src/main/java/org/olat/search/service/indexer/repository/course/DialogCourseNodeIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/repository/course/DialogCourseNodeIndexer.java @@ -40,7 +40,6 @@ import org.olat.core.id.OLATResourceable; import org.olat.core.id.Roles; import org.olat.core.id.context.BusinessControl; import org.olat.core.id.context.ContextEntry; -import org.olat.core.util.WorkThreadInformations; import org.olat.core.util.resource.OresHelper; import org.olat.core.util.vfs.VFSLeaf; import org.olat.core.util.vfs.filters.VFSLeafFilter; @@ -123,7 +122,6 @@ public class DialogCourseNodeIndexer extends DefaultIndexer implements CourseNod leafResourceContext.setFilePath(filename); leafResourceContext.setDocumentType(TYPE_FILE); - WorkThreadInformations.set("Index Dialog VFSLeaf=" + filename + " at " + leafResourceContext.getResourceUrl()); Document document = CoreSpringFactory.getImpl(FileDocumentFactory.class).createDocument(leafResourceContext, leaf); indexWriter.addDocument(document); } else { @@ -137,8 +135,6 @@ public class DialogCourseNodeIndexer extends DefaultIndexer implements CourseNod throw new InterruptedException(iex.getMessage()); } catch (Exception ex) { logWarn("Exception: Can not index leaf=" + leaf.getName(), ex); - } finally { - WorkThreadInformations.unset(); } } -- GitLab