diff --git a/src/main/java/org/olat/core/util/filter/impl/HtmlFilter.java b/src/main/java/org/olat/core/util/filter/impl/HtmlFilter.java index 3efef4f1645a8fff835636bcddea189d4e4f32c1..1ce347a55ab940796ca23a497f013b842e5cad70 100644 --- a/src/main/java/org/olat/core/util/filter/impl/HtmlFilter.java +++ b/src/main/java/org/olat/core/util/filter/impl/HtmlFilter.java @@ -81,7 +81,7 @@ public class HtmlFilter implements Filter { if (in == null) return null; try { HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); - HtmlHandler contentHandler = new HtmlHandler((int)(1000 * 0.66f), false); + HtmlHandler contentHandler = new HtmlHandler(8096, false); parser.setContentHandler(contentHandler); parser.parse(new InputSource(in)); return contentHandler.getContent(); diff --git a/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java b/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java index bac90bfba6ff48271fd66aa1fe393c796fb68d91..dcd5fea8c617e47122084cccd1ae6a302929bdef 100644 --- a/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java +++ b/src/main/java/org/olat/search/service/indexer/FolderIndexerWorker.java @@ -29,10 +29,10 @@ package org.olat.search.service.indexer; import java.io.IOException; import java.util.concurrent.Callable; +import org.apache.logging.log4j.Logger; import org.apache.lucene.document.Document; import org.olat.core.CoreSpringFactory; import org.olat.core.commons.persistence.DBFactory; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.vfs.VFSContainer; import org.olat.core.util.vfs.VFSItem; @@ -86,27 +86,28 @@ public class FolderIndexerWorker implements Callable<Boolean> { throws IOException, InterruptedException { // Items: List of VFSContainer & VFSLeaf String myFilePath = fPath; + boolean debug = log.isDebugEnabled(); for (VFSItem item : cont.getItems(new VFSSystemItemFilter())) { if (item instanceof VFSContainer) { // ok it is a container go further - if (log.isDebugEnabled()) log.debug(item.getName() + " is a VFSContainer => go further "); + if (debug) log.debug("{} is a VFSContainer => go further ", item.getName()); if(aRule.allowed(item)) { doIndexVFSContainer(resourceContext, (VFSContainer)item, writer, myFilePath + "/" + ((VFSContainer)item).getName(), aRule); } } else if (item instanceof VFSLeaf) { // ok it is a file => analyse it - if (log.isDebugEnabled()) log.debug(item.getName() + " is a VFSLeaf => analyse file"); + if (debug) log.debug("{} is a VFSLeaf => analyse file", item.getName() ); if(aRule.allowed(item)) { doIndexVFSLeaf(resourceContext, (VFSLeaf)item, writer, myFilePath); } } else { - log.warn("Unkown element in item-list class=" + item.getClass()); + log.warn("Unkown element in item-list class={}", item.getClass()); } } } protected void doIndexVFSLeaf(SearchResourceContext leafResourceContext, VFSLeaf leaf, OlatFullIndexer writer, String fPath) { - if (log.isDebugEnabled()) log.debug("Analyse VFSLeaf=" + leaf.getName()); + if (log.isDebugEnabled()) log.debug("Analyse VFSLeaf={}", leaf.getName()); try { if (docFactory.isFileSupported(leaf)) { String myFilePath = fPath + "/" + leaf.getName(); @@ -116,16 +117,16 @@ public class FolderIndexerWorker implements Callable<Boolean> { writer.addDocument(document); } } else { - if (log.isDebugEnabled()) log.debug("Documenttype not supported. file=" + leaf.getName()); + if (log.isDebugEnabled()) log.debug("Documenttype not supported. file={}", leaf.getName()); } } catch (DocumentAccessException e) { - if (log.isDebugEnabled()) log.debug("Can not access document." + e.getMessage()); + if (log.isDebugEnabled()) log.debug("Can not access document.", e); } catch (InterruptedException e) { - if (log.isDebugEnabled()) log.debug("InterruptedException: Can not index leaf=" + leaf.getName() + ";" + e.getMessage()); + if (log.isDebugEnabled()) log.debug("InterruptedException: Can not index leaf={}", leaf.getName(), e); } catch (IOException ioEx) { - log.warn("IOException: Can not index leaf=" + leaf.getName(), ioEx); + log.warn("IOException: Can not index leaf={}", leaf.getName(), ioEx); } catch (Exception ex) { - log.warn("Exception: Can not index leaf=" + leaf.getName(), ex); + log.warn("Exception: Can not index leaf={}", leaf.getName(), ex); } } diff --git a/src/main/java/org/olat/search/service/indexer/repository/ImsCPRepositoryIndexer.java b/src/main/java/org/olat/search/service/indexer/repository/ImsCPRepositoryIndexer.java index a6f02ec2edc1499b14cab87862a6ed600928920c..201355be8e172787a8dd296c6556103bd982a36b 100644 --- a/src/main/java/org/olat/search/service/indexer/repository/ImsCPRepositoryIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/repository/ImsCPRepositoryIndexer.java @@ -50,7 +50,7 @@ public class ImsCPRepositoryIndexer extends FolderIndexer { private static final Logger log = Tracing.createLoggerFor(ImsCPRepositoryIndexer.class); // Must correspond with LocalString_xx.properties - // Do not use '_' because we want to seach for certain documenttype and lucene haev problems with '_' + // Do not use '_' because we want to search for certain document types and lucene has problems with '_' public static final String TYPE = "type.repository.entry.imscp"; public static final String ORES_TYPE_CP = ImsCPFileResource.TYPE_NAME; @@ -59,10 +59,7 @@ public class ImsCPRepositoryIndexer extends FolderIndexer { public String getSupportedTypeName() { return ORES_TYPE_CP; } - - /** - * @see org.olat.repository.handlers.RepositoryHandler#supportsDownload() - */ + @Override public void doIndex(SearchResourceContext resourceContext, Object parentObject, OlatFullIndexer indexWriter) throws IOException,InterruptedException { RepositoryEntry repositoryEntry = (RepositoryEntry) parentObject; diff --git a/src/main/java/org/olat/search/service/indexer/repository/ScormRepositoryIndexer.java b/src/main/java/org/olat/search/service/indexer/repository/ScormRepositoryIndexer.java index c5ec9503fd17066c309581ce9d728d84c7a733f2..aefd0ce3b09425a1698527de15984de7fb367596 100644 --- a/src/main/java/org/olat/search/service/indexer/repository/ScormRepositoryIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/repository/ScormRepositoryIndexer.java @@ -21,7 +21,6 @@ package org.olat.search.service.indexer.repository; import java.io.File; import java.io.IOException; -import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; @@ -59,41 +58,26 @@ public class ScormRepositoryIndexer extends FolderIndexer { private static final Logger log = Tracing.createLoggerFor(ScormRepositoryIndexer.class); - public static Set<String> stopWords = new HashSet<>(); + private static final Set<String> stopWords = new HashSet<>(); static { stopWords.add("LOMv1.0"); stopWords.add("yes"); stopWords.add("NA"); } - public static final List<String> forbiddenExtensions = new ArrayList<>(); - static { - forbiddenExtensions.add("LOMv1.0"); - forbiddenExtensions.add(".xsd"); - forbiddenExtensions.add(".js"); - } - public static final Set<String> forbiddenFiles = new HashSet<>(); - static { - forbiddenFiles.add("imsmanifest.xml"); - } - - + public static final String TYPE = "type.repository.entry.scorm"; public static final String ORES_TYPE_SCORM = ScormCPFileResource.TYPE_NAME; public ScormRepositoryIndexer() { // Repository types } - - /** - * - */ + + @Override public String getSupportedTypeName() { return ORES_TYPE_SCORM; } - /** - * @see org.olat.repository.handlers.RepositoryHandler#supportsDownload() - */ + @Override public void doIndex(SearchResourceContext resourceContext, Object parentObject, OlatFullIndexer indexWriter) throws IOException,InterruptedException { if (log.isDebugEnabled()) log.debug("Index Scorm package..."); @@ -123,7 +107,7 @@ public class ScormRepositoryIndexer extends FolderIndexer { private Document createManifestDocument(VFSLeaf fManifest, Element rootElement, SearchResourceContext resourceContext) { IMSMetadataDocument document = new IMSMetadataDocument(); document.setResourceUrl(resourceContext.getResourceUrl()); - if (log.isDebugEnabled()) log.debug("MM: URL=" + document.getResourceUrl()); + if (log.isDebugEnabled()) log.debug("MM: URL={}", document.getResourceUrl()); document.setLastChange(new Date(fManifest.getLastModified())); document.setDocumentType(resourceContext.getDocumentType()); if (StringHelper.containsNonWhitespace(resourceContext.getTitle())) { @@ -134,8 +118,6 @@ public class ScormRepositoryIndexer extends FolderIndexer { document.setParentContextType(resourceContext.getParentContextType()); document.setParentContextName(resourceContext.getParentContextName()); - - StringBuilder sb = new StringBuilder(); collectLangString(sb, rootElement); document.setContent(sb.toString()); @@ -157,21 +139,14 @@ public class ScormRepositoryIndexer extends FolderIndexer { } } - public class ScormFileAccess implements FolderIndexerAccess { - + private static class ScormFileAccess implements FolderIndexerAccess { @Override public boolean allowed(VFSItem item) { - String name = item.getName(); - if(forbiddenFiles.contains(name)) { - return false; - } - - for(String forbiddenExtension:forbiddenExtensions) { - if(name.endsWith(forbiddenExtension)) { - return false; - } + String name = item.getName().toLowerCase(); + if(item instanceof VFSContainer) { + return !name.equals("template"); } - return true; + return name.endsWith(".htm") || name.endsWith(".html"); } } } diff --git a/src/main/java/org/olat/search/service/indexer/repository/course/ScormCourseNodeIndexer.java b/src/main/java/org/olat/search/service/indexer/repository/course/ScormCourseNodeIndexer.java index 9a8bbffd92ffd34b7fb61b2d6fb072ebb2fece5e..dc188f6f05af9d1de6951f27fd8db28680146573 100644 --- a/src/main/java/org/olat/search/service/indexer/repository/course/ScormCourseNodeIndexer.java +++ b/src/main/java/org/olat/search/service/indexer/repository/course/ScormCourseNodeIndexer.java @@ -43,8 +43,8 @@ import org.olat.search.service.indexer.repository.ScormRepositoryIndexer; * @author srosse, stephane.rosse@frentix.com */ public class ScormCourseNodeIndexer extends ScormRepositoryIndexer implements CourseNodeIndexer { - public final static String NODE_TYPE = "type.course.node.scorm"; - private final static String SUPPORTED_TYPE_NAME = "org.olat.course.nodes.ScormCourseNode"; + public static final String NODE_TYPE = "type.course.node.scorm"; + private static final String SUPPORTED_TYPE_NAME = "org.olat.course.nodes.ScormCourseNode"; @Override public void doIndex(SearchResourceContext repositoryResourceContext, ICourse course, CourseNode courseNode, OlatFullIndexer indexWriter)