diff --git a/pom.xml b/pom.xml index 13be36b7ce59bf2a49f219e2c5969d47f010b25a..6d19fd40d44970ae9691120386f3daa9d759b242 100644 --- a/pom.xml +++ b/pom.xml @@ -62,7 +62,7 @@ <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <targetJdk>1.8</targetJdk> - <org.springframework.version>5.1.6.RELEASE</org.springframework.version> + <org.springframework.version>5.1.7.RELEASE</org.springframework.version> <org.hibernate.version>5.4.2.Final</org.hibernate.version> <com.sun.jersey.version>1.19.4</com.sun.jersey.version> <apache.cxf>3.3.2</apache.cxf> @@ -2165,6 +2165,11 @@ <scope>test</scope> </dependency> <!-- core dependencies --> + <dependency> + <groupId>nu.validator</groupId> + <artifactId>htmlparser</artifactId> + <version>1.4.13</version> + </dependency> <dependency> <groupId>net.sourceforge.nekohtml</groupId> <artifactId>nekohtml</artifactId> diff --git a/src/main/java/org/olat/core/gui/components/form/flexible/impl/elements/richText/TextMode.java b/src/main/java/org/olat/core/gui/components/form/flexible/impl/elements/richText/TextMode.java index 8cff3631d97755ee2d60b87b1be8aaf7f333cb2d..eabf6530c240c2cd51c0b66f8aa9ecda63b14cd5 100644 --- a/src/main/java/org/olat/core/gui/components/form/flexible/impl/elements/richText/TextMode.java +++ b/src/main/java/org/olat/core/gui/components/form/flexible/impl/elements/richText/TextMode.java @@ -23,7 +23,6 @@ import java.io.ByteArrayInputStream; import java.util.ArrayList; import java.util.List; -import org.cyberneko.html.parsers.SAXParser; import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; @@ -32,6 +31,9 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * * Initial date: 19 juil. 2017<br> @@ -118,12 +120,9 @@ public enum TextMode { } private static void parse(String text, DefaultHandler handler) throws Exception { - SAXParser parser = new SAXParser(); - parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); - parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); - parser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8"); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); parser.setContentHandler(handler); - parser.parse(new InputSource(new ByteArrayInputStream(text.getBytes()))); + parser.parseFragment(new InputSource(new ByteArrayInputStream(text.getBytes())), ""); } private static final class TextAnalyser extends DefaultHandler { diff --git a/src/main/java/org/olat/core/util/Formatter.java b/src/main/java/org/olat/core/util/Formatter.java index c012fe1f28ba50bea2a68fec20d25c71632894cb..9f7ae71b3617538f4dfdf2984370542191fb6ff0 100644 --- a/src/main/java/org/olat/core/util/Formatter.java +++ b/src/main/java/org/olat/core/util/Formatter.java @@ -50,7 +50,7 @@ import org.apache.commons.lang.time.DurationFormatUtils; import org.olat.core.dispatcher.impl.StaticMediaDispatcher; import org.olat.core.gui.render.StringOutput; import org.olat.core.helpers.Settings; -import org.olat.core.util.filter.impl.NekoHTMLMathScanner; +import org.olat.core.util.filter.impl.HtmlMathScanner; /** * enclosing_type Description: <br> @@ -623,7 +623,7 @@ public class Formatter { public static String formatLatexFormulas(String htmlFragment) { if (htmlFragment == null) return ""; // optimize, reduce jsmath calls on client - if (new NekoHTMLMathScanner().scan(htmlFragment)) { + if (new HtmlMathScanner().scan(htmlFragment)) { // add math wrapper String domid = "mw_" + CodeHelper.getRAMUniqueID(); String elem = htmlFragment.contains("<div") || htmlFragment.contains("<p") ? "div" : "span"; diff --git a/src/main/java/org/olat/core/util/StringHelper.java b/src/main/java/org/olat/core/util/StringHelper.java index 349d0c0ed96c0cccea0fb8bb1d7994b6dd8667ea..0d184df22794c496d73917c0177f1b60f4c4bb42 100644 --- a/src/main/java/org/olat/core/util/StringHelper.java +++ b/src/main/java/org/olat/core/util/StringHelper.java @@ -51,7 +51,7 @@ import org.olat.core.id.Identity; import org.olat.core.logging.AssertException; import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; -import org.olat.core.util.filter.impl.NekoHTMLScanner; +import org.olat.core.util.filter.impl.HtmlScanner; import org.olat.core.util.filter.impl.OWASPAntiSamyXSSFilter; import org.olat.user.UserManager; @@ -337,9 +337,7 @@ public class StringHelper { */ public static boolean isHtml(String s) { if (s == null) return false; - - boolean containsHtml = new NekoHTMLScanner().scan(s); - return containsHtml; + return new HtmlScanner().scan(s); } /** diff --git a/src/main/java/org/olat/core/util/filter/FilterFactory.java b/src/main/java/org/olat/core/util/filter/FilterFactory.java index 8100f1cebb6fbae357a7bb71fd133728e31d62d2..681fac8ae174a756181a4d37d6ad9c3ad8ab4627 100644 --- a/src/main/java/org/olat/core/util/filter/FilterFactory.java +++ b/src/main/java/org/olat/core/util/filter/FilterFactory.java @@ -22,7 +22,7 @@ package org.olat.core.util.filter; import org.olat.core.util.filter.impl.AddBaseURLToMediaRelativeURLFilter; import org.olat.core.util.filter.impl.ConditionalHTMLCommentsFilter; -import org.olat.core.util.filter.impl.NekoHTMLFilter; +import org.olat.core.util.filter.impl.HtmlFilter; import org.olat.core.util.filter.impl.OWASPAntiSamyXSSFilter; import org.olat.core.util.filter.impl.OWASPAntiSamyXSSFilter.Variant; import org.olat.core.util.filter.impl.SimpleHTMLTagsFilter; @@ -44,7 +44,7 @@ import org.olat.core.util.filter.impl.XMLValidEntityFilter; public class FilterFactory { // the html tag filter is static, not stateful private static final Filter htmlTagsFilter = new SimpleHTMLTagsFilter(); - private static final Filter htmlTagsAndDesescapingFilter = new NekoHTMLFilter(); + private static final Filter htmlTagsAndDesescapingFilter = new HtmlFilter(); private static final Filter conditionalCommentsFilter = new ConditionalHTMLCommentsFilter(); private static final Filter xmlValidCharacterFilter = new XMLValidCharacterFilter(); private static final Filter smileysCssToDataUriFilter = new SmileysCssToDataUriFilter(); diff --git a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java b/src/main/java/org/olat/core/util/filter/impl/HtmlFilter.java similarity index 83% rename from src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java rename to src/main/java/org/olat/core/util/filter/impl/HtmlFilter.java index 7ada2bb70b7e910f26b26e1e93710221b686dc6a..3efef4f1645a8fff835636bcddea189d4e4f32c1 100644 --- a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java +++ b/src/main/java/org/olat/core/util/filter/impl/HtmlFilter.java @@ -19,7 +19,6 @@ */ package org.olat.core.util.filter.impl; -import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.util.Arrays; @@ -27,27 +26,28 @@ import java.util.HashSet; import java.util.Set; import org.apache.logging.log4j.Logger; -import org.cyberneko.html.parsers.SAXParser; import org.olat.core.logging.Tracing; import org.olat.core.util.filter.Filter; import org.olat.core.util.io.LimitedContentWriter; import org.olat.search.service.document.file.FileDocumentFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; -import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * Description:<br> - * Filter the HTML code using Neko SAX parser and extract the content. - * Neko parse the HTML entities too and deliver cleaned text. + * Filter the HTML code using HtmlParser SAX parser and extract the content. + * It parses the HTML entities too and deliver cleaned text. * * <P> * Initial Date: 2 dec. 2009 <br> * @author srosse */ -public class NekoHTMLFilter implements Filter { - private static final Logger log = Tracing.createLoggerFor(NekoHTMLFilter.class); +public class HtmlFilter implements Filter { + private static final Logger log = Tracing.createLoggerFor(HtmlFilter.class); public static final Set<String> blockTags = new HashSet<>(); public static final Set<String> toBeSkippedTags = new HashSet<>(); @@ -66,48 +66,36 @@ public class NekoHTMLFilter implements Filter { if(original.isEmpty()) return ""; try { - SAXParser parser = new SAXParser(); - HTMLHandler contentHandler = new HTMLHandler((int)(original.length() * 0.66f), pretty); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); + HtmlHandler contentHandler = new HtmlHandler((int)(original.length() * 0.66f), pretty); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(original))); return contentHandler.toString(); - } catch (SAXException e) { - log.error("", e); - return null; - } catch (IOException e) { - log.error("", e); - return null; } catch (Exception e) { log.error("", e); return null; } } - public NekoContent filter(InputStream in) { + public HtmlContent filter(InputStream in) { if (in == null) return null; try { - SAXParser parser = new SAXParser(); - HTMLHandler contentHandler = new HTMLHandler((int)(1000 * 0.66f), false); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); + HtmlHandler contentHandler = new HtmlHandler((int)(1000 * 0.66f), false); parser.setContentHandler(contentHandler); parser.parse(new InputSource(in)); return contentHandler.getContent(); - } catch (SAXException e) { - log.error("", e); - return null; - } catch (IOException e) { - log.error("", e); - return null; } catch (Exception e) { log.error("", e); return null; } } - public static class NekoContent { + public static class HtmlContent { private final String title; private final LimitedContentWriter content; - public NekoContent(String title, LimitedContentWriter content) { + public HtmlContent(String title, LimitedContentWriter content) { this.title = title; this.content = content; } @@ -121,7 +109,7 @@ public class NekoHTMLFilter implements Filter { } } - private static class HTMLHandler extends DefaultHandler { + private static class HtmlHandler extends DefaultHandler { private boolean collect = true; private boolean consumeBlanck = false; private boolean consumeTitle = true; @@ -129,7 +117,7 @@ public class NekoHTMLFilter implements Filter { private final LimitedContentWriter content; private final StringBuilder title; - public HTMLHandler(int size, boolean pretty) { + public HtmlHandler(int size, boolean pretty) { this.pretty = pretty; content = new LimitedContentWriter(size, FileDocumentFactory.getMaxFileSize()); title = new StringBuilder(32); @@ -213,8 +201,8 @@ public class NekoHTMLFilter implements Filter { } } - public NekoContent getContent() { - return new NekoContent(title.toString(), content); + public HtmlContent getContent() { + return new HtmlContent(title.toString(), content); } @Override diff --git a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java b/src/main/java/org/olat/core/util/filter/impl/HtmlMathScanner.java similarity index 86% rename from src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java rename to src/main/java/org/olat/core/util/filter/impl/HtmlMathScanner.java index 5ba6c8fa531c078b9a443816ff88eaa5e31e6749..71642b582aa26c0096aeb89f4b28f4466a0514b9 100644 --- a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java +++ b/src/main/java/org/olat/core/util/filter/impl/HtmlMathScanner.java @@ -22,7 +22,6 @@ package org.olat.core.util.filter.impl; import java.io.StringReader; import org.apache.logging.log4j.Logger; -import org.cyberneko.html.parsers.SAXParser; import org.olat.core.logging.Tracing; import org.olat.core.util.WebappHelper; import org.xml.sax.Attributes; @@ -30,6 +29,9 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * Detect the tag math and a CSS clas named math * @@ -38,17 +40,15 @@ import org.xml.sax.helpers.DefaultHandler; * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com * */ -public class NekoHTMLMathScanner { +public class HtmlMathScanner { - private static final Logger log = Tracing.createLoggerFor(NekoHTMLMathScanner.class); + private static final Logger log = Tracing.createLoggerFor(HtmlMathScanner.class); public boolean scan(String original) { if (original == null) return false; try { - SAXParser parser = new SAXParser(); - parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); - parser.setFeature("http://cyberneko.org/html/features/balance-tags", false); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); HTMLHandler contentHandler = new HTMLHandler(); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(original))); @@ -68,7 +68,7 @@ public class NekoHTMLMathScanner { @Override public void startElement(String uri, String localName, String qName, Attributes attributes) { - if("MATH".equals(localName)) { + if("MATH".equalsIgnoreCase(localName)) { mathFound = true; } else if(attributes != null) { String css = attributes.getValue("class"); diff --git a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLScanner.java b/src/main/java/org/olat/core/util/filter/impl/HtmlScanner.java similarity index 74% rename from src/main/java/org/olat/core/util/filter/impl/NekoHTMLScanner.java rename to src/main/java/org/olat/core/util/filter/impl/HtmlScanner.java index 5af3dca31628eb14561d4ad3d2b7f2eccf30204f..bc6d76cb10e249f9b80bd1cd697c8b5059f0d4aa 100644 --- a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLScanner.java +++ b/src/main/java/org/olat/core/util/filter/impl/HtmlScanner.java @@ -19,18 +19,20 @@ */ package org.olat.core.util.filter.impl; -import java.io.IOException; import java.io.StringReader; import org.apache.logging.log4j.Logger; -import org.cyberneko.html.parsers.SAXParser; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * Detect tags. * @@ -38,27 +40,21 @@ import org.xml.sax.helpers.DefaultHandler; * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com * */ -public class NekoHTMLScanner { +public class HtmlScanner { - private static final Logger log = Tracing.createLoggerFor(NekoHTMLScanner.class); + private static final Logger log = Tracing.createLoggerFor(HtmlScanner.class); public boolean scan(String original) { if (original == null) return false; try { - SAXParser parser = new SAXParser(); - parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); - parser.setFeature("http://cyberneko.org/html/features/balance-tags", false); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW); + parser.setCheckingNormalization(false); HTMLHandler contentHandler = new HTMLHandler(); parser.setContentHandler(contentHandler); - parser.parse(new InputSource(new StringReader(original))); + parser.setErrorHandler(contentHandler); + parser.parseFragment(new InputSource(new StringReader(original)), ""); return contentHandler.tagFound(); - } catch (SAXException e) { - log.error("", e); - return false; - } catch (IOException e) { - log.error("", e); - return false; } catch (Exception e) { log.error("", e); return false; @@ -72,6 +68,14 @@ public class NekoHTMLScanner { return tagFound; } + @Override + public void error(SAXParseException exception) throws SAXException { + String msg = exception.getMessage(); + if(msg != null && (msg.equals("Stray start tag \u201Chtml\\u201D.") || msg.equals("Stray start tag \u201Cbody\u201D."))) { + tagFound = true; + } + } + @Override public void startElement(String uri, String localName, String qName, Attributes attributes) { if(!tagFound && StringHelper.containsNonWhitespace(localName) && isTagAllowed(localName)) { diff --git a/src/main/java/org/olat/core/util/filter/impl/OWASPAntiSamyXSSFilter.java b/src/main/java/org/olat/core/util/filter/impl/OWASPAntiSamyXSSFilter.java index c6c9eebb7d3eb25432bf4442bc92a91cc31dbc97..067a9ebc933541a9dc9d992bc2b949ff192b39d2 100644 --- a/src/main/java/org/olat/core/util/filter/impl/OWASPAntiSamyXSSFilter.java +++ b/src/main/java/org/olat/core/util/filter/impl/OWASPAntiSamyXSSFilter.java @@ -27,7 +27,6 @@ import java.io.StringWriter; import java.io.Writer; import org.apache.logging.log4j.Logger; -import org.cyberneko.html.parsers.SAXParser; import org.olat.core.logging.OLATRuntimeException; import org.olat.core.logging.Tracing; import org.olat.core.util.filter.Filter; @@ -42,6 +41,9 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * Description:<br> * OWASP AntiSamy XSSFilter @@ -213,7 +215,7 @@ public class OWASPAntiSamyXSSFilter implements Filter { private String cleanHtml(String original) { try { HTMLCleanerHandler handler = new HTMLCleanerHandler(); - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); parser.setContentHandler(handler); parser.parse(new InputSource(new StringReader(original))); return handler.toString(); diff --git a/src/main/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilter.java b/src/main/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilter.java index 2b36d64245965be6dbb7799feb60d74e58a6d252..b15b25a3c14a57688a036ebc130e4a4e92ca9478 100644 --- a/src/main/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilter.java +++ b/src/main/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilter.java @@ -19,11 +19,9 @@ */ package org.olat.core.util.filter.impl; -import java.io.IOException; import java.io.StringReader; import org.apache.logging.log4j.Logger; -import org.cyberneko.html.parsers.SAXParser; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; import org.olat.core.util.filter.Filter; @@ -31,9 +29,11 @@ import org.olat.core.util.io.LimitedContentWriter; import org.olat.search.service.document.file.FileDocumentFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; -import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * Description:<br> * The html tags filter takes a string and filters all HTML tags. The filter @@ -60,7 +60,7 @@ public class SimpleHTMLTagsFilter implements Filter { if(original.isEmpty()) return ""; try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); HTMLHandler contentHandler = new HTMLHandler(original.length()); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(original))); @@ -68,12 +68,6 @@ public class SimpleHTMLTagsFilter implements Filter { text = text.replace('\u00a0', ' '); text = StringHelper.escapeHtml(text); return text; - } catch (SAXException e) { - log.error("", e); - return null; - } catch (IOException e) { - log.error("", e); - return null; } catch (Exception e) { log.error("", e); return null; @@ -99,7 +93,7 @@ public class SimpleHTMLTagsFilter implements Filter { content.append(" "); } else if("br".equals(elem)) { content.append(" "); - } else if(NekoHTMLFilter.blockTags.contains(elem) && content.length() > 0 && content.charAt(content.length() -1) != ' ' ) { + } else if(HtmlFilter.blockTags.contains(elem) && content.length() > 0 && content.charAt(content.length() -1) != ' ' ) { consumeBlanck = true; } } @@ -124,7 +118,7 @@ public class SimpleHTMLTagsFilter implements Filter { collect = true; } else if("li".equals(elem) || "p".equals(elem)) { content.append(" "); - } else if(NekoHTMLFilter.blockTags.contains(elem) && content.length() > 0 && content.charAt(content.length() -1) != ' ' ) { + } else if(HtmlFilter.blockTags.contains(elem) && content.length() > 0 && content.charAt(content.length() -1) != ' ' ) { consumeBlanck = true; } } diff --git a/src/main/java/org/olat/core/util/mail/manager/MailManagerImpl.java b/src/main/java/org/olat/core/util/mail/manager/MailManagerImpl.java index a8cf3cc7bc91346eb6e0f17d9e67cc07fcac4216..7dce181ea3049890b6422f07b36b558a1949bfc1 100644 --- a/src/main/java/org/olat/core/util/mail/manager/MailManagerImpl.java +++ b/src/main/java/org/olat/core/util/mail/manager/MailManagerImpl.java @@ -93,7 +93,7 @@ import org.olat.core.logging.Tracing; import org.olat.core.util.Encoder; import org.olat.core.util.StringHelper; import org.olat.core.util.WebappHelper; -import org.olat.core.util.filter.impl.NekoHTMLFilter; +import org.olat.core.util.filter.impl.HtmlFilter; import org.olat.core.util.mail.ContactList; import org.olat.core.util.mail.MailAttachment; import org.olat.core.util.mail.MailBundle; @@ -1772,7 +1772,7 @@ public class MailManagerImpl implements MailManager, InitializingBean { private Multipart createMultipartAlternative(String text) throws MessagingException { - String pureText = new NekoHTMLFilter().filter(text, true); + String pureText = new HtmlFilter().filter(text, true); MimeBodyPart textPart = new MimeBodyPart(); textPart.setText(pureText, "utf-8"); diff --git a/src/main/java/org/olat/core/util/openxml/OpenXMLDocument.java b/src/main/java/org/olat/core/util/openxml/OpenXMLDocument.java index 894527a88ba25e0442bcf89b19c50d069ba90668..4ea05b61e1f2668473ad11ccb910cd707c042110 100644 --- a/src/main/java/org/olat/core/util/openxml/OpenXMLDocument.java +++ b/src/main/java/org/olat/core/util/openxml/OpenXMLDocument.java @@ -44,10 +44,9 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.commons.io.IOUtils; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.olat.core.commons.services.image.ImageUtils; import org.olat.core.commons.services.image.Size; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; import org.olat.core.util.io.ShieldInputStream; @@ -64,6 +63,7 @@ import org.xml.sax.SAXException; import fmath.conversion.ConvertFromLatexToMathML; import fmath.conversion.ConvertFromMathMLToWord; +import nu.validator.htmlparser.sax.HtmlParser; /** * The page are A4 format, with 2.54cm margins on top, bottom, left and right. @@ -419,12 +419,10 @@ public class OpenXMLDocument { if(!StringHelper.containsNonWhitespace(html)) return; try { html = cleanUpHTML(html); - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(); parser.setContentHandler(new HTMLToOpenXMLHandler(this, spacing)); parser.parse(new InputSource(new StringReader(html))); - } catch (SAXException e) { - log.error("", e); - } catch (IOException e) { + } catch (SAXException | IOException e) { log.error("", e); } } @@ -433,13 +431,11 @@ public class OpenXMLDocument { if(!StringHelper.containsNonWhitespace(html)) return; try { html = cleanUpHTML(html); - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(); Element paragraphEl = getParagraphToAppendTo(newParagraph); parser.setContentHandler(new HTMLToOpenXMLHandler(this, null, paragraphEl, true)); parser.parse(new InputSource(new StringReader(html))); - } catch (SAXException e) { - log.error("", e); - } catch (IOException e) { + } catch (SAXException | IOException e) { log.error("", e); } } @@ -448,20 +444,18 @@ public class OpenXMLDocument { if(!StringHelper.containsNonWhitespace(html)) return; try { html = cleanUpHTML(html); - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(); Element paragraphEl = getParagraphToAppendTo(newParagraph); handler.setInitialParagraph(paragraphEl); parser.setContentHandler(handler); parser.parse(new InputSource(new StringReader(html))); - } catch (SAXException e) { - log.error("", e); - } catch (IOException e) { + } catch (SAXException | IOException e) { log.error("", e); } } /** - * The Neko HTMl parser has some issues with <p/>. + * The HTMl parser has / had some issues with <p/>. * * @param html The HTML to clean up * @return HTML code which Neko understands diff --git a/src/main/java/org/olat/course/nodes/gta/ui/HTMLZippedMediaResource.java b/src/main/java/org/olat/course/nodes/gta/ui/HTMLZippedMediaResource.java index 27806d5974879aee6be7e055637b526f690d17a5..73b4b2e2a6c8dd4c40a41fa3a9358065e497bb73 100644 --- a/src/main/java/org/olat/course/nodes/gta/ui/HTMLZippedMediaResource.java +++ b/src/main/java/org/olat/course/nodes/gta/ui/HTMLZippedMediaResource.java @@ -31,10 +31,9 @@ import java.util.zip.ZipOutputStream; import javax.servlet.http.HttpServletResponse; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.olat.core.gui.media.MediaResource; import org.olat.core.gui.media.ServletUtil; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.FileUtils; import org.olat.core.util.StringHelper; @@ -43,6 +42,9 @@ import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * * Initial date: 19.11.2015<br> @@ -128,7 +130,7 @@ public class HTMLZippedMediaResource implements MediaResource { public HTMLHandler filter(File file) { try(InputStream in = new FileInputStream(file); BufferedInputStream bis = new BufferedInputStream(in, FileUtils.BSIZE)) { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); HTMLHandler contentHandler = new HTMLHandler(); parser.setContentHandler(contentHandler); parser.parse(new InputSource(bis)); diff --git a/src/main/java/org/olat/ims/qti/QTI12MetadataController.java b/src/main/java/org/olat/ims/qti/QTI12MetadataController.java index 307ed8bee6d493b30e29174701ce08cd21664cfd..5e836fbf6935800a1529e3dff01b4c6118c83798 100644 --- a/src/main/java/org/olat/ims/qti/QTI12MetadataController.java +++ b/src/main/java/org/olat/ims/qti/QTI12MetadataController.java @@ -31,7 +31,7 @@ import org.olat.core.gui.control.WindowControl; import org.olat.core.util.Formatter; import org.olat.core.util.StringHelper; import org.olat.core.util.Util; -import org.olat.core.util.filter.impl.NekoHTMLFilter; +import org.olat.core.util.filter.impl.HtmlFilter; import org.olat.ims.qti.editor.ItemNodeTabbedFormController; import org.olat.ims.qti.editor.QTIEditHelper; import org.olat.ims.qti.editor.beecom.objects.ChoiceQuestion; @@ -192,7 +192,7 @@ public class QTI12MetadataController extends FormBasicController { } else { responseSummary = response.getContent().renderAsText(); if(responseSummary.length() > 128) { - responseSummary = new NekoHTMLFilter().filter(responseSummary); + responseSummary = new HtmlFilter().filter(responseSummary); if(responseSummary.length() > 128) { responseSummary = responseSummary.substring(0, 125) + "..."; } diff --git a/src/main/java/org/olat/ims/qti/qpool/QTIExportProcessor.java b/src/main/java/org/olat/ims/qti/qpool/QTIExportProcessor.java index 7198d19135537d3dada150ac90e44e5a3c14a5f6..2cae2d429ec20000077b599de04c2ae65267fce2 100644 --- a/src/main/java/org/olat/ims/qti/qpool/QTIExportProcessor.java +++ b/src/main/java/org/olat/ims/qti/qpool/QTIExportProcessor.java @@ -31,7 +31,7 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import org.apache.commons.io.IOUtils; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.dom4j.Attribute; import org.dom4j.CDATA; import org.dom4j.Document; @@ -41,7 +41,6 @@ import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.CodeHelper; import org.olat.core.util.FileUtils; @@ -58,6 +57,9 @@ import org.olat.modules.qpool.QuestionItemFull; import org.olat.modules.qpool.manager.QPoolFileStorage; import org.xml.sax.InputSource; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * * Initial date: 11.03.2013<br> @@ -279,7 +281,7 @@ public class QTIExportProcessor { */ private List<String> findMaterialInMatText(String content) { try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); QTI12HtmlHandler contentHandler = new QTI12HtmlHandler(); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(content))); diff --git a/src/main/java/org/olat/ims/qti/qpool/QTIImportProcessor.java b/src/main/java/org/olat/ims/qti/qpool/QTIImportProcessor.java index 5aa1729f7122ec436d985a9210d3e3bf990e9353..c45f7c354a0b668694d369acdfb8e27a8a2fa9dc 100644 --- a/src/main/java/org/olat/ims/qti/qpool/QTIImportProcessor.java +++ b/src/main/java/org/olat/ims/qti/qpool/QTIImportProcessor.java @@ -43,7 +43,7 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import org.apache.commons.io.IOUtils; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentFactory; @@ -55,7 +55,6 @@ import org.dom4j.io.XMLWriter; import org.olat.core.CoreSpringFactory; import org.olat.core.commons.persistence.DB; import org.olat.core.id.Identity; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.FileUtils; import org.olat.core.util.PathUtils; @@ -88,6 +87,9 @@ import org.springframework.beans.factory.annotation.Autowired; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * This class is NOT thread-safe * @@ -524,7 +526,7 @@ class QTIImportProcessor { */ protected void findMaterialInMatText(String content, List<String> materialPath) { try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); QTI12HtmlHandler contentHandler = new QTI12HtmlHandler(materialPath); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(content))); diff --git a/src/main/java/org/olat/ims/qti21/manager/openxml/QTI21WordExport.java b/src/main/java/org/olat/ims/qti21/manager/openxml/QTI21WordExport.java index d06a8478200cb03141f2214d367b4acc1b8c6534..3e296e0754b9c7d8932415d65fb7c0768c72fdeb 100644 --- a/src/main/java/org/olat/ims/qti21/manager/openxml/QTI21WordExport.java +++ b/src/main/java/org/olat/ims/qti21/manager/openxml/QTI21WordExport.java @@ -39,10 +39,9 @@ import java.util.zip.ZipOutputStream; import javax.servlet.http.HttpServletResponse; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.olat.core.gui.media.MediaResource; import org.olat.core.gui.translator.Translator; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; import org.olat.core.util.Util; @@ -79,6 +78,7 @@ import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import nu.validator.htmlparser.sax.HtmlParser; import uk.ac.ed.ph.jqtiplus.attribute.Attribute; import uk.ac.ed.ph.jqtiplus.node.QtiNode; import uk.ac.ed.ph.jqtiplus.node.content.basic.Block; @@ -420,6 +420,9 @@ public class QTI21WordExport implements MediaResource { break; case "textentryinteraction": startTextEntryInteraction(tag, attributes); + flushText(); + Style[] currentStyles = popStyle(tag); + unsetTextPreferences(currentStyles); break; case "extendedtextinteraction": startExtendedTextInteraction(attributes); @@ -507,9 +510,6 @@ public class QTI21WordExport implements MediaResource { endSimpleChoice(); break; case "textentryinteraction": - flushText(); - Style[] currentStyles = popStyle(tag); - unsetTextPreferences(currentStyles); break; case "extendedtextinteraction": //auto closing tag @@ -1108,7 +1108,7 @@ public class QTI21WordExport implements MediaResource { return Collections.emptyList(); } try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(); HTMLToOpenXMLHandler handler = new HTMLToOpenXMLHandler(factory, relPath, wrapEl, false); handler.setMaxWidthCm(widthCm); parser.setContentHandler(handler); diff --git a/src/main/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilder.java b/src/main/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilder.java index 0daf21a3e4acbfc8111d9520f4823823fad0aff3..f719a6a0446829bcb07e7e937df7cf67902f9700 100644 --- a/src/main/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilder.java +++ b/src/main/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilder.java @@ -19,7 +19,6 @@ */ package org.olat.ims.qti21.model.xml; -import java.io.ByteArrayInputStream; import java.io.StringReader; import java.util.ArrayList; import java.util.List; @@ -29,11 +28,10 @@ import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.stream.StreamResult; import org.apache.logging.log4j.Logger; -import org.cyberneko.html.parsers.SAXParser; import org.olat.core.gui.render.StringOutput; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; -import org.olat.core.util.filter.impl.NekoHTMLFilter; +import org.olat.core.util.filter.impl.HtmlFilter; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -42,6 +40,8 @@ import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; import uk.ac.ed.ph.jqtiplus.JqtiExtensionManager; import uk.ac.ed.ph.jqtiplus.exception.QtiModelException; import uk.ac.ed.ph.jqtiplus.node.AbstractNode; @@ -82,7 +82,8 @@ public class AssessmentHtmlBuilder { if(!StringHelper.containsNonWhitespace(html)) return false; try { - SAXParser parser = new SAXParser(); + // return always true? Neko send always an html tag -> content true + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); ContentDetectionHandler contentHandler = new ContentDetectionHandler(); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(html))); @@ -154,11 +155,12 @@ public class AssessmentHtmlBuilder { } else { htmlFragment = "<p>" + htmlFragment + "</p>"; } - //wrap around <html> to have a root element for neko - Document document = filter("<html>" + htmlFragment + "</html>"); + //root element is an <html> element + Document document = filter(htmlFragment); if(document != null) { Element docElement = document.getDocumentElement(); cleanUpNamespaces(docElement); + // load the elements under the <html> root element parent.getNodeGroups().load(docElement, new HTMLLoadingContext()); } } @@ -174,6 +176,16 @@ public class AssessmentHtmlBuilder { element.removeAttribute("xmlns"); } + // escaped by the HTML parser -> always remove them + Attr schemaLocationAttr = element.getAttributeNode("xsiU00003Aschemalocation"); + if(schemaLocationAttr != null) { + element.removeAttribute("xsiU00003Aschemalocation"); + } + Attr xsiattrEscaped = element.getAttributeNode("xmlnsU00003Axsi"); + if(xsiattrEscaped != null) { + element.removeAttribute("xmlnsU00003Axsi"); + } + for(Node child=element.getFirstChild(); child != null; child = child.getNextSibling()) { if(child instanceof Element) { cleanUpNamespaces((Element)child); @@ -181,19 +193,25 @@ public class AssessmentHtmlBuilder { } } + /** + * The method filters the content of textEntryInteraction (rename it correctly + * and remove TinyMCE attributes) and repackage the video from TinyMCE format + * to an object suited for QTI 2.1. + * + * @param content The content to filter + * @return A document object with an HTML root element with the actual + * blocks under this root element. + */ private Document filter(String content) { try { - SAXParser parser = new SAXParser(); - parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); - parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); - parser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8"); - + // Alter infoset because of special namespace on tag p + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.newDocument(); HtmlToDomBuilderHandler contentHandler = new HtmlToDomBuilderHandler(document); parser.setContentHandler(contentHandler); - parser.parse(new InputSource(new ByteArrayInputStream(content.getBytes()))); + parser.parse(new InputSource(new StringReader(content))); return document; } catch (Exception e) { log.error("", e); @@ -222,9 +240,11 @@ public class AssessmentHtmlBuilder { @Override public void startElement(String uri, String localName, String qName, Attributes attributes) { - if(video) return; + if(video || "head".equalsIgnoreCase(localName) || "body".equalsIgnoreCase(localName)) { + return; + } - if("textentryinteraction".equals(localName)) { + if("textentryinteraction".equalsIgnoreCase(localName)) { localName = qName = "textEntryInteraction"; AttributesImpl attributesCleaned = new AttributesImpl(""); @@ -239,13 +259,16 @@ public class AssessmentHtmlBuilder { } attributes = new AttributesDelegate(attributesCleaned); - } else if("span".equals(localName)) { + super.startElement(uri, localName, qName, attributes); + super.endElement(uri, localName, qName); + return; + } else if("span".equalsIgnoreCase(localName)) { String cssClass = attributes.getValue("class"); if(cssClass != null && "olatFlashMovieViewer".equals(cssClass)) { video = true; return; } - } else if("u".equals(localName)) { + } else if("u".equalsIgnoreCase(localName)) { qName = "span"; AttributesImpl underlineAttributes = new AttributesImpl(""); underlineAttributes.addAttributes(attributes); @@ -266,13 +289,16 @@ public class AssessmentHtmlBuilder { @Override public void endElement(String uri, String localName, String qName) { + if("head".equalsIgnoreCase(localName) || "body".equalsIgnoreCase(localName)) { + return; + } if(video) { - if("span".equals(localName)) { + if("span".equalsIgnoreCase(localName)) { String content = scriptBuffer.toString(); String startScript = "BPlayer.insertPlayer("; int start = content.indexOf(startScript); if(start >= 0) { - int end = content.indexOf(")", start); + int end = content.indexOf(')', start); String parameters = content.substring(start + startScript.length(), end); translateToObject(uri, parameters); } @@ -280,10 +306,9 @@ public class AssessmentHtmlBuilder { } return; } - - if("textentryinteraction".equals(localName)) { - localName = qName = "textEntryInteraction"; - } + if("textentryinteraction".equalsIgnoreCase(localName)) { + return; + } super.endElement(uri, localName, qName); } @@ -297,7 +322,7 @@ public class AssessmentHtmlBuilder { String type = array[6].replace("\"", ""); String ooData = parameters.replace("\"", "'"); - AttributesImpl attributes = new AttributesImpl(uri); + AttributesImpl attributes = new AttributesImpl(""); attributes.addAttribute("data", data); attributes.addAttribute("id", id); attributes.addAttribute("class", "olatFlashMovieViewer"); @@ -306,8 +331,8 @@ public class AssessmentHtmlBuilder { attributes.addAttribute("type", type); attributes.addAttribute("data-oo-movie", ooData); - super.startElement(uri, "object", "object", attributes); - super.endElement(uri, "object", "object"); + super.startElement("", "object", "object", attributes); + super.endElement("", "object", "object"); } } @@ -542,7 +567,7 @@ public class AssessmentHtmlBuilder { String elem = localName.toLowerCase(); if("script".equals(elem)) { collect = false; - } else if(!NekoHTMLFilter.blockTags.contains(localName)) { + } else if(!HtmlFilter.blockTags.contains(localName)) { content = true; } } @@ -562,7 +587,7 @@ public class AssessmentHtmlBuilder { String elem = localName.toLowerCase(); if("script".equals(elem)) { collect = true; - } else if(!NekoHTMLFilter.blockTags.contains(localName)) { + } else if(!HtmlFilter.blockTags.contains(localName)) { content = true; } } diff --git a/src/main/java/org/olat/ims/qti21/model/xml/TestFeedbackBuilder.java b/src/main/java/org/olat/ims/qti21/model/xml/TestFeedbackBuilder.java index 4c24a728d7df619abee792648024caea3841762e..e54c3541f27a361cde3cf66430f7711947ee4316 100644 --- a/src/main/java/org/olat/ims/qti21/model/xml/TestFeedbackBuilder.java +++ b/src/main/java/org/olat/ims/qti21/model/xml/TestFeedbackBuilder.java @@ -22,7 +22,6 @@ package org.olat.ims.qti21.model.xml; import java.io.StringReader; import java.util.List; -import org.cyberneko.html.parsers.SAXParser; import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; @@ -32,6 +31,8 @@ import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; import uk.ac.ed.ph.jqtiplus.node.expression.Expression; import uk.ac.ed.ph.jqtiplus.node.expression.general.BaseValue; import uk.ac.ed.ph.jqtiplus.node.expression.general.Variable; @@ -235,7 +236,7 @@ public class TestFeedbackBuilder { } try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); HTMLHandler contentHandler = new HTMLHandler(); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(value))); diff --git a/src/main/java/org/olat/ims/qti21/pool/QTI12To21Converter.java b/src/main/java/org/olat/ims/qti21/pool/QTI12To21Converter.java index cdc84c511d2cabad2b92de005527f6bddd42bda3..9f125c3d47daec53eb457f5c53fa0737d6890b82 100644 --- a/src/main/java/org/olat/ims/qti21/pool/QTI12To21Converter.java +++ b/src/main/java/org/olat/ims/qti21/pool/QTI12To21Converter.java @@ -43,11 +43,10 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import org.apache.commons.lang.StringEscapeUtils; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.olat.core.CoreSpringFactory; import org.olat.core.gui.translator.Translator; import org.olat.core.helpers.Settings; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.FileUtils; import org.olat.core.util.StringHelper; @@ -109,6 +108,8 @@ import org.olat.resource.OLATResource; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; import uk.ac.ed.ph.jqtiplus.node.AssessmentObject; import uk.ac.ed.ph.jqtiplus.node.content.variable.RubricBlock; import uk.ac.ed.ph.jqtiplus.node.content.xhtml.text.P; @@ -777,7 +778,7 @@ public class QTI12To21Converter { XMLOutputFactory xof = XMLOutputFactory.newInstance(); XMLStreamWriter xtw = xof.createXMLStreamWriter(out); - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(); QTI12To21HtmlHandler handler = new QTI12To21HtmlHandler(xtw); parser.setContentHandler(handler); parser.parse(new InputSource(new StringReader(trimmedText))); @@ -797,7 +798,7 @@ public class QTI12To21Converter { private void collectMaterial(String content) { try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); QTI12HtmlHandler contentHandler = new QTI12HtmlHandler(materialPath); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(content))); diff --git a/src/main/java/org/olat/ims/qti21/ui/editor/interactions/FIBEditorController.java b/src/main/java/org/olat/ims/qti21/ui/editor/interactions/FIBEditorController.java index 0cf20c1d59a50afb16ea37432d201f73c52d1b95..167e76b14b60eb0ed9d59738adff5bc1714872d5 100644 --- a/src/main/java/org/olat/ims/qti21/ui/editor/interactions/FIBEditorController.java +++ b/src/main/java/org/olat/ims/qti21/ui/editor/interactions/FIBEditorController.java @@ -22,7 +22,6 @@ package org.olat.ims.qti21.ui.editor.interactions; import java.io.ByteArrayInputStream; import java.io.File; -import org.cyberneko.html.parsers.SAXParser; import org.json.JSONException; import org.json.JSONObject; import org.olat.core.gui.UserRequest; @@ -54,6 +53,9 @@ import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * * Initial date: 24.02.2016<br> @@ -331,10 +333,7 @@ public class FIBEditorController extends FormBasicController { private void extractSolution(String content) { try { - SAXParser parser = new SAXParser(); - parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); - parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); - parser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8"); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); parser.setContentHandler(new SolutionExtractorHandler()); parser.parse(new InputSource(new ByteArrayInputStream(content.getBytes()))); } catch (Exception e) { diff --git a/src/main/java/org/olat/modules/cp/CPPrintMapper.java b/src/main/java/org/olat/modules/cp/CPPrintMapper.java index c5e71d97aedbbc8a2be734c1608aacb2b2a860b0..246bdfc0c4433c280c0a8ed2124d86e01627d763 100644 --- a/src/main/java/org/olat/modules/cp/CPPrintMapper.java +++ b/src/main/java/org/olat/modules/cp/CPPrintMapper.java @@ -34,7 +34,6 @@ import javax.servlet.http.HttpServletRequest; import org.apache.batik.css.parser.ParseException; import org.apache.batik.css.parser.Parser; import org.apache.logging.log4j.Logger; -import org.cyberneko.html.parsers.SAXParser; import org.olat.core.dispatcher.impl.StaticMediaDispatcher; import org.olat.core.dispatcher.mapper.Mapper; import org.olat.core.gui.components.tree.TreeNode; @@ -51,12 +50,15 @@ import org.olat.core.util.vfs.VFSLeaf; import org.olat.core.util.vfs.VFSMediaResource; import org.xml.sax.InputSource; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * * Description:<br> * Deliver the CP as a single page. All the HTML Pages are parser and the * attribute href/src are rewritten to absolute /olat/m/xxxx urls. The HTML parser - * used is NekoHTML. For CSS there is the same process. We use the Batik CSS + * used is validator.HtmlParser. For CSS there is the same process. We use the Batik CSS * parser for SAC. * * <P> @@ -125,18 +127,18 @@ public class CPPrintMapper implements Mapper { } private MediaResource deliverCompositePage(HttpServletRequest request) { - List<NekoHtmlPageHandler> parsedPages = composePrintPage(selectedNodeIds); + List<HtmlPageHandler> parsedPages = composePrintPage(selectedNodeIds); StringBuilder sb = new StringBuilder(); sb.append("<html><head>"); - for(NekoHtmlPageHandler page:parsedPages) { + for(HtmlPageHandler page:parsedPages) { sb.append("<!-- Header of -->"); sb.append(page.getHeader()).append("\n\n"); } injectJavascriptAndCss(sb); sb.append("</head><body onload='window.focus();window.print()'>"); - for(NekoHtmlPageHandler page:parsedPages) { + for(HtmlPageHandler page:parsedPages) { if(page.isEmpty()) { String title = page.getTitle(); if(StringHelper.containsNonWhitespace(title)) { @@ -165,18 +167,18 @@ public class CPPrintMapper implements Mapper { } } - protected void bodyDecorator(NekoHtmlPageHandler page, StringBuilder sb) { + protected void bodyDecorator(HtmlPageHandler page, StringBuilder sb) { sb.append("<!-- Body of ").append(page.getDocument().getName()).append("-->"); sb.append("<div class=\"o_cp_print_page\" style='clear:both; position:relative;page-break-after:always;'>\n"); sb.append(page.getBody()); sb.append("\n</div>"); } - private List<NekoHtmlPageHandler> composePrintPage(List<String> nodeIds) { - List<NekoHtmlPageHandler> pages = new ArrayList<>(); + private List<HtmlPageHandler> composePrintPage(List<String> nodeIds) { + List<HtmlPageHandler> pages = new ArrayList<>(); for(String nodeId:nodeIds) { - NekoHtmlPageHandler parsedPage = null; + HtmlPageHandler parsedPage = null; TreeNode treeNode = ctm.getNodeById(nodeId); String identifierRes = (String)treeNode.getUserObject(); if(StringHelper.containsNonWhitespace(identifierRes)) { @@ -190,15 +192,15 @@ public class CPPrintMapper implements Mapper { } } if(parsedPage == null) { - parsedPage = new NekoHtmlPageHandler(treeNode, null, rootDir, baseUri); + parsedPage = new HtmlPageHandler(treeNode, null, rootDir, baseUri); } pages.add(parsedPage); } return pages; } - private NekoHtmlPageHandler parsePage(String identifierRes, VFSLeaf document, TreeNode node) { - NekoHtmlPageHandler page = new NekoHtmlPageHandler(node, document, rootDir, baseUri); + private HtmlPageHandler parsePage(String identifierRes, VFSLeaf document, TreeNode node) { + HtmlPageHandler page = new HtmlPageHandler(node, document, rootDir, baseUri); int index = identifierRes.lastIndexOf('/'); if(index > 0) { String relativePath = identifierRes.substring(0, index+1); @@ -219,7 +221,7 @@ public class CPPrintMapper implements Mapper { rawContent = FileUtils.load(document.getInputStream(), content.getEncoding()); } - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); parser.setContentHandler(page); parser.parse(new InputSource(new StringReader(rawContent))); return page; diff --git a/src/main/java/org/olat/modules/cp/NekoHtmlPageHandler.java b/src/main/java/org/olat/modules/cp/HtmlPageHandler.java similarity index 89% rename from src/main/java/org/olat/modules/cp/NekoHtmlPageHandler.java rename to src/main/java/org/olat/modules/cp/HtmlPageHandler.java index c213cda15920e9c0577e7993812dcd1cfdce4d6b..edc4e8170444a47fe5905695a077ebcb6f7d1ab4 100644 --- a/src/main/java/org/olat/modules/cp/NekoHtmlPageHandler.java +++ b/src/main/java/org/olat/modules/cp/HtmlPageHandler.java @@ -32,20 +32,19 @@ import org.xml.sax.helpers.DefaultHandler; /** * * Description:<br> - * SAX handler for the NekoParser + * SAX handler for the HtmlParser * * <P> * Initial Date: 18 mars 2011 <br> * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com */ -//fxdiff VCRP-14: print cp -public class NekoHtmlPageHandler extends DefaultHandler { - private final StringBuilder header = new StringBuilder(); - private final StringBuilder body = new StringBuilder(); +public class HtmlPageHandler extends DefaultHandler { + private final StringBuilder header = new StringBuilder(4096); + private final StringBuilder body = new StringBuilder(8192); - private final String HEAD = "HEAD"; - private final String BODY = "BODY"; - private final String SCRIPT = "SCRIPT"; + private static final String HEAD = "head"; + private static final String BODY = "body"; + private static final String SCRIPT = "script"; private boolean pauseOutput; private StringBuilder output; @@ -56,7 +55,7 @@ public class NekoHtmlPageHandler extends DefaultHandler { private final VFSLeaf document; private final VFSContainer rootContainer; - public NekoHtmlPageHandler(TreeNode node, VFSLeaf document, VFSContainer rootDir, String baseUri) { + public HtmlPageHandler(TreeNode node, VFSLeaf document, VFSContainer rootDir, String baseUri) { this.document = document; this.rootContainer = rootDir; this.baseUri = baseUri; @@ -108,11 +107,11 @@ public class NekoHtmlPageHandler extends DefaultHandler { @Override public final void startElement(String uri, String localName, String qName, Attributes attributes) { - if (HEAD.equals(localName)) { + if (HEAD.equalsIgnoreCase(localName)) { output = header; - } else if (BODY.equals(localName)) { + } else if (BODY.equalsIgnoreCase(localName)) { output = body; - } else if (SCRIPT.equals(localName)) { + } else if (SCRIPT.equalsIgnoreCase(localName)) { pauseOutput = true; } else if (output != null) { pauseOutput = false; diff --git a/src/main/java/org/olat/modules/fo/QuoteAndTagFilter.java b/src/main/java/org/olat/modules/fo/QuoteAndTagFilter.java index 079f67e7a593838ab1bb55fd75dff9f9b1c4499a..7fcc78057d33d12058056f58984cbb30d594d77b 100644 --- a/src/main/java/org/olat/modules/fo/QuoteAndTagFilter.java +++ b/src/main/java/org/olat/modules/fo/QuoteAndTagFilter.java @@ -22,36 +22,30 @@ package org.olat.modules.fo; import java.io.IOException; import java.io.StringReader; -import org.cyberneko.html.parsers.DOMParser; import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.StringHelper; import org.olat.core.util.filter.Filter; -import org.w3c.dom.Document; +import org.w3c.dom.DocumentFragment; import org.w3c.dom.Node; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import nu.validator.htmlparser.dom.HtmlDocumentBuilder; + public class QuoteAndTagFilter implements Filter { private static final Logger log = Tracing.createLoggerFor(QuoteAndTagFilter.class); - /** - * @see org.olat.core.util.filter.Filter#filter(java.lang.String) - */ @Override public String filter(String original) { try { - DOMParser parser = new DOMParser(); - parser.parse(new InputSource(new StringReader(original))); - Document document = parser.getDocument(); + HtmlDocumentBuilder parser = new HtmlDocumentBuilder(); + DocumentFragment document = parser.parseFragment(new InputSource(new StringReader(original)), ""); StringBuilder sb = new StringBuilder(); scanNode(document, sb); return sb.toString(); - } catch (SAXException e) { - log.error("", e); - return null; - } catch (IOException e) { + } catch (SAXException | IOException e) { log.error("", e); return null; } diff --git a/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsCPResource.java b/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsCPResource.java index d46183a461ccc1df1a5b79290e47beaa7184c721..233e290962eb704de9e834a7e001112767c74c52 100644 --- a/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsCPResource.java +++ b/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsCPResource.java @@ -40,7 +40,7 @@ import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Marshaller; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.olat.core.CoreSpringFactory; import org.olat.core.commons.services.commentAndRating.CommentAndRatingSecurityCallback; import org.olat.core.commons.services.commentAndRating.ui.UserCommentsController; @@ -63,7 +63,6 @@ import org.olat.core.gui.util.SyntheticUserRequest; import org.olat.core.gui.util.WindowControlMocker; import org.olat.core.helpers.Settings; import org.olat.core.id.OLATResourceable; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.FileUtils; import org.olat.core.util.StringHelper; @@ -110,6 +109,9 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * * Initial date: 17 août 2017<br> @@ -421,7 +423,7 @@ public class ExportBinderAsCPResource implements MediaResource { public String exportMedia(String html, ZipOutputStream zout) { try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); ExportMedia contentHandler = new ExportMedia(zout); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(html))); diff --git a/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsPDFResource.java b/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsPDFResource.java index 85f27b8f5f6ea7bd268b698ba480156a3d30a7cf..46f117bb68b9e6353e1d32eae73e869f1c5ec802 100644 --- a/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsPDFResource.java +++ b/src/main/java/org/olat/modules/portfolio/ui/export/ExportBinderAsPDFResource.java @@ -35,7 +35,7 @@ import java.util.UUID; import javax.servlet.http.HttpServletResponse; import org.apache.commons.io.IOUtils; -import org.cyberneko.html.parsers.SAXParser; +import org.apache.logging.log4j.Logger; import org.olat.core.CoreSpringFactory; import org.olat.core.commons.services.pdf.PdfService; import org.olat.core.dispatcher.DispatcherModule; @@ -56,7 +56,6 @@ import org.olat.core.gui.translator.Translator; import org.olat.core.gui.util.SyntheticUserRequest; import org.olat.core.gui.util.WindowControlMocker; import org.olat.core.helpers.Settings; -import org.apache.logging.log4j.Logger; import org.olat.core.logging.Tracing; import org.olat.core.util.FileUtils; import org.olat.core.util.StringHelper; @@ -74,6 +73,9 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + /** * It makes a flat html file, copy all medias around it * and render it as PDF via phantomjs. @@ -238,7 +240,7 @@ public class ExportBinderAsPDFResource implements MediaResource { public String exportMedia(String html, File outputDir) { try { - SAXParser parser = new SAXParser(); + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); ExportMedia contentHandler = new ExportMedia(outputDir); parser.setContentHandler(contentHandler); parser.parse(new InputSource(new StringReader(html))); diff --git a/src/main/java/org/olat/registration/RegistrationManager.java b/src/main/java/org/olat/registration/RegistrationManager.java index 13e7a9a53b4f3d211b19cfd4ce528a32bcad8775..d8e84fb1bf1b832db4b6b4847bfb8e72188a6150 100644 --- a/src/main/java/org/olat/registration/RegistrationManager.java +++ b/src/main/java/org/olat/registration/RegistrationManager.java @@ -63,7 +63,7 @@ import org.olat.core.util.StringHelper; import org.olat.core.util.Util; import org.olat.core.util.WebappHelper; import org.olat.core.util.filter.FilterFactory; -import org.olat.core.util.filter.impl.NekoHTMLFilter; +import org.olat.core.util.filter.impl.HtmlFilter; import org.olat.core.util.i18n.I18nModule; import org.olat.core.util.mail.MailManager; import org.olat.core.util.mail.MailerResult; @@ -603,7 +603,7 @@ public class RegistrationManager implements UserDataDeletable, UserDataExportabl sb.append(translator.translate("disclaimer.paragraph1")) .append("\n") .append(translator.translate("disclaimer.paragraph2")); - String disclaimer = new NekoHTMLFilter().filter(sb.toString(), true); + String disclaimer = new HtmlFilter().filter(sb.toString(), true); out.write(disclaimer); } } catch (IOException e) { diff --git a/src/main/java/org/olat/search/service/document/file/HtmlDocument.java b/src/main/java/org/olat/search/service/document/file/HtmlDocument.java index a1a5573c50384fe4a86e4ab79f50e036cb451f76..ac56baefc8686833b68f1d28d01f934c2195e809 100644 --- a/src/main/java/org/olat/search/service/document/file/HtmlDocument.java +++ b/src/main/java/org/olat/search/service/document/file/HtmlDocument.java @@ -32,8 +32,8 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.document.Document; import org.olat.core.gui.util.CSSHelper; import org.olat.core.logging.Tracing; -import org.olat.core.util.filter.impl.NekoHTMLFilter; -import org.olat.core.util.filter.impl.NekoHTMLFilter.NekoContent; +import org.olat.core.util.filter.impl.HtmlFilter; +import org.olat.core.util.filter.impl.HtmlFilter.HtmlContent; import org.olat.core.util.vfs.VFSLeaf; import org.olat.search.service.SearchResourceContext; @@ -64,7 +64,7 @@ public class HtmlDocument extends FileDocument { protected FileContent readContent(VFSLeaf leaf) throws DocumentException { try(InputStream is = leaf.getInputStream()) { // Remove all HTML and Tags - NekoContent output = new NekoHTMLFilter().filter(is); + HtmlContent output = new HtmlFilter().filter(is); if (log.isDebugEnabled()) log.debug("HTML content without tags :" + output); diff --git a/src/main/java/org/olat/search/service/document/file/XmlDocument.java b/src/main/java/org/olat/search/service/document/file/XmlDocument.java index a9b15e4a7482917a3918d5893cbe39193b39ca07..5b6806777a74227bbc4b67f40b52f161997631b8 100644 --- a/src/main/java/org/olat/search/service/document/file/XmlDocument.java +++ b/src/main/java/org/olat/search/service/document/file/XmlDocument.java @@ -33,8 +33,8 @@ import org.apache.lucene.document.Document; import org.olat.core.gui.util.CSSHelper; import org.olat.core.logging.Tracing; import org.olat.core.util.FileUtils; -import org.olat.core.util.filter.impl.NekoHTMLFilter; -import org.olat.core.util.filter.impl.NekoHTMLFilter.NekoContent; +import org.olat.core.util.filter.impl.HtmlFilter; +import org.olat.core.util.filter.impl.HtmlFilter.HtmlContent; import org.olat.core.util.vfs.VFSLeaf; import org.olat.search.service.SearchResourceContext; @@ -61,13 +61,12 @@ public class XmlDocument extends FileDocument { return htmlDocument.getLuceneDocument(); } - //fxdiff FXOLAT-97: index run in infinite loop protected FileContent readContent(VFSLeaf leaf) throws IOException { InputStream is = leaf.getInputStream(); // Remove all HTML and Tags - NekoContent output; + HtmlContent output; try { - output = new NekoHTMLFilter().filter(is); + output = new HtmlFilter().filter(is); if (log.isDebugEnabled() ) log.debug("HTML content without tags :" + output); } catch (Exception e) { throw new IOException(e); diff --git a/src/main/java/org/olat/search/ui/SearchLRUCache.java b/src/main/java/org/olat/search/ui/SearchLRUCache.java deleted file mode 100644 index a63d0f1b26bd87e5828f64f42278fb2ee44f8fd3..0000000000000000000000000000000000000000 --- a/src/main/java/org/olat/search/ui/SearchLRUCache.java +++ /dev/null @@ -1,71 +0,0 @@ -/** - * <a href="http://www.openolat.org"> - * OpenOLAT - Online Learning and Training</a><br> - * <p> - * Licensed under the Apache License, Version 2.0 (the "License"); <br> - * you may not use this file except in compliance with the License.<br> - * You may obtain a copy of the License at the - * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> - * <p> - * Unless required by applicable law or agreed to in writing,<br> - * software distributed under the License is distributed on an "AS IS" BASIS, <br> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> - * See the License for the specific language governing permissions and <br> - * limitations under the License. - * <p> - * Initial code contributed and copyrighted by<br> - * frentix GmbH, http://www.frentix.com - * <p> - */ -package org.olat.search.ui; - -import org.apache.commons.collections.map.LRUMap; -import org.olat.search.SearchResults; - -/** - * A LRU map - * - * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com - */ -class SearchLRUCache extends LRUMap { - - private static final long serialVersionUID = -6748874217812035135L; - - public SearchLRUCache() { - super(5); - } - - @Override - public SearchResults get(Object key) { - SearchEntry searchEntry = (SearchEntry)super.get(key); - if(searchEntry != null) { - if(searchEntry.isUpToDate()) { - return searchEntry.getSearchResults(); - } - remove(key); - } - return null; - } - - @Override - public Object put(Object key, Object value) { - return super.put(key, new SearchEntry((SearchResults)value)); - } - - public class SearchEntry { - private final SearchResults results; - private long timestamp = System.currentTimeMillis(); - - public SearchEntry(SearchResults results) { - this.results = results; - } - - public SearchResults getSearchResults() { - return results; - } - - public boolean isUpToDate() { - return System.currentTimeMillis() - timestamp < 300000; - } - } -} diff --git a/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java b/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java index 2dbc4a3076fc77278611b6822a1fd04cdd3e4577..4143c54beda0f62407cf8cd218ba9e653a514794 100644 --- a/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java +++ b/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java @@ -60,7 +60,7 @@ public class FormatLatexFormulasTest { { "<span class='math\"'\"></span>", Boolean.FALSE }, { "<span class='math></\"span>", Boolean.FALSE }, { "<span class='math></'span>", Boolean.FALSE }, - { "<span class='math", Boolean.TRUE }, + { "<span class='math", Boolean.FALSE }, { "<span class=math\"", Boolean.FALSE } }); } diff --git a/src/test/java/org/olat/core/util/StringHelperTest.java b/src/test/java/org/olat/core/util/StringHelperTest.java index 61185ec8372bdedf1e3d4cbc16b61c2d76d3b566..b90a0cf7ba5c448a30dc3bb4aa508387f8fa5e36 100644 --- a/src/test/java/org/olat/core/util/StringHelperTest.java +++ b/src/test/java/org/olat/core/util/StringHelperTest.java @@ -174,13 +174,13 @@ public class StringHelperTest { Assert.assertFalse(StringHelper.isHtml("http://some.domain:8080/olat/dmz/registration/index.html?key=b67a28bd5e5820155b3ba496ef16d1d9&lang=de")); //good and bad html code - Assert.assertTrue(StringHelper.isHtml("<html><head></head><body>Hello world</body></html>")); Assert.assertTrue(StringHelper.isHtml("Hello <p>world</p>")); Assert.assertTrue(StringHelper.isHtml("<ul><li>Hello<li>world</ul>")); Assert.assertTrue(StringHelper.isHtml("Hello<br>world")); + Assert.assertTrue(StringHelper.isHtml("<html><head></head><body>Hello world</body></html>")); + Assert.assertTrue(StringHelper.isHtml("<html><head></head><body><p>Hello world</p></body></html>")); } - @Test public void formatAsCSVString() { List<String> entries = new ArrayList<>(); diff --git a/src/test/java/org/olat/core/util/filter/impl/NekoHTMLFilterTest.java b/src/test/java/org/olat/core/util/filter/impl/HtmlFilterTest.java similarity index 90% rename from src/test/java/org/olat/core/util/filter/impl/NekoHTMLFilterTest.java rename to src/test/java/org/olat/core/util/filter/impl/HtmlFilterTest.java index a909db60567d1fa48681f0f346ccd79b2ec403f9..580cb01ff8cbcfb7f65a377e5c84b63b150c17d0 100644 --- a/src/test/java/org/olat/core/util/filter/impl/NekoHTMLFilterTest.java +++ b/src/test/java/org/olat/core/util/filter/impl/HtmlFilterTest.java @@ -21,34 +21,21 @@ package org.olat.core.util.filter.impl; import java.io.ByteArrayInputStream; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.olat.core.util.filter.impl.NekoHTMLFilter.NekoContent; +import org.olat.core.util.filter.impl.HtmlFilter.HtmlContent; /** * Description:<br> - * This test case tests the NEko HTML tags filter + * This test case tests the HTML tags filter. * * <P> * Initial Date: 14.07.2009 <br> * @author gnaegi */ -@RunWith(JUnit4.class) -public class NekoHTMLFilterTest{ +public class HtmlFilterTest { - protected NekoHTMLFilter filter; - - @Before public void setup() { - filter = new NekoHTMLFilter(); - } - - @After public void tearDown() { - filter = null; - } + private final HtmlFilter filter = new HtmlFilter(); private void t(String input, String result) { Assert.assertEquals(result, filter.filter(input)); @@ -61,6 +48,7 @@ public class NekoHTMLFilterTest{ public void escaping() { String output = filter.filter("Test ä test"); System.out.println(output); + Assert.assertEquals("Test \u00E4 test", output); } @Test public void testPlainText() { @@ -158,8 +146,8 @@ public class NekoHTMLFilterTest{ p("<a href=\"index.html\">GO HERE</a>", "GO HERE"); t("<a href=\"/go/here/index.html\">GO HERE</a>", "GO HERE"); p("<a href=\"/go/here/index.html\">GO HERE</a>", "GO HERE"); - t("<a href=\"http://www.openolat.org/go/here/index.html\" title=\\\"super site\\\">GO HERE</a>", "GO HERE"); - p("<a href=\"http://www.openolat.org/go/here/index.html\" title=\\\"super site\\\">GO HERE</a>", "http://www.openolat.org/go/here/index.html GO HERE"); + t("<a href=\"http://www.openolat.org/go/here/index.html\" title=\"super site\">GO HERE</a>", "GO HERE"); + p("<a href=\"http://www.openolat.org/go/here/index.html\" title=\"super site\">GO HERE</a>", "http://www.openolat.org/go/here/index.html GO HERE"); t("<a href=\"https://www.openolat.org/go/here/index.html\" title=\"super site\">GO HERE</a>", "GO HERE"); p("<a href=\"https://www.openolat.org/go/here/index.html\" title=\"super site\">GO HERE</a>", "https://www.openolat.org/go/here/index.html GO HERE"); @@ -174,7 +162,7 @@ public class NekoHTMLFilterTest{ + "</body></html>"; // Text = 'Dies ist der Test Text' String text = "Hello Neko Test HTML Seite fuer JUnit Test Dies ist der Test\u00A0Text"; // must include '\u00A0' !!! 19.5.2010/cg - NekoContent content = filter.filter(new ByteArrayInputStream(htmlText.getBytes())); + HtmlContent content = filter.filter(new ByteArrayInputStream(htmlText.getBytes())); Assert.assertNotNull(content); Assert.assertEquals("Hello Neko", content.getTitle()); Assert.assertEquals(text, content.getContent()); diff --git a/src/test/java/org/olat/core/util/filter/impl/HtmlMathScannerTest.java b/src/test/java/org/olat/core/util/filter/impl/HtmlMathScannerTest.java new file mode 100644 index 0000000000000000000000000000000000000000..979ef768d005dfff36a5e2489198a239b7564caf --- /dev/null +++ b/src/test/java/org/olat/core/util/filter/impl/HtmlMathScannerTest.java @@ -0,0 +1,84 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.core.util.filter.impl; + +import java.util.Arrays; +import java.util.Collection; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.olat.core.util.WebappHelper; + +/** + * + * Initial date: 18 mai 2019<br> + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + * + */ +@RunWith(Parameterized.class) +public class HtmlMathScannerTest { + + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { + { true, "<p><span class=\"math\" title=\"%5Coverbrace%7Bx+%5Ccdots+x%5C%2C%7D%5E%7Bk%5Crm%5C%3Btimes%7D%20%5Cqquad%0A%5Cunderbrace%7Bx+y+z%5C%2C%7D_%7B%3E%5C%2C0%7D\">\\overbrace{x+\\cdots+x\\,}^{k\\rm\\;times} \\qquad \\underbrace{x+y+z\\,}_{>\\,0}</span></p>" }, + { true, "\\(math\\)" }, + { true, "Ceci <p>\\[math\\]</p>" }, + { true, "Was ist das <p>$$math$$</p>" }, + { true, "<span><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><semantics><mrow><mi>Ï€</mi><mo>=</mo><mfrac><mi>C</mi><mi>d</mi></mfrac></mrow><annotation encoding=\"SnuggleTeX\">$\\pi = \\frac{C}{d}</annotation></semantics></math>" }, + { false, "math" }, + { false, "<p>math</p>" } + }); + } + + + private final HtmlMathScanner filter = new HtmlMathScanner(); + + private boolean hasMath; + private String input; + private boolean markersSettings; + + public HtmlMathScannerTest(boolean hasMath, String input) { + this.hasMath = hasMath; + this.input = input; + } + + @Before + public void setMathJaxMarkers() { + markersSettings = WebappHelper.isMathJaxMarkers(); + new WebappHelper().setMathJaxMarkers(true); + } + + @After + public void resetMathJaxMarkers() { + new WebappHelper().setMathJaxMarkers(markersSettings); + } + + @Test + public void hasMath() { + boolean output = filter.scan(input); + Assert.assertEquals(hasMath, output); + } +} diff --git a/src/test/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilterTest.java b/src/test/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilterTest.java index 99703b8ffc133c05e2801de388ebcf61b4f5509d..317e057b79463bd5bc706fb8365c5d063703532a 100644 --- a/src/test/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilterTest.java +++ b/src/test/java/org/olat/core/util/filter/impl/SimpleHTMLTagsFilterTest.java @@ -92,7 +92,7 @@ public class SimpleHTMLTagsFilterTest { } // Boundary test: this filter does NOT decode HTML entities. Use the - // NekoHTMLFilter if you need this feature! + // HtmlFilter if you need this feature! @Test public void testTagsWithEntities() { t("Gnägi", "Gnägi"); diff --git a/src/test/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilderTest.java b/src/test/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilderTest.java index a684a8dd086aafd8190dc1deb5aed922addbc97e..83b7a9b8e1370f9acd1dac82b2ecef19fc487163 100644 --- a/src/test/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilderTest.java +++ b/src/test/java/org/olat/ims/qti21/model/xml/AssessmentHtmlBuilderTest.java @@ -32,12 +32,15 @@ import uk.ac.ed.ph.jqtiplus.JqtiExtensionManager; import uk.ac.ed.ph.jqtiplus.node.content.ItemBody; import uk.ac.ed.ph.jqtiplus.node.content.basic.Block; import uk.ac.ed.ph.jqtiplus.node.content.basic.TextRun; +import uk.ac.ed.ph.jqtiplus.node.content.xhtml.table.Table; import uk.ac.ed.ph.jqtiplus.node.content.xhtml.text.P; import uk.ac.ed.ph.jqtiplus.node.item.AssessmentItem; import uk.ac.ed.ph.jqtiplus.node.item.interaction.Interaction; import uk.ac.ed.ph.jqtiplus.node.item.interaction.TextEntryInteraction; import uk.ac.ed.ph.jqtiplus.node.item.interaction.choice.SimpleChoice; +import uk.ac.ed.ph.jqtiplus.node.item.interaction.content.Hottext; import uk.ac.ed.ph.jqtiplus.serialization.QtiSerializer; +import uk.ac.ed.ph.jqtiplus.utils.QueryUtils; /** * Test the conversion from TinyMCE HTML code to the QTI Works object @@ -62,9 +65,28 @@ public class AssessmentHtmlBuilderTest { String content = new AssessmentHtmlBuilder().flowStaticString(helper.getFlowStatics()); Assert.assertTrue(content.contains(">Hello world<")); } - + + @Test + public void isEmpty() { + String html = "Hello world"; + boolean contains = new AssessmentHtmlBuilder().containsSomething(html); + Assert.assertTrue(contains); + + html = "<p> </p>"; + contains = new AssessmentHtmlBuilder().containsSomething(html); + Assert.assertTrue(contains); + + html = "<p></p>"; + contains = new AssessmentHtmlBuilder().containsSomething(html); + Assert.assertTrue(contains); + + html = " "; + contains = new AssessmentHtmlBuilder().containsSomething(html); + Assert.assertFalse(contains); + } + @Test - public void filter_alt() { + public void appendHtml() throws IOException { String content = "<p>Test <textEntryInteraction responseIdentifier=\"RESPONSE_1\"/> </p>"; AssessmentItem item = new AssessmentItem(); @@ -78,10 +100,112 @@ public class AssessmentHtmlBuilderTest { Assert.assertTrue(interaction instanceof TextEntryInteraction); Assert.assertNotNull(interaction.getResponseIdentifier()); Assert.assertEquals("RESPONSE_1", interaction.getResponseIdentifier().toString()); + + // The serializer can throw some exceptions if it doesn't like the model + // we want to serialize. + StringOutput sb = new StringOutput(); + QtiSerializer qtiSerializer = new QtiSerializer(new JqtiExtensionManager()); + qtiSerializer.serializeJqtiObject(helper, new StreamResult(sb)); + String serializedQti = sb.toString(); + Assert.assertTrue(serializedQti.contains("textEntryInteraction")); + sb.close(); + } + + @Test + public void appendHtml_namespace() throws IOException { + String content = "<p xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns=\"http://www.imsglobal.org/xsd/imsqti_v2p1\" xsi:schemaLocation=\"http://www.imsglobal.org/xsd/imsqti_v2p1 http://www.imsglobal.org/xsd/imsqti_v2p1.xsd\">European rocket <textEntryInteraction responseIdentifier=\"RESPONSE_2\" data-qti-solution=\"Ariane\" openolatType=\"string\" placeholderText=\"ari\"/>New text<textEntryInteraction responseIdentifier=\"RESPONSE_1\" data-qti-solution=\"Falcon9\" openolatType=\"string\" placeholderText=\"falc\"/></p>"; + + AssessmentItem item = new AssessmentItem(); + ItemBody helper = new ItemBody(item); + new AssessmentHtmlBuilder().appendHtml(helper, content); + + List<Interaction> interactions = helper.findInteractions(); + Assert.assertNotNull(interactions); + Assert.assertEquals(2, interactions.size()); + TextEntryInteraction entry1 = (TextEntryInteraction)interactions.get(0); + Assert.assertEquals("RESPONSE_2", entry1.getResponseIdentifier().toString()); + TextEntryInteraction entry2 = (TextEntryInteraction)interactions.get(1); + Assert.assertEquals("RESPONSE_1", entry2.getResponseIdentifier().toString()); + + List<Block> blocks = helper.getBlocks(); + Assert.assertNotNull(blocks); + Assert.assertEquals(1, blocks.size()); + Assert.assertTrue(blocks.get(0) instanceof P); + + // The serializer can throw some exceptions if it doesn't like the model + // we want to serialize. + StringOutput sb = new StringOutput(); + QtiSerializer qtiSerializer = new QtiSerializer(new JqtiExtensionManager()); + qtiSerializer.serializeJqtiObject(helper, new StreamResult(sb)); + String serializedQti = sb.toString(); + Assert.assertTrue(serializedQti.contains("textEntryInteraction")); + sb.close(); + } + + @Test + public void appendHtml_textEntryInteractions() throws IOException { + String content = "<p>Usefull for circles <textentryinteraction responseidentifier=\"RESPONSE_2\" data-qti-solution=\"Pi\" data-qti-solution-empty=\"false\"></textentryinteraction>New <br>text<textentryinteraction responseidentifier=\"RESPONSE_1\" data-qti-solution=\"Ln\" data-qti-solution-empty=\"false\"></textentryinteraction></p>"; + + AssessmentItem item = new AssessmentItem(); + ItemBody helper = new ItemBody(item); + new AssessmentHtmlBuilder().appendHtml(helper, content); + + List<Interaction> interactions = helper.findInteractions(); + Assert.assertNotNull(interactions); + Assert.assertEquals(2, interactions.size()); + TextEntryInteraction entry1 = (TextEntryInteraction)interactions.get(0); + Assert.assertEquals("RESPONSE_2", entry1.getResponseIdentifier().toString()); + TextEntryInteraction entry2 = (TextEntryInteraction)interactions.get(1); + Assert.assertEquals("RESPONSE_1", entry2.getResponseIdentifier().toString()); + + List<Block> blocks = helper.getBlocks(); + Assert.assertNotNull(blocks); + Assert.assertEquals(1, blocks.size()); + Assert.assertTrue(blocks.get(0) instanceof P); + + // The serializer can throw some exceptions if it doesn't like the model + // we want to serialize. + StringOutput sb = new StringOutput(); + QtiSerializer qtiSerializer = new QtiSerializer(new JqtiExtensionManager()); + qtiSerializer.serializeJqtiObject(helper, new StreamResult(sb)); + String serializedQti = sb.toString(); + Assert.assertTrue(serializedQti.contains("textEntryInteraction")); + sb.close(); + } + + @Test + public void appendHtml_textEntryInteractionsAutoClosed() throws IOException { + String content = "<p xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns=\"http://www.imsglobal.org/xsd/imsqti_v2p1\" xsi:schemaLocation=\"http://www.imsglobal.org/xsd/imsqti_v2p1 http://www.imsglobal.org/xsd/imsqti_v2p1.xsd\">Usefull for circles <textEntryInteraction responseIdentifier=\"RESPONSE_2\" data-qti-solution=\"Pi\" openolatType=\"string\" placeholderText=\"314\" />New text<textEntryInteraction responseIdentifier=\"RESPONSE_1\" data-qti-solution=\"Ln\" openolatType=\"string\" placeholderText=\"lognat\" /></p>"; + + AssessmentItem item = new AssessmentItem(); + ItemBody helper = new ItemBody(item); + new AssessmentHtmlBuilder().appendHtml(helper, content); + + List<Interaction> interactions = helper.findInteractions(); + Assert.assertNotNull(interactions); + Assert.assertEquals(2, interactions.size()); + TextEntryInteraction entry1 = (TextEntryInteraction)interactions.get(0); + Assert.assertEquals("RESPONSE_2", entry1.getResponseIdentifier().toString()); + TextEntryInteraction entry2 = (TextEntryInteraction)interactions.get(1); + Assert.assertEquals("RESPONSE_1", entry2.getResponseIdentifier().toString()); + + List<Block> blocks = helper.getBlocks(); + Assert.assertNotNull(blocks); + Assert.assertEquals(1, blocks.size()); + Assert.assertTrue(blocks.get(0) instanceof P); + + // The serializer can throw some exceptions if it doesn't like the model + // we want to serialize. + StringOutput sb = new StringOutput(); + QtiSerializer qtiSerializer = new QtiSerializer(new JqtiExtensionManager()); + qtiSerializer.serializeJqtiObject(helper, new StreamResult(sb)); + String serializedQti = sb.toString(); + Assert.assertTrue(serializedQti.contains("textEntryInteraction")); + sb.close(); } @Test - public void filter() throws IOException { + public void appendHtml_accidentalHtmlRootElement() throws IOException { String content = "<html><p>Test \u00EA<strong><span><img src='img.jpg'></span></strong></p><p>Test 2</p></html>"; AssessmentItem item = new AssessmentItem(); @@ -91,19 +215,73 @@ public class AssessmentHtmlBuilderTest { List<Block> paragraphs = helper.getBlocks(); Assert.assertNotNull(paragraphs); Assert.assertEquals(2, paragraphs.size()); + Assert.assertTrue(paragraphs.get(0) instanceof P); + Assert.assertTrue(paragraphs.get(1) instanceof P); - // The serializer can throw some exceptions if it doens't like the model + // The serializer can throw some exceptions if it doesn't like the model // we want to serialize. StringOutput sb = new StringOutput(); QtiSerializer qtiSerializer = new QtiSerializer(new JqtiExtensionManager()); qtiSerializer.serializeJqtiObject(helper, new StreamResult(sb)); String serializedQti = sb.toString(); Assert.assertTrue(serializedQti.contains("img.jpg")); + Assert.assertTrue(serializedQti.contains("Test \u00EA"));// check encoding + Assert.assertTrue(serializedQti.contains("Test 2")); + Assert.assertFalse(serializedQti.contains("<html")); + sb.close(); + } + + @Test + public void appendHtml_table() throws IOException { + String content = "<p>This is a hot<hottext identifier=\"ht2efcb885fc4149bdf3fcf567b44a78\">text</hottext> et un dans une table:</p>\n" + + "<table class=\"b_default\" style=\"height: 109px;\" width=\"440\">\n" + + "<tbody>\n" + + "<tr>\n" + + "<td style=\"width: 146.34375px;\">Ceci</td>\n" + + "<td style=\"width: 146.34375px;\">est</td>\n" + + "<td style=\"width: 146.34375px;\">une</td>\n" + + "</tr>\n" + + "<tr>\n" + + "<td style=\"width: 146.34375px;\">table</td>\n" + + "<td style=\"width: 146.34375px;\">donc</td>\n" + + "<td style=\"width: 146.34375px; border-color: #ed9595; background-color: #edc5c5;\"><hottext identifier=\"ht8096a52c17f663ab9d73b7071609\">table</hottext></td>\n" + + "</tr>\n" + + "</tbody>\n" + + "</table>"; + + AssessmentItem item = new AssessmentItem(); + ItemBody helper = new ItemBody(item); + new AssessmentHtmlBuilder().appendHtml(helper, content); + + List<Block> paragraphs = helper.getBlocks(); + Assert.assertNotNull(paragraphs); + Assert.assertEquals(2, paragraphs.size()); + Assert.assertTrue(paragraphs.get(0) instanceof P); + Assert.assertTrue(paragraphs.get(1) instanceof Table); + + List<Hottext> hottexts = QueryUtils.search(Hottext.class, helper.getBlocks()); + Assert.assertNotNull(hottexts); + Assert.assertEquals(2, hottexts.size()); + Assert.assertEquals("ht2efcb885fc4149bdf3fcf567b44a78", hottexts.get(0).getIdentifier().toString()); + Assert.assertEquals("ht8096a52c17f663ab9d73b7071609", hottexts.get(1).getIdentifier().toString()); + + // The serializer can throw some exceptions if it doesn't like the model + // we want to serialize. + StringOutput sb = new StringOutput(); + QtiSerializer qtiSerializer = new QtiSerializer(new JqtiExtensionManager()); + qtiSerializer.serializeJqtiObject(helper, new StreamResult(sb)); + String serializedQti = sb.toString(); + Assert.assertTrue(serializedQti.contains("ht2efcb885fc4149bdf3fcf567b44a78")); + Assert.assertTrue(serializedQti.contains("ht8096a52c17f663ab9d73b7071609")); + Assert.assertTrue(serializedQti.contains("background-color: #edc5c5;")); + Assert.assertTrue(serializedQti.contains("width: 146.34375px;")); + Assert.assertFalse(serializedQti.contains("ns:identifier"));// check namespaced attributes + Assert.assertFalse(serializedQti.contains("ns:style")); sb.close(); } @Test - public void serializeVideo() throws IOException { + public void appendHtml_serializeVideo() throws IOException { String content = "<p><span id=\"olatFlashMovieViewer213060\" class=\"olatFlashMovieViewer\" style=\"display:block;border:solid 1px #000; width:320px; height:240px;\">\n" + "<script src=\"/raw/fx-111111x11/movie/player.js\" type=\"text/javascript\"></script>\n" + "<script type=\"text/javascript\" defer=\"defer\">// <![CDATA[\n" @@ -118,7 +296,7 @@ public class AssessmentHtmlBuilderTest { List<Block> paragraphs = helper.getBlocks(); Assert.assertNotNull(paragraphs); Assert.assertEquals(1, paragraphs.size()); - + StringOutput sb = new StringOutput(); QtiSerializer qtiSerializer = new QtiSerializer(new JqtiExtensionManager()); qtiSerializer.serializeJqtiObject(helper, new StreamResult(sb)); @@ -127,6 +305,7 @@ public class AssessmentHtmlBuilderTest { Assert.assertTrue(serializedQti.contains("object")); Assert.assertFalse(serializedQti.contains("span")); Assert.assertFalse(serializedQti.contains("script")); + Assert.assertFalse(serializedQti.contains("ns:data")); sb.close(); } } diff --git a/src/test/java/org/olat/ims/qti21/model/xml/TestFeedbackBuilderTest.java b/src/test/java/org/olat/ims/qti21/model/xml/TestFeedbackBuilderTest.java new file mode 100644 index 0000000000000000000000000000000000000000..ad0e8ffbe7ca384ad7ffb17eddc62d11e06209a4 --- /dev/null +++ b/src/test/java/org/olat/ims/qti21/model/xml/TestFeedbackBuilderTest.java @@ -0,0 +1,68 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.ims.qti21.model.xml; + +import java.util.Arrays; +import java.util.Collection; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + + +/** + * + * Initial date: 18 mai 2019<br> + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + * + */ +@RunWith(Parameterized.class) +public class TestFeedbackBuilderTest { + + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { + { true, "<p>\n </p>" }, + { true, "\n" }, + { true, "<span></span>" }, + { false, "Hello" }, + { false, "<span class='olatFlashMovieViewer'></span>" }, + { false, "<img src='img.jpg' />" }, + { false, "<img src='img.jpg'></img>" }, + { false, "<p>Hello</p>" } + }); + } + + private boolean empty; + private String input; + + public TestFeedbackBuilderTest(boolean empty, String input) { + this.empty = empty; + this.input = input; + } + + @Test + public void isEmpty() { + boolean output = TestFeedbackBuilder.isEmpty(input); + Assert.assertEquals(empty, output); + } +} diff --git a/src/test/java/org/olat/search/service/document/file/HtmlDocumentTest.java b/src/test/java/org/olat/search/service/document/file/HtmlDocumentTest.java new file mode 100644 index 0000000000000000000000000000000000000000..728b0d895be4913895368e63a7cad1e3ed47b735 --- /dev/null +++ b/src/test/java/org/olat/search/service/document/file/HtmlDocumentTest.java @@ -0,0 +1,53 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.search.service.document.file; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; + +import org.junit.Assert; +import org.junit.Test; +import org.olat.core.util.vfs.VFSLeaf; +import org.olat.test.VFSJavaIOFile; + +/** + * + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + */ +public class HtmlDocumentTest { + + @Test + public void testHtmlDocument() + throws DocumentException, DocumentAccessException, URISyntaxException, IOException { + URL url = HtmlDocumentTest.class.getResource("test.html"); + Assert.assertNotNull(url); + + VFSLeaf doc = new VFSJavaIOFile("test.html", new File(url.toURI())); + HtmlDocument document = new HtmlDocument(); + FileContent content = document.readContent(doc); + Assert.assertNotNull(content); + Assert.assertEquals("Hello", content.getTitle().trim()); + String body = content.getContent(); + Assert.assertTrue(body.trim().contains("Hello, this is HTML")); + } + +} diff --git a/src/test/java/org/olat/search/service/document/file/XmlDocumentTest.java b/src/test/java/org/olat/search/service/document/file/XmlDocumentTest.java new file mode 100644 index 0000000000000000000000000000000000000000..75a3e122e8c483646875cbeeaf9f5a495c997ba8 --- /dev/null +++ b/src/test/java/org/olat/search/service/document/file/XmlDocumentTest.java @@ -0,0 +1,52 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.search.service.document.file; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; + +import org.junit.Assert; +import org.junit.Test; +import org.olat.core.util.vfs.VFSLeaf; +import org.olat.test.VFSJavaIOFile; + +/** + * + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + */ +public class XmlDocumentTest { + + @Test + public void testXmlDocument() + throws DocumentException, DocumentAccessException, URISyntaxException, IOException { + URL url = XmlDocumentTest.class.getResource("test.xml"); + Assert.assertNotNull(url); + + VFSLeaf doc = new VFSJavaIOFile("test.xml", new File(url.toURI())); + XmlDocument document = new XmlDocument(); + FileContent content = document.readContent(doc); + Assert.assertNotNull(content); + String body = content.getContent(); + Assert.assertTrue(body.trim().contains("This is XML")); + } + +} diff --git a/src/test/java/org/olat/search/service/document/file/test.html b/src/test/java/org/olat/search/service/document/file/test.html index e1153f6bba59fe3fab57b9f5dbe23aabf955292b..b8e9d517dca196d511cedc791640a6148da94699 100644 --- a/src/test/java/org/olat/search/service/document/file/test.html +++ b/src/test/java/org/olat/search/service/document/file/test.html @@ -3,6 +3,6 @@ <title>Hello</title> </head> <body> - <h4>Hello</h4> + <h4>Hello, this is HTML</h4> </body> </html> \ No newline at end of file diff --git a/src/test/java/org/olat/search/service/document/file/test.xml b/src/test/java/org/olat/search/service/document/file/test.xml new file mode 100644 index 0000000000000000000000000000000000000000..a477f6cbf5143a8f4881c73daa955d2514a2f1ff --- /dev/null +++ b/src/test/java/org/olat/search/service/document/file/test.xml @@ -0,0 +1,4 @@ +<some> + <tag>Hello</tag> + <frentix>This is XML</frentix> +</some> \ No newline at end of file diff --git a/src/test/java/org/olat/search/service/document/file/test2.html b/src/test/java/org/olat/search/service/document/file/test2.html new file mode 100644 index 0000000000000000000000000000000000000000..e1153f6bba59fe3fab57b9f5dbe23aabf955292b --- /dev/null +++ b/src/test/java/org/olat/search/service/document/file/test2.html @@ -0,0 +1,8 @@ +<html> + <head> + <title>Hello</title> + </head> + <body> + <h4>Hello</h4> + </body> +</html> \ No newline at end of file diff --git a/src/test/java/org/olat/test/AllTestsJunit4.java b/src/test/java/org/olat/test/AllTestsJunit4.java index fdc4532321cc62795cd38c6ddc5eaf31f8a49154..c7b09caa2b7ecb441586fd621b344767e0f7b90b 100644 --- a/src/test/java/org/olat/test/AllTestsJunit4.java +++ b/src/test/java/org/olat/test/AllTestsJunit4.java @@ -56,7 +56,8 @@ import org.junit.runners.Suite; org.olat.core.util.filter.impl.XSSFilterParamTest.class, org.olat.core.util.filter.impl.AddBaseURLToMediaRelativeURLFilterTest.class, org.olat.core.util.filter.impl.SimpleHTMLTagsFilterTest.class, - org.olat.core.util.filter.impl.NekoHTMLFilterTest.class, + org.olat.core.util.filter.impl.HtmlFilterTest.class, + org.olat.core.util.filter.impl.HtmlMathScannerTest.class, org.olat.core.util.filter.impl.ConditionalHtmlCommentsFilterTest.class, org.olat.core.util.filter.impl.XMLValidCharacterFilterTest.class, org.olat.core.util.filter.impl.XMLValidEntityFilterTest.class, @@ -295,6 +296,7 @@ import org.junit.runners.Suite; org.olat.properties.PropertyTest.class, org.olat.search.service.document.file.FileDocumentFactoryTest.class, org.olat.search.service.indexer.repository.course.SPCourseNodeIndexerTest.class, + org.olat.search.service.document.file.HtmlDocumentTest.class, org.olat.search.service.document.file.PDFDocumentTest.class, org.olat.search.service.document.file.OfficeDocumentTest.class, org.olat.core.commons.services.notifications.manager.NotificationsManagerTest.class, @@ -318,6 +320,7 @@ import org.junit.runners.Suite; org.olat.ims.qti21.model.xml.AssessmentItemBuilderTest.class, org.olat.ims.qti21.model.xml.MultipleChoiceAssessmentItemBuilderTest.class, org.olat.ims.qti21.model.xml.SingleChoiceAssessmentItemBuilderTest.class, + org.olat.ims.qti21.model.xml.TestFeedbackBuilderTest.class, org.olat.ims.qti21.model.xml.HottextAssessmentItemBuilderTest.class, org.olat.ims.qti21.model.xml.AssessmentHtmlBuilderTest.class, org.olat.ims.qti21.model.xml.AssessmentItemPackageTest.class,