From 49aec96208fe571ae43a38418f3f7c14596d7672 Mon Sep 17 00:00:00 2001 From: srosse <none@none> Date: Mon, 23 Apr 2018 13:56:26 +0200 Subject: [PATCH] OO-3441: replace th regex with a real HTML parser to catch all cases --- .../services/csp/ui/CSPLogDataSource.java | 19 +++++ .../csp/ui/CSPLogEntryController.java | 19 +++++ .../java/org/olat/core/util/Formatter.java | 5 +- .../util/filter/impl/NekoHTMLMathScanner.java | 84 +++++++++++++++++++ .../services/csp/manager/CSPManagerTest.java | 19 +++++ .../core/util/FormatLatexFormulasTest.java | 80 ++++++++++++++++++ .../org/olat/core/util/FormatterTest.java | 29 ------- .../java/org/olat/test/AllTestsJunit4.java | 1 + 8 files changed, 225 insertions(+), 31 deletions(-) create mode 100644 src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java create mode 100644 src/test/java/org/olat/core/util/FormatLatexFormulasTest.java diff --git a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java index b66a56010bf..db69137ca75 100644 --- a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java +++ b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java @@ -1,3 +1,22 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ package org.olat.core.commons.services.csp.ui; import java.util.Collections; diff --git a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java index 499a1338f79..30023cca549 100644 --- a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java +++ b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java @@ -1,3 +1,22 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ package org.olat.core.commons.services.csp.ui; import org.olat.basesecurity.BaseSecurity; diff --git a/src/main/java/org/olat/core/util/Formatter.java b/src/main/java/org/olat/core/util/Formatter.java index 97e9516a7fe..3b172294d01 100644 --- a/src/main/java/org/olat/core/util/Formatter.java +++ b/src/main/java/org/olat/core/util/Formatter.java @@ -50,6 +50,7 @@ import org.apache.commons.lang.time.DurationFormatUtils; import org.olat.core.dispatcher.impl.StaticMediaDispatcher; import org.olat.core.gui.render.StringOutput; import org.olat.core.helpers.Settings; +import org.olat.core.util.filter.impl.NekoHTMLMathScanner; /** * enclosing_type Description: <br> @@ -654,7 +655,7 @@ public class Formatter { public static String formatLatexFormulas(String htmlFragment) { if (htmlFragment == null) return ""; // optimize, reduce jsmath calls on client - if (htmlFragment.contains("<math") || htmlFragment.contains("class='math'") || htmlFragment.contains("class=\"math\"") || classMathPattern.matcher(htmlFragment).matches()) { + if (new NekoHTMLMathScanner().scan(htmlFragment)) { // add math wrapper String domid = "mw_" + CodeHelper.getRAMUniqueID(); String elem = htmlFragment.contains("<div") || htmlFragment.contains("<p") ? "div" : "span"; @@ -664,7 +665,7 @@ public class Formatter { sb.append("</").append(elem).append(">"); sb.append("\n<script type='text/javascript'>\n/* <![CDATA[ */\n setTimeout(function() { BFormatter.formatLatexFormulas('").append(domid).append("');}, 100);\n/* ]]> */\n</script>"); return sb.toString(); - } + } return htmlFragment; } diff --git a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java new file mode 100644 index 00000000000..68c23ea336c --- /dev/null +++ b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java @@ -0,0 +1,84 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.core.util.filter.impl; + +import java.io.StringReader; + +import org.cyberneko.html.parsers.SAXParser; +import org.olat.core.logging.OLog; +import org.olat.core.logging.Tracing; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.helpers.DefaultHandler; + +/** + * Detect the tag math and a CSS clas named math + * + * + * Initial date: 23 avr. 2018<br> + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + * + */ +public class NekoHTMLMathScanner { + + private static final OLog log = Tracing.createLoggerFor(NekoHTMLMathScanner.class); + + public boolean scan(String original) { + if (original == null) return false; + + try { + SAXParser parser = new SAXParser(); + parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); + parser.setFeature("http://cyberneko.org/html/features/balance-tags", false); + HTMLHandler contentHandler = new HTMLHandler(); + parser.setContentHandler(contentHandler); + parser.parse(new InputSource(new StringReader(original))); + return contentHandler.mathFound(); + } catch (Exception e) { + log.error("", e); + return false; + } + } + + private static class HTMLHandler extends DefaultHandler { + private boolean mathFound = false; + + public boolean mathFound() { + return mathFound; + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) { + if("MATH".equals(localName)) { + mathFound = true; + } else if(attributes != null) { + String css = attributes.getValue("class"); + if(css != null) { + String[] splited = css.split("\\s+"); + for(String split:splited) { + if(split.equals("math")) { + mathFound = true; + } + } + } + } + } + } +} diff --git a/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java b/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java index bc8c42eaadb..4808b26acb1 100644 --- a/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java +++ b/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java @@ -1,3 +1,22 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ package org.olat.core.commons.services.csp.manager; import org.junit.Test; diff --git a/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java b/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java new file mode 100644 index 00000000000..2dbc4a3076f --- /dev/null +++ b/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java @@ -0,0 +1,80 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.core.util; + +import java.util.Arrays; +import java.util.Collection; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * + * Initial date: 23 avr. 2018<br> + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + * + */ +@RunWith(Parameterized.class) +public class FormatLatexFormulasTest { + + + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { + { "<span class='math'></span>", Boolean.TRUE }, + { "<span class='math inline'></span>", Boolean.TRUE }, + { "<div class = \"inline math special\"></div>", Boolean.TRUE }, + { "<span class = math></span>", Boolean.TRUE }, + { "<math></math>", Boolean.TRUE }, + { "<MATH></MATH>", Boolean.TRUE }, + + { "<span class = \"nomath\"></span>", Boolean.FALSE }, + { "<span class='test' id='math'></span>", Boolean.FALSE }, + { "<span class='math\"></span>", Boolean.FALSE }, + // real example of a wiki + { "<h2>Hier den unformatierten Text eingeben</h2><p>Ceci est une page avec LaTeX\n</p><div class=\"math\">x^2</div>", Boolean.TRUE }, + { "<span class=\"math\" title=\"A%20%3D%20%5Cpmatrix%7Ba_%7B11%7D%20%26%20a_%7B12%7D%20%26%20%5Cldots%20%26%20a_%7B1n%7D%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20a_%7B21%7D%20%26%20a_%7B22%7D%20%26%20%5Cldots%20%26%20a_%7B2n%7D%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%5Cvdots%20%26%20%5Cvdots%20%26%20%5Cddots%20%26%20%5Cvdots%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20a_%7Bm1%7D%20%26%20a_%7Bm2%7D%20%26%20%5Cldots%20%26%20a_%7Bmn%7D%7D%0A%5Cqquad%5Cqquad%0A%5Cpmatrix%7By_1%5Ccr%5Cvdots%5Ccr%20y_k%7D\">A = \\pmatrix{a_{11} & a_{12} & \\ldots & a_{1n}\\cr a_{21} & a_{22} & \\ldots & a_{2n}\\cr \\vdots & \\vdots & \\ddots & \\vdots\\cr a_{m1} & a_{m2} & \\ldots & a_{mn}} \\qquad\\qquad \\pmatrix{y_1\\cr\\vdots\\cr y_k}</span></p>\n\"", Boolean.TRUE }, + { "<DIV CLASS='math'></DIV>", Boolean.TRUE }, + + { "<span class='math\"''></span>", Boolean.FALSE }, + { "<span class='math\"'\"></span>", Boolean.FALSE }, + { "<span class='math></\"span>", Boolean.FALSE }, + { "<span class='math></'span>", Boolean.FALSE }, + { "<span class='math", Boolean.TRUE }, + { "<span class=math\"", Boolean.FALSE } + }); + } + + private String text; + private Boolean result; + + public FormatLatexFormulasTest(String text, Boolean result) { + this.text = text; + this.result = result; + } + + @Test + public void formatLatexFormulas() { + Assert.assertEquals(text, result.booleanValue(), Formatter.formatLatexFormulas(text).contains("<script")); + } +} diff --git a/src/test/java/org/olat/core/util/FormatterTest.java b/src/test/java/org/olat/core/util/FormatterTest.java index b794f3f8ef0..60073f1557c 100644 --- a/src/test/java/org/olat/core/util/FormatterTest.java +++ b/src/test/java/org/olat/core/util/FormatterTest.java @@ -132,33 +132,4 @@ public class FormatterTest { Assert.assertEquals("32:23:45", Formatter.formatTimecode(116625000l)); Assert.assertEquals("532:23:45", Formatter.formatTimecode(1916625000l)); } - - @Test - public void formatLatexFormulas() { - Assert.assertTrue(Formatter.formatLatexFormulas("<span class='math'></span>").contains("<script")); - Assert.assertTrue(Formatter.formatLatexFormulas("<span class='math inline'></span>").contains("<script")); - Assert.assertTrue(Formatter.formatLatexFormulas("<div class = \"inline math special\"></div>").contains("<script")); - Assert.assertTrue(Formatter.formatLatexFormulas("<span class = math></span>").contains("<script")); - Assert.assertFalse(Formatter.formatLatexFormulas("<span class = \"nomath\"></span>").contains("<script")); - Assert.assertFalse(Formatter.formatLatexFormulas("<span class='test' id='math'></span>").contains("<script")); - Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"></span>").contains("<script")); - //wiki - Assert.assertTrue(Formatter.formatLatexFormulas("<DIV CLASS=\"math\"></span>").contains("<script")); - } - - - /** - * This case doesn't test the result of the method but if wrong - * formatted HTML code can produce an infinite loop. - * - */ - @Test - public void formatLatexFormulas_edgeCase() { - Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"''></span>").contains("<script")); - Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"'\"></span>").contains("<script")); - Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math></\"span>").contains("<script")); - Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math></'span>").contains("<script")); - Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math").contains("<script")); - Assert.assertTrue(Formatter.formatLatexFormulas("<span class=math\"").contains("<script")); - } } diff --git a/src/test/java/org/olat/test/AllTestsJunit4.java b/src/test/java/org/olat/test/AllTestsJunit4.java index dbc1d0dafec..31440a03d97 100644 --- a/src/test/java/org/olat/test/AllTestsJunit4.java +++ b/src/test/java/org/olat/test/AllTestsJunit4.java @@ -65,6 +65,7 @@ import org.junit.runners.Suite; org.olat.core.util.FileUtilsTest.class, org.olat.core.util.FileNameSuffixFilterTest.class, org.olat.core.util.FormatterTest.class, + org.olat.core.util.FormatLatexFormulasTest.class, org.olat.core.util.FormatterHourAndSecondsTest.class, org.olat.core.util.EncoderTest.class, org.olat.core.util.SimpleHtmlParserTest.class, -- GitLab