From 49aec96208fe571ae43a38418f3f7c14596d7672 Mon Sep 17 00:00:00 2001
From: srosse <none@none>
Date: Mon, 23 Apr 2018 13:56:26 +0200
Subject: [PATCH] OO-3441: replace th regex with a real HTML parser to catch
 all cases

---
 .../services/csp/ui/CSPLogDataSource.java     | 19 +++++
 .../csp/ui/CSPLogEntryController.java         | 19 +++++
 .../java/org/olat/core/util/Formatter.java    |  5 +-
 .../util/filter/impl/NekoHTMLMathScanner.java | 84 +++++++++++++++++++
 .../services/csp/manager/CSPManagerTest.java  | 19 +++++
 .../core/util/FormatLatexFormulasTest.java    | 80 ++++++++++++++++++
 .../org/olat/core/util/FormatterTest.java     | 29 -------
 .../java/org/olat/test/AllTestsJunit4.java    |  1 +
 8 files changed, 225 insertions(+), 31 deletions(-)
 create mode 100644 src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java
 create mode 100644 src/test/java/org/olat/core/util/FormatLatexFormulasTest.java

diff --git a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java
index b66a56010bf..db69137ca75 100644
--- a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java
+++ b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogDataSource.java
@@ -1,3 +1,22 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
 package org.olat.core.commons.services.csp.ui;
 
 import java.util.Collections;
diff --git a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java
index 499a1338f79..30023cca549 100644
--- a/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java
+++ b/src/main/java/org/olat/core/commons/services/csp/ui/CSPLogEntryController.java
@@ -1,3 +1,22 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
 package org.olat.core.commons.services.csp.ui;
 
 import org.olat.basesecurity.BaseSecurity;
diff --git a/src/main/java/org/olat/core/util/Formatter.java b/src/main/java/org/olat/core/util/Formatter.java
index 97e9516a7fe..3b172294d01 100644
--- a/src/main/java/org/olat/core/util/Formatter.java
+++ b/src/main/java/org/olat/core/util/Formatter.java
@@ -50,6 +50,7 @@ import org.apache.commons.lang.time.DurationFormatUtils;
 import org.olat.core.dispatcher.impl.StaticMediaDispatcher;
 import org.olat.core.gui.render.StringOutput;
 import org.olat.core.helpers.Settings;
+import org.olat.core.util.filter.impl.NekoHTMLMathScanner;
 
 /**
  * enclosing_type Description: <br>
@@ -654,7 +655,7 @@ public class Formatter {
 	public static String formatLatexFormulas(String htmlFragment) {
 		if (htmlFragment == null) return "";
 		// optimize, reduce jsmath calls on client
-		if (htmlFragment.contains("<math") || htmlFragment.contains("class='math'") || htmlFragment.contains("class=\"math\"") || classMathPattern.matcher(htmlFragment).matches()) {
+		if (new NekoHTMLMathScanner().scan(htmlFragment)) {
 			// add math wrapper
 			String domid = "mw_" + CodeHelper.getRAMUniqueID();
 			String elem = htmlFragment.contains("<div") || htmlFragment.contains("<p") ? "div" : "span";
@@ -664,7 +665,7 @@ public class Formatter {
 			sb.append("</").append(elem).append(">");
 			sb.append("\n<script type='text/javascript'>\n/* <![CDATA[ */\n setTimeout(function() { BFormatter.formatLatexFormulas('").append(domid).append("');}, 100);\n/* ]]> */\n</script>");
 			return sb.toString();
-		}			
+		}
 		return htmlFragment;
 	}
 	
diff --git a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java
new file mode 100644
index 00000000000..68c23ea336c
--- /dev/null
+++ b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLMathScanner.java
@@ -0,0 +1,84 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
+package org.olat.core.util.filter.impl;
+
+import java.io.StringReader;
+
+import org.cyberneko.html.parsers.SAXParser;
+import org.olat.core.logging.OLog;
+import org.olat.core.logging.Tracing;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Detect the tag math and a CSS clas named math
+ * 
+ * 
+ * Initial date: 23 avr. 2018<br>
+ * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
+ *
+ */
+public class NekoHTMLMathScanner {
+	
+	private static final OLog log = Tracing.createLoggerFor(NekoHTMLMathScanner.class);
+
+	public boolean scan(String original) {
+		if (original == null) return false;
+		
+		try {
+			SAXParser parser = new SAXParser();
+			parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
+			parser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
+			HTMLHandler contentHandler = new HTMLHandler();
+			parser.setContentHandler(contentHandler);
+			parser.parse(new InputSource(new StringReader(original)));
+			return contentHandler.mathFound();
+		} catch (Exception e) {
+			log.error("", e);
+			return false;
+		}
+	}
+
+	private static class HTMLHandler extends DefaultHandler {
+		private boolean mathFound = false;
+		
+		public boolean mathFound() {
+			return mathFound;
+		}
+
+		@Override
+		public void startElement(String uri, String localName, String qName, Attributes attributes) {
+			if("MATH".equals(localName)) {
+				mathFound = true;
+			} else if(attributes != null) {
+				String css = attributes.getValue("class");
+				if(css != null) {
+					String[] splited = css.split("\\s+");
+					for(String split:splited) {
+						if(split.equals("math")) {
+							mathFound = true;
+						}
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java b/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java
index bc8c42eaadb..4808b26acb1 100644
--- a/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java
+++ b/src/test/java/org/olat/core/commons/services/csp/manager/CSPManagerTest.java
@@ -1,3 +1,22 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
 package org.olat.core.commons.services.csp.manager;
 
 import org.junit.Test;
diff --git a/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java b/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java
new file mode 100644
index 00000000000..2dbc4a3076f
--- /dev/null
+++ b/src/test/java/org/olat/core/util/FormatLatexFormulasTest.java
@@ -0,0 +1,80 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
+package org.olat.core.util;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * 
+ * Initial date: 23 avr. 2018<br>
+ * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
+ *
+ */
+@RunWith(Parameterized.class)
+public class FormatLatexFormulasTest {
+	
+	
+	@Parameters
+    public static Collection<Object[]> data() {
+        return Arrays.asList(new Object[][] {
+                { "<span class='math'></span>", Boolean.TRUE },
+                { "<span class='math inline'></span>", Boolean.TRUE },
+                { "<div class = \"inline math special\"></div>", Boolean.TRUE },
+                { "<span class = math></span>", Boolean.TRUE },
+                { "<math></math>", Boolean.TRUE },
+                { "<MATH></MATH>", Boolean.TRUE },
+        		
+                { "<span class = \"nomath\"></span>", Boolean.FALSE },
+                { "<span class='test' id='math'></span>", Boolean.FALSE },
+                { "<span class='math\"></span>", Boolean.FALSE },
+        			// real example of a wiki
+                { "&lt;h2&gt;Hier den unformatierten Text eingeben&lt;/h2&gt;<p>Ceci est une page avec LaTeX\n</p><div class=\"math\">x^2</div>", Boolean.TRUE },
+                { "<span class=\"math\" title=\"A%20%3D%20%5Cpmatrix%7Ba_%7B11%7D%20%26%20a_%7B12%7D%20%26%20%5Cldots%20%26%20a_%7B1n%7D%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20a_%7B21%7D%20%26%20a_%7B22%7D%20%26%20%5Cldots%20%26%20a_%7B2n%7D%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%5Cvdots%20%26%20%5Cvdots%20%26%20%5Cddots%20%26%20%5Cvdots%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20a_%7Bm1%7D%20%26%20a_%7Bm2%7D%20%26%20%5Cldots%20%26%20a_%7Bmn%7D%7D%0A%5Cqquad%5Cqquad%0A%5Cpmatrix%7By_1%5Ccr%5Cvdots%5Ccr%20y_k%7D\">A = \\pmatrix{a_{11} &amp; a_{12} &amp; \\ldots &amp; a_{1n}\\cr a_{21} &amp; a_{22} &amp; \\ldots &amp; a_{2n}\\cr \\vdots &amp; \\vdots &amp; \\ddots &amp; \\vdots\\cr a_{m1} &amp; a_{m2} &amp; \\ldots &amp; a_{mn}} \\qquad\\qquad \\pmatrix{y_1\\cr\\vdots\\cr y_k}</span></p>\n\"", Boolean.TRUE },
+                { "<DIV CLASS='math'></DIV>", Boolean.TRUE },
+                
+                { "<span class='math\"''></span>", Boolean.FALSE },
+                { "<span class='math\"'\"></span>", Boolean.FALSE },
+                { "<span class='math></\"span>", Boolean.FALSE },
+                { "<span class='math></'span>", Boolean.FALSE },
+                { "<span class='math", Boolean.TRUE },
+                { "<span class=math\"", Boolean.FALSE }
+        });
+    }
+    
+    private String text;
+    private Boolean result;
+    
+    public FormatLatexFormulasTest(String text, Boolean result) {
+    		this.text = text;
+    		this.result = result;
+    }
+    
+	@Test
+	public void formatLatexFormulas() {
+		Assert.assertEquals(text, result.booleanValue(), Formatter.formatLatexFormulas(text).contains("<script"));
+	}
+}
diff --git a/src/test/java/org/olat/core/util/FormatterTest.java b/src/test/java/org/olat/core/util/FormatterTest.java
index b794f3f8ef0..60073f1557c 100644
--- a/src/test/java/org/olat/core/util/FormatterTest.java
+++ b/src/test/java/org/olat/core/util/FormatterTest.java
@@ -132,33 +132,4 @@ public class FormatterTest {
 		Assert.assertEquals("32:23:45", Formatter.formatTimecode(116625000l));
 		Assert.assertEquals("532:23:45", Formatter.formatTimecode(1916625000l));
 	}
-	
-	@Test
-	public void formatLatexFormulas() {
-		Assert.assertTrue(Formatter.formatLatexFormulas("<span class='math'></span>").contains("<script"));
-		Assert.assertTrue(Formatter.formatLatexFormulas("<span class='math inline'></span>").contains("<script"));
-		Assert.assertTrue(Formatter.formatLatexFormulas("<div class = \"inline math special\"></div>").contains("<script"));
-		Assert.assertTrue(Formatter.formatLatexFormulas("<span class = math></span>").contains("<script"));
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class = \"nomath\"></span>").contains("<script"));
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class='test' id='math'></span>").contains("<script"));
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"></span>").contains("<script"));
-		//wiki
-		Assert.assertTrue(Formatter.formatLatexFormulas("<DIV CLASS=\"math\"></span>").contains("<script"));
-	}
-	
-
-	/**
-	 * This case doesn't test the result of the method but if wrong
-	 * formatted HTML code can produce an infinite loop.
-	 * 
-	 */
-	@Test
-	public void formatLatexFormulas_edgeCase() {
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"''></span>").contains("<script"));
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"'\"></span>").contains("<script"));
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math></\"span>").contains("<script"));
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math></'span>").contains("<script"));
-		Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math").contains("<script"));
-		Assert.assertTrue(Formatter.formatLatexFormulas("<span class=math\"").contains("<script"));
-	}
 }
diff --git a/src/test/java/org/olat/test/AllTestsJunit4.java b/src/test/java/org/olat/test/AllTestsJunit4.java
index dbc1d0dafec..31440a03d97 100644
--- a/src/test/java/org/olat/test/AllTestsJunit4.java
+++ b/src/test/java/org/olat/test/AllTestsJunit4.java
@@ -65,6 +65,7 @@ import org.junit.runners.Suite;
 	org.olat.core.util.FileUtilsTest.class,
 	org.olat.core.util.FileNameSuffixFilterTest.class,
 	org.olat.core.util.FormatterTest.class,
+	org.olat.core.util.FormatLatexFormulasTest.class,
 	org.olat.core.util.FormatterHourAndSecondsTest.class,
 	org.olat.core.util.EncoderTest.class,
 	org.olat.core.util.SimpleHtmlParserTest.class,
-- 
GitLab