Skip to content
Snippets Groups Projects
Commit 49aec962 authored by srosse's avatar srosse
Browse files

OO-3441: replace th regex with a real HTML parser to catch all cases

parent 6e1a021e
No related branches found
No related tags found
No related merge requests found
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.core.commons.services.csp.ui;
import java.util.Collections;
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.core.commons.services.csp.ui;
import org.olat.basesecurity.BaseSecurity;
......
......@@ -50,6 +50,7 @@ import org.apache.commons.lang.time.DurationFormatUtils;
import org.olat.core.dispatcher.impl.StaticMediaDispatcher;
import org.olat.core.gui.render.StringOutput;
import org.olat.core.helpers.Settings;
import org.olat.core.util.filter.impl.NekoHTMLMathScanner;
/**
* enclosing_type Description: <br>
......@@ -654,7 +655,7 @@ public class Formatter {
public static String formatLatexFormulas(String htmlFragment) {
if (htmlFragment == null) return "";
// optimize, reduce jsmath calls on client
if (htmlFragment.contains("<math") || htmlFragment.contains("class='math'") || htmlFragment.contains("class=\"math\"") || classMathPattern.matcher(htmlFragment).matches()) {
if (new NekoHTMLMathScanner().scan(htmlFragment)) {
// add math wrapper
String domid = "mw_" + CodeHelper.getRAMUniqueID();
String elem = htmlFragment.contains("<div") || htmlFragment.contains("<p") ? "div" : "span";
......@@ -664,7 +665,7 @@ public class Formatter {
sb.append("</").append(elem).append(">");
sb.append("\n<script type='text/javascript'>\n/* <![CDATA[ */\n setTimeout(function() { BFormatter.formatLatexFormulas('").append(domid).append("');}, 100);\n/* ]]> */\n</script>");
return sb.toString();
}
}
return htmlFragment;
}
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.core.util.filter.impl;
import java.io.StringReader;
import org.cyberneko.html.parsers.SAXParser;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.DefaultHandler;
/**
* Detect the tag math and a CSS clas named math
*
*
* Initial date: 23 avr. 2018<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
public class NekoHTMLMathScanner {
private static final OLog log = Tracing.createLoggerFor(NekoHTMLMathScanner.class);
public boolean scan(String original) {
if (original == null) return false;
try {
SAXParser parser = new SAXParser();
parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
parser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
HTMLHandler contentHandler = new HTMLHandler();
parser.setContentHandler(contentHandler);
parser.parse(new InputSource(new StringReader(original)));
return contentHandler.mathFound();
} catch (Exception e) {
log.error("", e);
return false;
}
}
private static class HTMLHandler extends DefaultHandler {
private boolean mathFound = false;
public boolean mathFound() {
return mathFound;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
if("MATH".equals(localName)) {
mathFound = true;
} else if(attributes != null) {
String css = attributes.getValue("class");
if(css != null) {
String[] splited = css.split("\\s+");
for(String split:splited) {
if(split.equals("math")) {
mathFound = true;
}
}
}
}
}
}
}
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.core.commons.services.csp.manager;
import org.junit.Test;
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.core.util;
import java.util.Arrays;
import java.util.Collection;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
*
* Initial date: 23 avr. 2018<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
@RunWith(Parameterized.class)
public class FormatLatexFormulasTest {
@Parameters
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][] {
{ "<span class='math'></span>", Boolean.TRUE },
{ "<span class='math inline'></span>", Boolean.TRUE },
{ "<div class = \"inline math special\"></div>", Boolean.TRUE },
{ "<span class = math></span>", Boolean.TRUE },
{ "<math></math>", Boolean.TRUE },
{ "<MATH></MATH>", Boolean.TRUE },
{ "<span class = \"nomath\"></span>", Boolean.FALSE },
{ "<span class='test' id='math'></span>", Boolean.FALSE },
{ "<span class='math\"></span>", Boolean.FALSE },
// real example of a wiki
{ "&lt;h2&gt;Hier den unformatierten Text eingeben&lt;/h2&gt;<p>Ceci est une page avec LaTeX\n</p><div class=\"math\">x^2</div>", Boolean.TRUE },
{ "<span class=\"math\" title=\"A%20%3D%20%5Cpmatrix%7Ba_%7B11%7D%20%26%20a_%7B12%7D%20%26%20%5Cldots%20%26%20a_%7B1n%7D%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20a_%7B21%7D%20%26%20a_%7B22%7D%20%26%20%5Cldots%20%26%20a_%7B2n%7D%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%5Cvdots%20%26%20%5Cvdots%20%26%20%5Cddots%20%26%20%5Cvdots%5Ccr%0A%20%20%20%20%20%20%20%20%20%20%20%20%20a_%7Bm1%7D%20%26%20a_%7Bm2%7D%20%26%20%5Cldots%20%26%20a_%7Bmn%7D%7D%0A%5Cqquad%5Cqquad%0A%5Cpmatrix%7By_1%5Ccr%5Cvdots%5Ccr%20y_k%7D\">A = \\pmatrix{a_{11} &amp; a_{12} &amp; \\ldots &amp; a_{1n}\\cr a_{21} &amp; a_{22} &amp; \\ldots &amp; a_{2n}\\cr \\vdots &amp; \\vdots &amp; \\ddots &amp; \\vdots\\cr a_{m1} &amp; a_{m2} &amp; \\ldots &amp; a_{mn}} \\qquad\\qquad \\pmatrix{y_1\\cr\\vdots\\cr y_k}</span></p>\n\"", Boolean.TRUE },
{ "<DIV CLASS='math'></DIV>", Boolean.TRUE },
{ "<span class='math\"''></span>", Boolean.FALSE },
{ "<span class='math\"'\"></span>", Boolean.FALSE },
{ "<span class='math></\"span>", Boolean.FALSE },
{ "<span class='math></'span>", Boolean.FALSE },
{ "<span class='math", Boolean.TRUE },
{ "<span class=math\"", Boolean.FALSE }
});
}
private String text;
private Boolean result;
public FormatLatexFormulasTest(String text, Boolean result) {
this.text = text;
this.result = result;
}
@Test
public void formatLatexFormulas() {
Assert.assertEquals(text, result.booleanValue(), Formatter.formatLatexFormulas(text).contains("<script"));
}
}
......@@ -132,33 +132,4 @@ public class FormatterTest {
Assert.assertEquals("32:23:45", Formatter.formatTimecode(116625000l));
Assert.assertEquals("532:23:45", Formatter.formatTimecode(1916625000l));
}
@Test
public void formatLatexFormulas() {
Assert.assertTrue(Formatter.formatLatexFormulas("<span class='math'></span>").contains("<script"));
Assert.assertTrue(Formatter.formatLatexFormulas("<span class='math inline'></span>").contains("<script"));
Assert.assertTrue(Formatter.formatLatexFormulas("<div class = \"inline math special\"></div>").contains("<script"));
Assert.assertTrue(Formatter.formatLatexFormulas("<span class = math></span>").contains("<script"));
Assert.assertFalse(Formatter.formatLatexFormulas("<span class = \"nomath\"></span>").contains("<script"));
Assert.assertFalse(Formatter.formatLatexFormulas("<span class='test' id='math'></span>").contains("<script"));
Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"></span>").contains("<script"));
//wiki
Assert.assertTrue(Formatter.formatLatexFormulas("<DIV CLASS=\"math\"></span>").contains("<script"));
}
/**
* This case doesn't test the result of the method but if wrong
* formatted HTML code can produce an infinite loop.
*
*/
@Test
public void formatLatexFormulas_edgeCase() {
Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"''></span>").contains("<script"));
Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math\"'\"></span>").contains("<script"));
Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math></\"span>").contains("<script"));
Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math></'span>").contains("<script"));
Assert.assertFalse(Formatter.formatLatexFormulas("<span class='math").contains("<script"));
Assert.assertTrue(Formatter.formatLatexFormulas("<span class=math\"").contains("<script"));
}
}
......@@ -65,6 +65,7 @@ import org.junit.runners.Suite;
org.olat.core.util.FileUtilsTest.class,
org.olat.core.util.FileNameSuffixFilterTest.class,
org.olat.core.util.FormatterTest.class,
org.olat.core.util.FormatLatexFormulasTest.class,
org.olat.core.util.FormatterHourAndSecondsTest.class,
org.olat.core.util.EncoderTest.class,
org.olat.core.util.SimpleHtmlParserTest.class,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment