diff --git a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java index 45531c185605060a13f81b54d760964d2d299d97..9a2169475dbf3c0ae497f04f2bc85d1b04fad471 100644 --- a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java +++ b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java @@ -140,18 +140,37 @@ public class NekoHTMLFilter implements Filter { String elem = localName.toLowerCase(); if(toBeSkippedTags.contains(elem)) { collect = false; - // add a single whitespace before each block element but only if not there is not already a whitespace there } else { if(pretty) { + // format text line breaks for plain text as rendered in HTML if("li".equals(elem)) { content.append("\u00B7 "); + } else if("ul".equals(elem)) { + // add break before the list start (for correct rendering of first list element) + content.append('\n'); } else if("br".equals(elem)) { content.append('\n'); + } else if("p".equals(elem)) { + // for p tags: line break before and after + content.append('\n'); + } else if("h2".equals(elem) || "h3".equals(elem) || "h4".equals(elem) || "h5".equals(elem) || "h6".equals(elem)) { + // for h tags: line break before and after. For H1 which is usually the start of the page omit the trailing return. + content.append("\n\n"); + } + // preserve links + if ("a".equals(elem)) { + String href = attributes.getValue("href"); + // write absolute url's only + if (href != null && href.startsWith("http")) { + content.append(href); + content.append(" "); + } } } if("title".equals(elem)) { consumeTitle = true; } + // add a single whitespace before each block element but only if there is not already a whitespace if(blockTags.contains(elem) && content.length() > 0 && content.charAt(content.length() -1) != ' ' ) { consumeBlanck = true; } @@ -162,7 +181,8 @@ public class NekoHTMLFilter implements Filter { public void characters(char[] chars, int offset, int length) { if(collect) { if(consumeBlanck) { - if(content.length() > 0 && content.charAt(content.length() -1) != ' ' && length > 0 && chars[offset] != ' ') { + if(content.length() > 0 && content.charAt(content.length() -1) != ' ' && length > 0 && chars[offset] != ' ' && content.charAt(content.length() -1) != '\n') { + // Add space only if there is not already space and we are not right after a line break content.append(' '); } consumeBlanck = false; @@ -180,7 +200,8 @@ public class NekoHTMLFilter implements Filter { if(toBeSkippedTags.contains(elem)) { collect = true; } else { - if(pretty && ("li".equals(elem) || "p".equals(elem))) { + if(pretty && ("li".equals(elem) || "p".equals(elem) || "h1".equals(elem) || "h2".equals(elem) || "h3".equals(elem) || "h4".equals(elem) || "h5".equals(elem) || "h6".equals(elem) )) { + // start with new line after paragraph, list item and header elements content.append('\n'); } if("title".equals(elem)) { diff --git a/src/test/java/org/olat/core/util/filter/impl/NekoHTMLFilterTest.java b/src/test/java/org/olat/core/util/filter/impl/NekoHTMLFilterTest.java index a814a0bdeb4f2753f8de4e532fb32a1bb31550f3..a909db60567d1fa48681f0f346ccd79b2ec403f9 100644 --- a/src/test/java/org/olat/core/util/filter/impl/NekoHTMLFilterTest.java +++ b/src/test/java/org/olat/core/util/filter/impl/NekoHTMLFilterTest.java @@ -53,6 +53,9 @@ public class NekoHTMLFilterTest{ private void t(String input, String result) { Assert.assertEquals(result, filter.filter(input)); } + private void p(String input, String result) { + Assert.assertEquals(result, filter.filter(input, true)); + } @Test public void escaping() { @@ -78,22 +81,39 @@ public class NekoHTMLFilterTest{ @Test public void testBRAndPReplacement() { t("<br>", ""); + p("<br>", "\n"); t("<p>", ""); + p("<p>", "\n\n"); t("<br >", ""); + p("<br >", "\n"); t("<p >", ""); + p("<p >", "\n\n"); t("<br/>", ""); + p("<br/>", "\n"); t("<p/>", ""); + p("<p/>", "\n\n"); t("<br />", ""); + p("<br />", "\n"); t("<p />", ""); + p("<p />", "\n\n"); t("bla<br>bla", "bla bla"); + p("bla<br>bla", "bla\nbla"); t("bla<p>bla", "bla bla"); + p("bla<p>bla", "bla\nbla\n"); t("bla<br >bla", "bla bla"); + p("bla<br >bla", "bla\nbla"); t("bla<p >bla", "bla bla"); + p("bla<p >bla", "bla\nbla\n"); t("bla<br/>bla", "bla bla"); + p("bla<br/>bla", "bla\nbla"); t("bla<p/>bla", "bla bla"); + p("bla<p/>bla", "bla\nbla\n"); // invalid html anyway t("bla<br />bla", "bla bla"); + p("bla<br />bla", "bla\nbla"); t("bla<p />bla", "bla bla"); + p("bla<p />bla", "bla\nbla\n"); // invalid html anyway t("hello<br /> world", "hello world"); + p("hello<br /> world", "hello\n world"); } @Test public void testStyleTags() { @@ -119,11 +139,32 @@ public class NekoHTMLFilterTest{ @Test public void testHtmlText() { String htmlText = "<html><head><meta name=\"generator\" content=\"olat-tinymce-1\"><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>" + "<H1>Test HTML Seite fuer JUnit Test</H1>" - + "Dies ist<br />der Test Text" + + "Dies ist<br />der Test Text." + + "<h2>And now something else</h2>Hello, really." + "</body></html>"; // Text = 'Dies ist der Test Text' - String text = "Test HTML Seite fuer JUnit Test Dies ist der Test\u00A0Text"; // must include '\u00A0' !!! 19.5.2010/cg + String text = "Test HTML Seite fuer JUnit Test Dies ist der Test\u00A0Text. And now something else Hello, really."; // must include '\u00A0' !!! 19.5.2010/cg t(htmlText,text); + p(htmlText,"Test HTML Seite fuer JUnit Test\nDies ist\nder Test\u00A0Text.\n\nAnd now something else\nHello, really."); } + + @Test public void testHTMLLinks() { + t("<a name=\"gugus\">GO HERE</a>", "GO HERE"); + p("<a name=\"gugus\">GO HERE</a>", "GO HERE"); + t("<a href=\"#top\">GO HERE</a>", "GO HERE"); + p("<a href=\"#top\">GO HERE</a>", "GO HERE"); + t("<a href=\"javascript:alert('sdf')\">GO HERE</a>", "GO HERE"); + p("<a href=\"javascript:alert('sdf')\">GO HERE</a>", "GO HERE"); + t("<a href=\"index.html\">GO HERE</a>", "GO HERE"); + p("<a href=\"index.html\">GO HERE</a>", "GO HERE"); + t("<a href=\"/go/here/index.html\">GO HERE</a>", "GO HERE"); + p("<a href=\"/go/here/index.html\">GO HERE</a>", "GO HERE"); + t("<a href=\"http://www.openolat.org/go/here/index.html\" title=\\\"super site\\\">GO HERE</a>", "GO HERE"); + p("<a href=\"http://www.openolat.org/go/here/index.html\" title=\\\"super site\\\">GO HERE</a>", "http://www.openolat.org/go/here/index.html GO HERE"); + t("<a href=\"https://www.openolat.org/go/here/index.html\" title=\"super site\">GO HERE</a>", "GO HERE"); + p("<a href=\"https://www.openolat.org/go/here/index.html\" title=\"super site\">GO HERE</a>", "https://www.openolat.org/go/here/index.html GO HERE"); + + + } @Test public void testHtmlTextAndTitle() {