From 3071feec786afec8a297077c6d5c7c5755b5a3ea Mon Sep 17 00:00:00 2001
From: srosse <none@none>
Date: Wed, 22 Apr 2015 14:29:08 +0200
Subject: [PATCH] OO-1522: limit the maximum size of the indexed content by
 document, use the maxFileSize as variable

---
 .../core/util/filter/impl/NekoHTMLFilter.java |  32 ++--
 .../core/util/io/LimitedContentWriter.java    | 110 ++++++++++++++
 .../olat/core/util/io/ShieldInputStream.java  |   5 +-
 .../org/olat/core/util/vfs/JavaIOItem.java    |  34 +++++
 .../org/olat/core/util/vfs/LocalImpl.java     |   2 +-
 .../org/olat/search/model/OlatDocument.java   |  53 ++++---
 .../AbstractOfficeDocumentComparator.java     |  72 +++++++++
 .../service/document/file/ExcelDocument.java  |  10 +-
 .../document/file/ExcelOOXMLDocument.java     |  63 +++-----
 .../service/document/file/FileContent.java    |  51 +++++++
 .../service/document/file/FileDocument.java   |  21 ---
 .../document/file/FileDocumentFactory.java    |   4 +
 .../service/document/file/HtmlDocument.java   |  28 ++--
 .../service/document/file/OpenDocument.java   |  18 +--
 .../service/document/file/PdfDocument.java    |  47 +++---
 .../document/file/PowerPointDocument.java     | 129 ++++++++--------
 .../file/PowerPointOOXMLDocument.java         |  92 ++++++++----
 .../service/document/file/TextDocument.java   |  99 ++++++------
 .../service/document/file/WordDocument.java   | 141 +++++++++---------
 .../document/file/WordOOXMLDocument.java      | 127 +++++++++-------
 .../document/file/pdf/PdfBoxExtractor.java    |  31 ++--
 .../document/file/utils/SlicedDocument.java   |  87 -----------
 .../file/FileDocumentFactoryTest.java         |   7 +-
 .../document/file/OfficeDocumentTest.java     |  55 ++++++-
 .../document/file/PDFDocumentTest.java        |  17 ++-
 .../java/org/olat/test/VFSJavaIOFile.java     |   8 +-
 26 files changed, 815 insertions(+), 528 deletions(-)
 create mode 100644 src/main/java/org/olat/core/util/io/LimitedContentWriter.java
 create mode 100644 src/main/java/org/olat/core/util/vfs/JavaIOItem.java
 create mode 100644 src/main/java/org/olat/search/service/document/file/AbstractOfficeDocumentComparator.java
 create mode 100644 src/main/java/org/olat/search/service/document/file/FileContent.java
 delete mode 100644 src/main/java/org/olat/search/service/document/file/utils/SlicedDocument.java

diff --git a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java
index bead7cb6601..3370d64a250 100644
--- a/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java
+++ b/src/main/java/org/olat/core/util/filter/impl/NekoHTMLFilter.java
@@ -29,6 +29,8 @@ import java.util.Set;
 import org.cyberneko.html.parsers.SAXParser;
 import org.olat.core.logging.LogDelegator;
 import org.olat.core.util.filter.Filter;
+import org.olat.core.util.io.LimitedContentWriter;
+import org.olat.search.service.document.file.FileDocumentFactory;
 import org.xml.sax.Attributes;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
@@ -97,9 +99,9 @@ public class NekoHTMLFilter extends LogDelegator implements Filter {
 	
 	public static class NekoContent {
 		private final String title;
-		private final String content;
+		private final LimitedContentWriter content;
 		
-		public NekoContent(String title, String content) {
+		public NekoContent(String title, LimitedContentWriter content) {
 			this.title = title;
 			this.content = content;
 		}
@@ -109,7 +111,7 @@ public class NekoHTMLFilter extends LogDelegator implements Filter {
 		}
 		
 		public String getContent() {
-			return content;
+			return content.toString();
 		}
 	}
 	
@@ -118,12 +120,12 @@ public class NekoHTMLFilter extends LogDelegator implements Filter {
 		private boolean consumeBlanck = false;
 		private boolean consumeTitle = true;
 		private final boolean pretty;
-		private final StringBuilder sb;
+		private final LimitedContentWriter content;
 		private final StringBuilder title;
 		
 		public HTMLHandler(int size, boolean pretty) {
 			this.pretty = pretty;
-			sb = new StringBuilder(size);
+			content = new LimitedContentWriter(size, FileDocumentFactory.getMaxFileSize());
 			title = new StringBuilder(32);
 		}
 
@@ -136,15 +138,15 @@ public class NekoHTMLFilter extends LogDelegator implements Filter {
 			} else {
 				if(pretty) {
 					if("li".equals(elem)) {
-						sb.append("\u00B7 ");
+						content.append("\u00B7 ");
 					} else if("br".equals(elem)) {
-						sb.append('\n');
+						content.append('\n');
 					}
 				}
 				if("title".equals(elem)) {
 					consumeTitle = true;
 				}
-				if(blockTags.contains(elem) && sb.length() > 0 && sb.charAt(sb.length() -1) != ' ' ) {
+				if(blockTags.contains(elem) && content.length() > 0 && content.charAt(content.length() -1) != ' ' ) {
 					consumeBlanck = true;
 				}
 			}
@@ -154,12 +156,12 @@ public class NekoHTMLFilter extends LogDelegator implements Filter {
 		public void characters(char[] chars, int offset, int length) {
 			if(collect) {
 				if(consumeBlanck) {
-					if(sb.length() > 0 && sb.charAt(sb.length() -1) != ' ' && length > 0 && chars[offset] != ' ') { 
-						sb.append(' ');
+					if(content.length() > 0 && content.charAt(content.length() -1) != ' ' && length > 0 && chars[offset] != ' ') { 
+						content.append(' ');
 					}
 					consumeBlanck = false;
 				}
-				sb.append(chars, offset, length);
+				content.write(chars, offset, length);
 				if(consumeTitle) {
 					title.append(chars, offset, length);
 				}
@@ -173,24 +175,24 @@ public class NekoHTMLFilter extends LogDelegator implements Filter {
 				collect = true;
 			} else {
 				if(pretty && ("li".equals(elem) || "p".equals(elem))) {
-					sb.append('\n');
+					content.append('\n');
 				}
 				if("title".equals(elem)) {
 					consumeTitle = false;
 				}
-				if(blockTags.contains(elem) && sb.length() > 0 && sb.charAt(sb.length() -1) != ' ' ) {
+				if(blockTags.contains(elem) && content.length() > 0 && content.charAt(content.length() -1) != ' ' ) {
 					consumeBlanck = true;
 				}
 			}
 		}
 
 		public NekoContent getContent() {
-			return new NekoContent(title.toString(), sb.toString());
+			return new NekoContent(title.toString(), content);
 		}
 		
 		@Override
 		public String toString() {
-			return sb.toString();
+			return content.toString();
 		}
 	}
 }
diff --git a/src/main/java/org/olat/core/util/io/LimitedContentWriter.java b/src/main/java/org/olat/core/util/io/LimitedContentWriter.java
new file mode 100644
index 00000000000..b81fed8387e
--- /dev/null
+++ b/src/main/java/org/olat/core/util/io/LimitedContentWriter.java
@@ -0,0 +1,110 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
+package org.olat.core.util.io;
+
+import java.io.Writer;
+
+/**
+ * 
+ * Initial date: 21.04.2015<br>
+ * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
+ *
+ */
+public class LimitedContentWriter extends Writer {
+
+	private final StringBuilder sb;
+	private final int maxSize;
+
+	/**
+	 * @param len
+	 */
+	public LimitedContentWriter(int len, int maxSize) {
+		sb = new StringBuilder(Math.min(len, maxSize));
+		this.maxSize = maxSize;
+	}
+	
+	protected LimitedContentWriter(int len) {
+		this(len, Integer.MAX_VALUE);
+	}
+
+	@Override
+	public void write(char[] cbuf, int off, int len) {
+		if(accept() && len + sb.length() < maxSize) {
+			sb.append(cbuf, off, len);
+		}
+	}
+	
+	@Override
+	public Writer append(CharSequence seq, int start, int end) {
+		if(accept()) {
+			if((end - start) + sb.length() < maxSize) {
+				sb.append(seq, start, end);
+			} else {
+				sb.append(seq, start, start + (maxSize - sb.length()));
+			}
+		}
+		return this;
+	}
+
+	@Override
+	public Writer append(CharSequence csq) {
+		if(accept()) {
+			if(csq.length() + sb.length() < maxSize) {
+				sb.append(csq);
+			} else {
+				sb.append(csq, 0, maxSize - sb.length());
+			}
+		}
+		return this;
+	}
+
+	@Override
+	public Writer append(char c) {
+		if(accept()) sb.append(c);
+		return this;
+	}
+	
+	public final boolean accept() {
+		return sb.length() < maxSize;
+	}
+
+	@Override
+	public void flush() {
+		//
+	}
+
+	@Override
+	public void close() {
+		//
+	}
+	
+	public int length() {
+		return sb.length();
+	}
+	
+	public char charAt(int index) {
+		return sb.charAt(index);
+	}
+	
+	@Override
+	public String toString() {
+		return sb.toString();
+	}
+}
diff --git a/src/main/java/org/olat/core/util/io/ShieldInputStream.java b/src/main/java/org/olat/core/util/io/ShieldInputStream.java
index 9b9914bd8ed..117b9d3f279 100644
--- a/src/main/java/org/olat/core/util/io/ShieldInputStream.java
+++ b/src/main/java/org/olat/core/util/io/ShieldInputStream.java
@@ -21,7 +21,6 @@ package org.olat.core.util.io;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.zip.ZipInputStream;
 
 /**
  * It's a wrapper for a ZIP input stream which MUST not be closed
@@ -33,9 +32,9 @@ import java.util.zip.ZipInputStream;
  * @author srosse, stephane.rosse@frentix.com, http://www.frentix
  */
 public class ShieldInputStream extends InputStream {
-	private final ZipInputStream delegate;
+	private final InputStream delegate;
 	
-	public ShieldInputStream(ZipInputStream delegate) {
+	public ShieldInputStream(InputStream delegate) {
 		this.delegate = delegate;
 	}
 	
diff --git a/src/main/java/org/olat/core/util/vfs/JavaIOItem.java b/src/main/java/org/olat/core/util/vfs/JavaIOItem.java
new file mode 100644
index 00000000000..9aa1223d9e0
--- /dev/null
+++ b/src/main/java/org/olat/core/util/vfs/JavaIOItem.java
@@ -0,0 +1,34 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
+package org.olat.core.util.vfs;
+
+import java.io.File;
+
+/**
+ * 
+ * Initial date: 22.04.2015<br>
+ * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
+ *
+ */
+public interface JavaIOItem {
+	
+	public File getBasefile();
+
+}
diff --git a/src/main/java/org/olat/core/util/vfs/LocalImpl.java b/src/main/java/org/olat/core/util/vfs/LocalImpl.java
index 1ceab961103..48ad07e9433 100644
--- a/src/main/java/org/olat/core/util/vfs/LocalImpl.java
+++ b/src/main/java/org/olat/core/util/vfs/LocalImpl.java
@@ -39,7 +39,7 @@ import org.olat.core.util.vfs.callbacks.VFSSecurityCallback;
  *
  * @author Felix Jost
  */
-public abstract class LocalImpl implements VFSItem {
+public abstract class LocalImpl implements VFSItem, JavaIOItem {
 
 	private File basefile;
 	private VFSContainer parentContainer;
diff --git a/src/main/java/org/olat/search/model/OlatDocument.java b/src/main/java/org/olat/search/model/OlatDocument.java
index 7a2472f421c..ee790ad0c1b 100644
--- a/src/main/java/org/olat/search/model/OlatDocument.java
+++ b/src/main/java/org/olat/search/model/OlatDocument.java
@@ -25,6 +25,7 @@
 
 package org.olat.search.model;
 
+import java.util.Date;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.StringTokenizer;
@@ -85,7 +86,7 @@ public class OlatDocument extends AbstractOlatDocument {
 		}
 		document.add(createTextField(TITLE_FIELD_NAME,getTitle(), 4));
 		document.add(createTextField(DESCRIPTION_FIELD_NAME,getDescription(), 2));
-		document.add(createTextField(CONTENT_FIELD_NAME,getContent(), 0.5f ) );
+		document.add(createTextField(CONTENT_FIELD_NAME, getContent(), 0.5f));
 		document.add(new StringField(RESOURCEURL_FIELD_NAME, getResourceUrl(), Field.Store.YES));
 		document.add(new StringField(DOCUMENTTYPE_FIELD_NAME,getDocumentType(), Field.Store.YES));
 		if(getCssIcon() != null) {
@@ -95,26 +96,26 @@ public class OlatDocument extends AbstractOlatDocument {
 		document.add(createTextField(AUTHOR_FIELD_NAME,getAuthor(), 2));
     
 		try {
-    	if(getCreatedDate() != null) {
-    		document.add(new StringField(CREATED_FIELD_NAME, DateTools.dateToString(getCreatedDate(), DateTools.Resolution.DAY), Field.Store.YES) );
-    	}
-    }catch (Exception ex) {
-    	// No createdDate set => does not add field
-    }
-    try {
-    	if(getLastChange() != null) {
-    		document.add(new StringField(CHANGED_FIELD_NAME, DateTools.dateToString(getLastChange(), DateTools.Resolution.DAY), Field.Store.YES) );
-    	}
-    }catch (Exception ex) {
-    	// No changedDate set => does not add field
-    }
-    try {
-    	if(getTimestamp() != null) {
-    		document.add(new StringField(TIME_STAMP_NAME, DateTools.dateToString(getTimestamp(), DateTools.Resolution.MILLISECOND), Field.Store.YES) );
-    	}
-    }catch (Exception ex) {
-    	// No changedDate set => does not add field
-    }
+			if(getCreatedDate() != null) {
+				document.add(createDayField(CREATED_FIELD_NAME, getCreatedDate()));
+			}
+		} catch (Exception ex) {
+			// No createdDate set => does not add field
+		}
+		try {
+			if (getLastChange() != null) {
+				document.add(createDayField(CHANGED_FIELD_NAME, getLastChange()));
+			}
+		} catch (Exception ex) {
+			// No changedDate set => does not add field
+		}
+		try {
+			if (getTimestamp() != null) {
+				document.add(createMillisecondField(TIME_STAMP_NAME, getTimestamp()));
+			}
+		} catch (Exception ex) {
+			// No changedDate set => does not add field
+		}
     
 		// Add various metadata
 		if (metadata != null) {
@@ -136,7 +137,7 @@ public class OlatDocument extends AbstractOlatDocument {
 		} else {
 			document.add(new StringField(RESERVED_TO, "public", Field.Store.YES));
 		}
-	  return document;
+		return document;
 	}
 
 	/**
@@ -151,4 +152,12 @@ public class OlatDocument extends AbstractOlatDocument {
 		field.setBoost(boost);
 		return field;
 	}
+	
+	protected static Field createDayField(String fieldName, Date date) {
+		return new StringField(fieldName, DateTools.dateToString(date, DateTools.Resolution.DAY), Field.Store.YES);
+	}
+	
+	protected static Field createMillisecondField(String fieldName, Date date) {
+		return new StringField(fieldName, DateTools.dateToString(date, DateTools.Resolution.MILLISECOND), Field.Store.YES);
+	}
 }
diff --git a/src/main/java/org/olat/search/service/document/file/AbstractOfficeDocumentComparator.java b/src/main/java/org/olat/search/service/document/file/AbstractOfficeDocumentComparator.java
new file mode 100644
index 00000000000..08e7dc53fdd
--- /dev/null
+++ b/src/main/java/org/olat/search/service/document/file/AbstractOfficeDocumentComparator.java
@@ -0,0 +1,72 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
+package org.olat.search.service.document.file;
+
+import java.util.Comparator;
+
+import org.olat.core.util.StringHelper;
+
+/**
+ * 
+ * Initial date: 22.04.2015<br>
+ * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
+ *
+ */
+public abstract class AbstractOfficeDocumentComparator implements Comparator<String> {
+	
+	protected int comparePosition(String f1, String f2, String prefix) {
+		boolean l1;
+		String p1;
+		if(f1.length() > prefix.length() && f1.endsWith(".xml")) {
+			p1 = f1.substring(prefix.length(), f1.indexOf(".xml"));
+			l1 = StringHelper.isLong(p1);
+		} else {
+			p1 = null;
+			l1 = false;
+		}
+		
+		boolean l2;
+		String p2;
+		if(f2.length() > prefix.length() && f2.endsWith(".xml")) {
+			p2 = f2.substring(prefix.length(), f2.indexOf(".xml"));
+			l2 = StringHelper.isLong(p2);
+		} else {
+			p2 = null;
+			l2 = false;
+		}
+		
+		int c = 0;
+		if(l1 && l2) {
+			try {
+				Long pl1 = Long.parseLong(p1);
+				long pl2 = Long.parseLong(p2);
+				return (int) (pl2 - pl1);
+			} catch (NumberFormatException e) {
+				//can happen
+			}
+		} else if(l1) {
+			return -1;
+		} else if(l2) {
+			return 1;
+		}
+		return c;
+	}
+
+}
diff --git a/src/main/java/org/olat/search/service/document/file/ExcelDocument.java b/src/main/java/org/olat/search/service/document/file/ExcelDocument.java
index 39c36c012ff..7dc32d92c8d 100644
--- a/src/main/java/org/olat/search/service/document/file/ExcelDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/ExcelDocument.java
@@ -38,6 +38,7 @@ import org.apache.poi.ss.usermodel.Cell;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
 
@@ -56,8 +57,8 @@ public class ExcelDocument extends FileDocument {
 		//
 	}
 
-	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException, DocumentException,
-			DocumentAccessException {
+	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
+	throws IOException, DocumentException, DocumentAccessException {
 		ExcelDocument excelDocument = new ExcelDocument();
 		excelDocument.init(leafResourceContext, leaf);
 		excelDocument.setFileType(FILE_TYPE);
@@ -66,6 +67,7 @@ public class ExcelDocument extends FileDocument {
 		return excelDocument.getLuceneDocument();
 	}
 
+	@Override
 	protected FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
 		BufferedInputStream bis = null;
 		int cellNullCounter = 0;
@@ -74,7 +76,8 @@ public class ExcelDocument extends FileDocument {
 
 		try {
 			bis = new BufferedInputStream(leaf.getInputStream());
-			StringBuilder content = new StringBuilder(bis.available());
+
+			LimitedContentWriter content = new LimitedContentWriter(bis.available(), FileDocumentFactory.getMaxFileSize());
 			POIFSFileSystem fs = new POIFSFileSystem(bis);
 			HSSFWorkbook workbook = new HSSFWorkbook(fs);
 
@@ -110,6 +113,7 @@ public class ExcelDocument extends FileDocument {
 							+ sheetNullCounter);
 				}
 			}
+			content.close();
 			return new FileContent(content.toString());
 		} catch (Exception ex) {
 			throw new DocumentException("Can not read XLS Content. File=" + leaf.getName());
diff --git a/src/main/java/org/olat/search/service/document/file/ExcelOOXMLDocument.java b/src/main/java/org/olat/search/service/document/file/ExcelOOXMLDocument.java
index 29f7d7b872d..f1d7baa730d 100644
--- a/src/main/java/org/olat/search/service/document/file/ExcelOOXMLDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/ExcelOOXMLDocument.java
@@ -30,11 +30,10 @@ import org.apache.lucene.document.Document;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
-import org.olat.core.util.FileUtils;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.io.ShieldInputStream;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
-import org.olat.search.service.document.file.utils.SlicedDocument;
 import org.xml.sax.Attributes;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
@@ -77,50 +76,38 @@ public class ExcelOOXMLDocument extends FileDocument {
 		Map<String,String> sharedStrings = parseSharedStrings(leaf);
 		//parse sheets
 		String content = parseSheets(sharedStrings, leaf);
-		
 		return new FileContent(content);
 	}
 	
-	
 	private String parseSheets(Map<String,String> sharedStrings, VFSLeaf leaf)  throws IOException, DocumentException {
-		InputStream stream = null;
-		ZipInputStream zip = null;
-		try {
-			stream = leaf.getInputStream();
-			zip = new ZipInputStream(stream);
-			ZipEntry entry = zip.getNextEntry();
+		try(InputStream stream = leaf.getInputStream();
+				ZipInputStream zip = new ZipInputStream(stream)) {
 			
-			SlicedDocument doc = new SlicedDocument();
+			ZipEntry entry = zip.getNextEntry();
+			LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
 			while (entry != null) {
-				String name = entry.getName();
-				if(name.startsWith(SHEET) && name.endsWith(".xml")) {
-					String position = name.substring(SHEET.length(), name.indexOf(".xml"));
-					
-					OfficeDocumentHandler dh = new OfficeDocumentHandler(sharedStrings);
-					parse(new ShieldInputStream(zip), dh);
-					doc.setContent(Integer.parseInt(position), dh.getContent());
+				if(writer.accept()) {
+					String name = entry.getName();
+					if(name.startsWith(SHEET) && name.endsWith(".xml")) {
+						OfficeDocumentHandler dh = new OfficeDocumentHandler(writer, sharedStrings);
+						parse(new ShieldInputStream(zip), dh);
+					}
 				}
 				entry = zip.getNextEntry();
 			}
-			return doc.toStringAndClear();
+			return writer.toString();
 		} catch (DocumentException e) {
 			throw e;
 		} catch (Exception e) {
 			throw new DocumentException(e.getMessage());
-		} finally {
-			FileUtils.closeSafely(zip);
-			FileUtils.closeSafely(stream);
 		}
 	}
 	
 	private Map<String,String> parseSharedStrings( VFSLeaf leaf) throws IOException, DocumentException {
 		SharedStringsHandler dh = new SharedStringsHandler();
-		
-		InputStream stream = null;
-		ZipInputStream zip = null;
-		try {
-			stream = leaf.getInputStream();
-			zip = new ZipInputStream(stream);
+		try(InputStream stream = leaf.getInputStream();
+			ZipInputStream zip = new ZipInputStream(stream)) {
+
 			ZipEntry entry = zip.getNextEntry();
 			while (entry != null) {
 				String name = entry.getName();
@@ -135,9 +122,6 @@ public class ExcelOOXMLDocument extends FileDocument {
 			throw e;
 		} catch (Exception e) {
 			throw new DocumentException(e.getMessage());
-		} finally {
-			FileUtils.closeSafely(zip);
-			FileUtils.closeSafely(stream);
 		}
 	}
 	
@@ -147,7 +131,7 @@ public class ExcelOOXMLDocument extends FileDocument {
 			parser.setContentHandler(handler);
 			parser.setEntityResolver(handler);
 			try {
-			parser.setFeature("http://xml.org/sax/features/validation", false);
+				parser.setFeature("http://xml.org/sax/features/validation", false);
 			} catch(Exception e) {
 				log.error("Cannot deactivate validation", e);
 			}
@@ -157,21 +141,18 @@ public class ExcelOOXMLDocument extends FileDocument {
 		}
 	}
 	
-	private class OfficeDocumentHandler extends DefaultHandler {
-		private final StringBuilder sb = new StringBuilder();
+	private static class OfficeDocumentHandler extends DefaultHandler {
 		
 		private boolean row = false;
 		private boolean sharedStrings = false;
-		private Map<String,String> strings;
+		private final Map<String,String> strings;
+		private final LimitedContentWriter sb;
 		
-		public OfficeDocumentHandler(Map<String,String> strings) {
+		public OfficeDocumentHandler(LimitedContentWriter sb, Map<String,String> strings) {
+			this.sb = sb;
 			this.strings = strings;
 		}
 
-		public StringBuilder getContent() {
-			return sb;
-		}
-
 		@Override
 		public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
 			if("row".equals(qName)) {
@@ -211,7 +192,7 @@ public class ExcelOOXMLDocument extends FileDocument {
 				if(sb .length() > 0 && sb.charAt(sb.length() - 1) != ' '){
 					sb.append(' ');
 				}
-				sb.append(ch, start, length);
+				sb.write(ch, start, length);
 			}
 		}
 	}
diff --git a/src/main/java/org/olat/search/service/document/file/FileContent.java b/src/main/java/org/olat/search/service/document/file/FileContent.java
new file mode 100644
index 00000000000..14b0eb2b4bb
--- /dev/null
+++ b/src/main/java/org/olat/search/service/document/file/FileContent.java
@@ -0,0 +1,51 @@
+/**
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at the
+ * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Initial code contributed and copyrighted by<br>
+ * frentix GmbH, http://www.frentix.com
+ * <p>
+ */
+package org.olat.search.service.document.file;
+
+
+/**
+ * 
+ * Initial date: 08.04.2015<br>
+ * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
+ *
+ */
+public class FileContent {
+
+	private final String title;
+	private final String content;
+	
+	public FileContent(String content) {
+		this(null, content);
+	}
+	
+	public FileContent(String title, String content) {
+		this.title = title;
+		this.content = content;
+	}
+	
+	public String getTitle() {
+		return title;
+	}
+	
+	public String getContent() {
+		return content;
+	}
+
+}
diff --git a/src/main/java/org/olat/search/service/document/file/FileDocument.java b/src/main/java/org/olat/search/service/document/file/FileDocument.java
index cd95ddcf678..f3f48312880 100644
--- a/src/main/java/org/olat/search/service/document/file/FileDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/FileDocument.java
@@ -130,25 +130,4 @@ public abstract class FileDocument extends OlatDocument {
 	
 	abstract protected FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException, DocumentAccessException;
 
-	public static class FileContent {
-		private final String title;
-		private final String content;
-		
-		public FileContent(String content) {
-			this(null, content);
-		}
-		
-		public FileContent(String title, String content) {
-			this.title = title;
-			this.content = content;
-		}
-		
-		public String getTitle() {
-			return title;
-		}
-		
-		public String getContent() {
-			return content;
-		}
-	}
 }
diff --git a/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java b/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java
index 219ac0c8d91..97cf3e2cd67 100644
--- a/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java
+++ b/src/main/java/org/olat/search/service/document/file/FileDocumentFactory.java
@@ -85,6 +85,10 @@ public class FileDocumentFactory {
 		searchModule = module;
 	}
 	
+	public static int getMaxFileSize() {
+		return (int)searchModule.getMaxFileSize();
+	}
+	
 	public Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
 	throws IOException, DocumentAccessException {
 		try {
diff --git a/src/main/java/org/olat/search/service/document/file/HtmlDocument.java b/src/main/java/org/olat/search/service/document/file/HtmlDocument.java
index c2f3b62f8dc..0aa234d4be4 100644
--- a/src/main/java/org/olat/search/service/document/file/HtmlDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/HtmlDocument.java
@@ -32,7 +32,6 @@ import org.apache.lucene.document.Document;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
-import org.olat.core.util.FileUtils;
 import org.olat.core.util.filter.impl.NekoHTMLFilter;
 import org.olat.core.util.filter.impl.NekoHTMLFilter.NekoContent;
 import org.olat.core.util.vfs.VFSLeaf;
@@ -53,20 +52,25 @@ public class HtmlDocument extends FileDocument {
 	}
 	
 	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException,DocumentException,DocumentAccessException {
-    HtmlDocument htmlDocument = new HtmlDocument();
-    htmlDocument.init(leafResourceContext,leaf);
-    htmlDocument.setFileType(FILE_TYPE);
-    htmlDocument.setCssIcon(CSSHelper.createFiletypeIconCssClassFor(leaf.getName()));
+		HtmlDocument htmlDocument = new HtmlDocument();
+		htmlDocument.init(leafResourceContext,leaf);
+		htmlDocument.setFileType(FILE_TYPE);
+		htmlDocument.setCssIcon(CSSHelper.createFiletypeIconCssClassFor(leaf.getName()));
 		if (log.isDebug() ) log.debug(htmlDocument.toString());
 		return htmlDocument.getLuceneDocument();
 	}
 	
-	protected FileContent readContent(VFSLeaf leaf) {
-		InputStream is = leaf.getInputStream();
-    // Remove all HTML and &nbsp; Tags
-    NekoContent output = new NekoHTMLFilter().filter(is);
-    if (log.isDebug() ) log.debug("HTML content without tags :" + output);
-  	FileUtils.closeSafely(is);
-		return new FileContent(output.getTitle(), output.getContent());
+	@Override
+	protected FileContent readContent(VFSLeaf leaf) throws DocumentException {
+		try(InputStream is = leaf.getInputStream()) {
+			// Remove all HTML and &nbsp; Tags
+			NekoContent output = new NekoHTMLFilter().filter(is);
+			if (log.isDebug())
+				log.debug("HTML content without tags :" + output);
+	
+			return new FileContent(output.getTitle(), output.getContent());
+		} catch(Exception e) {
+			throw new DocumentException(e.getMessage()); 
+		}
 	}
 }
diff --git a/src/main/java/org/olat/search/service/document/file/OpenDocument.java b/src/main/java/org/olat/search/service/document/file/OpenDocument.java
index 5dc2ae635e9..91f59ec4a18 100644
--- a/src/main/java/org/olat/search/service/document/file/OpenDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/OpenDocument.java
@@ -30,6 +30,7 @@ import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
 import org.olat.core.util.FileUtils;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.io.ShieldInputStream;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
@@ -103,7 +104,7 @@ public class OpenDocument extends FileDocument {
 			FileUtils.closeSafely(zip);
 			FileUtils.closeSafely(stream);
 		}
-		return new FileContent(dh.getContent());
+		return new FileContent(dh.toString());
 	}
 	
 	private void parse(InputStream stream, DefaultHandler handler) throws DocumentException {
@@ -112,9 +113,8 @@ public class OpenDocument extends FileDocument {
 			parser.setContentHandler(handler);
 			parser.setEntityResolver(handler);
 			try {
-			parser.setFeature("http://xml.org/sax/features/validation", false);
+				parser.setFeature("http://xml.org/sax/features/validation", false);
 			} catch(Exception e) {
-				e.printStackTrace();
 				//cannot desactivate validation
 			}
 			parser.parse(new InputSource(stream));
@@ -125,15 +125,7 @@ public class OpenDocument extends FileDocument {
 	
 	private class OpenDocumentHandler extends DefaultHandler {
 
-		private final StringBuilder sb = new StringBuilder();
-		
-		public OpenDocumentHandler() {
-			//
-		}
-
-		public String getContent() {
-			return sb.toString();
-		}
+		private final LimitedContentWriter sb = new LimitedContentWriter(5000, FileDocumentFactory.getMaxFileSize());
 
 		@Override
 		public InputSource resolveEntity(String publicId, String systemId) {
@@ -150,7 +142,7 @@ public class OpenDocument extends FileDocument {
 			if(sb .length() > 0 && sb.charAt(sb.length() - 1) != ' '){
 				sb.append(' ');
 			}
-			sb.append(ch, start, length);
+			sb.write(ch, start, length);
 		}
 	}
 }
diff --git a/src/main/java/org/olat/search/service/document/file/PdfDocument.java b/src/main/java/org/olat/search/service/document/file/PdfDocument.java
index 65be5390978..ad174712e23 100644
--- a/src/main/java/org/olat/search/service/document/file/PdfDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/PdfDocument.java
@@ -25,9 +25,9 @@
 
 package org.olat.search.service.document.file;
 
-import java.io.BufferedInputStream;
+import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileInputStream;
+import java.io.FileReader;
 import java.io.IOException;
 
 import org.apache.lucene.document.Document;
@@ -35,7 +35,7 @@ import org.olat.core.CoreSpringFactory;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
-import org.olat.core.util.FileUtils;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
 import org.olat.search.service.SearchServiceFactory;
@@ -48,11 +48,11 @@ import org.olat.search.service.document.file.pdf.PdfExtractor;
  */
 public class PdfDocument extends FileDocument {
 	private static final long serialVersionUID = 6432923202585881794L;
-	private static OLog log = Tracing.createLoggerFor(PdfDocument.class);
+	private static final OLog log = Tracing.createLoggerFor(PdfDocument.class);
 
 	public final static String FILE_TYPE = "type.file.pdf";
 	
-  private boolean externalIndexer;
+	private boolean externalIndexer;
 	private String pdfTextBufferPath;
 	private String filePath;
 
@@ -119,21 +119,32 @@ public class PdfDocument extends FileDocument {
 
 	private FileContent getPdfTextFromBuffer(File pdfTextFile) throws IOException {
 		if (log.isDebug()) log.debug("readContent from text file start...");
-		BufferedInputStream bis = null;
-		try {
-			bis = new BufferedInputStream(new FileInputStream(pdfTextFile));
-			String text = FileUtils.load(bis, "utf-8");
-			if (log.isDebug()) log.debug("readContent from text file done.");
+
+		try(BufferedReader br = new BufferedReader(new FileReader(pdfTextFile));
+				LimitedContentWriter sb = new LimitedContentWriter(5000, FileDocumentFactory.getMaxFileSize())) {
+			//search the title
+			char[] cbuf = new char[4096];
+			int length = br.read(cbuf);
+			int indexSep = 0;
+			String title = "";
 			
-			int indexSep = text.indexOf("\u00A0|\u00A0");
-			if(indexSep > 0) {
-				return new FileContent(text.substring(0, indexSep), text.substring(indexSep + 3, text.length()));
-			}
-			return new FileContent(text);
-		} finally {
-			if (bis != null) {
-				bis.close();
+			if(length > 0) {
+				String firstChunk = new String(cbuf, 0, length);
+				indexSep = firstChunk.indexOf("\u00A0|\u00A0");
+				if(indexSep > 0) {
+					title = firstChunk.substring(0, indexSep);
+					sb.append(firstChunk.substring(indexSep + 3));
+				} else {
+					sb.append(firstChunk);
+				}
+				while((length = br.read(cbuf)) > 0) {
+					sb.write(cbuf, 0, length);
+				}
 			}
+	
+			return new FileContent(title, sb.toString());
+		} catch(IOException e) {
+			throw e;
 		}
 	}
 
diff --git a/src/main/java/org/olat/search/service/document/file/PowerPointDocument.java b/src/main/java/org/olat/search/service/document/file/PowerPointDocument.java
index 2b179c7b072..ccca99d9c89 100644
--- a/src/main/java/org/olat/search/service/document/file/PowerPointDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/PowerPointDocument.java
@@ -26,10 +26,9 @@
 package org.olat.search.service.document.file;
 
 import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
+import java.io.Writer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -42,6 +41,7 @@ import org.apache.poi.util.LittleEndian;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
 
@@ -67,85 +67,74 @@ public class PowerPointDocument extends FileDocument {
 		if (log.isDebug()) log.debug(powerPointDocument.toString());
 		return powerPointDocument.getLuceneDocument();
 	}
-	
 
-	
-  public FileContent readContent(VFSLeaf leaf) throws IOException,DocumentException {
-  	BufferedInputStream bis = null;
-  	OutputStream oStream = null;
-  	if (log.isDebug()) log.debug("read PPT Content of leaf=" + leaf.getName());
-  	try {
-    	bis = new BufferedInputStream(leaf.getInputStream());
-      oStream = new ByteArrayOutputStream();
-    	extractText(bis, oStream);
-    	String content = oStream.toString();
-      return new FileContent(removeUnvisibleChars(content));			
+	@Override
+	public FileContent readContent(VFSLeaf leaf) throws IOException,DocumentException {
+		if (log.isDebug()) log.debug("read PPT Content of leaf=" + leaf.getName());
+		try (BufferedInputStream bis = new BufferedInputStream(leaf.getInputStream())) {
+			LimitedContentWriter oStream = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
+			extractText(bis, oStream);
+			return new FileContent(oStream.toString());			
 		} catch (Exception e) {
-			throw new DocumentException("Can not read PPT Content. File=" + leaf.getName() );
-		} finally {
-			if (bis != null) {
-				bis.close();
-			}
-			if (oStream != null) {
-				oStream.close();
-			}
+			throw new DocumentException("Can not read PPT Content. File=" + leaf.getName(), e);
 		}
-  }
-
-  /**
-   * Remove unvisible chars form input string.
-   * @param inputString
-   * @return Return filtered string
-   */
-  private String removeUnvisibleChars(String inputString) {
-	  Pattern p = Pattern.compile("[^a-zA-Z0-9\n\r!&#<>{}]");
-	  Matcher m = p.matcher(inputString);
-	  String output = m.replaceAll(" ");
-	  return output;
-  }
-  
+	}
 
-  private void extractText(InputStream inStream, OutputStream stream ) throws IOException {
-    POIFSReader r = new POIFSReader();
-    /* Register a listener for *all* documents. */
-    r.registerListener(new MyPOIFSReaderListener(stream));
-    r.read(inStream);
-  }
+	private void extractText(InputStream inStream, Writer outWriter) throws IOException {
+		POIFSReader r = new POIFSReader();
+		/* Register a listener for *all* documents. */
+		r.registerListener(new MyPOIFSReaderListener(outWriter));
+		r.read(inStream);
+	}
 
-  private class MyPOIFSReaderListener implements POIFSReaderListener {
-	  private final OutputStream oStream;
+	private static class MyPOIFSReaderListener implements POIFSReaderListener {
+		private final Writer oStream;
 	
-	  public MyPOIFSReaderListener(OutputStream oStream) {
+		public MyPOIFSReaderListener(Writer oStream) {
 			this.oStream = oStream;
 		}
 	
+		@Override
 		public void processPOIFSReaderEvent(POIFSReaderEvent event) {
-      int errorCounter = 0;
+			int errorCounter = 0;
       
-      try {
-        DocumentInputStream dis = null;
-        dis = event.getStream();
+			try {
+				DocumentInputStream dis = event.getStream();
         
-        byte btoWrite[] = new byte[dis.available()];
-        dis.read(btoWrite, 0, dis.available());
-        for (int i = 0; i < btoWrite.length - 20; i++) {
-          long type = LittleEndian.getUShort(btoWrite, i + 2);
-          long size = LittleEndian.getUInt(btoWrite, i + 4);
-          if (type == 4008) {
-            try {
-          	  oStream.write(btoWrite, i + 4 + 1, (int) size + 3);
-            } catch( IndexOutOfBoundsException ex) {
-              errorCounter++;
-            }
-          }
-        }
-      } catch (Exception ex) {
-      	// FIXME:chg: Remove general Exception later, for now make it run 
-        log.warn("Can not read PPT content.", ex);
-      }
-      if (errorCounter > 0) {
-      	if (log.isDebug()) log.debug("Could not parse ppt properly. There were " + errorCounter + " IndexOutOfBoundsException");
-      }
-    }
+				byte btoWrite[] = new byte[dis.available()];
+				dis.read(btoWrite, 0, dis.available());
+				for (int i = 0; i < btoWrite.length - 20; i++) {
+					long type = LittleEndian.getUShort(btoWrite, i + 2);
+					long size = LittleEndian.getUInt(btoWrite, i + 4);
+					if (type == 4008) {
+						try {
+							String chunk = new String(btoWrite, i + 4 + 1, (int)(size + 3));
+							oStream.write(removeUnvisibleChars(chunk));
+						} catch( IndexOutOfBoundsException ex) {
+							errorCounter++;
+						}
+					}
+				}
+			} catch (Exception ex) {
+				// Remove general Exception later, for now make it run 
+				log.warn("Can not read PPT content.", ex);
+			}
+			if (errorCounter > 0 && log.isDebug()) {
+				log.debug("Could not parse ppt properly. There were " + errorCounter + " IndexOutOfBoundsException");
+			}
+		}
+		
+		/**
+		 * Remove unvisible chars form input string.
+		 * 
+		 * @param inputString
+		 * @return Return filtered string
+		 */
+		private String removeUnvisibleChars(String inputString) {
+			Pattern p = Pattern.compile("[^a-zA-Z0-9\n\r!&#<>{}]");
+			Matcher m = p.matcher(inputString);
+			String output = m.replaceAll(" ");
+			return output;
+		}
 	}
 }
diff --git a/src/main/java/org/olat/search/service/document/file/PowerPointOOXMLDocument.java b/src/main/java/org/olat/search/service/document/file/PowerPointOOXMLDocument.java
index 2212ea25733..907bf19e63a 100644
--- a/src/main/java/org/olat/search/service/document/file/PowerPointOOXMLDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/PowerPointOOXMLDocument.java
@@ -19,25 +19,31 @@
  */
 package org.olat.search.service.document.file;
 
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.List;
 import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
+import java.util.zip.ZipFile;
 
 import org.apache.lucene.document.Document;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
-import org.olat.core.util.FileUtils;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.io.ShieldInputStream;
+import org.olat.core.util.vfs.JavaIOItem;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
-import org.olat.search.service.document.file.utils.SlicedDocument;
 import org.xml.sax.InputSource;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.DefaultHandler;
 import org.xml.sax.helpers.XMLReaderFactory;
 
+import edu.emory.mathcs.backport.java.util.Collections;
+
 /**
  * 
  * Description:<br>
@@ -54,8 +60,8 @@ public class PowerPointOOXMLDocument extends FileDocument {
 	public final static String POWERPOINT_FILE_TYPE = "type.file.ppt";
 	private static final String SLIDE = "ppt/slides/slide";
 
-	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException, DocumentException,
-			DocumentAccessException {
+	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
+	throws IOException, DocumentException, DocumentAccessException {
 		PowerPointOOXMLDocument officeDocument = new PowerPointOOXMLDocument();
 		officeDocument.init(leafResourceContext, leaf);
 		officeDocument.setFileType(POWERPOINT_FILE_TYPE);
@@ -68,36 +74,41 @@ public class PowerPointOOXMLDocument extends FileDocument {
 
 	@Override
 	public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
-		SlicedDocument doc = new SlicedDocument();
 		
-		InputStream stream = null;
-		ZipInputStream zip = null;
-		try {
-			stream = leaf.getInputStream();
-
-			zip = new ZipInputStream(stream);
-			ZipEntry entry = zip.getNextEntry();
-			while (entry != null) {
+		File file = ((JavaIOItem)leaf).getBasefile();
+		
+		LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
+	
+		try(ZipFile wordFile = new ZipFile(file)) {
+			List<String> contents = new ArrayList<>();
+			for(Enumeration<? extends ZipEntry> entriesEnumeration=wordFile.entries(); entriesEnumeration.hasMoreElements(); ) {
+				ZipEntry entry = entriesEnumeration.nextElement();
 				String name = entry.getName();
 				if(name.startsWith(SLIDE) && name.endsWith(".xml")) {
-					int lastIndex = name.indexOf(".xml");
-					String position = name.substring(SLIDE.length(), lastIndex);
-					
-					OfficeDocumentHandler dh = new OfficeDocumentHandler();
+					contents.add(name);
+				}
+			}
+
+			if(contents.size() > 1) {
+				Collections.sort(contents, new PowerPointDocumentComparator());
+			}
+			
+			for(String content:contents) {
+				if(writer.accept()) {
+					ZipEntry entry = wordFile.getEntry(content);
+					InputStream zip = wordFile.getInputStream(entry);
+					OfficeDocumentHandler dh = new OfficeDocumentHandler(writer);
 					parse(new ShieldInputStream(zip), dh);
-					doc.setContent(Integer.parseInt(position), dh.getContent());
+					zip.close();
 				}
-				entry = zip.getNextEntry();
 			}
+			
 		} catch (DocumentException e) {
 			throw e;
 		} catch (Exception e) {
 			throw new DocumentException(e.getMessage());
-		} finally {
-			FileUtils.closeSafely(zip);
-			FileUtils.closeSafely(stream);
 		}
-		return new FileContent(doc.toStringAndClear());
+		return new FileContent(writer.toString());
 	}
 	
 	private void parse(InputStream stream, DefaultHandler handler) throws DocumentException {
@@ -106,7 +117,7 @@ public class PowerPointOOXMLDocument extends FileDocument {
 			parser.setContentHandler(handler);
 			parser.setEntityResolver(handler);
 			try {
-			parser.setFeature("http://xml.org/sax/features/validation", false);
+				parser.setFeature("http://xml.org/sax/features/validation", false);
 			} catch(Exception e) {
 				log.error("Cannot deactivate validation", e);
 			}
@@ -116,11 +127,11 @@ public class PowerPointOOXMLDocument extends FileDocument {
 		}
 	}
 	
-	private class OfficeDocumentHandler extends DefaultHandler {
-		private final StringBuilder sb = new StringBuilder();
+	private static class OfficeDocumentHandler extends DefaultHandler {
+		private final LimitedContentWriter sb;
 
-		public StringBuilder getContent() {
-			return sb;
+		public OfficeDocumentHandler(LimitedContentWriter sb) {
+			this.sb = sb;
 		}
 
 		@Override
@@ -133,7 +144,28 @@ public class PowerPointOOXMLDocument extends FileDocument {
 			if(sb .length() > 0 && sb.charAt(sb.length() - 1) != ' '){
 				sb.append(' ');
 			}
-			sb.append(ch, start, length);
+			sb.write(ch, start, length);
 		}
 	}
+	
+	public static class PowerPointDocumentComparator extends AbstractOfficeDocumentComparator  {
+
+		@Override
+		public int compare(String f1, String f2) {
+			int c = 0;
+			if(f1.startsWith(SLIDE) && f2.startsWith(SLIDE)) {
+				c = comparePosition(f1, f2, SLIDE);
+			} else if(f1.startsWith(SLIDE)) {
+				c = 1;
+			} else if(f2.startsWith(SLIDE)) {
+				c = -1;
+			}
+			
+			if(c == 0) {
+				c = f1.compareTo(f2);
+			}
+			return -c;
+		}
+		
+	}
 }
\ No newline at end of file
diff --git a/src/main/java/org/olat/search/service/document/file/TextDocument.java b/src/main/java/org/olat/search/service/document/file/TextDocument.java
index b024b964353..90fd8a18f75 100644
--- a/src/main/java/org/olat/search/service/document/file/TextDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/TextDocument.java
@@ -1,45 +1,46 @@
 /**
-* OLAT - Online Learning and Training<br>
-* http://www.olat.org
-* <p>
-* Licensed under the Apache License, Version 2.0 (the "License"); <br>
-* you may not use this file except in compliance with the License.<br>
-* You may obtain a copy of the License at
-* <p>
-* http://www.apache.org/licenses/LICENSE-2.0
-* <p>
-* Unless required by applicable law or agreed to in writing,<br>
-* software distributed under the License is distributed on an "AS IS" BASIS, <br>
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
-* See the License for the specific language governing permissions and <br>
-* limitations under the License.
-* <p>
-* Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
-* University of Zurich, Switzerland.
-* <hr>
-* <a href="http://www.openolat.org">
-* OpenOLAT - Online Learning and Training</a><br>
-* This file has been modified by the OpenOLAT community. Changes are licensed
-* under the Apache 2.0 license as the original file.
-*/
+ * OLAT - Online Learning and Training<br>
+ * http://www.olat.org
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
+ * University of Zurich, Switzerland.
+ * <hr>
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * This file has been modified by the OpenOLAT community. Changes are licensed
+ * under the Apache 2.0 license as the original file.
+ */
 
 package org.olat.search.service.document.file;
 
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
-import java.io.StringWriter;
 import java.io.Writer;
 
 import org.apache.lucene.document.Document;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
 
 /**
  * Lucene document mapper.
+ * 
  * @author Christian Guretzki
  */
 public class TextDocument extends FileDocument {
@@ -47,48 +48,42 @@ public class TextDocument extends FileDocument {
 	private static final OLog log = Tracing.createLoggerFor(TextDocument.class);
 
 	public final static String FILE_TYPE = "type.file.text";
-	/**
-	 * Limit the maximal size of the content read to index.
-	 */
-	public static int MAX_SIZE = 200000;
 
 	public TextDocument() {
 		//
 	}
-	
-	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException,DocumentException,DocumentAccessException {
-    TextDocument textDocument = new TextDocument();
-    textDocument.init(leafResourceContext,leaf);
-    textDocument.setFileType(FILE_TYPE);
-		textDocument.setCssIcon(CSSHelper.createFiletypeIconCssClassFor(leaf.getName()));
-    if (log.isDebug() ) log.debug(textDocument.toString());
+
+	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
+	throws IOException, DocumentException, DocumentAccessException {
+		
+		TextDocument textDocument = new TextDocument();
+		textDocument.init(leafResourceContext, leaf);
+		textDocument.setFileType(FILE_TYPE);
+		textDocument.setCssIcon(CSSHelper.createFiletypeIconCssClassFor(leaf
+				.getName()));
+		if (log.isDebug())
+			log.debug(textDocument.toString());
 		return textDocument.getLuceneDocument();
 	}
-	
+
 	@Override
 	protected FileContent readContent(VFSLeaf leaf) throws IOException {
-    StringWriter out = new StringWriter();
-    InputStreamReader in = new InputStreamReader(leaf.getInputStream());
+		InputStreamReader in = new InputStreamReader(leaf.getInputStream());
+		LimitedContentWriter out = new LimitedContentWriter((int)leaf.getSize(), FileDocumentFactory.getMaxFileSize());
 		try {
 			copy(in, out);
 		} catch (Exception e) {
 			log.error("", e);
 		}
-  	return new FileContent(out.toString());
+		return new FileContent(out.toString());
 	}
-	
-	private void copy(Reader input, Writer output)
-	throws IOException {
+
+	private void copy(Reader input, Writer output) throws IOException {
 		char[] buffer = new char[4096];
-		
-		int count = 0;
-    int n = 0;
-    while (-1 != (n = input.read(buffer))) {
-        output.write(buffer, 0, n);
-        count += n;
-        if(count >= MAX_SIZE) {
-        	break;
-        }
-    }
+
+		int n = 0;
+		while (-1 != (n = input.read(buffer))) {
+			output.write(buffer, 0, n);
+		}
 	}
 }
diff --git a/src/main/java/org/olat/search/service/document/file/WordDocument.java b/src/main/java/org/olat/search/service/document/file/WordDocument.java
index 9cb083602eb..41ef3c11b95 100644
--- a/src/main/java/org/olat/search/service/document/file/WordDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/WordDocument.java
@@ -1,32 +1,33 @@
 /**
-* OLAT - Online Learning and Training<br>
-* http://www.olat.org
-* <p>
-* Licensed under the Apache License, Version 2.0 (the "License"); <br>
-* you may not use this file except in compliance with the License.<br>
-* You may obtain a copy of the License at
-* <p>
-* http://www.apache.org/licenses/LICENSE-2.0
-* <p>
-* Unless required by applicable law or agreed to in writing,<br>
-* software distributed under the License is distributed on an "AS IS" BASIS, <br>
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
-* See the License for the specific language governing permissions and <br>
-* limitations under the License.
-* <p>
-* Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
-* University of Zurich, Switzerland.
-* <hr>
-* <a href="http://www.openolat.org">
-* OpenOLAT - Online Learning and Training</a><br>
-* This file has been modified by the OpenOLAT community. Changes are licensed
-* under the Apache 2.0 license as the original file.
-*/
+ * OLAT - Online Learning and Training<br>
+ * http://www.olat.org
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); <br>
+ * you may not use this file except in compliance with the License.<br>
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing,<br>
+ * software distributed under the License is distributed on an "AS IS" BASIS, <br>
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
+ * See the License for the specific language governing permissions and <br>
+ * limitations under the License.
+ * <p>
+ * Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
+ * University of Zurich, Switzerland.
+ * <hr>
+ * <a href="http://www.openolat.org">
+ * OpenOLAT - Online Learning and Training</a><br>
+ * This file has been modified by the OpenOLAT community. Changes are licensed
+ * under the Apache 2.0 license as the original file.
+ */
 
 package org.olat.search.service.document.file;
 
 import java.io.BufferedInputStream;
 import java.io.IOException;
+import java.io.Writer;
 import java.util.Iterator;
 
 import org.apache.lucene.document.Document;
@@ -37,11 +38,13 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
 
 /**
  * Lucene document mapper.
+ * 
  * @author Christian Guretzki
  */
 public class WordDocument extends FileDocument {
@@ -53,68 +56,72 @@ public class WordDocument extends FileDocument {
 	public WordDocument() {
 		//
 	}
-	
-	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException,DocumentException,DocumentAccessException {
-    WordDocument wordDocument = new WordDocument();
-    wordDocument.init(leafResourceContext,leaf);
-    wordDocument.setFileType(FILE_TYPE);
+
+	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
+	throws IOException, DocumentException, DocumentAccessException {
+		WordDocument wordDocument = new WordDocument();
+		wordDocument.init(leafResourceContext, leaf);
+		wordDocument.setFileType(FILE_TYPE);
 		wordDocument.setCssIcon(CSSHelper.createFiletypeIconCssClassFor(leaf.getName()));
-		if (log.isDebug()) log.debug(wordDocument.toString());
+		if (log.isDebug())
+			log.debug(wordDocument.toString());
 		return wordDocument.getLuceneDocument();
 	}
-	
-	protected FileContent readContent(VFSLeaf leaf) throws IOException,DocumentException {
+
+	@Override
+	protected FileContent readContent(VFSLeaf leaf) throws IOException,
+			DocumentException {
 		BufferedInputStream bis = null;
-		StringBuilder sb = new StringBuilder();
-    try {
-    	bis = new BufferedInputStream(leaf.getInputStream());
-    	POIFSFileSystem filesystem = new POIFSFileSystem(bis);
-    	Iterator<?> entries = filesystem.getRoot().getEntries();
-      while (entries.hasNext()) {
-      	Entry entry = (Entry) entries.next();
-        String name = entry.getName();
-        if (!(entry instanceof DocumentEntry)) {
-        	// Skip directory entries
-        } else if ("WordDocument".equals(name)) {
-        	collectWordDocument(filesystem, sb);
-        }
-      }
+		LimitedContentWriter sb = new LimitedContentWriter((int)leaf.getSize(), FileDocumentFactory.getMaxFileSize());
+		try {
+			bis = new BufferedInputStream(leaf.getInputStream());
+			POIFSFileSystem filesystem = new POIFSFileSystem(bis);
+			Iterator<?> entries = filesystem.getRoot().getEntries();
+			while (entries.hasNext()) {
+				Entry entry = (Entry) entries.next();
+				String name = entry.getName();
+				if (!(entry instanceof DocumentEntry)) {
+					// Skip directory entries
+				} else if ("WordDocument".equals(name)) {
+					collectWordDocument(filesystem, sb);
+				}
+			}
 			return new FileContent(sb.toString());
 		} catch (Exception e) {
-			log.warn("could not read in word document: " + leaf + " please check, that this is not an docx/rtf/html file!");
+			log.warn("could not read in word document: " + leaf
+					+ " please check, that this is not an docx/rtf/html file!");
 			throw new DocumentException(e.getMessage());
 		} finally {
 			if (bis != null) {
-			  bis.close();
+				bis.close();
 			}
 		}
 	}
-	
-	private void collectWordDocument(POIFSFileSystem filesystem, StringBuilder sb) 
-		throws IOException {
+
+	private void collectWordDocument(POIFSFileSystem filesystem, Writer sb) throws IOException {
 		WordExtractor extractor = new WordExtractor(filesystem);
-    addTextIfAny(sb, extractor.getHeaderText());
-    for (String paragraph : extractor.getParagraphText()) {
-        sb.append(paragraph).append(' ');
-    }
+		addTextIfAny(sb, extractor.getHeaderText());
+		for (String paragraph : extractor.getParagraphText()) {
+			sb.append(paragraph).append(' ');
+		}
 
-    for (String paragraph : extractor.getFootnoteText()) {
-        sb.append(paragraph).append(' ');
-    }
+		for (String paragraph : extractor.getFootnoteText()) {
+			sb.append(paragraph).append(' ');
+		}
 
-    for (String paragraph : extractor.getCommentsText()) {
-        sb.append(paragraph).append(' ');
-    }
+		for (String paragraph : extractor.getCommentsText()) {
+			sb.append(paragraph).append(' ');
+		}
 
-    for (String paragraph : extractor.getEndnoteText()) {
-        sb.append(paragraph).append(' ');
-    }
-    addTextIfAny(sb, extractor.getFooterText());
+		for (String paragraph : extractor.getEndnoteText()) {
+			sb.append(paragraph).append(' ');
+		}
+		addTextIfAny(sb, extractor.getFooterText());
 	}
-    
-	private void addTextIfAny(StringBuilder sb, String text) {
+
+	private void addTextIfAny(Writer sb, String text) throws IOException {
 		if (text != null && text.length() > 0) {
 			sb.append(text).append(' ');
-    }
+		}
 	}
 }
diff --git a/src/main/java/org/olat/search/service/document/file/WordOOXMLDocument.java b/src/main/java/org/olat/search/service/document/file/WordOOXMLDocument.java
index 310580c26d0..1cccd55e0d2 100644
--- a/src/main/java/org/olat/search/service/document/file/WordOOXMLDocument.java
+++ b/src/main/java/org/olat/search/service/document/file/WordOOXMLDocument.java
@@ -19,26 +19,31 @@
  */
 package org.olat.search.service.document.file;
 
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.List;
 import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
+import java.util.zip.ZipFile;
 
 import org.apache.lucene.document.Document;
 import org.olat.core.gui.util.CSSHelper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
-import org.olat.core.util.FileUtils;
-import org.olat.core.util.StringHelper;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.io.ShieldInputStream;
+import org.olat.core.util.vfs.JavaIOItem;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.SearchResourceContext;
-import org.olat.search.service.document.file.utils.SlicedDocument;
 import org.xml.sax.InputSource;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.DefaultHandler;
 import org.xml.sax.helpers.XMLReaderFactory;
 
+import edu.emory.mathcs.backport.java.util.Collections;
+
 /**
  * 
  * Description:<br>
@@ -55,6 +60,7 @@ public class WordOOXMLDocument extends FileDocument {
 	public final static String WORD_FILE_TYPE = "type.file.word";
 	private static final String HEADER = "word/header";
 	private static final String FOOTER = "word/footer";
+	private static final String DOCUMENT = "word/document.xml";
 
 	public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf)
 	throws IOException, DocumentException, DocumentAccessException {
@@ -71,58 +77,47 @@ public class WordOOXMLDocument extends FileDocument {
 
 	@Override
 	public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
-		SlicedDocument doc = new SlicedDocument();
 		
-		InputStream stream = null;
-		ZipInputStream zip = null;
-		try {
-			stream = leaf.getInputStream();
+		File file = ((JavaIOItem)leaf).getBasefile();
 
-			zip = new ZipInputStream(stream);
-			ZipEntry entry = zip.getNextEntry();
-			while (entry != null) {
+		LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
+	
+		try(ZipFile wordFile = new ZipFile(file)) {
+			
+			List<String> contents = new ArrayList<>();
+			for(Enumeration<? extends ZipEntry> entriesEnumeration=wordFile.entries(); entriesEnumeration.hasMoreElements(); ) {
+				ZipEntry entry = entriesEnumeration.nextElement();
 				String name = entry.getName();
 				if(name.endsWith("word/document.xml")) {
-					OfficeDocumentHandler dh = new OfficeDocumentHandler();
-					parse(new ShieldInputStream(zip), dh);
-					doc.setContent(0, dh.getContent());
+					contents.add(name);
 				} else if(name.startsWith(HEADER) && name.endsWith(".xml")) {
-					String position = name.substring(HEADER.length(), name.indexOf(".xml"));
-					if(StringHelper.isLong(position)) {
-						try {
-							OfficeDocumentHandler dh = new OfficeDocumentHandler();
-							parse(new ShieldInputStream(zip), dh);
-							doc.setHeader(Integer.parseInt(position), dh.getContent());
-						} catch (NumberFormatException e) {
-							log.warn("", e);
-							//if position not a position, go head
-						}
-					}
+					contents.add(name);
 				} else if(name.startsWith(FOOTER) && name.endsWith(".xml")) {
-					String position = name.substring(FOOTER.length(), name.indexOf(".xml"));
-					if(StringHelper.isLong(position)) {
-						try {
-							OfficeDocumentHandler dh = new OfficeDocumentHandler();
-							parse(new ShieldInputStream(zip), dh);
-							doc.setFooter(Integer.parseInt(position), dh.getContent());
-						} catch (NumberFormatException e) {
-							log.warn("", e);
-							//if position not a position, go head
-						}
-					}
+					contents.add(name);
 				}
-				entry = zip.getNextEntry();
 			}
+			
+			if(contents.size() > 1) {
+				Collections.sort(contents, new WordDocumentComparator());
+			}
+			
+			for(String content:contents) {
+				if(writer.accept()) {
+					ZipEntry entry = wordFile.getEntry(content);
+					InputStream zip = wordFile.getInputStream(entry);
+					OfficeDocumentHandler dh = new OfficeDocumentHandler(writer);
+					parse(new ShieldInputStream(zip), dh);
+					zip.close();
+				}
+			}
+			
 		} catch (DocumentException e) {
 			throw e;
 		} catch (Exception e) {
-			e.printStackTrace();
 			throw new DocumentException(e.getMessage());
-		} finally {
-			FileUtils.closeSafely(zip);
-			FileUtils.closeSafely(stream);
 		}
-		return new FileContent(doc.toStringAndClear());
+		
+		return new FileContent(writer.toString());
 	}
 	
 	private void parse(InputStream stream, DefaultHandler handler) throws DocumentException {
@@ -131,7 +126,7 @@ public class WordOOXMLDocument extends FileDocument {
 			parser.setContentHandler(handler);
 			parser.setEntityResolver(handler);
 			try {
-			parser.setFeature("http://xml.org/sax/features/validation", false);
+				parser.setFeature("http://xml.org/sax/features/validation", false);
 			} catch(Exception e) {
 				log.error("Cannot deactivate validation", e);
 			}
@@ -141,11 +136,11 @@ public class WordOOXMLDocument extends FileDocument {
 		}
 	}
 	
-	private class OfficeDocumentHandler extends DefaultHandler {
-		private final StringBuilder sb = new StringBuilder();
-
-		public StringBuilder getContent() {
-			return sb;
+	private static class OfficeDocumentHandler extends DefaultHandler {
+		private final LimitedContentWriter sb;
+		
+		public OfficeDocumentHandler(LimitedContentWriter sb) {
+			this.sb = sb;
 		}
 
 		@Override
@@ -153,7 +148,39 @@ public class WordOOXMLDocument extends FileDocument {
 			if(sb .length() > 0 && sb.charAt(sb.length() - 1) != ' '){
 				sb.append(' ');
 			}
-			sb.append(ch, start, length);
+			sb.write(ch, start, length);
+		}
+	}
+	
+	public static class WordDocumentComparator extends AbstractOfficeDocumentComparator {
+
+		@Override
+		public int compare(String f1, String f2) {
+			int c = 0;
+			if(f1.endsWith(DOCUMENT)) {
+				if(f2.startsWith(HEADER)) {
+					c = -1;
+				} else if(f2.startsWith(FOOTER)) {
+					c = 1;	
+				}
+			} else if(f1.startsWith(HEADER)) {
+				if(f2.startsWith(DOCUMENT) || f2.startsWith(FOOTER)) {
+					c = 1;	
+				} else if(f2.startsWith(HEADER)) {
+					c = comparePosition(f1, f2, HEADER);
+				}
+			} else if(f1.startsWith(FOOTER)) {
+				if(f2.startsWith(DOCUMENT) || f2.startsWith(HEADER)) {
+					c = -1;	
+				} else if(f2.startsWith(FOOTER)) {
+					c = comparePosition(f1, f2, FOOTER);
+				}
+			}
+			
+			if(c == 0) {
+				c = f1.compareTo(f2);
+			}
+			return -c;
 		}
 	}
 }
\ No newline at end of file
diff --git a/src/main/java/org/olat/search/service/document/file/pdf/PdfBoxExtractor.java b/src/main/java/org/olat/search/service/document/file/pdf/PdfBoxExtractor.java
index 31b2d622b73..82d7b3b507f 100644
--- a/src/main/java/org/olat/search/service/document/file/pdf/PdfBoxExtractor.java
+++ b/src/main/java/org/olat/search/service/document/file/pdf/PdfBoxExtractor.java
@@ -21,18 +21,19 @@ package org.olat.search.service.document.file.pdf;
 
 import java.io.BufferedInputStream;
 import java.io.File;
-import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.util.PDFTextStripper;
 import org.olat.core.logging.OLog;
 import org.olat.core.logging.Tracing;
-import org.olat.core.util.FileUtils;
 import org.olat.core.util.StringHelper;
+import org.olat.core.util.io.LimitedContentWriter;
 import org.olat.core.util.vfs.VFSLeaf;
 import org.olat.search.service.document.file.DocumentAccessException;
-import org.olat.search.service.document.file.FileDocument.FileContent;
+import org.olat.search.service.document.file.FileContent;
+import org.olat.search.service.document.file.FileDocumentFactory;
 
 /**
  * 
@@ -52,11 +53,14 @@ public class PdfBoxExtractor implements PdfExtractor {
 	}
 	
 	private void storePdfTextInBuffer(FileContent pdfText, File pdfTextFile) throws IOException {
-		FileOutputStream out = new FileOutputStream(pdfTextFile);
-		if(StringHelper.containsNonWhitespace(pdfText.getTitle())) {
-			FileUtils.save(out, pdfText.getTitle() + "\u00A0|\u00A0" + pdfText.getContent(), "utf-8");
-		} else {
-			FileUtils.save(out, pdfText.getContent(), "utf-8");
+		try(FileWriter out = new FileWriter(pdfTextFile)) {
+			if(StringHelper.containsNonWhitespace(pdfText.getTitle())) {
+				out.write(pdfText.getTitle());
+				out.write("\u00A0|\u00A0");
+			}
+			out.write(pdfText.getContent());
+		} catch(IOException e) {
+			throw e;
 		}
 	}
 	
@@ -72,13 +76,19 @@ public class PdfBoxExtractor implements PdfExtractor {
 					document.decrypt("");
 				} catch (Exception e) {
 					log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
-					return new FileContent(leaf.getName(), leaf.getName());
+					LimitedContentWriter writer = new LimitedContentWriter(128, FileDocumentFactory.getMaxFileSize());
+					writer.append(leaf.getName());
+					writer.close();
+					return new FileContent(leaf.getName(), writer.toString());
 				}
 			}	
 			String title = getTitle(document);
 			if (log.isDebug()) log.debug("readContent PDDocument loaded");
 			PDFTextStripper stripper = new PDFTextStripper();
-			return new FileContent(title, stripper.getText(document));
+			LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
+			stripper.writeText(document, writer);
+			writer.close();
+			return new FileContent(title, writer.toString());
 		} finally {
 			if (document != null) {
 			  document.close();
@@ -95,5 +105,4 @@ public class PdfBoxExtractor implements PdfExtractor {
 		}
 		return null;
 	}
-
 }
diff --git a/src/main/java/org/olat/search/service/document/file/utils/SlicedDocument.java b/src/main/java/org/olat/search/service/document/file/utils/SlicedDocument.java
deleted file mode 100644
index 95fba7bc4da..00000000000
--- a/src/main/java/org/olat/search/service/document/file/utils/SlicedDocument.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * <a href="http://www.openolat.org">
- * OpenOLAT - Online Learning and Training</a><br>
- * <p>
- * Licensed under the Apache License, Version 2.0 (the "License"); <br>
- * you may not use this file except in compliance with the License.<br>
- * You may obtain a copy of the License at the
- * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
- * <p>
- * Unless required by applicable law or agreed to in writing,<br>
- * software distributed under the License is distributed on an "AS IS" BASIS, <br>
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
- * See the License for the specific language governing permissions and <br>
- * limitations under the License.
- * <p>
- * Initial code contributed and copyrighted by<br>
- * frentix GmbH, http://www.frentix.com
- * <p>
- */
-package org.olat.search.service.document.file.utils;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * 
- * Utility class to help reorder the slides/sheets/fotters/headers of XML 
- * documents which are sliced in different numbered XML files
- * 
- * <P>
- * Initial Date:  5 nov. 2012<br>
- * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
- */
-public class SlicedDocument {
-
-	private List<StringBuilder> headers = new ArrayList<StringBuilder>();
-	private List<StringBuilder> documents = new ArrayList<StringBuilder>();
-	private List<StringBuilder> footers = new ArrayList<StringBuilder>();
-	
-	private int size;
-
-	public void setHeader(int index, StringBuilder doc) {
-		ensureSize(headers, index);
-		headers.set(index, doc);
-		size += doc.length();
-	}
-	
-	public void setContent(int index, StringBuilder doc) {
-		ensureSize(documents, index);
-		documents.set(index, doc);
-		size += doc.length();
-	}
-	
-	public void setFooter(int index, StringBuilder doc) {
-		ensureSize(footers, index);
-		footers.set(index, doc);
-		size += doc.length();
-	}
-	
-	private final void ensureSize(List<StringBuilder> list, int index) {
-		if(list.size() <= index) {
-			for(int i=list.size(); i< (index+20); i++) {
-				list.add(null);
-			}
-		}
-	}
-	
-	private final List<StringBuilder> toStringAndClear(StringBuilder content, List<StringBuilder> list) {
-		if(list != null && !list.isEmpty()) {
-			for(StringBuilder document:list) {
-				if(document != null) {
-					content.append(document).append('\n');
-				}
-			}
-			list.clear();
-		}
-		return null;
-	}
-
-	public String toStringAndClear() {
-		StringBuilder content = new StringBuilder(size + 100);
-		headers = toStringAndClear(content, headers);
-		documents = toStringAndClear(content, documents);
-		footers = toStringAndClear(content, footers);
-		return content.toString();
-	}
-}
diff --git a/src/test/java/org/olat/search/service/document/file/FileDocumentFactoryTest.java b/src/test/java/org/olat/search/service/document/file/FileDocumentFactoryTest.java
index 8ef724ff0be..d3f9663c2f6 100644
--- a/src/test/java/org/olat/search/service/document/file/FileDocumentFactoryTest.java
+++ b/src/test/java/org/olat/search/service/document/file/FileDocumentFactoryTest.java
@@ -94,9 +94,9 @@ public class FileDocumentFactoryTest extends OlatTestCase {
 		assertTrue("xml must be supported", fileDocumentFactory.isFileSupported(new LocalFileImpl(new File("test.xml"))));
 	}
 	
-	private VFSLeaf getVFSFile(String name) {
+	private VFSLeaf getVFSFile(String filename) {
 		try {
-			URL url = FileDocumentFactoryTest.class.getResource(name);
+			URL url = FileDocumentFactoryTest.class.getResource(filename);
 			File file = new File(url.toURI());
 			return new LocalFileImpl(file);
 		} catch (URISyntaxException e) {
@@ -105,7 +105,8 @@ public class FileDocumentFactoryTest extends OlatTestCase {
 		}
 	}
 
-	@Test public void testCreateHtmlDocument() {
+	@Test
+	public void testCreateHtmlDocument() {
 		String filePath = "SearchTestFolder";
 		String htmlFileName = "test.html";
 		String htmlText = "<html><head><meta name=\"generator\" content=\"olat-tinymce-1\"><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>"
diff --git a/src/test/java/org/olat/search/service/document/file/OfficeDocumentTest.java b/src/test/java/org/olat/search/service/document/file/OfficeDocumentTest.java
index 2928c1c4f2d..3d91b7f4ada 100644
--- a/src/test/java/org/olat/search/service/document/file/OfficeDocumentTest.java
+++ b/src/test/java/org/olat/search/service/document/file/OfficeDocumentTest.java
@@ -23,12 +23,16 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
 import org.junit.Assert;
 import org.junit.Test;
 import org.olat.core.util.vfs.VFSLeaf;
-import org.olat.search.service.document.file.FileDocument.FileContent;
+import org.olat.test.OlatTestCase;
 import org.olat.test.VFSJavaIOFile;
+import org.springframework.beans.factory.annotation.Autowired;
 
 /**
  * Test the low memory text extractor for OpenXML (Microsoft Office XML)
@@ -36,7 +40,10 @@ import org.olat.test.VFSJavaIOFile;
  * 
  * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
  */
-public class OfficeDocumentTest {
+public class OfficeDocumentTest extends OlatTestCase {
+	
+	@Autowired
+	private FileDocumentFactory fileDocumentFactory;
 	
 	@Test
 	public void testWordOpenXMLDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException {
@@ -52,6 +59,28 @@ public class OfficeDocumentTest {
 		Assert.assertTrue(body.contains("They prefer to start writing a document at home in desktop or laptop computer"));
 	}
 	
+	@Test
+	public void testWordOOXMLDocumentComparator() {
+		List<String> docs = new ArrayList<>();
+		docs.add("word/document.xml");
+		docs.add("word/header1.xml");
+		docs.add("word/footer3.xml");
+		docs.add("word/footer.xml");
+		docs.add("word/footer14.xml");
+		docs.add("word/header4.xml");
+		docs.add("word/header25.xml");
+		
+		Collections.sort(docs, new WordOOXMLDocument.WordDocumentComparator());
+		
+		Assert.assertEquals("word/header1.xml", docs.get(0));
+		Assert.assertEquals("word/header4.xml", docs.get(1));
+		Assert.assertEquals("word/header25.xml", docs.get(2));
+		Assert.assertEquals("word/document.xml", docs.get(3));
+		Assert.assertEquals("word/footer.xml", docs.get(4));
+		Assert.assertEquals("word/footer3.xml", docs.get(5));
+		Assert.assertEquals("word/footer14.xml", docs.get(6));
+	}
+	
 	@Test
 	public void testWordDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException {
 		URL docUrl = OfficeDocumentTest.class.getResource("Test_word_indexing.doc");
@@ -106,6 +135,28 @@ public class OfficeDocumentTest {
 		Assert.assertTrue(body.contains("Here is some text"));
 	}
 	
+	@Test
+	public void testPowerPointOOXMLDocumentComparator() {
+		List<String> docs = new ArrayList<>();
+		docs.add("word/dru.xml");
+		docs.add("ppt/slides/slide9.xml");
+		docs.add("ppt/slides/slide6.xml");
+		docs.add("ppt/slides/slide25.xml");
+		docs.add("ppt/slides/slide.xml");
+		docs.add("ppt/slides/slide12.xml");
+		docs.add("ppt/slides/slide3.xml");
+		
+		Collections.sort(docs, new PowerPointOOXMLDocument.PowerPointDocumentComparator());
+		
+		Assert.assertEquals("ppt/slides/slide.xml", docs.get(0));
+		Assert.assertEquals("ppt/slides/slide3.xml", docs.get(1));
+		Assert.assertEquals("ppt/slides/slide6.xml", docs.get(2));
+		Assert.assertEquals("ppt/slides/slide9.xml", docs.get(3));
+		Assert.assertEquals("ppt/slides/slide12.xml", docs.get(4));
+		Assert.assertEquals("ppt/slides/slide25.xml", docs.get(5));
+		Assert.assertEquals("word/dru.xml", docs.get(6));
+	}
+	
 	@Test
 	public void testPowerPointDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException {
 		URL docUrl = OfficeDocumentTest.class.getResource("Test_ppt_indexing.ppt");
diff --git a/src/test/java/org/olat/search/service/document/file/PDFDocumentTest.java b/src/test/java/org/olat/search/service/document/file/PDFDocumentTest.java
index 97aacdf5a0f..12530e878c9 100644
--- a/src/test/java/org/olat/search/service/document/file/PDFDocumentTest.java
+++ b/src/test/java/org/olat/search/service/document/file/PDFDocumentTest.java
@@ -20,6 +20,7 @@
 package org.olat.search.service.document.file;
 
 import java.io.File;
+import java.io.IOException;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.UUID;
@@ -27,7 +28,6 @@ import java.util.UUID;
 import org.junit.Assert;
 import org.junit.Test;
 import org.olat.core.util.vfs.VFSLeaf;
-import org.olat.search.service.document.file.FileDocument.FileContent;
 import org.olat.test.OlatTestCase;
 import org.olat.test.VFSJavaIOFile;
 
@@ -38,7 +38,8 @@ import org.olat.test.VFSJavaIOFile;
 public class PDFDocumentTest extends OlatTestCase {
 	
 	@Test
-	public void testPDFDocument() throws DocumentException, DocumentAccessException, URISyntaxException {
+	public void testPDFDocument()
+	throws DocumentException, DocumentAccessException, URISyntaxException, IOException {
 		URL pdfUrl = PDFDocumentTest.class.getResource("Test_pdf_indexing.pdf");
 		Assert.assertNotNull(pdfUrl);
 
@@ -48,11 +49,13 @@ public class PDFDocumentTest extends OlatTestCase {
 		FileContent content =	document.readContent(doc);
 		Assert.assertNotNull(content);
 		Assert.assertEquals("Test pdf indexing", content.getTitle());
-		Assert.assertEquals("Un petit texte en franÃ§ais", content.getContent().trim());
+		String body = content.getContent();
+		Assert.assertEquals("Un petit texte en franÃ§ais", body.trim());
 	}
 	
 	@Test
-	public void testPDFDocumentCaching() throws DocumentException, DocumentAccessException, URISyntaxException {
+	public void testPDFDocumentCaching()
+	throws DocumentException, DocumentAccessException, URISyntaxException, IOException {
 		URL pdfUrl = PDFDocumentTest.class.getResource("Test_pdf_indexing.pdf");
 		Assert.assertNotNull(pdfUrl);
 
@@ -64,12 +67,14 @@ public class PDFDocumentTest extends OlatTestCase {
 		FileContent contentIndexed =	document.readContent(doc);
 		Assert.assertNotNull(contentIndexed);
 		Assert.assertEquals("Test pdf indexing", contentIndexed.getTitle());
-		Assert.assertEquals("Un petit texte en franÃ§ais", contentIndexed.getContent().trim());
+		String bodyIndexed = contentIndexed.getContent();
+		Assert.assertEquals("Un petit texte en franÃ§ais", bodyIndexed.trim());
 		
 		//take from the cache
 		FileContent contentCached =	document.readContent(doc);
 		Assert.assertNotNull(contentCached);
 		Assert.assertEquals("Test pdf indexing", contentCached.getTitle());
-		Assert.assertEquals("Un petit texte en franÃ§ais", contentCached.getContent().trim());
+		String cachedBody = contentCached.getContent();
+		Assert.assertEquals("Un petit texte en franÃ§ais", cachedBody.trim());
 	}
 }
\ No newline at end of file
diff --git a/src/test/java/org/olat/test/VFSJavaIOFile.java b/src/test/java/org/olat/test/VFSJavaIOFile.java
index b4a1313f7ef..d8ba423bae1 100644
--- a/src/test/java/org/olat/test/VFSJavaIOFile.java
+++ b/src/test/java/org/olat/test/VFSJavaIOFile.java
@@ -29,6 +29,7 @@ import org.olat.core.util.vfs.VFSConstants;
 import org.olat.core.util.vfs.VFSContainer;
 import org.olat.core.util.vfs.VFSItem;
 import org.olat.core.util.vfs.VFSLeaf;
+import org.olat.core.util.vfs.JavaIOItem;
 import org.olat.core.util.vfs.VFSStatus;
 import org.olat.core.util.vfs.callbacks.VFSSecurityCallback;
 
@@ -39,7 +40,7 @@ import org.olat.core.util.vfs.callbacks.VFSSecurityCallback;
  * 
  * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
  */
-public class VFSJavaIOFile implements VFSLeaf {
+public class VFSJavaIOFile implements VFSLeaf, JavaIOItem {
 
 	private final String name;
 	private final File file;
@@ -58,6 +59,11 @@ public class VFSJavaIOFile implements VFSLeaf {
 		return file != null && file.exists();
 	}
 
+	@Override
+	public File getBasefile() {
+		return file;
+	}
+
 	@Override
 	public VFSItem resolve(String path) {
 		return null;
-- 
GitLab