diff --git a/src/main/java/org/olat/search/service/document/file/PdfDocument.java b/src/main/java/org/olat/search/service/document/file/PdfDocument.java index 04d4051571b580a52a6bad993673240451454e92..75bb1c748c8179cc84b072e4079f7bba6257fad0 100644 --- a/src/main/java/org/olat/search/service/document/file/PdfDocument.java +++ b/src/main/java/org/olat/search/service/document/file/PdfDocument.java @@ -64,6 +64,11 @@ public class PdfDocument extends FileDocument { pdfTextBufferPath = SearchServiceFactory.getService().getSearchModuleConfig().getPdfTextBufferPath(); } + public PdfDocument(boolean pdfTextBuffering, String pdfTextBufferPath) { + this.pdfTextBuffering = pdfTextBuffering; + this.pdfTextBufferPath = pdfTextBufferPath; + } + public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException,DocumentException,DocumentAccessException { PdfDocument textDocument = new PdfDocument(); textDocument.setFilePath(getPdfTextTmpFilePath(leafResourceContext)); @@ -128,10 +133,12 @@ public class PdfDocument extends FileDocument { String dirPath = fullPdfTextTmpFilePath.substring(0,lastSlash); File dirFile = new File(dirPath); dirFile.mkdirs(); + + FileOutputStream out = new FileOutputStream(pdfTextFile); if(StringHelper.containsNonWhitespace(pdfText.getTitle())) { - FileUtils.save(new FileOutputStream(pdfTextFile), pdfText.getTitle() + "\u00A0|\u00A0" + pdfText.getContent(), "utf-8"); + FileUtils.save(out, pdfText.getTitle() + "\u00A0|\u00A0" + pdfText.getContent(), "utf-8"); } else { - FileUtils.save(new FileOutputStream(pdfTextFile), pdfText.getContent(), "utf-8"); + FileUtils.save(out, pdfText.getContent(), "utf-8"); } } diff --git a/src/test/java/org/olat/search/service/document/file/PDFDocumentTest.java b/src/test/java/org/olat/search/service/document/file/PDFDocumentTest.java new file mode 100644 index 0000000000000000000000000000000000000000..555429f97b17a3aedb97064795ba7bb5a4751e63 --- /dev/null +++ b/src/test/java/org/olat/search/service/document/file/PDFDocumentTest.java @@ -0,0 +1,75 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.search.service.document.file; + +import java.io.File; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.UUID; + +import junit.framework.Assert; + +import org.junit.Test; +import org.olat.core.util.vfs.VFSLeaf; +import org.olat.search.service.document.file.FileDocument.FileContent; +import org.olat.test.VFSJavaIOFile; + +/** + * + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + */ +public class PDFDocumentTest { + + @Test + public void testPDFDocument() throws DocumentException, DocumentAccessException, URISyntaxException { + URL pdfUrl = PDFDocumentTest.class.getResource("Test_pdf_indexing.pdf"); + Assert.assertNotNull(pdfUrl); + + VFSLeaf doc = new VFSJavaIOFile("Test_1.pdf", new File(pdfUrl.toURI())); + String temp = System.getProperty("java.io.tmpdir"); + PdfDocument document = new PdfDocument(false, temp); + FileContent content = document.readContent(doc); + Assert.assertNotNull(content); + Assert.assertEquals("Test pdf indexing", content.getTitle()); + Assert.assertEquals("Un petit texte en français", content.getContent().trim()); + } + + @Test + public void testPDFDocumentCaching() throws DocumentException, DocumentAccessException, URISyntaxException { + URL pdfUrl = PDFDocumentTest.class.getResource("Test_pdf_indexing.pdf"); + Assert.assertNotNull(pdfUrl); + + VFSLeaf doc = new VFSJavaIOFile(UUID.randomUUID().toString() + ".pdf", new File(pdfUrl.toURI())); + String temp = System.getProperty("java.io.tmpdir"); + PdfDocument document = new PdfDocument(false, temp); + + //index the pdf + FileContent contentIndexed = document.readContent(doc); + Assert.assertNotNull(contentIndexed); + Assert.assertEquals("Test pdf indexing", contentIndexed.getTitle()); + Assert.assertEquals("Un petit texte en français", contentIndexed.getContent().trim()); + + //take from the cache + FileContent contentCached = document.readContent(doc); + Assert.assertNotNull(contentCached); + Assert.assertEquals("Test pdf indexing", contentCached.getTitle()); + Assert.assertEquals("Un petit texte en français", contentCached.getContent().trim()); + } +} \ No newline at end of file diff --git a/src/test/java/org/olat/search/service/document/file/Test_pdf_indexing.pdf b/src/test/java/org/olat/search/service/document/file/Test_pdf_indexing.pdf new file mode 100644 index 0000000000000000000000000000000000000000..16d8cdcb0e9df6dffa044b6778dac33582abf4ee Binary files /dev/null and b/src/test/java/org/olat/search/service/document/file/Test_pdf_indexing.pdf differ diff --git a/src/test/java/org/olat/test/AllTestsJunit4.java b/src/test/java/org/olat/test/AllTestsJunit4.java index 15495ea596476e3c00db7975b77901618500e796..42fefc422c43ad24695d55c08459a3e1f4a07640 100644 --- a/src/test/java/org/olat/test/AllTestsJunit4.java +++ b/src/test/java/org/olat/test/AllTestsJunit4.java @@ -98,6 +98,7 @@ import org.junit.runners.Suite; org.olat.modules.wiki.versioning.diff.CookbookDiffTest.class,//ok org.olat.properties.PropertyTest.class,//ok org.olat.search.service.document.file.FileDocumentFactoryTest.class, + org.olat.search.service.document.file.PDFDocumentTest.class, org.olat.catalog.CatalogManagerTest.class,//ok org.olat.bookmark.BookmarkManagerTest.class,//ok org.olat.notifications.NotificationsManagerTest.class,//fail diff --git a/src/test/java/org/olat/test/VFSJavaIOFile.java b/src/test/java/org/olat/test/VFSJavaIOFile.java new file mode 100644 index 0000000000000000000000000000000000000000..5fd92fb877d721b2ad424f2a03b15604c1f38452 --- /dev/null +++ b/src/test/java/org/olat/test/VFSJavaIOFile.java @@ -0,0 +1,140 @@ +/** + * <a href="http://www.openolat.org"> + * OpenOLAT - Online Learning and Training</a><br> + * <p> + * Licensed under the Apache License, Version 2.0 (the "License"); <br> + * you may not use this file except in compliance with the License.<br> + * You may obtain a copy of the License at the + * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> + * <p> + * Unless required by applicable law or agreed to in writing,<br> + * software distributed under the License is distributed on an "AS IS" BASIS, <br> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> + * See the License for the specific language governing permissions and <br> + * limitations under the License. + * <p> + * Initial code contributed and copyrighted by<br> + * frentix GmbH, http://www.frentix.com + * <p> + */ +package org.olat.test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.olat.core.util.vfs.VFSConstants; +import org.olat.core.util.vfs.VFSContainer; +import org.olat.core.util.vfs.VFSItem; +import org.olat.core.util.vfs.VFSLeaf; +import org.olat.core.util.vfs.VFSStatus; +import org.olat.core.util.vfs.callbacks.VFSSecurityCallback; + +/** + * An implementation of the VFSLEaf for a pure java.io.File with + * an absolute path. + * + * + * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com + */ +public class VFSJavaIOFile implements VFSLeaf { + + private final String name; + private final File file; + + public VFSJavaIOFile(String name, File file) { + this.name = name; + this.file = file; + } + + @Override + public VFSItem resolve(String path) { + return null; + } + + @Override + public VFSContainer getParentContainer() { + return null; + } + + @Override + public void setParentContainer(VFSContainer parentContainer) { + // + } + + @Override + public VFSStatus rename(String newname) { + return VFSConstants.NO; + } + + @Override + public VFSStatus delete() { + return VFSConstants.NO; + } + + @Override + public VFSStatus canRename() { + return VFSConstants.NO; + } + + @Override + public VFSStatus canDelete() { + return VFSConstants.NO; + } + + @Override + public String getName() { + return name == null ? file.getName() : name; + } + + @Override + public long getLastModified() { + return file.lastModified(); + } + + @Override + public VFSStatus canCopy() { + return VFSConstants.NO; + } + + @Override + public VFSStatus canWrite() { + return VFSConstants.NO; + } + + @Override + public VFSSecurityCallback getLocalSecurityCallback() { + return null; + } + + @Override + public void setLocalSecurityCallback(VFSSecurityCallback secCallback) { + // + } + + @Override + public boolean isSame(VFSItem vfsItem) { + return false; + } + + @Override + public InputStream getInputStream() { + try { + return new FileInputStream(file); + } catch (FileNotFoundException e) { + return null; + } + } + + @Override + public long getSize() { + return file.length(); + } + + @Override + public OutputStream getOutputStream(boolean append) { + return null; + } +} \ No newline at end of file