Skip to content
Snippets Groups Projects
Commit 0eca5eda authored by srosse's avatar srosse
Browse files

OO-336: add a unit test for the PDF extractor to the extraction itself and the caching mechanism

parent 8be6fb8c
No related branches found
No related tags found
No related merge requests found
...@@ -64,6 +64,11 @@ public class PdfDocument extends FileDocument { ...@@ -64,6 +64,11 @@ public class PdfDocument extends FileDocument {
pdfTextBufferPath = SearchServiceFactory.getService().getSearchModuleConfig().getPdfTextBufferPath(); pdfTextBufferPath = SearchServiceFactory.getService().getSearchModuleConfig().getPdfTextBufferPath();
} }
public PdfDocument(boolean pdfTextBuffering, String pdfTextBufferPath) {
this.pdfTextBuffering = pdfTextBuffering;
this.pdfTextBufferPath = pdfTextBufferPath;
}
public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException,DocumentException,DocumentAccessException { public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException,DocumentException,DocumentAccessException {
PdfDocument textDocument = new PdfDocument(); PdfDocument textDocument = new PdfDocument();
textDocument.setFilePath(getPdfTextTmpFilePath(leafResourceContext)); textDocument.setFilePath(getPdfTextTmpFilePath(leafResourceContext));
...@@ -128,10 +133,12 @@ public class PdfDocument extends FileDocument { ...@@ -128,10 +133,12 @@ public class PdfDocument extends FileDocument {
String dirPath = fullPdfTextTmpFilePath.substring(0,lastSlash); String dirPath = fullPdfTextTmpFilePath.substring(0,lastSlash);
File dirFile = new File(dirPath); File dirFile = new File(dirPath);
dirFile.mkdirs(); dirFile.mkdirs();
FileOutputStream out = new FileOutputStream(pdfTextFile);
if(StringHelper.containsNonWhitespace(pdfText.getTitle())) { if(StringHelper.containsNonWhitespace(pdfText.getTitle())) {
FileUtils.save(new FileOutputStream(pdfTextFile), pdfText.getTitle() + "\u00A0|\u00A0" + pdfText.getContent(), "utf-8"); FileUtils.save(out, pdfText.getTitle() + "\u00A0|\u00A0" + pdfText.getContent(), "utf-8");
} else { } else {
FileUtils.save(new FileOutputStream(pdfTextFile), pdfText.getContent(), "utf-8"); FileUtils.save(out, pdfText.getContent(), "utf-8");
} }
} }
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.search.service.document.file;
import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.UUID;
import junit.framework.Assert;
import org.junit.Test;
import org.olat.core.util.vfs.VFSLeaf;
import org.olat.search.service.document.file.FileDocument.FileContent;
import org.olat.test.VFSJavaIOFile;
/**
*
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*/
public class PDFDocumentTest {
@Test
public void testPDFDocument() throws DocumentException, DocumentAccessException, URISyntaxException {
URL pdfUrl = PDFDocumentTest.class.getResource("Test_pdf_indexing.pdf");
Assert.assertNotNull(pdfUrl);
VFSLeaf doc = new VFSJavaIOFile("Test_1.pdf", new File(pdfUrl.toURI()));
String temp = System.getProperty("java.io.tmpdir");
PdfDocument document = new PdfDocument(false, temp);
FileContent content = document.readContent(doc);
Assert.assertNotNull(content);
Assert.assertEquals("Test pdf indexing", content.getTitle());
Assert.assertEquals("Un petit texte en français", content.getContent().trim());
}
@Test
public void testPDFDocumentCaching() throws DocumentException, DocumentAccessException, URISyntaxException {
URL pdfUrl = PDFDocumentTest.class.getResource("Test_pdf_indexing.pdf");
Assert.assertNotNull(pdfUrl);
VFSLeaf doc = new VFSJavaIOFile(UUID.randomUUID().toString() + ".pdf", new File(pdfUrl.toURI()));
String temp = System.getProperty("java.io.tmpdir");
PdfDocument document = new PdfDocument(false, temp);
//index the pdf
FileContent contentIndexed = document.readContent(doc);
Assert.assertNotNull(contentIndexed);
Assert.assertEquals("Test pdf indexing", contentIndexed.getTitle());
Assert.assertEquals("Un petit texte en français", contentIndexed.getContent().trim());
//take from the cache
FileContent contentCached = document.readContent(doc);
Assert.assertNotNull(contentCached);
Assert.assertEquals("Test pdf indexing", contentCached.getTitle());
Assert.assertEquals("Un petit texte en français", contentCached.getContent().trim());
}
}
\ No newline at end of file
File added
...@@ -98,6 +98,7 @@ import org.junit.runners.Suite; ...@@ -98,6 +98,7 @@ import org.junit.runners.Suite;
org.olat.modules.wiki.versioning.diff.CookbookDiffTest.class,//ok org.olat.modules.wiki.versioning.diff.CookbookDiffTest.class,//ok
org.olat.properties.PropertyTest.class,//ok org.olat.properties.PropertyTest.class,//ok
org.olat.search.service.document.file.FileDocumentFactoryTest.class, org.olat.search.service.document.file.FileDocumentFactoryTest.class,
org.olat.search.service.document.file.PDFDocumentTest.class,
org.olat.catalog.CatalogManagerTest.class,//ok org.olat.catalog.CatalogManagerTest.class,//ok
org.olat.bookmark.BookmarkManagerTest.class,//ok org.olat.bookmark.BookmarkManagerTest.class,//ok
org.olat.notifications.NotificationsManagerTest.class,//fail org.olat.notifications.NotificationsManagerTest.class,//fail
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
import org.olat.core.util.vfs.VFSConstants;
import org.olat.core.util.vfs.VFSContainer;
import org.olat.core.util.vfs.VFSItem;
import org.olat.core.util.vfs.VFSLeaf;
import org.olat.core.util.vfs.VFSStatus;
import org.olat.core.util.vfs.callbacks.VFSSecurityCallback;
/**
* An implementation of the VFSLEaf for a pure java.io.File with
* an absolute path.
*
*
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*/
public class VFSJavaIOFile implements VFSLeaf {
private final String name;
private final File file;
public VFSJavaIOFile(String name, File file) {
this.name = name;
this.file = file;
}
@Override
public VFSItem resolve(String path) {
return null;
}
@Override
public VFSContainer getParentContainer() {
return null;
}
@Override
public void setParentContainer(VFSContainer parentContainer) {
//
}
@Override
public VFSStatus rename(String newname) {
return VFSConstants.NO;
}
@Override
public VFSStatus delete() {
return VFSConstants.NO;
}
@Override
public VFSStatus canRename() {
return VFSConstants.NO;
}
@Override
public VFSStatus canDelete() {
return VFSConstants.NO;
}
@Override
public String getName() {
return name == null ? file.getName() : name;
}
@Override
public long getLastModified() {
return file.lastModified();
}
@Override
public VFSStatus canCopy() {
return VFSConstants.NO;
}
@Override
public VFSStatus canWrite() {
return VFSConstants.NO;
}
@Override
public VFSSecurityCallback getLocalSecurityCallback() {
return null;
}
@Override
public void setLocalSecurityCallback(VFSSecurityCallback secCallback) {
//
}
@Override
public boolean isSame(VFSItem vfsItem) {
return false;
}
@Override
public InputStream getInputStream() {
try {
return new FileInputStream(file);
} catch (FileNotFoundException e) {
return null;
}
}
@Override
public long getSize() {
return file.length();
}
@Override
public OutputStream getOutputStream(boolean append) {
return null;
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment