Skip to content
Snippets Groups Projects
Commit ec9c9b25 authored by srosse's avatar srosse
Browse files

OO-3295: allow reloading qpool index if deleted, tune pdf extractor

parent a640616f
No related branches found
No related tags found
No related merge requests found
...@@ -64,7 +64,7 @@ public class PdfBoxExtractor implements PdfExtractor { ...@@ -64,7 +64,7 @@ public class PdfBoxExtractor implements PdfExtractor {
} }
} }
private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException { private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException {
if (log.isDebug()) log.debug("readContent from pdf starts..."); if (log.isDebug()) log.debug("readContent from pdf starts...");
try(BufferedInputStream bis = new BufferedInputStream(leaf.getInputStream()); try(BufferedInputStream bis = new BufferedInputStream(leaf.getInputStream());
...@@ -72,6 +72,8 @@ public class PdfBoxExtractor implements PdfExtractor { ...@@ -72,6 +72,8 @@ public class PdfBoxExtractor implements PdfExtractor {
String title = getTitle(document); String title = getTitle(document);
if (log.isDebug()) log.debug("readContent PDDocument loaded"); if (log.isDebug()) log.debug("readContent PDDocument loaded");
PDFTextStripper stripper = new PDFTextStripper(); PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);
stripper.setSuppressDuplicateOverlappingText(true);
LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize()); LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
stripper.writeText(document, writer); stripper.writeText(document, writer);
writer.close(); writer.close();
...@@ -88,7 +90,7 @@ public class PdfBoxExtractor implements PdfExtractor { ...@@ -88,7 +90,7 @@ public class PdfBoxExtractor implements PdfExtractor {
return new FileContent(leaf.getName(), writer.toString()); return new FileContent(leaf.getName(), writer.toString());
} catch(Exception e) { } catch(Exception e) {
log.error("", e); log.error("", e);
return null; return new FileContent("", "");
} }
} }
......
...@@ -66,7 +66,7 @@ public class PdfExternalExtractor implements PdfExtractor { ...@@ -66,7 +66,7 @@ public class PdfExternalExtractor implements PdfExtractor {
return; return;
} }
List<String> cmds = new ArrayList<String>(); List<String> cmds = new ArrayList<>();
cmds.add(searchModule.getPdfExternalIndexerCmd()); cmds.add(searchModule.getPdfExternalIndexerCmd());
cmds.add(((LocalFileImpl)document).getBasefile().getAbsolutePath()); cmds.add(((LocalFileImpl)document).getBasefile().getAbsolutePath());
cmds.add(bufferFile.getAbsolutePath()); cmds.add(bufferFile.getAbsolutePath());
......
...@@ -334,9 +334,15 @@ public class JmsIndexer implements MessageListener, LifeFullIndexer, ConfigOnOff ...@@ -334,9 +334,15 @@ public class JmsIndexer implements MessageListener, LifeFullIndexer, ConfigOnOff
} }
private DirectoryReader getReader() throws IOException { private DirectoryReader getReader() throws IOException {
DirectoryReader newReader = DirectoryReader.openIfChanged(reader); if(reader == null) {
if(newReader != null) { File tempIndexDir = new File(permanentIndexPath);
reader = newReader; Directory indexPath = FSDirectory.open(tempIndexDir.toPath());
reader = DirectoryReader.open(indexPath);
} else {
DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
if(newReader != null) {
reader = newReader;
}
} }
return reader; return reader;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment