Skip to content
Snippets Groups Projects
Commit 2f773884 authored by srosse's avatar srosse
Browse files

OO-834: delegate the search to a pool to contains the Lucene bag habit to use...

OO-834: delegate the search to a pool to contains the Lucene bag habit to use ThreadLocals, update Lucene to 4.5.1
parent bcf46a0a
No related branches found
No related tags found
No related merge requests found
Showing
with 392 additions and 177 deletions
......@@ -1969,27 +1969,27 @@
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.1.0</version>
<version>4.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.1.0</version>
<version>4.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.1.0</version>
<version>4.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.1.0</version>
<version>4.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId>
<version>4.1.0</version>
<version>4.5.1</version>
</dependency>
<!-- End lucene dependencies -->
......
......@@ -40,7 +40,7 @@ import org.olat.core.id.Roles;
*/
public interface SearchService {
public static final Version OO_LUCENE_VERSION = Version.LUCENE_41;
public static final Version OO_LUCENE_VERSION = Version.LUCENE_45;
/**
*
......
......@@ -23,6 +23,17 @@
<constructor-arg index="3" ref="schedulerFactoryBean"/>
<property name="lifeIndexer" ref="jmsIndexer"/>
<property name="metadataFields" ref="SearchMetadataFieldsProvider" />
<property name="searchExecutor" ref="searchExecutor" />
</bean>
<bean id="searchExecutor" class="org.springframework.core.task.support.ExecutorServiceAdapter">
<constructor-arg index="0" ref="searchSpringExecutor" />
</bean>
<bean id="searchSpringExecutor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
<property name="corePoolSize" value="2" />
<property name="maxPoolSize" value="3" />
<property name="queueCapacity" value="100" />
</bean>
<bean id="org.olat.search.service.disabled" class="org.olat.search.service.SearchServiceDisabled"
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.search.service;
import java.util.List;
import java.util.concurrent.Callable;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.olat.core.commons.persistence.DBFactory;
import org.olat.core.id.Identity;
import org.olat.core.id.Roles;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.search.SearchResults;
import org.olat.search.ServiceNotAvailableException;
import org.olat.search.service.searcher.SearchResultsImpl;
/**
*
* Initial date: 24.10.2013<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
class SearchCallable implements Callable<SearchResults> {
private static final OLog log = Tracing.createLoggerFor(SearchCallable.class);
private String queryString;
private List<String> condQueries;
private Identity identity;
private Roles roles;
private int firstResult;
private int maxResults;
private boolean doHighlighting;
private SearchServiceImpl searchService;
public SearchCallable(String queryString, List<String> condQueries, Identity identity, Roles roles,
int firstResult, int maxResults, boolean doHighlighting, SearchServiceImpl searchService) {
this.queryString = queryString;
this.condQueries = condQueries;
this.identity = identity;
this.roles = roles;
this.firstResult = firstResult;
this.maxResults = maxResults;
this.doHighlighting = doHighlighting;
this.searchService = searchService;
}
@Override
public SearchResults call() {
try {
boolean debug = log.isDebug();
if (!searchService.existIndex()) {
log.warn("Index does not exist, can't search for queryString: "+queryString);
throw new ServiceNotAvailableException("Index does not exist");
}
if(debug) log.debug("queryString=" + queryString);
IndexSearcher searcher = searchService.getIndexSearcher();
BooleanQuery query = searchService.createQuery(queryString, condQueries);
if(debug) log.debug("query=" + query);
long startTime = System.currentTimeMillis();
int n = SearchServiceFactory.getService().getSearchModuleConfig().getMaxHits();
TopDocs docs = searcher.search(query, n);
long queryTime = System.currentTimeMillis() - startTime;
if(debug) log.debug("hits.length()=" + docs.totalHits);
SearchResultsImpl searchResult = new SearchResultsImpl(searchService.getMainIndexer(), searcher, docs, query, searchService.getAnalyzer(), identity, roles, firstResult, maxResults, doHighlighting, false);
searchResult.setQueryTime(queryTime);
searchResult.setNumberOfIndexDocuments(docs.totalHits);
return searchResult;
} catch (Exception naex) {
log.error("", naex);
return null;
} finally {
DBFactory.getInstance().commitAndCloseSession();
}
}
}
\ No newline at end of file
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.search.service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.olat.core.commons.persistence.DBFactory;
import org.olat.core.commons.persistence.SortKey;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.StringHelper;
import org.olat.search.ServiceNotAvailableException;
import org.olat.search.model.AbstractOlatDocument;
/**
*
* Initial date: 24.10.2013<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
class SearchOrderByCallable implements Callable<List<Long>> {
private static final OLog log = Tracing.createLoggerFor(SearchOrderByCallable.class);
private String queryString;
private List<String> condQueries;
private SortKey[] orderBy;
private int firstResult;
private int maxResults;
private SearchServiceImpl searchService;
public SearchOrderByCallable(String queryString, List<String> condQueries, SortKey[] orderBy,
int firstResult, int maxResults, SearchServiceImpl searchService) {
this.queryString = queryString;
this.condQueries = condQueries;
this.orderBy = orderBy;
this.firstResult = firstResult;
this.maxResults = maxResults;
this.searchService = searchService;
}
@Override
public List<Long> call() {
try {
if (!searchService.existIndex()) {
log.warn("Index does not exist, can't search for queryString: "+queryString);
throw new ServiceNotAvailableException("Index does not exist");
}
log.info("queryString=" + queryString);
IndexSearcher searcher = searchService.getIndexSearcher();
BooleanQuery query = searchService.createQuery(queryString, condQueries);
int n = SearchServiceFactory.getService().getSearchModuleConfig().getMaxHits();
TopDocs docs;
if(orderBy != null && orderBy.length > 0 && orderBy[0] != null) {
SortField[] sortFields = new SortField[orderBy.length];
for(int i=0; i<orderBy.length; i++) {
sortFields[i] = new SortField(orderBy[i].getKey(), SortField.Type.STRING_VAL, orderBy[i].isAsc());
}
Sort sort = new Sort(sortFields);
docs = searcher.search(query, n, sort);
} else {
docs = searcher.search(query, n);
}
int numOfDocs = Math.min(n, docs.totalHits);
Set<String> retrievedFields = new HashSet<String>();
retrievedFields.add(AbstractOlatDocument.DB_ID_NAME);
List<Long> res = new ArrayList<Long>();
for (int i=firstResult; i<numOfDocs && res.size() < maxResults; i++) {
Document doc = searcher.doc(docs.scoreDocs[i].doc, retrievedFields);
String dbKeyStr = doc.get(AbstractOlatDocument.DB_ID_NAME);
if(StringHelper.containsNonWhitespace(dbKeyStr)) {
res.add(Long.parseLong(dbKeyStr));
}
}
return res;
} catch (Exception naex) {
log.error("", naex);
return null;
} finally {
DBFactory.getInstance().commitAndCloseSession();
}
}
}
\ No newline at end of file
......@@ -30,13 +30,13 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
......@@ -46,9 +46,6 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.olat.core.CoreSpringFactory;
......@@ -73,7 +70,6 @@ import org.olat.search.service.indexer.Index;
import org.olat.search.service.indexer.LifeFullIndexer;
import org.olat.search.service.indexer.MainIndexer;
import org.olat.search.service.searcher.JmsSearchProvider;
import org.olat.search.service.searcher.SearchResultsImpl;
import org.olat.search.service.spell.SearchSpellChecker;
import org.quartz.JobDetail;
import org.quartz.Scheduler;
......@@ -104,6 +100,8 @@ public class SearchServiceImpl implements SearchService {
/** Counts number of search queries since last restart. */
private long queryCount = 0;
private Object createIndexSearcherLock = new Object();
private ExecutorService searchExecutor;
private String fields[] = {
AbstractOlatDocument.TITLE_FIELD_NAME, AbstractOlatDocument.DESCRIPTION_FIELD_NAME,
......@@ -133,6 +131,10 @@ public class SearchServiceImpl implements SearchService {
searchProvider.setSearchService(this);
}
public void setSearchExecutor(ExecutorService searchExecutor) {
this.searchExecutor = searchExecutor;
}
/**
* [user by Spring]
* @param lifeIndexer
......@@ -141,6 +143,14 @@ public class SearchServiceImpl implements SearchService {
this.lifeIndexer = lifeIndexer;
}
protected MainIndexer getMainIndexer() {
return mainIndexer;
}
protected Analyzer getAnalyzer() {
return analyzer;
}
/**
* Start the job indexer
*/
......@@ -189,6 +199,7 @@ public class SearchServiceImpl implements SearchService {
searchSpellChecker.setIndexPath(searchModuleConfig.getFullIndexPath());
searchSpellChecker.setSpellDictionaryPath(searchModuleConfig.getSpellCheckDictionaryPath());
searchSpellChecker.setSpellCheckEnabled(searchModuleConfig.getSpellCheckEnabled());
searchSpellChecker.setSearchExecutor(searchExecutor);
indexer = new Index(searchModuleConfig, searchSpellChecker, mainIndexer, lifeIndexer);
......@@ -218,35 +229,14 @@ public class SearchServiceImpl implements SearchService {
int firstResult, int maxResults, boolean doHighlighting)
throws ServiceNotAvailableException, ParseException {
try {
if (!existIndex()) {
log.warn("Index does not exist, can't search for queryString: "+queryString);
throw new ServiceNotAvailableException("Index does not exist");
}
log.info("queryString=" + queryString);
IndexSearcher searcher = getIndexSearcher();
BooleanQuery query = createQuery(queryString, condQueries);
if (log.isDebug()) log.debug("query=" + query);
long startTime = System.currentTimeMillis();
int n = SearchServiceFactory.getService().getSearchModuleConfig().getMaxHits();
TopDocs docs = searcher.search(query, n);
long queryTime = System.currentTimeMillis() - startTime;
if (log.isDebug()) log.debug("hits.length()=" + docs.totalHits);
SearchResultsImpl searchResult = new SearchResultsImpl(mainIndexer, searcher, docs, query, analyzer, identity, roles, firstResult, maxResults, doHighlighting, false);
searchResult.setQueryTime(queryTime);
searchResult.setNumberOfIndexDocuments(docs.totalHits);
queryCount++;
return searchResult;
} catch (ServiceNotAvailableException naex) {
// pass exception
throw new ServiceNotAvailableException(naex.getMessage());
} catch (ParseException pex) {
throw new ParseException("can not parse query=" + queryString);
} catch (Exception ex) {
log.warn("Exception in search", ex);
throw new ServiceNotAvailableException(ex.getMessage());
SearchCallable run = new SearchCallable(queryString, condQueries, identity, roles, firstResult, maxResults, doHighlighting, this);
Future<SearchResults> futureResults = searchExecutor.submit(run);
SearchResults results = futureResults.get();
queryCount++;
return results;
} catch (Exception e) {
log.error("", e);
return null;
}
}
......@@ -255,54 +245,18 @@ public class SearchServiceImpl implements SearchService {
int firstResult, int maxResults, SortKey... orderBy)
throws ServiceNotAvailableException, ParseException, QueryException {
try {
if (!existIndex()) {
log.warn("Index does not exist, can't search for queryString: "+queryString);
throw new ServiceNotAvailableException("Index does not exist");
}
log.info("queryString=" + queryString);
IndexSearcher searcher = getIndexSearcher();
BooleanQuery query = createQuery(queryString, condQueries);
int n = SearchServiceFactory.getService().getSearchModuleConfig().getMaxHits();
TopDocs docs;
if(orderBy != null && orderBy.length > 0 && orderBy[0] != null) {
SortField[] sortFields = new SortField[orderBy.length];
for(int i=0; i<orderBy.length; i++) {
sortFields[i] = new SortField(orderBy[i].getKey(), SortField.Type.STRING_VAL, orderBy[i].isAsc());
}
Sort sort = new Sort(sortFields);
docs = searcher.search(query, n, sort);
} else {
docs = searcher.search(query, n);
}
int numOfDocs = Math.min(n, docs.totalHits);
Set<String> retrievedFields = new HashSet<String>();
retrievedFields.add(AbstractOlatDocument.DB_ID_NAME);
List<Long> res = new ArrayList<Long>();
for (int i=firstResult; i<numOfDocs && res.size() < maxResults; i++) {
Document doc = searcher.doc(docs.scoreDocs[i].doc, retrievedFields);
String dbKeyStr = doc.get(AbstractOlatDocument.DB_ID_NAME);
if(StringHelper.containsNonWhitespace(dbKeyStr)) {
res.add(Long.parseLong(dbKeyStr));
}
}
queryCount++;
return res;
} catch (ServiceNotAvailableException naex) {
// pass exception
throw new ServiceNotAvailableException(naex.getMessage());
} catch (ParseException pex) {
throw new ParseException("can not parse query=" + queryString);
} catch (Exception ex) {
log.warn("Exception in search", ex);
throw new ServiceNotAvailableException(ex.getMessage());
SearchOrderByCallable run = new SearchOrderByCallable(queryString, condQueries, orderBy, firstResult, maxResults, this);
Future<List<Long>> futureResults = searchExecutor.submit(run);
List<Long> results = futureResults.get();
queryCount++;
return results;
} catch (Exception e) {
log.error("", e);
return new ArrayList<Long>(1);
}
}
private BooleanQuery createQuery(String queryString, List<String> condQueries)
protected BooleanQuery createQuery(String queryString, List<String> condQueries)
throws ParseException {
BooleanQuery query = new BooleanQuery();
if(StringHelper.containsNonWhitespace(queryString)) {
......@@ -383,7 +337,7 @@ public class SearchServiceImpl implements SearchService {
return true;
}
private IndexSearcher getIndexSearcher() throws ServiceNotAvailableException, IOException {
protected IndexSearcher getIndexSearcher() throws ServiceNotAvailableException, IOException {
if(searcher == null) {
synchronized (createIndexSearcherLock) {//o_clusterOK by:fj if service is only configured on one vm, which is recommended way
if (searcher == null) {
......@@ -454,7 +408,7 @@ public class SearchServiceImpl implements SearchService {
* Check if index exist.
* @return true : Index exists.
*/
private boolean existIndex()
protected boolean existIndex()
throws IOException {
try {
File indexFile = new File(searchModuleConfig.getFullIndexPath());
......
......@@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
......@@ -39,38 +38,38 @@ public class IndexWriterHolder {
private static final OLog log = Tracing.createLoggerFor(IndexWriterHolder.class);
private Directory indexPath;
private IndexWriterConfig config;
private JmsIndexer indexer;
private AtomicInteger counter = new AtomicInteger(0);
private IndexWriter writerRef;
public IndexWriterHolder(Directory indexPath, IndexWriterConfig config)
public IndexWriterHolder(Directory indexPath, JmsIndexer indexer)
throws IOException {
this.indexPath = indexPath;
this.config = config;
this.indexer = indexer;
}
public synchronized void ensureIndexExists() {
if(!DirectoryReader.indexExists(indexPath)) {
IndexWriter writer = null;
try {
IndexWriter writer = null;
try {
if(!DirectoryReader.indexExists(indexPath)) {
writer = getAndLock();
} catch (IOException e) {
log.error("", e);
} finally {
release(writer);
}
} catch (IOException e) {
log.error("", e);
} finally {
release(writer);
}
}
public synchronized IndexWriter getAndLock() throws IOException {
if(writerRef == null) {
long start = System.currentTimeMillis();
IndexWriter indexWriter = new IndexWriter(indexPath, config);
IndexWriter indexWriter = new IndexWriter(indexPath, indexer.newIndexWriterConfig());
if(!DirectoryReader.indexExists(indexPath)) {
indexWriter.commit();//make sure it exists
}
System.out.println("Opening writer takes (ms): " + (System.currentTimeMillis() - start));
log.info("Opening writer takes (ms): " + (System.currentTimeMillis() - start));
writerRef = indexWriter;
}
counter.incrementAndGet();
......@@ -86,7 +85,7 @@ public class IndexWriterHolder {
indexWriter.commit();
indexWriter.close();
writerRef = null;
System.out.println("Close writer takes (ms): " + (System.currentTimeMillis() - start));
log.info("Close writer takes (ms): " + (System.currentTimeMillis() - start));
}
} catch (IOException e) {
log.error("", e);
......
......@@ -83,7 +83,6 @@ public class JmsIndexer implements MessageListener, LifeFullIndexer {
private IndexWriterHolder permanentIndexWriter;
private double ramBufferSizeMB;
private boolean useCompoundFile;
private boolean indexingNode;
private List<LifeIndexer> indexers = new ArrayList<LifeIndexer>();
......@@ -91,7 +90,6 @@ public class JmsIndexer implements MessageListener, LifeFullIndexer {
public JmsIndexer(SearchModule searchModuleConfig) {
indexingNode = searchModuleConfig.isSearchServiceEnabled();
ramBufferSizeMB = searchModuleConfig.getRAMBufferSizeMB();
useCompoundFile = searchModuleConfig.getUseCompoundFile();
permanentIndexPath = searchModuleConfig.getFullPermanentIndexPath();
}
......@@ -173,7 +171,7 @@ public class JmsIndexer implements MessageListener, LifeFullIndexer {
File tempIndexDir = new File(permanentIndexPath);
Directory indexPath = FSDirectory.open(tempIndexDir);
if(indexingNode) {
permanentIndexWriter = new IndexWriterHolder (indexPath, newIndexWriterConfig());
permanentIndexWriter = new IndexWriterHolder (indexPath, this);
permanentIndexWriter.ensureIndexExists();
}
reader = DirectoryReader.open(indexPath);
......@@ -184,7 +182,6 @@ public class JmsIndexer implements MessageListener, LifeFullIndexer {
public LogMergePolicy newLogMergePolicy() {
LogMergePolicy logmp = new LogDocMergePolicy();
logmp.setUseCompoundFile(useCompoundFile);
logmp.setCalibrateSizeByDeletes(true);
logmp.setMergeFactor(INDEX_MERGE_FACTOR);
return logmp;
......
......@@ -100,7 +100,6 @@ public class OlatFullIndexer {
private MainIndexer mainIndexer;
private double ramBufferSizeMB;
private boolean useCompoundFile;
/**
*
......@@ -117,7 +116,6 @@ public class OlatFullIndexer {
numberIndexWriter = searchModuleConfig.getNumberIndexWriter();
documentsPerInterval = searchModuleConfig.getDocumentsPerInterval();
ramBufferSizeMB = searchModuleConfig.getRAMBufferSizeMB();
useCompoundFile = searchModuleConfig.getUseCompoundFile();
fullIndexerStatus = new FullIndexerStatus(numberIndexWriter);
stopIndexing = true;
documentQueue = new Vector<Document>();
......@@ -148,7 +146,6 @@ public class OlatFullIndexer {
public LogMergePolicy newLogMergePolicy() {
LogMergePolicy logmp = new LogDocMergePolicy();
logmp.setUseCompoundFile(useCompoundFile);
logmp.setCalibrateSizeByDeletes(true);
logmp.setMergeFactor(INDEX_MERGE_FACTOR);
return logmp;
......
/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.search.service.spell;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import org.apache.lucene.search.spell.SpellChecker;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
/**
*
* Initial date: 24.10.2013<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
class CheckCallable implements Callable<Set<String>> {
private static final OLog log = Tracing.createLoggerFor(CheckCallable.class);
private final String query;
private final SearchSpellChecker spellCheckerService;
public CheckCallable(String query, SearchSpellChecker spellCheckerService) {
this.query = query;
this.spellCheckerService = spellCheckerService;
}
@Override
public Set<String> call() throws Exception {
try {
SpellChecker spellChecker = spellCheckerService.getSpellChecker();
if (spellChecker != null) {
String[] words = spellChecker.suggestSimilar(query,5);
// Remove dublicate
Set<String> filteredList = new TreeSet<String>();
for (String word : words) {
filteredList.add(word);
}
return filteredList;
}
} catch (IOException e) {
log.warn("Can not spell check",e);
}
return new HashSet<String>();
}
}
......@@ -27,8 +27,10 @@ package org.olat.search.service.spell;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
......@@ -36,13 +38,9 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.olat.core.logging.OLog;
......@@ -68,10 +66,14 @@ public class SearchSpellChecker {
private String spellDictionaryPath;
private SpellChecker spellChecker;
private boolean isSpellCheckEnabled = true;
private ExecutorService searchExecutor;
public SearchSpellChecker() {
//called by Spring
//
}
public void setSearchExecutor(ExecutorService searchExecutor) {
this.searchExecutor = searchExecutor;
}
/**
......@@ -82,66 +84,31 @@ public class SearchSpellChecker {
*/
public Set<String> check(String query) {
try {
if(spellChecker==null) { //lazy initialization
try {
synchronized(spellDictionaryPath) {//o_clusterOK by:pb if service is only configured on one vm, which is recommended way
File spellDictionaryFile = new File(spellDictionaryPath);
Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);
if (spellChecker==null && DirectoryReader.indexExists(spellIndexDirectory) && isSpellCheckEnabled ) {
spellChecker = new SpellChecker(spellIndexDirectory);
spellChecker.setAccuracy(0.7f);
}
}
} catch (IOException e) {
log.warn("Can not initialze SpellChecker",e);
}
}
if (spellChecker != null) {
String[] words = spellChecker.suggestSimilar(query,5);
// Remove dublicate
Set<String> filteredList = new TreeSet<String>();
for (String word : words) {
filteredList.add(word);
}
return filteredList;
}
} catch (IOException e) {
CheckCallable run = new CheckCallable(query, this);
Future<Set<String>> futureResults = searchExecutor.submit(run);
return futureResults.get();
} catch (Exception e) {
log.warn("Can not spell check",e);
return null;
return new HashSet<String>();
}
return null;
}
public Set<String> directCheck(String query) {
try {
File indexPathIO = new File(indexPath);
Directory spellIndexDirectory = FSDirectory.open(indexPathIO);
DirectoryReader indexReader = DirectoryReader.open(spellIndexDirectory);
String[] fields = new String[]{ OlatDocument.TITLE_FIELD_NAME, OlatDocument.DESCRIPTION_FIELD_NAME, OlatDocument.AUTHOR_FIELD_NAME, OlatDocument.CONTENT_FIELD_NAME};
Set<String> filteredList = new TreeSet<String>();
for(String field:fields) {
Term term = new Term(field, query);
DirectSpellChecker spellChecker = new DirectSpellChecker();
spellChecker.setAccuracy(0.7f);
spellChecker.setMaxInspections(10);
spellChecker.setMaxEdits(2);
//spellChecker.setDistance(new NGramDistance(4));
SuggestWord[] words = spellChecker.suggestSimilar(term, 5, indexReader, SuggestMode.SUGGEST_ALWAYS, 0.7f);
for (SuggestWord word : words) {
filteredList.add(word.string);
protected SpellChecker getSpellChecker() {
if(spellChecker==null) { //lazy initialization
try {
synchronized(spellDictionaryPath) {//o_clusterOK by:pb if service is only configured on one vm, which is recommended way
File spellDictionaryFile = new File(spellDictionaryPath);
Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);
if (spellChecker==null && DirectoryReader.indexExists(spellIndexDirectory) && isSpellCheckEnabled ) {
spellChecker = new SpellChecker(spellIndexDirectory);
spellChecker.setAccuracy(0.7f);
}
}
} catch (IOException e) {
log.warn("Can not initialze SpellChecker",e);
}
return filteredList;
} catch (IOException e) {
e.printStackTrace();
return null;
}
return spellChecker;
}
/**
......@@ -157,37 +124,43 @@ public class SearchSpellChecker {
if (log.isDebug()) startSpellIndexTime = System.currentTimeMillis();
Directory indexDir = FSDirectory.open(new File(indexPath));
indexReader = DirectoryReader.open(indexDir);
// 1. Create content spellIndex
// 1. Create content spellIndex
File spellDictionaryFile = new File(spellDictionaryPath);
FSDirectory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));//true
SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
Dictionary contentDictionary = new LuceneDictionary(indexReader, OlatDocument.CONTENT_FIELD_NAME);
Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
IndexWriterConfig contentIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
contentSpellChecker.indexDictionary(contentDictionary, contentIndexWriterConfig, true);
contentSpellChecker.indexDictionary(contentDictionary, indexWriterConfig, true);
// 2. Create title spellIndex
FSDirectory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));//true
SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
Dictionary titleDictionary = new LuceneDictionary(indexReader, OlatDocument.TITLE_FIELD_NAME);
titleSpellChecker.indexDictionary(titleDictionary, indexWriterConfig, true);
IndexWriterConfig titleIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
titleSpellChecker.indexDictionary(titleDictionary, titleIndexWriterConfig, true);
// 3. Create description spellIndex
FSDirectory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));//true
SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
Dictionary descriptionDictionary = new LuceneDictionary(indexReader, OlatDocument.DESCRIPTION_FIELD_NAME);
descriptionSpellChecker.indexDictionary(descriptionDictionary, indexWriterConfig, true);
IndexWriterConfig descIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
descriptionSpellChecker.indexDictionary(descriptionDictionary, descIndexWriterConfig, true);
// 4. Create author spellIndex
FSDirectory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));//true
SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
Dictionary authorDictionary = new LuceneDictionary(indexReader, OlatDocument.AUTHOR_FIELD_NAME);
authorSpellChecker.indexDictionary(authorDictionary, indexWriterConfig, true);
IndexWriterConfig authorIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
authorSpellChecker.indexDictionary(authorDictionary, authorIndexWriterConfig, true);
// Merge all part spell indexes (content,title etc.) to one common spell index
Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);//true
//clean up the main index
IndexWriter merger = new IndexWriter(spellIndexDirectory, indexWriterConfig);
IndexWriterConfig spellIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
IndexWriter merger = new IndexWriter(spellIndexDirectory, spellIndexWriterConfig);
merger.deleteAll();
merger.commit();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment