Implement trivial custom analyzer

This commit is contained in:
coolneng 2021-01-09 06:20:06 +01:00
parent 6405617858
commit c61932d99d
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
1 changed files with 26 additions and 12 deletions

View File

@ -7,8 +7,12 @@ import java.io.Reader;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.text.ParseException; import java.text.ParseException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
@ -17,20 +21,29 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.json.simple.JSONArray; import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue; import org.json.simple.JSONValue;
public class Indexer { public class Indexer {
IndexWriter index; IndexWriter index;
String folderPath; String folderPath;
Map<String, Analyzer> analyzerPerField; PerFieldAnalyzerWrapper customAnalyzer;
Indexer(String folderPath) throws IOException, ParseException { Indexer(String folderPath) throws IOException, ParseException {
this.folderPath = folderPath; this.folderPath = folderPath;
analyzerPerField = new HashMap<>(); customAnalyzer = createAnalyzer();
createIndex(folderPath);
} }
public JSONArray parseJSONFile(String filePath) throws IOException, ParseException { PerFieldAnalyzerWrapper createAnalyzer() {
Map<String, Analyzer> analyzerPerField = new HashMap<>();
analyzerPerField.put("title", new EnglishAnalyzer());
analyzerPerField.put("abstract", new EnglishAnalyzer());
PerFieldAnalyzerWrapper customAnalyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(),
analyzerPerField);
return customAnalyzer;
}
JSONArray parseJSONFile(String filePath) throws IOException, ParseException {
InputStream jsonFile = getClass().getResourceAsStream(filePath); InputStream jsonFile = getClass().getResourceAsStream(filePath);
Reader readerJson = new InputStreamReader(jsonFile); Reader readerJson = new InputStreamReader(jsonFile);
Object fileObjects = JSONValue.parse(readerJson); Object fileObjects = JSONValue.parse(readerJson);
@ -38,25 +51,26 @@ public class Indexer {
return arrayObjects; return arrayObjects;
} }
public void openIndex() throws IOException { void openIndex() throws IOException {
Directory dir = FSDirectory.open(Paths.get(folderPath)); Directory dir = FSDirectory.open(Paths.get(folderPath));
Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(customAnalyzer);
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setOpenMode(OpenMode.CREATE_OR_APPEND);
index = new IndexWriter(dir, config); index = new IndexWriter(dir, config);
} }
public void addDocuments(JSONArray jsonObjects) throws IOException { void addDocuments(JSONArray jsonObjects) throws IOException {
Document doc = new Document(); for (JSONObject object : (List<JSONObject>) jsonObjects) {
index.addDocument(doc); Document doc = new Document();
index.addDocument(doc);
}
} }
public void commitChanges() throws IOException { void commitChanges() throws IOException {
index.commit(); index.commit();
index.close(); index.close();
} }
public void createIndex(String folderPath) throws IOException, ParseException { void createIndex() throws IOException, ParseException {
JSONArray jsonObjects = parseJSONFile(folderPath); JSONArray jsonObjects = parseJSONFile(folderPath);
openIndex(); openIndex();
addDocuments(jsonObjects); addDocuments(jsonObjects);