Replace json-simple with GSON

This commit is contained in:
coolneng 2021-01-11 18:54:40 +01:00
parent 725be7993e
commit 61193050db
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
4 changed files with 68 additions and 18 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
data
data-test
target
output

View File

@ -30,9 +30,9 @@
<version>8.6.3</version>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.6</version>
</dependency>
</dependencies>

View File

@ -19,16 +19,16 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
import com.google.gson.Gson;
public class Indexer {
IndexWriter index;
@ -57,13 +57,12 @@ public class Indexer {
return files;
}
JSONArray parseJSONFile(File file) throws IOException {
Paper parseJSONFile(File file) throws IOException {
InputStream jsonFile = new FileInputStream(file);
Reader readerJson = new InputStreamReader(jsonFile);
Object fileObject = JSONValue.parse(readerJson);
JSONArray arrayObject = new JSONArray();
arrayObject.add(fileObject);
return arrayObject;
Gson gson = new Gson();
Paper data = gson.fromJson(readerJson, Paper.class);
return data;
}
void createIndex() throws IOException {
@ -73,11 +72,16 @@ public class Indexer {
index = new IndexWriter(dir, config);
}
void addDocuments(JSONArray jsonObjects) throws IOException {
for (JSONObject object : (List<JSONObject>) jsonObjects) {
Document doc = new Document();
index.addDocument(doc);
void addDocument(Paper paper) throws IOException {
Document doc = new Document();
doc.add(new StringField("document_id", paper.paper_id, Field.Store.YES));
doc.add(new TextField("title", paper.metadata.title, Field.Store.YES));
for (Author author : paper.metadata.authors) {
String authorName = author.first + " " + author.middle + " " + author.last;
authorName = authorName.replaceAll("\\p{P}", "");
doc.add(new TextField("authors", authorName, Field.Store.YES));
}
index.addDocument(doc);
}
void commitChanges() throws IOException {
@ -88,8 +92,8 @@ public class Indexer {
void populateIndex() throws IOException, ParseException {
createIndex();
for (File file : files) {
JSONArray jsonObjects = parseJSONFile(file);
addDocument(jsonObjects);
Paper paper = parseJSONFile(file);
addDocument(paper);
}
commitChanges();
}

View File

@ -0,0 +1,45 @@
package org.RI.P2;
import java.util.List;
class Affiliation {
String laboratory;
String institution;
}
class Location {
String postCode;
String settlement;
String region;
String country;
}
class Author {
String first;
List<String> middle;
String last;
String suffix;
Affiliation affiliation;
Location location;
String email;
}
class Metadata {
String title;
List<Author> authors;
}
class Abstract {
String text;
}
class Body_Text {
String text;
}
public class Paper {
String paper_id;
Metadata metadata;
List<Abstract> _abstract;
List<Body_Text> body_text;
}