Replace json-simple with GSON

This commit is contained in:
coolneng 2021-01-11 18:54:40 +01:00
parent 725be7993e
commit 61193050db
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
4 changed files with 68 additions and 18 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
data data
data-test
target target
output output

View File

@ -30,9 +30,9 @@
<version>8.6.3</version> <version>8.6.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.googlecode.json-simple</groupId> <groupId>com.google.code.gson</groupId>
<artifactId>json-simple</artifactId> <artifactId>gson</artifactId>
<version>1.1.1</version> <version>2.8.6</version>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@ -19,16 +19,16 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.json.simple.JSONArray; import com.google.gson.Gson;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
public class Indexer { public class Indexer {
IndexWriter index; IndexWriter index;
@ -57,13 +57,12 @@ public class Indexer {
return files; return files;
} }
JSONArray parseJSONFile(File file) throws IOException { Paper parseJSONFile(File file) throws IOException {
InputStream jsonFile = new FileInputStream(file); InputStream jsonFile = new FileInputStream(file);
Reader readerJson = new InputStreamReader(jsonFile); Reader readerJson = new InputStreamReader(jsonFile);
Object fileObject = JSONValue.parse(readerJson); Gson gson = new Gson();
JSONArray arrayObject = new JSONArray(); Paper data = gson.fromJson(readerJson, Paper.class);
arrayObject.add(fileObject); return data;
return arrayObject;
} }
void createIndex() throws IOException { void createIndex() throws IOException {
@ -73,11 +72,16 @@ public class Indexer {
index = new IndexWriter(dir, config); index = new IndexWriter(dir, config);
} }
void addDocuments(JSONArray jsonObjects) throws IOException { void addDocument(Paper paper) throws IOException {
for (JSONObject object : (List<JSONObject>) jsonObjects) {
Document doc = new Document(); Document doc = new Document();
index.addDocument(doc); doc.add(new StringField("document_id", paper.paper_id, Field.Store.YES));
doc.add(new TextField("title", paper.metadata.title, Field.Store.YES));
for (Author author : paper.metadata.authors) {
String authorName = author.first + " " + author.middle + " " + author.last;
authorName = authorName.replaceAll("\\p{P}", "");
doc.add(new TextField("authors", authorName, Field.Store.YES));
} }
index.addDocument(doc);
} }
void commitChanges() throws IOException { void commitChanges() throws IOException {
@ -88,8 +92,8 @@ public class Indexer {
void populateIndex() throws IOException, ParseException { void populateIndex() throws IOException, ParseException {
createIndex(); createIndex();
for (File file : files) { for (File file : files) {
JSONArray jsonObjects = parseJSONFile(file); Paper paper = parseJSONFile(file);
addDocument(jsonObjects); addDocument(paper);
} }
commitChanges(); commitChanges();
} }

View File

@ -0,0 +1,45 @@
package org.RI.P2;
import java.util.List;
class Affiliation {
String laboratory;
String institution;
}
class Location {
String postCode;
String settlement;
String region;
String country;
}
class Author {
String first;
List<String> middle;
String last;
String suffix;
Affiliation affiliation;
Location location;
String email;
}
class Metadata {
String title;
List<Author> authors;
}
class Abstract {
String text;
}
class Body_Text {
String text;
}
public class Paper {
String paper_id;
Metadata metadata;
List<Abstract> _abstract;
List<Body_Text> body_text;
}