Replace json-simple with GSON
This commit is contained in:
parent
725be7993e
commit
61193050db
|
@ -1,3 +1,4 @@
|
||||||
data
|
data
|
||||||
|
data-test
|
||||||
target
|
target
|
||||||
output
|
output
|
||||||
|
|
6
pom.xml
6
pom.xml
|
@ -30,9 +30,9 @@
|
||||||
<version>8.6.3</version>
|
<version>8.6.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.googlecode.json-simple</groupId>
|
<groupId>com.google.code.gson</groupId>
|
||||||
<artifactId>json-simple</artifactId>
|
<artifactId>gson</artifactId>
|
||||||
<version>1.1.1</version>
|
<version>2.8.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
|
@ -19,16 +19,16 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.json.simple.JSONArray;
|
import com.google.gson.Gson;
|
||||||
import org.json.simple.JSONObject;
|
|
||||||
import org.json.simple.JSONValue;
|
|
||||||
|
|
||||||
public class Indexer {
|
public class Indexer {
|
||||||
IndexWriter index;
|
IndexWriter index;
|
||||||
|
@ -57,13 +57,12 @@ public class Indexer {
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
JSONArray parseJSONFile(File file) throws IOException {
|
Paper parseJSONFile(File file) throws IOException {
|
||||||
InputStream jsonFile = new FileInputStream(file);
|
InputStream jsonFile = new FileInputStream(file);
|
||||||
Reader readerJson = new InputStreamReader(jsonFile);
|
Reader readerJson = new InputStreamReader(jsonFile);
|
||||||
Object fileObject = JSONValue.parse(readerJson);
|
Gson gson = new Gson();
|
||||||
JSONArray arrayObject = new JSONArray();
|
Paper data = gson.fromJson(readerJson, Paper.class);
|
||||||
arrayObject.add(fileObject);
|
return data;
|
||||||
return arrayObject;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void createIndex() throws IOException {
|
void createIndex() throws IOException {
|
||||||
|
@ -73,11 +72,16 @@ public class Indexer {
|
||||||
index = new IndexWriter(dir, config);
|
index = new IndexWriter(dir, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
void addDocuments(JSONArray jsonObjects) throws IOException {
|
void addDocument(Paper paper) throws IOException {
|
||||||
for (JSONObject object : (List<JSONObject>) jsonObjects) {
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
index.addDocument(doc);
|
doc.add(new StringField("document_id", paper.paper_id, Field.Store.YES));
|
||||||
|
doc.add(new TextField("title", paper.metadata.title, Field.Store.YES));
|
||||||
|
for (Author author : paper.metadata.authors) {
|
||||||
|
String authorName = author.first + " " + author.middle + " " + author.last;
|
||||||
|
authorName = authorName.replaceAll("\\p{P}", "");
|
||||||
|
doc.add(new TextField("authors", authorName, Field.Store.YES));
|
||||||
}
|
}
|
||||||
|
index.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void commitChanges() throws IOException {
|
void commitChanges() throws IOException {
|
||||||
|
@ -88,8 +92,8 @@ public class Indexer {
|
||||||
void populateIndex() throws IOException, ParseException {
|
void populateIndex() throws IOException, ParseException {
|
||||||
createIndex();
|
createIndex();
|
||||||
for (File file : files) {
|
for (File file : files) {
|
||||||
JSONArray jsonObjects = parseJSONFile(file);
|
Paper paper = parseJSONFile(file);
|
||||||
addDocument(jsonObjects);
|
addDocument(paper);
|
||||||
}
|
}
|
||||||
commitChanges();
|
commitChanges();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
package org.RI.P2;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
class Affiliation {
|
||||||
|
String laboratory;
|
||||||
|
String institution;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Location {
|
||||||
|
String postCode;
|
||||||
|
String settlement;
|
||||||
|
String region;
|
||||||
|
String country;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Author {
|
||||||
|
String first;
|
||||||
|
List<String> middle;
|
||||||
|
String last;
|
||||||
|
String suffix;
|
||||||
|
Affiliation affiliation;
|
||||||
|
Location location;
|
||||||
|
String email;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Metadata {
|
||||||
|
String title;
|
||||||
|
List<Author> authors;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Abstract {
|
||||||
|
String text;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Body_Text {
|
||||||
|
String text;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Paper {
|
||||||
|
String paper_id;
|
||||||
|
Metadata metadata;
|
||||||
|
List<Abstract> _abstract;
|
||||||
|
List<Body_Text> body_text;
|
||||||
|
}
|
Loading…
Reference in New Issue