Refactor addDocument function

This commit is contained in:
coolneng 2021-01-11 20:20:54 +01:00
parent 3b4d7f1408
commit 5139f0a38d
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
1 changed files with 23 additions and 11 deletions

View File

@ -79,28 +79,40 @@ public class Indexer {
index = new IndexWriter(dir, config); index = new IndexWriter(dir, config);
} }
void addDocument(Paper paper) throws IOException { void populatePaperMetadata(Paper paper, StringBuilder authors, StringBuilder institutions, StringBuilder emails) {
Document doc = new Document();
doc.add(new StringField("document_id", paper.paper_id, Field.Store.YES));
doc.add(new TextField("title", paper.metadata.title, Field.Store.YES));
StringBuilder authors = new StringBuilder();
StringBuilder institutions = new StringBuilder();
StringBuilder emails = new StringBuilder();
for (Author author : paper.metadata.authors) { for (Author author : paper.metadata.authors) {
String authorName = author.first + " " + author.middle + " " + author.last; String authorName = author.first + " " + author.middle + " " + author.last + " ";
authorName = authorName.replaceAll("\\p{P}", ""); authorName = authorName.replaceAll("\\p{P}", "");
authors.append(authorName); authors.append(authorName);
institutions.append(author.affiliation.institution); institutions.append(author.affiliation.institution);
emails.append(author.email); emails.append(author.email);
} }
}
void populateFullAbstract(Paper paper, StringBuilder fullAbstract) {
for (Abstract abstr : paper.abstr) {
fullAbstract.append(abstr.text);
}
}
void populateDocumentFields(Paper paper, Document doc) {
doc.add(new StringField("document_id", paper.paper_id, Field.Store.YES));
doc.add(new TextField("title", paper.metadata.title, Field.Store.YES));
StringBuilder authors = new StringBuilder();
StringBuilder institutions = new StringBuilder();
StringBuilder emails = new StringBuilder();
populatePaperMetadata(paper, authors, institutions, emails);
doc.add(new TextField("authors", authors.toString(), Field.Store.YES)); doc.add(new TextField("authors", authors.toString(), Field.Store.YES));
doc.add(new TextField("institution", institutions.toString(), Field.Store.NO)); doc.add(new TextField("institution", institutions.toString(), Field.Store.NO));
doc.add(new TextField("emails", emails.toString(), Field.Store.NO)); doc.add(new TextField("emails", emails.toString(), Field.Store.NO));
StringBuilder fullAbstract = new StringBuilder(); StringBuilder fullAbstract = new StringBuilder();
for (Abstract abstr : paper.abstr) { populateFullAbstract(paper, fullAbstract);
fullAbstract.append(abstr.text);
}
doc.add(new TextField("abstract", fullAbstract.toString(), Field.Store.NO)); doc.add(new TextField("abstract", fullAbstract.toString(), Field.Store.NO));
}
void addDocument(Paper paper) throws IOException {
Document doc = new Document();
populateDocumentFields(paper, doc);
index.addDocument(doc); index.addDocument(doc);
} }