From ecbe3349ce129a56c567da69537d53ffcf857c31 Mon Sep 17 00:00:00 2001 From: coolneng Date: Sun, 25 Oct 2020 23:40:20 +0100 Subject: [PATCH] Implement poorly word frequency reader --- .gitignore | 1 + TODO.org | 5 +- src/main/java/org/RI/P1/AnalyzeDirectory.java | 10 +++ src/main/java/org/RI/P1/FileData.java | 67 +++++++++++++++++++ 4 files changed, 81 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 73df60d..7cb494d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ target .classpath .project .settings +output diff --git a/TODO.org b/TODO.org index 5a305ab..20fd916 100644 --- a/TODO.org +++ b/TODO.org @@ -1,6 +1,4 @@ * P1 -** TODO Write to a file all word occurrences and frequencies -Sorted in a decreasing manner ** TODO Plot word frequencies With gnuplot, with documents of at least 3 different languages. We'll fit this to the Booth and Federowicz equation @@ -9,3 +7,6 @@ CLOSED: [2020-10-25 Sun 19:58] | filename | type | encoding | language | ** DONE Extract all URLs CLOSED: [2020-10-25 Sun 22:14] +** DONE Write to a file all word occurrences and frequencies +CLOSED: [2020-10-25 Sun 23:40] +Sorted in a decreasing manner diff --git a/src/main/java/org/RI/P1/AnalyzeDirectory.java b/src/main/java/org/RI/P1/AnalyzeDirectory.java index fbfd025..5849504 100644 --- a/src/main/java/org/RI/P1/AnalyzeDirectory.java +++ b/src/main/java/org/RI/P1/AnalyzeDirectory.java @@ -23,6 +23,7 @@ public class AnalyzeDirectory { System.out.println("Usage: AnalyzeDirectory