add corpus parsing

master
dfsek 1 year ago
parent 79f4cb9b91
commit 57ca8dde51
  1. 40
      corpus/src/main/java/com/dfsek/corpus/Read.java

@ -0,0 +1,40 @@
package com.dfsek.corpus;
import io.vavr.collection.HashMap;
import io.vavr.collection.List;
import io.vavr.collection.Map;
import io.vavr.collection.Stream;
import org.apache.commons.io.IOUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.util.Scanner;
import static io.vavr.API.*;
public class Read {
public static void main(String... args) throws IOException {
System.out.println("Parsing probabilities...");
String corpus = IOUtils.resourceToString("/corpus.8vim", Charset.defaultCharset());
long st = System.nanoTime();
Map<String, List<String>> probabilities = parseCorpus(corpus);
long e = System.nanoTime();
long diff = e - st;
System.out.println("Done (" + ((double) diff / 1000000) + "ms).");
Scanner s = new Scanner(System.in);
while (true) {
System.out.print("Start: ");
String start = s.nextLine();
System.out.println(probabilities.get(start));
}
}
private static Map<String, List<String>> parseCorpus(String in) {
return Stream.ofAll(new BufferedReader(new StringReader(in)).lines())
.map(s -> Tuple(s.substring(0, s.indexOf(':')), s.substring(s.indexOf(':'))))
.foldLeft(HashMap.empty(), (run, tup) -> run.put(tup._1(), List(tup._2()), List::appendAll));
}
}
Loading…
Cancel
Save