1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
|
package tim.prune.function.filesleuth.extract;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.TimeZone;
import tim.prune.function.filesleuth.data.LocationFilter;
import tim.prune.function.filesleuth.data.TrackContents;
/** Responsible for extracting text from text/csv files */
public class TextFileExtractor implements ContentExtractor
{
private final File _file;
public TextFileExtractor(File inFile) {
_file = inFile;
}
@Override
public TrackContents getContents(TimeZone inTimezone)
{
TrackContents contents = new TrackContents(inTimezone);
try (BufferedReader reader = new BufferedReader(new FileReader(_file)))
{
reader.lines().forEach((s) -> processLine(s, contents));
} catch (IOException ignored) {}
return contents;
}
/** Split the given line into bits and add fields to the contents object */
private void processLine(String inLine, TrackContents inContents)
{
if (inLine == null) {
return;
}
String line = inLine.trim();
if (line.equals("")) {
return;
}
for (String field : line.split("[,;\\t]+")) {
if (looksLikeWord(field)) {
inContents.addString(field);
}
}
}
/** @return true if the text contains at least two letters (not just numbers) */
private static boolean looksLikeWord(String inText)
{
boolean foundLetter = false;
for (int i=0; i<inText.length(); i++)
{
if (Character.isAlphabetic(inText.charAt(i)))
{
if (foundLetter) {
return true;
}
foundLetter = true;
}
}
return false;
}
@Override
public boolean matchesFilter(LocationFilter inFilter)
{
// Not possible because we don't get coordinates from text files, only xml
return false;
}
}
|