Answer the question
In order to leave comments, you need to log in
How does lemmatization work in Apache OpenNLP???
This library is designed for text processing. I need to extract the lemma from the words. Repository connected.
Here is the code of this class, which apparently deals with lemmatization
public class SimpleLemmatizer implements DictionaryLemmatizer {
public final Set<String> constantTags = new HashSet<String>(Arrays.asList("NNP","NP00000"));
private HashMap<List<String>,String> dictMap;
public SimpleLemmatizer(InputStream dictionary) {
dictMap = new HashMap<List<String>,String>();
BufferedReader breader = new BufferedReader(new InputStreamReader(dictionary));
String line;
try {
while ((line = breader.readLine()) != null) {
String[] elems = line.split("\t");
dictMap.put(Arrays.asList(elems[0],elems[1]),elems[2]);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private List<String> getDictKeys(String word, String postag) {
List<String> keys = new ArrayList<String>();
if (constantTags.contains(postag)) {
keys.addAll(Arrays.asList(word,postag));
}
else {
keys.addAll(Arrays.asList(StringUtil.toLowerCase(word),postag));
}
return keys;
}
public String lemmatize(String word, String postag) {
String lemma = null;
List<String> keys = getDictKeys(word, postag);
//lookup lemma as value of the map
String keyValue = dictMap.get(keys);
if (keyValue != null) {
lemma = keyValue;
}
else if (keyValue == null && constantTags.contains(postag)) {
lemma = word;
}
else if (keyValue == null && word.toUpperCase() == word) {
lemma = word;
}
else {
lemma = StringUtil.toLowerCase(word);
}
return lemma;
}
}
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question