I need to write a program for text lemmatization (different forms of words). Since I'm going to be using different lemmatization libraries and comparing them I've decided to use Strategy Pattern.
My idea is to wrap everything to single class and, depending on lemmatization function, only change my lemmatize method.
Here's my class:
import re
import types
create_bound_method = types.MethodType
class Lemmatizator(object):
def __init__(self, filename=None, lemmatization=None):
if lemmatization and filename:
self.filename = filename
self.lemmatize = create_bound_method(lemmatization, self)
def _get_text(self):
with open(f'texts/{self.filename}.txt', 'r') as file:
self.text = file.read()
def _split_to_unique(self):
text = re.sub(r'[^\w\s]', '', self.text)
split_text = re.split(r'\s', text)
self.unique_words = set(split_text)
return self.unique_words
def lemmatize(self):
return 'Lemmatize function or text are not found'
Then I'm creating my lemmatize method:
def nltk_lemmatization(self):
words = {}
for word in self.unique_words:
if word:
words[word] = {
'noun': wnl.lemmatize(word),
'adverb': wnl.lemmatize(word, pos='r'),
'adjective': wnl.lemmatize(word, pos='a'),
'verb': wnl.lemmatize(word, pos='v')
}
return words
And trying to apply it:
nltk_lem = Lemmatizator('A Christmas Carol in Prose', nltk_lemmatization)
nltk_lem.lemmatize()
But I receive the following error:
for word in self.unique_words:
AttributeError: 'Lemmatizator' object has no attribute 'unique_words'
what's wrong?
Aucun commentaire:
Enregistrer un commentaire