Slackbot
05/28/2023, 12:36 PMlarme (shenyang)
05/28/2023, 6:39 PMPrashant Godhani
05/28/2023, 6:48 PMPrashant Godhani
05/28/2023, 6:48 PMlarme (shenyang)
05/29/2023, 1:39 PMimport torch
from flair.data import Sentence
from flair.models import SequenceTagger
import bentoml
from <http://bentoml.io|bentoml.io> import Text, JSON
class NERRunnable(bentoml.Runnable):
SUPPORTED_RESOURCES = ("<http://nvidia.com/gpu|nvidia.com/gpu>")
def __init__(self):
self.model = SequenceTagger.load("flair/ner-english-large")
@bentoml.Runnable.method(batchable=False)
def inference(self, texts):
ner_results = []
for text in texts:
sentence = Sentence(text)
self.model.predict(sentence)
sentence_ner_result = []
for entity in sentence.get_spans('ner'):
sentence_ner_result.append({'entity_group': entity.tag, 'score': entity.score, 'word': entity.text, })
ner_results.append(sentence_ner_result)
return ner_results
runner = bentoml.Runner(NERRunnable)
svc = bentoml.Service("test-flair", runners=[runner])
@svc.api(input=JSON(), output=JSON())
def inference(d):
text = d["text"]
return runner.inference.run([text])
And I didn't observe OOM problem. Could you give me some input examples to reproduce this problem?