Paulo Moreira
01/26/2023, 5:42 PMclass BinaryModelRunnable(bentoml.Runnable):
SUPPORTED_RESOURCES = ("<http://nvidia.com/gpu|nvidia.com/gpu>", "cpu")
SUPPORTS_CPU_MULTI_THREADING = True
def __init__(self):
"""
Starts the class by loading the corresponding models
"""
self.binary_model = bentoml.pytorch.load_model(binary_model)
<http://self.binary_model.to|self.binary_model.to>(device)
@bentoml.Runnable.method(batchable=True, batch_dim=0)
def predict(self, ids, mask):
print(ids)
print("batch_size", len(ids))
output = self.binary_model(ids=ids, mask=mask)
prediction = []
prediction.extend(torch.sigmoid(output).cpu().detach().numpy().tolist())
return np.array(prediction)
binary_runner = bentoml.Runner(BinaryModelRunnable, name= "binary_runnable", models=[binary_model], max_batch_size=4, max_latency_ms=10000)
I have this problem for some time but no one has been able to help me. someone please help me