sanghyub lee
05/04/2023, 4:42 AMimport os
import sys
import bentoml
import mlflow
import boto3
import torch
import typing as t
from <http://bentoml.io|bentoml.io> import Text, JSON
from datetime import datetime
accessKey = os.environ['AWS_ACCESS_KEY_ID']
secretKey = os.environ['AWS_SECRET_ACCESS_KEY']
myRegion = 'ap-northeast-2'
start_time = datetime.now()
client = boto3.client('s3', aws_access_key_id=accessKey,
aws_secret_access_key=secretKey,
region_name=myRegion)
client.download_file('bucket_name', 'weights/best_both_123.pt', '<http://best.pt|best.pt>')
class YoloV5Runnable(bentoml.Runnable):
SUPPORTED_RESOURCES = ("<http://nvidia.com/gpu|nvidia.com/gpu>", "cpu")
SUPPORTS_CPU_MULTI_THREADING = True
def __init__(self):
start_time = datetime.now()
print("Torch Available : ", torch.cuda.is_available())
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = torch.hub.load('ultralytics/yolov5', 'custom', path='<http://best.pt|best.pt>', force_reload=True)
<http://self.model.to|self.model.to>(self.device)
self.model.conf = 0.85
time_elapsed = datetime.now() - start_time
print('load Time elapsed (hh:mm:<http://ss.ms|ss.ms>) {}'.format(time_elapsed))
@bentoml.Runnable.method(batchable=False)
def predict(self, img):
return self.model(img).pandas().xyxy[0].to_json(orient="records")
yolov5runner = t.cast("RunnerImpl", bentoml.Runner(YoloV5Runnable))
svc = bentoml.Service("ric_service", runners=[yolov5runner])
@svc.api(input=Text(), output=JSON())
def inference(img):
result = yolov5runner.predict.run(img)
return result