FastNN Client
FastNNClient
¶
FastNN client class for pushing requests to the Triton Inference Server
Usage:
>>> client = FastNNClient(url="127.0.0.1:8000", model_name="distilbert-squad", model_version="1")
>>> client.request(batch=batch)
Parameters:
- url - String url of Triton Inference Server. Defaults to 127.0.0.1:8000
- model_name - String name of model in
model_repository
directory - model_version - String model version name
- client_type - A string for choosing between http and grpc protocol ["grpc", "http"] (Defaults to "grpc")
- verbose - Bool for log verbosity
Source code in fastnn/client.py
class FastNNClient:
"""FastNN client class for pushing requests to the Triton Inference Server
Usage:
```python
>>> client = FastNNClient(url="127.0.0.1:8000", model_name="distilbert-squad", model_version="1")
>>> client.request(batch=batch)
```
**Parameters:**
* **url** - String url of Triton Inference Server. Defaults to 127.0.0.1:8000
* **model_name** - String name of model in `model_repository` directory
* **model_version** - String model version name
* **client_type** - A string for choosing between http and grpc protocol ["grpc", "http"] (Defaults to "grpc")
* **verbose** - Bool for log verbosity
"""
def __init__(
self,
url: str = "127.0.0.1:8000",
model_name: str = "distilbert-squad",
model_version: str = "1",
client_type: str = "grpc",
verbose: bool = False,
):
if client_type == "grpc":
self.client_pkg = grpc
elif client_type == "http":
self.client_pkg = http
else:
ValueError("Paramater 'client_type' must be either grpc or http")
self.url = url
self.model_name = model_name
self.model_version = model_version
self.client_type = client_type
self.triton_client = self.client_pkg.InferenceServerClient(
url=url, verbose=verbose
)
self.model_metadata = self.triton_client.get_model_metadata(
model_name=model_name, model_version=model_version
)
self.model_config = self.triton_client.get_model_config(
model_name=model_name, model_version=model_version
)
def request(
self, batch: Tuple[torch.Tensor], binary_data: bool = False
) -> InferResult:
"""Runs a request with the `batch` input that can be generated from a FastNN `Processor`
* **batch** - Tuple of torch tensors, typically batch inputs from a dataloader
"""
if self.client_type == "grpc":
return self.request_grpc(batch=batch)
elif self.client_type == "http":
return self.request_http(batch=batch)
else:
ValueError("Paramater 'client_type' must be either grpc or http")
def request_http(
self, batch: Tuple[torch.Tensor], binary_data: bool = False
) -> InferResult:
"""Runs an http request with the `batch` input that can be generated from a FastNN `Processor`
* **batch** - Tuple of torch tensors, typically batch inputs from a dataloader
"""
inputs_metadata = self.model_metadata["inputs"]
outputs_metadata = self.model_metadata["outputs"]
# Assert batch input matches triton model metadata
assert len(batch) == len(inputs_metadata)
inputs = []
for i, metadata in enumerate(inputs_metadata):
inp = self.client_pkg.InferInput(
metadata["name"], tuple(batch[i].shape), metadata["datatype"]
)
inp.set_data_from_numpy(batch[i].cpu().numpy(), binary_data=binary_data)
inputs.append(inp)
outputs = []
for i, metadata in enumerate(outputs_metadata):
out = self.client_pkg.InferRequestedOutput(
metadata["name"], binary_data=binary_data
)
outputs.append(out)
response = self.triton_client.infer(
model_name=self.model_name,
model_version=self.model_version,
inputs=inputs,
outputs=outputs,
)
return response
def request_grpc(
self, batch: Tuple[torch.Tensor], binary_data: bool = False
) -> InferResult:
"""Runs a grpc request with the `batch` input that can be generated from a FastNN `Processor`
* **batch** - Tuple of torch tensors, typically batch inputs from a dataloader
"""
inputs_metadata = self.model_metadata.inputs
outputs_metadata = self.model_metadata.outputs
# Assert batch input matches triton model metadata
assert len(batch) == len(inputs_metadata)
inputs = []
for i, metadata in enumerate(inputs_metadata):
inp = self.client_pkg.InferInput(
metadata.name, tuple(batch[i].shape), metadata.datatype
)
inp.set_data_from_numpy(batch[i].cpu().numpy())
inputs.append(inp)
outputs = []
for i, metadata in enumerate(outputs_metadata):
out = self.client_pkg.InferRequestedOutput(metadata.name)
outputs.append(out)
response = self.triton_client.infer(
model_name=self.model_name,
model_version=self.model_version,
inputs=inputs,
outputs=outputs,
)
return response
request(self, batch, binary_data=False)
¶
Runs a request with the batch
input that can be generated from a FastNN Processor
- batch - Tuple of torch tensors, typically batch inputs from a dataloader
Source code in fastnn/client.py
def request(
self, batch: Tuple[torch.Tensor], binary_data: bool = False
) -> InferResult:
"""Runs a request with the `batch` input that can be generated from a FastNN `Processor`
* **batch** - Tuple of torch tensors, typically batch inputs from a dataloader
"""
if self.client_type == "grpc":
return self.request_grpc(batch=batch)
elif self.client_type == "http":
return self.request_http(batch=batch)
else:
ValueError("Paramater 'client_type' must be either grpc or http")
request_grpc(self, batch, binary_data=False)
¶
Runs a grpc request with the batch
input that can be generated from a FastNN Processor
- batch - Tuple of torch tensors, typically batch inputs from a dataloader
Source code in fastnn/client.py
def request_grpc(
self, batch: Tuple[torch.Tensor], binary_data: bool = False
) -> InferResult:
"""Runs a grpc request with the `batch` input that can be generated from a FastNN `Processor`
* **batch** - Tuple of torch tensors, typically batch inputs from a dataloader
"""
inputs_metadata = self.model_metadata.inputs
outputs_metadata = self.model_metadata.outputs
# Assert batch input matches triton model metadata
assert len(batch) == len(inputs_metadata)
inputs = []
for i, metadata in enumerate(inputs_metadata):
inp = self.client_pkg.InferInput(
metadata.name, tuple(batch[i].shape), metadata.datatype
)
inp.set_data_from_numpy(batch[i].cpu().numpy())
inputs.append(inp)
outputs = []
for i, metadata in enumerate(outputs_metadata):
out = self.client_pkg.InferRequestedOutput(metadata.name)
outputs.append(out)
response = self.triton_client.infer(
model_name=self.model_name,
model_version=self.model_version,
inputs=inputs,
outputs=outputs,
)
return response
request_http(self, batch, binary_data=False)
¶
Runs an http request with the batch
input that can be generated from a FastNN Processor
- batch - Tuple of torch tensors, typically batch inputs from a dataloader
Source code in fastnn/client.py
def request_http(
self, batch: Tuple[torch.Tensor], binary_data: bool = False
) -> InferResult:
"""Runs an http request with the `batch` input that can be generated from a FastNN `Processor`
* **batch** - Tuple of torch tensors, typically batch inputs from a dataloader
"""
inputs_metadata = self.model_metadata["inputs"]
outputs_metadata = self.model_metadata["outputs"]
# Assert batch input matches triton model metadata
assert len(batch) == len(inputs_metadata)
inputs = []
for i, metadata in enumerate(inputs_metadata):
inp = self.client_pkg.InferInput(
metadata["name"], tuple(batch[i].shape), metadata["datatype"]
)
inp.set_data_from_numpy(batch[i].cpu().numpy(), binary_data=binary_data)
inputs.append(inp)
outputs = []
for i, metadata in enumerate(outputs_metadata):
out = self.client_pkg.InferRequestedOutput(
metadata["name"], binary_data=binary_data
)
outputs.append(out)
response = self.triton_client.infer(
model_name=self.model_name,
model_version=self.model_version,
inputs=inputs,
outputs=outputs,
)
return response