Closed c2j closed 3 months ago
To support array of integers for the input
field in the embedding API, follow these steps:
OpenAIEmbeddingInput
schema in pymodels.py
:class OpenAIEmbeddingInput(BaseModel):
input: Union[ # type: ignore
conlist( # type: ignore
Annotated[Union[str, int], INPUT_STRING],
**ITEMS_LIMIT,
),
Annotated[Union[str, int], INPUT_STRING],
list[list[int]],
list[int],
]
model: str = "default/not-specified"
user: Optional[str] = None
_embeddings
endpoint in infinity_server.py
:@app.post(
f"{url_prefix}/embeddings",
response_model=OpenAIEmbeddingResult,
response_class=responses.ORJSONResponse,
dependencies=route_dependencies,
operation_id="embeddings",
)
async def _embeddings(data: OpenAIEmbeddingInput):
engine = _resolve_engine(data.model)
try:
if isinstance(data.input, (str, int)):
data.input = [data.input]
elif all(isinstance(i, int) for i in data.input):
data.input = [str(i) for i in data.input]
elif all(isinstance(i, list) and all(isinstance(j, int) for j in i) for i in data.input):
data.input = [str(i) for sublist in data.input for i in sublist]
logger.debug("[📥] Received request with %s inputs ", len(data.input))
start = time.perf_counter()
embedding, usage = await engine.embed(sentences=data.input)
duration = (time.perf_counter() - start) * 1000
logger.debug("[✔️] Done in %s ms", duration)
return OpenAIEmbeddingResult.to_embeddings_response(
embeddings=embedding,
model=engine.engine_args.served_model_name,
usage=usage,
)
except ModelNotDeployedError as ex:
raise errors.OpenAIException(
f"ModelNotDeployedError: model=`{data.model}` does not support `embed`. Reason: {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except Exception as ex:
raise errors.OpenAIException(
f"InternalServerError: {ex}",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
openapi.json
:"OpenAIEmbeddingInput": {
"properties": {
"input": {
"anyOf": [
{
"items": {
"type": "string",
"maxLength": 122880
},
"type": "array",
"maxItems": 2048,
"minItems": 1
},
{
"type": "string",
"maxLength": 122880
},
{
"items": {
"type": "integer"
},
"type": "array",
"maxItems": 2048,
"minItems": 1
},
{
"items": {
"items": {
"type": "integer"
},
"type": "array"
},
"type": "array",
"maxItems": 2048,
"minItems": 1
}
],
"title": "Input"
},
"model": {
"type": "string",
"title": "Model",
"default": "default/not-specified"
},
"user": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "User"
}
},
"type": "object",
"required": [
"input"
],
"title": "OpenAIEmbeddingInput"
}
/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py /libs/infinity_emb/infinity_emb/infinity_server.py /docs/assets/openapi.json
OpenAI uses a different tokenizer. The numbers would require detokenizing and re tokenizing. This is not desired, and has been discussed in other issues. Therefore closed.
System Info
infinity 0.0.51 in Python 3.11, Ubuntu.
As openai api ref( https://platform.openai.com/docs/api-reference/embeddings/create ), the request arg
input
could be string or arraystring: The string that will be turned into an embedding. array: The array of strings that will be turned into an embedding. array: The array of integers that will be turned into an embedding. array: The array of arrays containing integers that will be turned into an embedding.
Information
Tasks
Reproduction
Can reproduce in following request:
Openai can return embedding, but infinity_emb throw error:
Expected behavior
support array of integer so that compatible with openai embedding api