Add AsyncComplete to do LLM completions asynchronously

Issue # https://linear.app/together-ai/issue/ENG-388/async-class-for-python-library

Describe your changes

import os
import asyncio
import time
import random
import string

import together

from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')

model = "togethercomputer/llama-2-7b"
prompt = "one 2 three 4"

output = together.Complete.create(
  prompt = prompt, 
  model = model, 
  max_tokens = 8,
)

print(output['output']['choices'][0]['text'])

output = await together.AsyncComplete.create(
  prompt = prompt, 
  model = model, 
  max_tokens = 8,
)

print(output.choices[0].text)

testing to show the non-blocking time is faster

# Synchronous version
def sync_example(prompt, model, max_tokens):
    output = together.Complete.create(prompt=prompt, model=model, max_tokens=max_tokens)
    return output['output']['choices'][0]['text']

# Asynchronous version
async def async_example(prompt, model, max_tokens):
    output = await together.AsyncComplete.create(prompt=prompt, model=model, max_tokens=max_tokens)
    return output.choices[0].text

# Function to run multiple requests concurrently
async def run_concurrent_requests(prompt="one 2 three 4", model="togethercomputer/llama-2-7b", max_tokens=8, num_requests=9):
    async def generate_random_prompt():
        # Randomly add a space or letter to the original prompt
        random_char = random.choice(string.ascii_letters + ' ')
        return prompt[:random.randint(0, len(prompt))] + random_char + prompt[random.randint(0, len(prompt)):]

    # Synchronous example
    start_time_sync = time.time()
    sync_results = [sync_example(await generate_random_prompt(), model, max_tokens) for _ in range(num_requests)]
    end_time_sync = time.time()
    print(f"Synchronous execution time: {end_time_sync - start_time_sync} seconds")

    # Asynchronous example
    start_time_async = time.time()
    async_results = await asyncio.gather(
        *[async_example(await generate_random_prompt(), model, max_tokens) for _ in range(num_requests)]
    )
    end_time_async = time.time()
    print(f"Asynchronous execution time: {end_time_async - start_time_async} seconds")

    # Print the results
    print("\nSynchronous Results:")
    for result in sync_results:
        print(result)

    print("\nAsynchronous Results:")
    for result in async_results:
        print(result)

# Run the concurrent requests with default arguments
asyncio.run(run_concurrent_requests())

Synchronous execution time: 3.3800830841064453 seconds Asynchronous execution time: 0.6449248790740967 seconds

togethercomputer / together-python

Add AsyncComplete to do LLM completions asynchronously #68

Describe your changes