Closed ArtemGr closed 2 years ago
Here's also a sample of tokenizing in Python:
git clone --depth=1 git@github.com:huggingface/transformers.git
pip3 install --upgrade regex packaging tqdm sacremoses filelock numpy huggingface_hub tokenizers
cd src
cat > qwe.py
import transformers
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
print(tokenizer.encode("hello world"))
^D
python3 qwe.py
Of note also is the cURL example that demonstrates how the REST generation currently works:
NAIK=access-key-generated-by-novelai-api-from-login
cargo install jql
NAIT=$(curl -s https://api.novelai.net/user/login -H 'Content-Type: application/json' -d "{\"key\": \"$NAIK\"}" | jql -r '"accessToken"')
curl -s https://api.novelai.net/ai/generate -H "Authorization: Bearer $NAIT" -H 'Content-Type: application/json' -d '{"input": "hello world", "model": "genji-python-6b", "parameters": {"temperature": 0.72, "max_length": 40, "min_length": 1, "top_k": 0, "top_p": 0.725, "tail_free_sampling": 1, "repetition_penalty": 1.13125, "repetition_penalty_range": 2048, "repetition_penalty_slope": 0.18, "repetition_penalty_frequency": 0, "repetition_penalty_presence": 0, "generate_until_sentence": true, "use_cache": false, "use_string": true, "return_full_text": true, "prefix": "vanilla", "order": [0, 1, 2, 3]}}' | jql -r '"output"'
Closing as the likehood of returning to this diminishes. =
The code I'm currently using is simply
def h2rest(host, path, payload, headers={}):
import http.client
conn = http.client.HTTPSConnection(host)
conn.request('POST', path, headers={'Content-Type': 'application/json'} | headers, body=json.dumps(payload))
resp = conn.getresponse()
if resp.status < 200 and 201 < resp.status:
try:
bytes = resp.read()
why = re.sub(r'[^\w<> =/!\.]', '?', bytes.decode())
if 234 < len(why):
why = why[:234]
except Exception:
why = ''
log(f"{resp.status}; “{why}”")
return resp
def nai(prompt):
# Extracted from browser requests, “Authorization: Bearer $NAIT”
nait = os.environ['NAIT']
payload = {
'input': prompt,
'model': 'euterpe-v2',
"parameters": {
"generate_until_sentence": False,
"max_length": 55,
"min_length": 1,
# Value of “10” corresponds to “Enable Token Probabilities” in UI
#"num_logprobs": 10,
"order": [2, 1, 3, 0],
"prefix": 'vanilla',
"repetition_penalty_frequency": 0,
"repetition_penalty_presence": 0,
"repetition_penalty_range": 2048,
"repetition_penalty_slope": 0.09,
"repetition_penalty": 1.09375,
"return_full_text": True,
"tail_free_sampling": 0.925,
"temperature": 0.666,
"top_k": 0,
"top_p": 0.925,
"use_cache": False,
"use_string": True
}
}
resp = h2rest('api.novelai.net', '/ai/generate', payload, {'Authorization': f"Bearer {nait}"})
assert resp.headers['Content-Type'].startswith('application/json')
js = json.loads(resp.read())
return js['output']
Didn't know that "use_string" makes things simple at first, so delved a bit into the tokenizing.