Open karmaakabane2005 opened 1 year ago
%cd /content !apt-get -y install -qq aria2
!git clone -b v2.5 https://github.com/camenduru/text-generation-webui %cd /content/text-generation-webui !pip install -q -r requirements.txt
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/resolve/main/model-00001-of-00003.safetensors -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o model-00001-of-00003.safetensors !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/resolve/main/model-00002-of-00003.safetensors -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o model-00002-of-00003.safetensors !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/resolve/main/model-00003-of-00003.safetensors -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o model-00003-of-00003.safetensors !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/raw/main/model.safetensors.index.json -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o model.safetensors.index.json !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/raw/main/special_tokens_map.json -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o special_tokens_map.json !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/resolve/main/tokenizer.model -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o tokenizer.model !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/raw/main/tokenizer_config.json -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o tokenizer_config.json !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/raw/main/config.json -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o config.json !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/Llama-2-13b-chat-hf/raw/main/generation_config.json -d /content/text-generation-webui/models/Llama-2-13b-chat-hf -o generation_config.json
!echo "dark_theme: true" > /content/settings.yaml !echo "chat_style: wpp" >> /content/settings.yaml
%cd /content/text-generation-webui !python server.py --share --settings /content/settings.yaml --load-in-8bit --model /content/text-generation-webui/models/Llama-2-13b-chat-hf
creating docker container with this command :
docker run -p 8888:8888 -p 5000:5000 -e HUGGINGFACEHUB_API_TOKEN=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 aishwaryaprabhat/llama-in-a-container:v1
after copy this python file :
#Author : Leandro
#Description : open an api to use llama
import os
from flask import Flask, request, jsonify
from llama_cpp import Llama
app = Flask(__name__)
os.environ["LLAMA_MODEL_PATH"] = "/home/llamauser/.cache/huggingface/hub/models--TheBloke--Llama-2-13B-chat-GGML/snapshots/3140827b4dfcb6b562cd87ee3d7f07109b014dd0/llama-2-13b-chat.ggmlv3.q5_1.bin"
llama_model_path = os.environ.get("LLAMA_MODEL_PATH") # Use the correct environment variable name
if llama_model_path is None:
raise ValueError("LLAMA_MODEL_PATH environment variable is not set")
llama_model = Llama(
model_path=llama_model_path,
n_threads=os.cpu_count(),
n_batch=512,
)
@app.route('/post', methods=['GET'])
def post_prompt():
prompt = request.args.get('prompt', '')
if not prompt:
return jsonify({"error": "Missing 'prompt' parameter"}), 400
prompt_template = f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer helpfully.
USER: {prompt}
ASSISTANT:
'''
response = llama_model(
prompt=prompt_template,
max_tokens=256,
temperature=0.5,
top_p=0.95,
repeat_penalty=1.2,
top_k=150,
echo=True
)
response = response["choices"][0]["text"]
# Find the index of "ASSISTANT:"
start_index = response.find("ASSISTANT:")
# Extract the part of the string from the end of "ASSISTANT:" until the end
assistant_response = response[start_index + len("ASSISTANT:"):].strip()
print(response)
return jsonify({"response": assistant_response})
if __name__ == '__main__':
app.run(debug=False, host='0.0.0.0', port=5000)
on the cmd :
pip install flask python3 main.py
PROBLEM SOLVED
Changing Local model to hosted model using google server