Closed mikelonestone closed 5 months ago
Hi @mikelonestone, the attached example shows how you can use the rag view and add data. Let me know if you have additional questions. document-qa-results.json
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from zeno_client import ZenoClient, ZenoMetric\n",
"import pandas as pd\n",
"import json\n",
"import os\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv(override=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(\"document-qa-results.json\", \"r\") as f:\n",
" data = json.load(f)\n",
"data_df = pd.DataFrame({\"question\": [d[\"data\"] for d in data]})\n",
"data_df[\"id\"] = data_df.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = ZenoClient(os.environ[\"ZENO_API_KEY\"])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successfully updated project.\n",
"Access your project at https://hub.zenoml.com/project/cabreraalex/Document%20QA\n"
]
}
],
"source": [
"project = client.create_project(\n",
" name=\"Document QA\",\n",
" view={\n",
" \"data\": {\"type\": \"text\"},\n",
" \"label\": {\"type\": \"text\"},\n",
" \"output\": {\n",
" \"type\": \"vstack\",\n",
" \"keys\": {\n",
" \"answer\": {\"type\": \"text\"},\n",
" \"retrieved\": {\n",
" \"type\": \"list\",\n",
" \"elements\": {\n",
" \"type\": \"vstack\",\n",
" \"keys\": {\n",
" \"score\": {\"type\": \"text\", \"label\": \"score: \"},\n",
" \"reference\": {\"type\": \"markdown\"},\n",
" \"text\": {\"type\": \"text\", \"label\": \"text: \"},\n",
" },\n",
" },\n",
" \"border\": True,\n",
" },\n",
" },\n",
" },\n",
" },\n",
" description=\"Document-grounded question answering with Wikipedia\",\n",
" metrics=[\n",
" ZenoMetric(name=\"accuracy\", type=\"mean\", columns=[\"accuracy\"]),\n",
" ZenoMetric(name=\"exact_match\", type=\"mean\", columns=[\"exact_match\"]),\n",
" ZenoMetric(name=\"substring_match\", type=\"mean\", columns=[\"substring_match\"]),\n",
" ZenoMetric(name=\"f1\", type=\"mean\", columns=[\"f1\"]),\n",
" ZenoMetric(name=\"rougel\", type=\"mean\", columns=[\"rougel\"]),\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"project.upload_dataset(data_df, id_column=\"id\", data_column=\"question\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"output_df = pd.DataFrame(\n",
" {\n",
" \"output\": [\n",
" json.dumps(\n",
" {\n",
" \"answer\": d[\"output\"][0][\"answer\"],\n",
" \"retrieved\": [\n",
" {\n",
" \"reference\": \"[{idx}]({url})\".format(\n",
" idx=d[\"output\"][0][\"retrieved\"][0][\"reference\"],\n",
" url=\"https://en.wikipedia.org/?curid=\"\n",
" + d[\"output\"][0][\"retrieved\"][0][\"reference\"],\n",
" ),\n",
" \"text\": d[\"output\"][0][\"retrieved\"][0][\"text\"],\n",
" \"score\": d[\"output\"][0][\"retrieved\"][0][\"score\"],\n",
" }\n",
" ],\n",
" }\n",
" )\n",
" for d in data\n",
" ],\n",
" \"accuracy\": [d[\"output\"][0][\"answer_evaluation\"][\"accuracy\"] for d in data],\n",
" \"exact_match\": [\n",
" d[\"output\"][0][\"answer_evaluation\"][\"exact_match\"] for d in data\n",
" ],\n",
" \"substring_match\": [\n",
" d[\"output\"][0][\"answer_evaluation\"][\"substring_match\"] for d in data\n",
" ],\n",
" \"f1\": [d[\"output\"][0][\"answer_evaluation\"][\"f1\"] for d in data],\n",
" \"rougel\": [d[\"output\"][0][\"answer_evaluation\"][\"rougel\"] for d in data],\n",
" }\n",
")\n",
"output_df[\"id\"] = output_df.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"project.upload_system(\n",
" output_df, name=\"Llama-2 BM25\", id_column=\"id\", output_column=\"output\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "compare",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
sorry i forgot to close the issue, I just had to delete/recreate project (before that the data view specification wasn't used and data was displayed as text instead to be formated)
Is it possible to have an example of upload_system with rag view data example ? https://hub.zenoml.com/playground?params=eyJzYW1wbGUiOiJyYWcifQ==
I'm not sure how to pass output column within Dataframe