Azure / azure-search-vector-samples

A repository of code samples for Vector search capabilities in Azure AI Search.
https://azure.microsoft.com/products/search
MIT License
690 stars 285 forks source link

Azure AI.Search is giving SSL: CERTIFICATE_VERIFY_FAILED certificate verify failed: unable to get local issuer certificate (_ssl.c:997) Error #238

Closed dhaksr closed 1 month ago

dhaksr commented 1 month ago

I am trying to use the examples provided on corporate provided laptop and I think the error is happening because the certificate is not downloadable in the laptop. Is there any workaround or alternate available? The issue happens at both office network and in personal network. However it doesnt happen in personal laptop (without doing any additional steps). So I request anyway to bypass the SSL request.

Environment used - Visual Studio Code/ Windows 10 Pro/ venv

The error I am getting is:

` File "c:\00WorkFiles\AzureOpenSearch.venv\lib\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 600, in _run_script exec(code, module.dict) File "C:\00WorkFiles\AzureOpenSearch\azure-try3.py", line 195, in main() File "C:\00WorkFiles\AzureOpenSearch\azure-try3.py", line 189, in main
response, citation = perform_search(query) File "C:\00WorkFiles\AzureOpenSearch\azure-try3.py", line 152, in perform_search for result in results: File "c:\00WorkFiles\AzureOpenSearch.venv\lib\site-packages\azure\search\documents_paging.py", line 54, in next return next(self._page_iterator)

.... error azure.core.exceptions.ServiceRequestError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997) `

The code in question is attached..

`import os import streamlit as st from azure.storage.blob import BlobServiceClient from azure.search.documents.indexes import SearchIndexerClient from azure.search.documents.indexes.models import ( SearchIndexerDataContainer, SearchIndexerDataSourceConnection, SplitSkill, InputFieldMappingEntry, OutputFieldMappingEntry, AzureOpenAIEmbeddingSkill, SearchIndexerIndexProjections, SearchIndexerIndexProjectionSelector, SearchIndexerIndexProjectionsParameters, IndexProjectionMode, SearchIndexerSkillset, SearchIndexer, FieldMapping ) from azure.search.documents import SearchClient from azure.search.documents.models import VectorizableTextQuery, QueryType, QueryCaptionType, QueryAnswerType from openai import AzureOpenAI from azure.core.credentials import AzureKeyCredential from azure.core.exceptions import ResourceExistsError

import ssl ssl._create_default_https_context = ssl._create_unverified_context

Azure configuration

endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"] credential = os.environ["AZURE_SEARCH_ADMIN_KEY"] index_name = os.environ["AZURE_SEARCH_INDEX"] blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] blob_container_name = os.environ["BLOB_CONTAINER_NAME"] azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"] azure_openai_key = os.environ["AZURE_OPENAI_KEY"] azure_openai_embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] azure_openai_chatgpt_deployment = os.environ["AZURE_OPENAI_CHATGPT_DEPLOYMENT"]

Set up Azure OpenAI client

openai_client = AzureOpenAI( api_key=azure_openai_key, api_version="2023-03-15-preview", azure_endpoint=azure_openai_endpoint )

def upload_to_azure(file):

Connect to Blob Storage

blob_service_client = BlobServiceClient.from_connection_string(blob_connection_string)
container_client = blob_service_client.get_container_client(blob_container_name)
if not container_client.exists():
    container_client.create_container()

# Check if the blob already exists
blob_client = container_client.get_blob_client(file.name)
if not blob_client.exists():
    # Upload the PDF to Blob Storage
    blob_client.upload_blob(file)
else:
    st.write(f"The document '{file.name}' already exists in the blob container.")
    return

# Create a data source
indexer_client = SearchIndexerClient(endpoint, AzureKeyCredential(credential))
container = SearchIndexerDataContainer(name=blob_container_name)
data_source_connection = SearchIndexerDataSourceConnection(
    name=f"{index_name}-blob",
    type="azureblob",
    connection_string=blob_connection_string,
    container=container
)
data_source = indexer_client.create_or_update_data_source_connection(data_source_connection)

# Create a skillset
skillset_name = f"{index_name}-skillset"
split_skill = SplitSkill(
    description="Split skill to chunk documents",
    text_split_mode="pages",
    context="/document",
    maximum_page_length=2000,
    page_overlap_length=500,
    inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
    outputs=[OutputFieldMappingEntry(name="textItems", target_name="pages")]
)
embedding_skill = AzureOpenAIEmbeddingSkill(
    description="Skill to generate embeddings via Azure OpenAI",
    context="/document/pages/*",
    resource_uri=azure_openai_endpoint,
    deployment_id=azure_openai_embedding_deployment,
    api_key=azure_openai_key,
    inputs=[InputFieldMappingEntry(name="text", source="/document/pages/*")],
    outputs=[OutputFieldMappingEntry(name="embedding", target_name="vector")]
)
index_projections = SearchIndexerIndexProjections(
    selectors=[
        SearchIndexerIndexProjectionSelector(
            target_index_name=index_name,
            parent_key_field_name="parent_id",
            source_context="/document/pages/*",
            mappings=[
                InputFieldMappingEntry(name="chunk", source="/document/pages/*"),
                InputFieldMappingEntry(name="vector", source="/document/pages/*/vector"),
                InputFieldMappingEntry(name="title", source="/document/metadata_storage_name")
            ]
        )
    ],
    parameters=SearchIndexerIndexProjectionsParameters(
        projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS
    )
)
skillset = SearchIndexerSkillset(
    name=skillset_name,
    description="Skillset to chunk documents and generating embeddings",
    skills=[split_skill, embedding_skill],
    index_projections=index_projections
)
client = SearchIndexerClient(endpoint, AzureKeyCredential(credential))
client.create_or_update_skillset(skillset)

# Create an indexer
indexer_name = f"{index_name}-indexer"
indexer = SearchIndexer(
    name=indexer_name,
    description="Indexer to index documents and generate embeddings",
    skillset_name=skillset_name,
    target_index_name=index_name,
    data_source_name=data_source.name,
    field_mappings=[FieldMapping(source_field_name="metadata_storage_name", target_field_name="title")]
)
indexer_client = SearchIndexerClient(endpoint, AzureKeyCredential(credential))
indexer_client.create_or_update_indexer(indexer)
indexer_client.run_indexer(indexer_name)

def perform_search(query): search_client = SearchClient(endpoint, index_name, AzureKeyCredential(credential)) vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=1, fields="vector", exhaustive=True)

results = search_client.search(
    search_text=query,
    vector_queries=[vector_query],
    select=["parent_id", "chunk_id", "chunk", "title"],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name='my-semantic-config',
    query_caption=QueryCaptionType.EXTRACTIVE,
    query_answer=QueryAnswerType.EXTRACTIVE,
    top=1
)

# Extract the relevant chunk and title from the search results
relevant_chunk = None
title = None
for result in results:
    relevant_chunk = result["chunk"]
    title = result["title"]
    break

if relevant_chunk:
    # Use Azure OpenAI chat completion for Q&A
    response = openai_client.chat.completions.create(
        model=azure_openai_chatgpt_deployment,
        messages=[
            {"role": "system", "content": f"You are an AI assistant that helps answer questions based on the given context. Here is the relevant context:\n\n{relevant_chunk}"},
            {"role": "user", "content": query}
        ]
    )

    # Return the assistant's response and the citation
    return response.choices[0].message.content, f"Reference: {title}"
else:
    return "Related documents not found.", None

def main(): st.title("RAG Chatbot")

uploaded_file = st.file_uploader("Upload a PDF", type="pdf")

if uploaded_file is not None:
    try:
        upload_to_azure(uploaded_file)
        st.write("PDF uploaded and vectorized successfully!")
    except ResourceExistsError:
        st.write(f"The document '{uploaded_file.name}' already exists in the blob container.")
    except Exception as e:
        st.write(f"An error occurred while uploading and vectorizing the document: {str(e)}")

query = st.text_input("Ask a question about the documents")

if query:
    response, citation = perform_search(query)
    st.write("Assistant:", response)
    if citation:
        st.write("Citation:", citation)

if name == "main": main()`

mattgotteiner commented 1 month ago

Can you please try following the steps here? https://github.com/Azure/azure-sdk-for-python/issues/20883

If this doesn't work - please open an issue on the azure-sdk-for-python repository.

dhaksr commented 1 month ago

Can you please try following the steps here? Azure/azure-sdk-for-python#20883

If this does work - please open an issue on the azure-sdk-for-python repository.

Thanks Matt. This does fix the problem. I dont understand the second part as to why I should open issue in the azure sdk repo. Can you please give more details.

mattgotteiner commented 1 month ago

Can you please try following the steps here? Azure/azure-sdk-for-python#20883 If this does work - please open an issue on the azure-sdk-for-python repository.

Thanks Matt. This does fix the problem. I dont understand the second part as to why I should open issue in the azure sdk repo. Can you please give more details.

sorry typo - "doesn't". Fixed