microsoft / MLOpsPython

MLOps using Azure ML Services and Azure DevOps
MIT License
1.21k stars 1.1k forks source link

Fail to run Azure Docker Image #351

Open virvirlopez opened 3 years ago

virvirlopez commented 3 years ago

We have been running for over a year, the mlops-azure container (https://hub.docker.com/_/microsoft-mlops-python), for running the creation of our scoring images. In the top of this image we are installing these requirements:

azureml-sdk
cryptography==3.3.1
flake8
flake8_formatter_junit_xml
pytest==4.3.0
python-dotenv>=0.10.3
requests>=2.2
numpy==1.19.5
pandas==1.1.5
scikit-learn==0.24.1
nltk==3.5
fuzzywuzzy==0.18.0
inflection==0.5.1
python-Levenshtein==0.12.0
seaborn
retrying
pymysql
pyyaml==5.3.1
pydantic==1.7.3
yamlloader==0.5.5

The code that we are running is:

import os
import glob
import sys
import yaml

from azureml.core import Workspace
from azureml.core.model import Model, InferenceConfig
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.authentication import ServicePrincipalAuthentication
from dotenv import load_dotenv

load_dotenv()

TENANT_ID = os.environ.get("TENANT_ID")
APP_ID = os.environ.get("SP_APP_ID")
APP_SECRET = os.environ.get("SP_APP_SECRET")
SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID")
REGISTRY_CONTAINER_IMAGE = os.environ.get("REGISTRY_CONTAINER_IMAGE")
REGISTRY_CONTAINER_USERNAME = os.environ.get("REGISTRY_CONTAINER_USERNAME")
REGISTRY_CONTAINER_PASSWORD = os.environ.get("REGISTRY_CONTAINER_PASSWORD")
REGISTRY_BASE_IMAGE = os.environ.get("REGISTRY_BASE_IMAGE")
MODEL_NAME = os.environ.get("MODEL_NAME")
MODEL_VERSION = os.environ.get("MODEL_VERSION")

WORKSPACE_NAME = os.environ.get("BASE_NAME")
RESOURCE_GROUP = os.environ.get("BASE_NAME")
if WORKSPACE_NAME and RESOURCE_GROUP:
    WORKSPACE_NAME += "-AML-WS"
    RESOURCE_GROUP += "-AML-RG"

def load_requirements_into_conda_yml(conda_yml: str):
    # Load requirements
    f = open("requirements.txt", "r")
    requirements = str(f.read()).split("\n")

    print(requirements)

    # Load conda dependencies
    with open(conda_yml, "r") as stream:
        try:
            conda_yaml = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)
    # Get conda pip package
    conda_dependencies = conda_yaml["dependencies"][1]["pip"]
    # Check if the package is already there
    for requirement in requirements:
        if requirement and requirement not in conda_dependencies:
            conda_yaml["dependencies"][1]["pip"].append(requirement)
    # Save the new requirements
    with open(conda_yml, "w") as outfile:
        yaml.dump(conda_yaml, outfile, default_flow_style=False)

output_dir = os.path.abspath(sys.argv[1])
os.makedirs(output_dir, exist_ok=True)

SP_AUTH = ServicePrincipalAuthentication(
    tenant_id=TENANT_ID, service_principal_id=APP_ID, service_principal_password=APP_SECRET
)

ws = Workspace.get(WORKSPACE_NAME, SP_AUTH, SUBSCRIPTION_ID, RESOURCE_GROUP)
model = Model(ws, name=MODEL_NAME, version=MODEL_VERSION)

# We are downloading the wheel from data tools into the path src/score
# With this loop we are looking for the name of the wheel
os.chdir("./src")
files_whl = []
for file_whl in glob.glob("score/*.whl"):
    files_whl.append(file_whl)

CONDA_YAML = "score/conda_dependencies.yml"
load_requirements_into_conda_yml(conda_yml=CONDA_YAML)
conda_dep = CondaDependencies(conda_dependencies_file_path=CONDA_YAML)
conda_dep.add_conda_package("pip==20.2.4")

try:
    for file_whl in files_whl:
        whl_url = Environment.add_private_pip_wheel(workspace=ws, file_path=file_whl, exist_ok=True)
        conda_dep.add_pip_package(whl_url)
except Exception:
    print("Not able to add the wheel from data tools")

myenv = Environment(name="myenv")

myenv.docker.base_image_registry.address = REGISTRY_CONTAINER_IMAGE
myenv.docker.base_image_registry.username = REGISTRY_CONTAINER_USERNAME
myenv.docker.base_image_registry.password = REGISTRY_CONTAINER_PASSWORD

myenv.docker.enabled = True
myenv.python.user_managed_dependencies = False
myenv.docker.base_image = REGISTRY_BASE_IMAGE
myenv.inferencing_stack_version = "latest"
myenv.python.conda_dependencies = conda_dep

inference_config = InferenceConfig(
    entry_script="score/score.py", source_directory=".", environment=myenv
)

package = Model.package(
    workspace=ws, inference_config=inference_config, models=[model], generate_dockerfile=True
)

package.wait_for_creation(show_output=True)

if package.state != "Succeeded":
    raise Exception("package creation status: {package.state}")

package.save(output_dir)

The Docker version is 20.10.2, is running in ubuntu18.

The command to run the docker image is:

docker run \
  -v $code_dir:/code \
  -v $package_dir:/package \
  -w=/code \
  -e BASE_NAME=$BASE_NAME \
  -e SP_APP_ID=$SP_APP_ID \
  -e SP_APP_SECRET=$(SP_APP_SECRET) \
  -e SUBSCRIPTION_ID=$SUBSCRIPTION_ID \
  -e TENANT_ID=$TENANT_ID \
  -e REGISTRY_CONTAINER_IMAGE=$REGISTRY_CONTAINER_IMAGE \
  -e REGISTRY_CONTAINER_PASSWORD=$REGISTRY_CONTAINER_PASSWORD \
  -e REGISTRY_CONTAINER_USERNAME=$REGISTRY_CONTAINER_USERNAME \
  -e REGISTRY_BASE_IMAGE=$SCORING_IMAGE \
  -e MODEL_NAME=$MODEL_NAME  \
  -e MODEL_VERSION=$MODEL_VERSION \
  $(SCORING_IMAGE) \
 python ml_service/util/create_scoring_package.py /package

The error that we are receiving is:

Failed to create Docker client. Is Docker running/installed?
When you deploy locally, we download a dockerfile
execute docker build on it, and docker run the built container for you
Error: Error while fetching server API version: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory'))

Unable to log into Docker registry (is Docker installed and running?). Building the saved Dockerfile may fail when pulling the base image. To login manually, or on another machine, use the credentials returned by package.get_container_registry()

Thank you so much!