amygdala / tensorflow-workshop

This repo contains materials for use in a TensorFlow workshop.
Apache License 2.0
633 stars 268 forks source link

Error in prediction on Cloud model #65

Open vkgpt11 opened 7 years ago

vkgpt11 commented 7 years ago

I am getting the following error when doing prediction after deploying model in cloud In my local

C:\Program Files (x86)\Google\Cloud SDK>gcloud ml-engine predict --model Deep_Wide --version v4 --json-instances C:\Users\vikas\PycharmProjects\TensorflowUScensusData\test.json { "error": "Prediction failed: Error processing input: Incompatible types: 0 vs. float64" }

As well as if i run the same in cloud

vkg_vikas_gupta@vikas-sapref:~$ gcloud ml-engine predict --model Deep_Wide --version v5 --json-instances data/test.json { "error": "Prediction failed: Error processing input: Incompatible types: 0 vs. float64" }

vkgpt11 commented 7 years ago

My code


from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)
print("Using Tensorflow version %s" % (tf.__version__))

CATEGORICAL_COLUMNS = ["workclass","education","marital_status","occupation",
                       "relationship","race","gender","native_country"]
# Columns of the input csv file
COLUMNS=["age","workclass","fnlwgt","education","education_num","marital_status","occupation",
         "relationship","race","gender","captial_gain","capital_loss",
         "hours_per_week","native_country","income_bracket"]

# Feature columns for input into the model
FEATURE_COLUMNS=["age","workclass","fnlwgt","education","education_num","marital_status","occupation",
                 "relationship","race","gender","captial_gain","capital_loss",
                 "hours_per_week","native_country"]

BATCH_SIZE = 40

def generate_input_fn(filename, batch_size=BATCH_SIZE):
    def _input_fn():
        filename_queue = tf.train.string_input_producer([filename])
        reader=tf.TextLineReader()
        # reads out batch_size number of lines
        key,value = reader.read_up_to(filename_queue,
                                      num_records=batch_size)
        # record_defaults should match the datatypes of each respective column
        record_defaults = [[0],[" "],[0],[" "],[0],
                           [" "], [" "], [" "], [" "], [" "],
                           [0],[0],[0], [" "], [" "]]
        # decode csv data that was just read out
        columns = tf.decode_csv(value,record_defaults=record_defaults)

        # features is a dictonary that maps from column names to tensors
        # income_bracket is the last column of the data. Note that this is
        all_columns = dict(zip(COLUMNS,columns))

        # Save the income_bracket column as our labels
        # dict.pop returns the popped array of income_bracket values
        income_bracket = all_columns.pop('income_bracket')

        # removes the fnlwgt key, which is not used
        all_columns.pop('fnlwgt',"fnlwgt key not found")

        # the remaining columns are our features
        features = all_columns

        # Sparse Categorical features must be represented with an additional
        # There is no additional work needed for the continuous columns;
        # see docs for tf.SparseTensor for more info
        for feature_name in CATEGORICAL_COLUMNS:
            features[feature_name] = tf.expand_dims(
                features[feature_name],-1)

        # convert ">50K: to 1 and "<=50K to 0
        labels = tf.to_int32(tf.equal(income_bracket," >50K"))
        assert isinstance(labels, object)
        return features, labels
    return _input_fn

print('input function configured')

# sparse base columns
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender",
                                                   keys=["female","male"])

race = tf.contrib.layers.sparse_column_with_keys(column_name="race",
                                                 keys=["Amer-Indian-Enkimo",
                                                       "Asian-Pac-Inslander",
                                                       "Black", "Other",
                                                       "White"])
education = tf.contrib.layers.sparse_column_with_hash_bucket(
    "education", hash_bucket_size=1000)

marital_status = tf.contrib.layers.sparse_column_with_hash_bucket(
    "marital_status", hash_bucket_size=100)

workclass = tf.contrib.layers.sparse_column_with_hash_bucket(
    "workclass", hash_bucket_size=100)

relationship = tf.contrib.layers.sparse_column_with_hash_bucket(
    "relationship", hash_bucket_size=100)

occupation = tf.contrib.layers.sparse_column_with_hash_bucket(
    "occupation", hash_bucket_size=1000)

native_country = tf.contrib.layers.sparse_column_with_hash_bucket(
    "native_country", hash_bucket_size=1000)

print('Sparse columns configured')

age = tf.contrib.layers.real_valued_column("age")
education_num = tf.contrib.layers.real_valued_column("education_num")
captial_gain = tf.contrib.layers.real_valued_column("captial_gain")
capital_loss = tf.contrib.layers.real_valued_column("capital_loss")
hours_per_week = tf.contrib.layers.real_valued_column("hours_per_week")
print('Continuous columns configured')

wide_columns =[gender, race, native_country,
               education, occupation, workclass,
               marital_status, relationship]
# age_buckets,
# education_occupation,
# age_race_occupation,country_occupation]

deep_columns = [
    tf.contrib.layers.embedding_column(workclass, dimension=8),
    tf.contrib.layers.embedding_column(education, dimension=8),
    tf.contrib.layers.embedding_column(marital_status, dimension=8),
    tf.contrib.layers.embedding_column(gender, dimension=8),
    tf.contrib.layers.embedding_column(relationship, dimension=8),
    tf.contrib.layers.embedding_column(race, dimension=8),
    tf.contrib.layers.embedding_column(native_country, dimension=8),
    tf.contrib.layers.embedding_column(occupation, dimension=8),
    age,
    education_num,
    captial_gain,
    capital_loss,
    hours_per_week
]

print('wide and deep columns configured')

def create_model_dir(model_type):
    return 'models/model_' + model_type + '_' + str(int(time.time()))

# If new_model = False, pass in the desired model_dir

def get_model(model_type, new_model=False, model_dir=None):
    if new_model or model_dir is None:
        model_dir = create_model_dir(model_type)
    print("Model_directory=%s" % model_dir)

    m = None

    # Linear Classifier
    if model_type == 'WIDE':
        m = tf.contrib.learn.LinearClassifier(
            model_dir=model_dir,
            feature_columns=wide_columns)
    # Deep neural net classifier
    if model_type == 'DEEP':
        m = tf.contrib.learn.DNNClassifier(
            model_dir=model_dir,
            feature_columns=deep_columns)
    if model_type == 'WIDE_AND_DEEP':
        m = tf.contrib.learn.DNNLinearCombinedClassifier(
            model_dir=model_dir,
            linear_feature_columns=wide_columns,
            dnn_feature_columns=deep_columns,
            dnn_hidden_units=[100, 70, 50, 25])

    print('estimator build')

    return m,model_dir

MODEL_TYPE = 'WIDE_AND_DEEP'
#MODEL_TYPE = 'DEEP'
model_dir = create_model_dir(model_type=MODEL_TYPE)
m,model_dir = get_model(model_type='WIDE_AND_DEEP',model_dir=model_dir)

# gsutil cp gs://cloudml-public/census/data/adult.data.csv C:\Users\vikas\PycharmProjects\TensorflowUScensusData

# gsutil cp gs://cloudml-public/census/data/adult.test.csv C:\Users\vikas\PycharmProjects\TensorflowUScensusData

train_file = "adult.data.csv"

train_steps = 1000
m.fit(input_fn=generate_input_fn(train_file, BATCH_SIZE),
      steps=train_steps)
print('fit done')

test_file = "adult.test.csv"
results = m.evaluate(input_fn=generate_input_fn(test_file),steps=100)
print("evaluate done")
print(results)
print('Accuracy: %s' %results['accuracy'])

from tensorflow.contrib.learn.python.learn.utils import input_fn_utils

def column_to_dtype(column):
    if column in CATEGORICAL_COLUMNS:
        return tf.string
    else:
        return tf.float32

def serving_input_fn():
    feature_placeholders = {
        column: tf.placeholder(column_to_dtype(column),[None])
        for column in FEATURE_COLUMNS
    }
    # DNNCombinedLinearClassification expects rank 2 Tensors, but input should be
    # rank 1, so that we can provide scalars to server
    features = {
        key:tf.expand_dims(tensor,-1)
        for key, tensor in feature_placeholders.items()
    }
    return input_fn_utils.InputFnOps(
        features, #input into graph
        None,
        feature_placeholders #tensor input converted from request
    )

export_folder = m.export_savedmodel(
    export_dir_base=model_dir + "/export/",
    input_fn=serving_input_fn
)

print('model exported successfully to {}'.format(export_folder))
vkgpt11 commented 7 years ago

gcloud ml-engine predict --model Deep_Wide --version v10 --json-instances data/test.json

{"age": 25, "workclass": " Private", "education": " 11th", "education_num": 7, "marital_status": " Never-married", "occupation": " Machine-op-inspct", "relationship": " Own-child", "race": " Black", "gender": " Male", "capital_gain": 12, "capital_loss": 3, "hours_per_week": 40, "native_country": " United-States"}

listentojohan commented 7 years ago

Have you been able to get this to work? If so, what steps?

vkgpt11 commented 7 years ago

No it has not worked

listentojohan commented 7 years ago

Just got it to work. Had to remove line-breaks from the json file. Everything has to be contained on a single line. Does that help?