Open vkgpt11 opened 7 years ago
My code
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print("Using Tensorflow version %s" % (tf.__version__))
CATEGORICAL_COLUMNS = ["workclass","education","marital_status","occupation",
"relationship","race","gender","native_country"]
# Columns of the input csv file
COLUMNS=["age","workclass","fnlwgt","education","education_num","marital_status","occupation",
"relationship","race","gender","captial_gain","capital_loss",
"hours_per_week","native_country","income_bracket"]
# Feature columns for input into the model
FEATURE_COLUMNS=["age","workclass","fnlwgt","education","education_num","marital_status","occupation",
"relationship","race","gender","captial_gain","capital_loss",
"hours_per_week","native_country"]
BATCH_SIZE = 40
def generate_input_fn(filename, batch_size=BATCH_SIZE):
def _input_fn():
filename_queue = tf.train.string_input_producer([filename])
reader=tf.TextLineReader()
# reads out batch_size number of lines
key,value = reader.read_up_to(filename_queue,
num_records=batch_size)
# record_defaults should match the datatypes of each respective column
record_defaults = [[0],[" "],[0],[" "],[0],
[" "], [" "], [" "], [" "], [" "],
[0],[0],[0], [" "], [" "]]
# decode csv data that was just read out
columns = tf.decode_csv(value,record_defaults=record_defaults)
# features is a dictonary that maps from column names to tensors
# income_bracket is the last column of the data. Note that this is
all_columns = dict(zip(COLUMNS,columns))
# Save the income_bracket column as our labels
# dict.pop returns the popped array of income_bracket values
income_bracket = all_columns.pop('income_bracket')
# removes the fnlwgt key, which is not used
all_columns.pop('fnlwgt',"fnlwgt key not found")
# the remaining columns are our features
features = all_columns
# Sparse Categorical features must be represented with an additional
# There is no additional work needed for the continuous columns;
# see docs for tf.SparseTensor for more info
for feature_name in CATEGORICAL_COLUMNS:
features[feature_name] = tf.expand_dims(
features[feature_name],-1)
# convert ">50K: to 1 and "<=50K to 0
labels = tf.to_int32(tf.equal(income_bracket," >50K"))
assert isinstance(labels, object)
return features, labels
return _input_fn
print('input function configured')
# sparse base columns
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender",
keys=["female","male"])
race = tf.contrib.layers.sparse_column_with_keys(column_name="race",
keys=["Amer-Indian-Enkimo",
"Asian-Pac-Inslander",
"Black", "Other",
"White"])
education = tf.contrib.layers.sparse_column_with_hash_bucket(
"education", hash_bucket_size=1000)
marital_status = tf.contrib.layers.sparse_column_with_hash_bucket(
"marital_status", hash_bucket_size=100)
workclass = tf.contrib.layers.sparse_column_with_hash_bucket(
"workclass", hash_bucket_size=100)
relationship = tf.contrib.layers.sparse_column_with_hash_bucket(
"relationship", hash_bucket_size=100)
occupation = tf.contrib.layers.sparse_column_with_hash_bucket(
"occupation", hash_bucket_size=1000)
native_country = tf.contrib.layers.sparse_column_with_hash_bucket(
"native_country", hash_bucket_size=1000)
print('Sparse columns configured')
age = tf.contrib.layers.real_valued_column("age")
education_num = tf.contrib.layers.real_valued_column("education_num")
captial_gain = tf.contrib.layers.real_valued_column("captial_gain")
capital_loss = tf.contrib.layers.real_valued_column("capital_loss")
hours_per_week = tf.contrib.layers.real_valued_column("hours_per_week")
print('Continuous columns configured')
wide_columns =[gender, race, native_country,
education, occupation, workclass,
marital_status, relationship]
# age_buckets,
# education_occupation,
# age_race_occupation,country_occupation]
deep_columns = [
tf.contrib.layers.embedding_column(workclass, dimension=8),
tf.contrib.layers.embedding_column(education, dimension=8),
tf.contrib.layers.embedding_column(marital_status, dimension=8),
tf.contrib.layers.embedding_column(gender, dimension=8),
tf.contrib.layers.embedding_column(relationship, dimension=8),
tf.contrib.layers.embedding_column(race, dimension=8),
tf.contrib.layers.embedding_column(native_country, dimension=8),
tf.contrib.layers.embedding_column(occupation, dimension=8),
age,
education_num,
captial_gain,
capital_loss,
hours_per_week
]
print('wide and deep columns configured')
def create_model_dir(model_type):
return 'models/model_' + model_type + '_' + str(int(time.time()))
# If new_model = False, pass in the desired model_dir
def get_model(model_type, new_model=False, model_dir=None):
if new_model or model_dir is None:
model_dir = create_model_dir(model_type)
print("Model_directory=%s" % model_dir)
m = None
# Linear Classifier
if model_type == 'WIDE':
m = tf.contrib.learn.LinearClassifier(
model_dir=model_dir,
feature_columns=wide_columns)
# Deep neural net classifier
if model_type == 'DEEP':
m = tf.contrib.learn.DNNClassifier(
model_dir=model_dir,
feature_columns=deep_columns)
if model_type == 'WIDE_AND_DEEP':
m = tf.contrib.learn.DNNLinearCombinedClassifier(
model_dir=model_dir,
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[100, 70, 50, 25])
print('estimator build')
return m,model_dir
MODEL_TYPE = 'WIDE_AND_DEEP'
#MODEL_TYPE = 'DEEP'
model_dir = create_model_dir(model_type=MODEL_TYPE)
m,model_dir = get_model(model_type='WIDE_AND_DEEP',model_dir=model_dir)
# gsutil cp gs://cloudml-public/census/data/adult.data.csv C:\Users\vikas\PycharmProjects\TensorflowUScensusData
# gsutil cp gs://cloudml-public/census/data/adult.test.csv C:\Users\vikas\PycharmProjects\TensorflowUScensusData
train_file = "adult.data.csv"
train_steps = 1000
m.fit(input_fn=generate_input_fn(train_file, BATCH_SIZE),
steps=train_steps)
print('fit done')
test_file = "adult.test.csv"
results = m.evaluate(input_fn=generate_input_fn(test_file),steps=100)
print("evaluate done")
print(results)
print('Accuracy: %s' %results['accuracy'])
from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
def column_to_dtype(column):
if column in CATEGORICAL_COLUMNS:
return tf.string
else:
return tf.float32
def serving_input_fn():
feature_placeholders = {
column: tf.placeholder(column_to_dtype(column),[None])
for column in FEATURE_COLUMNS
}
# DNNCombinedLinearClassification expects rank 2 Tensors, but input should be
# rank 1, so that we can provide scalars to server
features = {
key:tf.expand_dims(tensor,-1)
for key, tensor in feature_placeholders.items()
}
return input_fn_utils.InputFnOps(
features, #input into graph
None,
feature_placeholders #tensor input converted from request
)
export_folder = m.export_savedmodel(
export_dir_base=model_dir + "/export/",
input_fn=serving_input_fn
)
print('model exported successfully to {}'.format(export_folder))
gcloud ml-engine predict --model Deep_Wide --version v10 --json-instances data/test.json
{"age": 25, "workclass": " Private", "education": " 11th", "education_num": 7, "marital_status": " Never-married", "occupation": " Machine-op-inspct", "relationship": " Own-child", "race": " Black", "gender": " Male", "capital_gain": 12, "capital_loss": 3, "hours_per_week": 40, "native_country": " United-States"}
Have you been able to get this to work? If so, what steps?
No it has not worked
Just got it to work. Had to remove line-breaks from the json file. Everything has to be contained on a single line. Does that help?
I am getting the following error when doing prediction after deploying model in cloud In my local
C:\Program Files (x86)\Google\Cloud SDK>gcloud ml-engine predict --model Deep_Wide --version v4 --json-instances C:\Users\vikas\PycharmProjects\TensorflowUScensusData\test.json { "error": "Prediction failed: Error processing input: Incompatible types: 0 vs. float64" }
As well as if i run the same in cloud
vkg_vikas_gupta@vikas-sapref:~$ gcloud ml-engine predict --model Deep_Wide --version v5 --json-instances data/test.json { "error": "Prediction failed: Error processing input: Incompatible types: 0 vs. float64" }