Closed p2004r closed 7 years ago
And mx.io.CSVIter() not working ...
> system("head data/ml-100k/data.y.csv")
"score"
3
3
1
2
1
4
2
5
3
> system("head data/ml-100k/data.x.csv")
"user","item"
196,242
186,302
22,377
244,51
166,346
298,474
115,265
253,465
305,451
> mx.io.CSVIter("./data/ml-100k/data.x.csv", c(1,2), "./data/ml-100k/data.y.csv", c(1,1))
Error in mx.varg.io.CSVIter(list(...)) : basic_string::resize
> traceback()
4: stop(list(message = "basic_string::resize", call = mx.varg.io.CSVIter(list(...)),
cppstack = NULL))
3: .External(list(name = "InternalFunction_invoke", address = <pointer: 0x1347f10>,
dll = list(name = "Rcpp", path = "/usr/local/lib/R/site-library/Rcpp/libs/Rcpp.so",
dynamicLookup = TRUE, handle = <pointer: 0x1581520>,
info = <pointer: 0x7f4fba6a0d80>), numParameters = -1L),
<pointer: 0x39c9fb0>, alist)
2: mx.varg.io.CSVIter(list(...))
1: mx.io.CSVIter("./data/ml-100k/data.x.csv", c(1, 2), "./data/ml-100k/data.y.csv",
c(1, 1))
>
Manually I get data one labels
> X <- mx.io.arrayiter(data=DF_mat_x, label=t(data.matrix(data.frame(score=DF_y))))
> Z <- mxnet:::mx.model.init.iter(X, NULL, batch.size = 120, is.train = TRUE)
> Z$iter.next()
[1] TRUE
> Z$value()
$data
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
[1,] 196 186 22 244 166 298 115 253 305 6 62 286 200 210
[2,] 242 302 377 51 346 474 265 465 451 86 257 1014 222 40
[,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
[1,] 224 303 122 194 291 234 119 167 299 291 308 95
[2,] 29 785 387 274 1042 1184 392 486 144 118 1 546
[,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
[1,] 38 102 63 160 50 301 225 290 97 157 181 278
[2,] 95 768 277 234 246 98 193 88 194 274 1081 603
[,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
[1,] 276 7 10 284 201 276 287 246 242 249 99 178
[2,] 796 32 16 304 979 564 327 201 1137 241 4 332
[,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
[1,] 251 81 260 25 59 72 87 290 42 292 115 20
[2,] 100 432 322 181 196 679 384 143 423 515 20 288
[,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
[1,] 201 13 246 138 167 60 57 223 189 243 92 246
[2,] 219 526 919 26 232 427 304 274 512 15 1049 416
[,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
[1,] 194 241 178 254 293 127 225 299 225 276 291 222
[2,] 165 690 248 1444 5 229 237 229 480 54 144 366
[,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
[1,] 267 42 11 95 8 162 87 279 145 119 62 62
[2,] 518 403 111 625 338 25 1016 154 275 1153 498 382
[,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
[1,] 28 135 32 90 286 293 216 166 250 271
[2,] 209 23 294 382 208 685 144 328 496 132
[,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
[1,] 160 265 198 42 168 110 58 90 271 62
[2,] 174 118 498 96 151 307 144 648 346 21
[,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
[1,] 279 237 94 128 298 44 264 194 72 222
[2,] 832 514 789 485 317 195 200 385 195 750
$label
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
[1,] 3 3 1 2 1 4 2 5 3 3 2 5 5 3
[,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
[1,] 3 3 5 2 4 2 4 4 4 2 4 2
[,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
[1,] 5 2 4 5 3 4 4 4 3 4 1 5
[,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
[1,] 1 4 4 4 2 3 5 5 5 5 5 3
[,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
[1,] 4 2 4 5 5 2 4 5 5 4 3 1
[,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
[1,] 4 3 4 5 1 5 5 4 4 3 1 3
[,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
[1,] 4 2 4 3 3 5 5 3 5 3 5 4
[,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
[1,] 5 3 4 4 4 4 4 5 2 5 4 3
[,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
[1,] 4 4 3 5 4 3 4 5 4 5
[,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
[1,] 5 4 3 5 5 4 4 4 4 3
[,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
[1,] 3 4 4 3 4 5 5 2 5 5
And the model does not see the data
## model
user <- mx.symbol.Variable("user")
item <- mx.symbol.Variable("item")
score <- mx.symbol.Variable("label")
## user feature lookup
user1 <- mx.symbol.Embedding(user, input_dim = max_user, output_dim = k, name="user1")
## item feature lookup
item1 <- mx.symbol.Embedding(item, input_dim = max_item, output_dim = k, name="item1")
## predict by the inner product, which is elementwise product and then sum
pred <- user1 * item1
pred1 <- mx.symbol.sum_axis(pred, axis = 1, name="pred1")
pred2 <- mx.symbol.Flatten(pred1, name="pred2")
## loss layer
pred3 <- mx.symbol.LinearRegressionOutput(data=pred2, label=score, name="pred3")
devices <- mx.cpu()
mx.set.seed(123)
mx.model.FeedForward.create(pred3, X = mx.io.arrayiter(data=DF_mat_x, label=t(data.matrix(data.frame(score=DF_y)))),
ctx=devices, num.round=10, array.batch.size=10,
verbose=T, #array.layout="rowmajor",
initializer=mx.init.uniform(0.07), learning.rate=0.07,
eval.metric=mx.metric.rmse, momentum = 0.9,
epoch.end.callback=mx.callback.log.train.metric(1))
[18:47:02] /root/mxnet/dmlc-core/include/dmlc/logging.h:235: [18:47:02] src/symbol/symbol.cc:155: Symbol.InferShapeKeyword argument name data not found.
Candidate arguments:
[0]user
[1]user1_weight
[2]item
[3]item1_weight
[4]label
Error in symbol$infer.shape(list(...)) : basic_string::resize
> traceback()
6: stop(list(message = "basic_string::resize", call = symbol$infer.shape(list(...)),
cppstack = NULL))
5: .External(list(name = "CppMethod__invoke_notvoid", address = <pointer: 0x11caef0>,
dll = list(name = "Rcpp", path = "/usr/local/lib/R/site-library/Rcpp/libs/Rcpp.so",
dynamicLookup = TRUE, handle = <pointer: 0x1404520>,
info = <pointer: 0x7f348a598d80>), numParameters = -1L),
<pointer: 0x3835820>, <pointer: 0x3836540>, .pointer, ...)
4: symbol$infer.shape(list(...))
3: mx.symbol.infer.shape(symbol, data = input.shape)
2: mx.model.init.params(symbol, input.shape, initializer, mx.cpu())
1: mx.model.FeedForward.create(pred3, X = mx.io.arrayiter(data = DF_mat_x,
label = t(data.matrix(data.frame(score = DF_y)))), ctx = devices,
num.round = 10, array.batch.size = 10, verbose = T, initializer = mx.init.uniform(0.07),
learning.rate = 0.07, eval.metric = mx.metric.rmse, momentum = 0.9,
epoch.end.callback = mx.callback.log.train.metric(1))
Original example uses
user = mx.nd.array(user)
item = mx.nd.array(item)
score = mx.nd.array(score)
return mx.io.NDArrayIter(data={'user':user,'item':item},label={'score':score},
batch_size=batch_size, shuffle=True)
How to do it properly in R?
The workaround does not work. Can you fix my solution?
How to use the operator mx.symbol.slice_axis() correctly?
k <- 64
## model
data <- mx.symbol.Variable("data")
###user <- mx.symbol.Variable("user")
user <- mx.symbol.slice_axis(data=data, axis=0, begin=0, end=0, name="user")
###item <- mx.symbol.Variable("item")
item <- mx.symbol.slice_axis(data=data, axis=0, begin=1, end=1, name="item")
score <- mx.symbol.Variable("label")
## user feature lookup
user1 <- mx.symbol.Embedding(data=user, input_dim = max_user, output_dim = k, name="user1")
## item feature lookup
item1 <- mx.symbol.Embedding(data=item, input_dim = max_item, output_dim = k, name="item1")
## predict by the inner product, which is elementwise product and then sum
pred <- user1 * item1
pred1 <- mx.symbol.sum_axis(pred, axis = 1, name="pred1")
pred2 <- mx.symbol.Flatten(pred1, name="pred2")
## loss layer
pred3 <- mx.symbol.LinearRegressionOutput(data=pred2, label=score, name="pred3")
devices <- mx.cpu()
mx.set.seed(123)
mx.model.FeedForward.create(pred3, X = mx.io.arrayiter(data=DF_mat_x, label=t(data.matrix(data.frame(score=DF_y)))),
ctx=devices, num.round=10, array.batch.size=10,
verbose=T, array.layout="rowmajor",
initializer=mx.init.uniform(0.07), learning.rate=0.07,
eval.metric=mx.metric.rmse, momentum = 0.9,
epoch.end.callback=mx.callback.log.train.metric(1))
Error
> mx.model.FeedForward.create(pred3, X = mx.io.arrayiter(data=DF_mat_x, label=t(data.matrix(data.frame(score=DF_y)))),
+ ctx=devices, num.round=10, array.batch.size=10,
+ verbose=T, array.layout="rowmajor",
+ initializer=mx.init.uniform(0.07), learning.rate=0.07,
+ eval.metric=mx.metric.rmse, momentum = 0.9,
+ epoch.end.callback=mx.callback.log.train.metric(1))
[11:52:15] /root/mxnet/dmlc-core/include/dmlc/logging.h:235: [11:52:15] src/operator/./matrix_op-inl.h:620: Check failed: (param.begin) < (param.end)
Error in symbol$infer.shape(list(...)) : basic_string::resize
> traceback()
6: stop(list(message = "basic_string::resize", call = symbol$infer.shape(list(...)),
cppstack = NULL))
5: .External(list(name = "CppMethod__invoke_notvoid", address = <pointer: 0x182b480>,
dll = list(name = "Rcpp", path = "/usr/local/lib/R/site-library/Rcpp/libs/Rcpp.so",
dynamicLookup = TRUE, handle = <pointer: 0x2412840>,
info = <pointer: 0x7f177895cd80>), numParameters = -1L),
<pointer: 0x3646130>, <pointer: 0x359f480>, .pointer, ...)
4: symbol$infer.shape(list(...))
3: mx.symbol.infer.shape(symbol, data = input.shape)
2: mx.model.init.params(symbol, input.shape, initializer, mx.cpu())
1: mx.model.FeedForward.create(pred3, X = mx.io.arrayiter(data = DF_mat_x,
label = t(data.matrix(data.frame(score = DF_y)))), ctx = devices,
num.round = 10, array.batch.size = 10, verbose = T, array.layout = "rowmajor",
initializer = mx.init.uniform(0.07), learning.rate = 0.07,
eval.metric = mx.metric.rmse, momentum = 0.9, epoch.end.callback = mx.callback.log.train.metric(1))
@p2004r The proper way is to define your own iterator like below. You can find a small demo in #6673
CustomIter <- setRefClass("CustomIter", fields = c("iter1", "iter2"),
contains = "Rcpp_MXArrayDataIter",
methods = list(
initialize = function(iter1, iter2) {
.self$iter1 <- iter1
.self$iter2 <- iter2
.self
},
value = function() {
user <- .self$iter1$value()$data
item <- .self$iter2$value()$data
label <- .self$iter1$value()$label
list(user = user,
item = item,
label = label)
},
iter.next = function() {
.self$iter1$iter.next()
.self$iter2$iter.next()
},
reset = function() {
.self$iter1$reset()
.self$iter2$reset()
},
num.pad = function() {
.self$iter1$num.pad()
},
finalize = function() {
.self$iter1$finalize()
.self$iter2$finalize()
}
)
)
user_iter = mx.io.arrayiter(data = DF[, 1], label = DF[, 3], batch.size = k)
item_iter = mx.io.arrayiter(data = DF[, 2], label = DF[, 3], batch.size = k)
train_iter <- CustomIter$new(user_iter, item_iter)
Thank you.
Fixed in #6673
I rewrote an example available here (link to data in document) https://github.com/dmlc/mxnet/blob/master/example/recommenders/demo1-MF.ipynb
But I get an error:
Attempts to change the dimension of the data does not help.
Environment info
Operating System: Ubuntu 14.04
Package used R MXNet commit hash (
git rev-parse HEAD
): ~/mxnet# git rev-parse HEAD 9d066eebfb4f42f75e18ea8925c28d5526153576R
sessionInfo()
:R version 3.3.2 (2016-10-31) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 14.04.5 LTS
locale: [1] C
attached base packages: [1] stats graphics grDevices utils datasets methods base