torch / tds

Torch C data structures
Other
80 stars 25 forks source link

How to save tds.Vec()? #31

Open hfxunlp opened 7 years ago

hfxunlp commented 7 years ago

I have 700M+ json data, and want to save it with tds.Vec(), the data could be loaded into a tds.Vec() correctly, but while call torch.save(), the luajit breaks. Here is my code:

local tds = require("tds")

function conjson(fname)
    print("load:"..fname)
    local function convt(tin)
        local rsv=tds.Vec()
        for _,v in ipairs(tin) do
            rsv[_]=torch.IntTensor(v):reshape(#v, 1)
        end
        return rsv
    end
    local file=io.open(fname)
    local rs=tds.Vec()
    local lind=file:read("*l")
    local curd=1
    while lind do
        local data=json.decode(lind)
        local id, qd, td=unpack(data)
        rs[curd]=tds.Vec(convt(id), torch.IntTensor(qd):reshape(#qd, 1), convt(td))
        lind=file:read("*l")
        curd=curd+1
    end
    file:close()
    print("loaded")
    return rs
end

torch.save("train.t7", conjson("train.data"))

and it gives this output:

load:train.data
loaded  
/usr/local/bin/luajit: not enough memory