H2O is an Open Source, Distributed, Fast & Scalable Machine Learning Platform: Deep Learning, Gradient Boosting (GBM) & XGBoost, Random Forest, Generalized Linear Modeling (GLM with Elastic Net), K-Means, PCA, Generalized Additive Models (GAM), RuleFit, Support Vector Machine (SVM), Stacked Ensembles, Automatic Machine Learning (AutoML), etc.
On 3.26.0.6
{code:java} library(h2o) h2o.init()
Path to synthetic data
file = "/temp/synthetic_190M_rows_py_export.csv"
Create synthetic data with 190M rows
synth <- h2o.createFrame( rows = 190000000, cols = 1, randomize = TRUE, value = 0, real_range = 100, categorical_fraction = 0, factors = 100, integer_fraction = 1, integer_range = 999999999999999, binary_fraction = 0, binary_ones_fraction = 0, time_fraction = 0, string_fraction = 0, missing_fraction = 0, response_factors = 2, has_response = FALSE, seed = 0 )
Export synthetic data
h2o.exportFile(synth, path = file)
Base dataframe
bindrow = h2o.importFile(path = file)
pass 1
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 2
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 3
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 4
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 5
df1 = h2o.importFile(path = file)
bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 6
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 7
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 8
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 9
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 10
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
pass 11
df1 = h2o.importFile(path = file) bindrow = h2o.rbind(bindrow, df1) h2o.nrow(bindrow) bindrow = h2o.unique(bindrow[, 1]) h2o.nrow(bindrow)
{code}