Closed info-rchitect closed 7 years ago
Hey, sorry for the late response. Can you please show initial data and entire process?.. I've tried to start from your shown DF, but everything seems to work:
df = Daru::DataFrame.new({
total_test: [100.0] * 4,
total_test1: [100.0] * 4,
total_pass: [100.0] * 4,
total_pass1: [100.0] * 4,
test_yield: [100.0] * 4,
test_yield1: [100.0] * 4
},
index: [[:ds1, :testA], [:ds1, :testB], [:ds1, :testC], [:ds1, :testD]])
# => #<Daru::DataFrame(4x6)>
# test_yield test_yield total_pass total_pass total_test total_test
# ds1 testA 100.0 100.0 100.0 100.0 100.0 100.0
# testB 100.0 100.0 100.0 100.0 100.0 100.0
# testC 100.0 100.0 100.0 100.0 100.0 100.0
# testD 100.0 100.0 100.0 100.0 100.0 100.0
col_name = :dataset
col_data = [:ds1] * 4
v = Daru::Vector.new(col_data, index: df.index.to_a)
# => #<Daru::Vector(4)>
# ds1 testA ds1
# testB ds1
# testC ds1
# testD ds1
df[col_name] = v
df
# => #<Daru::DataFrame(4x7)>
# test_yield test_yield total_pass total_pass total_test total_test dataset
# ds1 testA 100.0 100.0 100.0 100.0 100.0 100.0 ds1
# testB 100.0 100.0 100.0 100.0 100.0 100.0 ds1
# testC 100.0 100.0 100.0 100.0 100.0 100.0 ds1
# testD 100.0 100.0 100.0 100.0 100.0 100.0 ds1
Hi,
Here is the code I use in my spec tests:
df = {
dataset: [:ds1, :ds1, :ds1, :ds1, :ds1, :ds1, :ds1, :ds1],
test: %w{testA testB testC testD testA testB testC testD},
amg: %w{amg1 amg1 amg1 amg1 amg2 amg2 amg2 amg2},
total_tested: [100, 100, 100, 100, 100, 100, 100, 100],
total_pass: [98, 79, 99, 86, 97, 92, 90, 85],
test_yield: [0.98, 0.79, 0.99, 0.86, 0.97, 0.92, 0.90, 0.85]
}
group_columns = [:dataset, :test]
split_by_columns = [:amg]
split_columns = [:total_tested, :total_pass, :test_yield]
df_pivot = df.pivot_table(index: group_columns, vectors: split_by_columns, values: split_columns)
group_column_data = Hash.new {|h,k| h[k] = []}
df_pivot.index.each_with_index do |index_data, row|
index_data.each_with_index do |d, i|
group_column_data[group_columns[i]] << d
end
end
column_rename_map = {}.tap do |rename_hash|
df_pivot.vectors.to_a.each do |col_name|
rename_hash[col_name] = col_name.join('_')
end
end
df_pivot.rename_vectors(column_rename_map)
group_column_data.each do |col_name, col_data|
df_pivot[col_name] = Daru::Vector.new(col_data, index: df_pivot.index.to_a, name: col_name)
end
Thanks for the help!
Hm. I've tried it on latest Daru:
require 'daru'
df = Daru::DataFrame.new({
dataset: [:ds1, :ds1, :ds1, :ds1, :ds1, :ds1, :ds1, :ds1],
test: %w{testA testB testC testD testA testB testC testD},
amg: %w{amg1 amg1 amg1 amg1 amg2 amg2 amg2 amg2},
total_tested: [100, 100, 100, 100, 100, 100, 100, 100],
total_pass: [98, 79, 99, 86, 97, 92, 90, 85],
test_yield: [0.98, 0.79, 0.99, 0.86, 0.97, 0.92, 0.90, 0.85]
})
group_columns = [:dataset, :test]
split_by_columns = [:amg]
split_columns = [:total_tested, :total_pass, :test_yield]
df_pivot = df.pivot_table(index: group_columns, vectors: split_by_columns, values: split_columns)
group_column_data = Hash.new {|h,k| h[k] = []}
df_pivot.index.each_with_index do |index_data, row|
index_data.each_with_index do |d, i|
group_column_data[group_columns[i]] << d
end
end
column_rename_map = {}.tap do |rename_hash|
df_pivot.vectors.to_a.each do |col_name|
rename_hash[col_name] = col_name.join('_')
end
end
df_pivot.rename_vectors(column_rename_map)
group_column_data.each do |col_name, col_data|
df_pivot[col_name] = Daru::Vector.new(col_data, index: df_pivot.index.to_a, name: col_name)
end
p df_pivot
It have printed this:
#<Daru::DataFrame(4x8)>
total_test total_test total_pass total_pass test_yield test_yield dataset test
ds1 testA 100.0 100.0 98.0 97.0 0.98 0.97 ds1 testA
testB 100.0 100.0 79.0 92.0 0.79 0.92 ds1 testB
testC 100.0 100.0 99.0 90.0 0.99 0.9 ds1 testC
testD 100.0 100.0 86.0 85.0 0.86 0.85 ds1 testD
I wonder what am I missing here :(
thanks @zverok. I too am able to run it as a standalone script but when I run the same code within my application in fails. After scrubbing the source code on Github I foudn the issue. Somehow, I suspect a find and replace operation ended up changing the 'flatten' method to 'flatten_columns'. Sorry for the false issue. Good news is that I will now be able to submit my first PR.
Hi,
I pivoted a dataframe and now want to add in some columns that were lost in the pivot. I keep getting an error that I don't understand, any help is appreciated.
thx in advance