clockworklabs / SpacetimeDB

Multiplayer at the speed of light
https://spacetimedb.com
Other
4.41k stars 110 forks source link

Optimize integrate_generated_columns #1895

Closed coolreader18 closed 1 week ago

coolreader18 commented 4 weeks ago

Description of Changes

Not really important, just something I noticed could be cleaner.

Comparison for integrate_generated_columns for:

pub struct TestE {
    #[primary_key]
    #[auto_inc]
    id: u64,
    #[index(btree)]
    name: String,
}
LLVM IR before this change: ```ll ; ::integrate_generated_columns ; Function Attrs: noinline nounwind nonlazybind define internal fastcc void @"::integrate_generated_columns"(ptr noalias nocapture noundef align 8 dereferenceable(32) %_row, ptr noalias noundef nonnull readonly align 1 %0, i64 noundef %1) unnamed_addr #5 personality ptr @rust_eh_personality { start: %self1.i.i = alloca %"core::result::Result<&[u8], spacetimedb_sats::buffer::DecodeError>", align 8 %e.i = alloca %"spacetimedb_sats::buffer::DecodeError", align 8 %_generated_cols = alloca { ptr, i64 }, align 8 store ptr %0, ptr %_generated_cols, align 8 %2 = getelementptr inbounds i8, ptr %_generated_cols, i64 8 store i64 %1, ptr %2, align 8 %3 = getelementptr inbounds i8, ptr %_row, i64 24 %_8 = load i64, ptr %3, align 8, !noundef !27 %4 = icmp eq i64 %_8, 0 br i1 %4, label %bb1, label %bb4 bb1: ; preds = %start call void @llvm.lifetime.start.p0(i64 40, ptr nonnull %self1.i.i), !noalias !2900 ; call <&[u8] as spacetimedb_sats::buffer::BufReader>::get_slice call void @"<&[u8] as spacetimedb_sats::buffer::BufReader>::get_slice"(ptr noalias nocapture noundef nonnull sret([40 x i8]) align 8 dereferenceable(40) %self1.i.i, ptr noalias noundef nonnull align 8 dereferenceable(16) %_generated_cols, i64 noundef 8) #19, !noalias !2907 %5 = load i64, ptr %self1.i.i, align 8, !range !2697, !noalias !2900, !noundef !27 %6 = icmp eq i64 %5, -9223372036854775805 %7 = getelementptr inbounds i8, ptr %self1.i.i, i64 8 %v.0.i.i = load ptr, ptr %7, align 8, !noalias !2900 %8 = getelementptr inbounds i8, ptr %self1.i.i, i64 16 %v.1.i.i = load i64, ptr %8, align 8, !noalias !2900 br i1 %6, label %bb6.i.i, label %bb2.i bb6.i.i: ; preds = %bb1 call void @llvm.lifetime.end.p0(i64 40, ptr nonnull %self1.i.i), !noalias !2900 call void @llvm.experimental.noalias.scope.decl(metadata !2908) call void @llvm.experimental.noalias.scope.decl(metadata !2911) %_3.not.i.i.i = icmp eq i64 %v.1.i.i, 8 br i1 %_3.not.i.i.i, label %"core::result::Result::unwrap.exit", label %bb1.i.i.i bb1.i.i.i: ; preds = %bb6.i.i ; call core::slice::::copy_from_slice::len_mismatch_fail call void @"core::slice::::copy_from_slice::len_mismatch_fail"(i64 noundef 8, i64 noundef %v.1.i.i, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc_825ed530fe0ece941722f5f5e2ef3a35) #20, !noalias !2913 unreachable bb2.i: ; preds = %bb1 %e.sroa.7.0.self1.sroa_idx.i.i = getelementptr inbounds i8, ptr %self1.i.i, i64 24 %_6.sroa.10.0.e.i.sroa_idx = getelementptr inbounds i8, ptr %e.i, i64 24 call void @llvm.lifetime.start.p0(i64 40, ptr nonnull %e.i), !noalias !2914 call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %_6.sroa.10.0.e.i.sroa_idx, ptr noundef nonnull align 8 dereferenceable(16) %e.sroa.7.0.self1.sroa_idx.i.i, i64 16, i1 false), !noalias !27 call void @llvm.lifetime.end.p0(i64 40, ptr nonnull %self1.i.i), !noalias !2900 %9 = ptrtoint ptr %v.0.i.i to i64 store i64 %5, ptr %e.i, align 8, !noalias !2918 %_6.sroa.6.0.e.i.sroa_idx = getelementptr inbounds i8, ptr %e.i, i64 8 store i64 %9, ptr %_6.sroa.6.0.e.i.sroa_idx, align 8, !noalias !2918 %_6.sroa.9.0.e.i.sroa_idx = getelementptr inbounds i8, ptr %e.i, i64 16 store i64 %v.1.i.i, ptr %_6.sroa.9.0.e.i.sroa_idx, align 8, !noalias !2918 ; call core::result::unwrap_failed call void @core::result::unwrap_failed(ptr noalias noundef nonnull readonly align 1 @alloc_00ae4b301f7fab8ac9617c03fcbd7274, i64 noundef 43, ptr noundef nonnull align 1 %e.i, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @vtable.2, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc_579c227a4403128ce5fbcc2607f38818) #20, !noalias !2919 unreachable "core::result::Result::unwrap.exit": ; preds = %bb6.i.i %buf.sroa.0.0.copyload6.i6.i = load i64, ptr %v.0.i.i, align 1, !alias.scope !2920, !noalias !2907 store i64 %buf.sroa.0.0.copyload6.i6.i, ptr %3, align 8 br label %bb4 bb4: ; preds = %start, %"core::result::Result::unwrap.exit" ret void } ```
LLVM IR after this change: ```ll ; ::integrate_generated_columns ; Function Attrs: noinline nounwind nonlazybind define internal fastcc void @"::integrate_generated_columns"(ptr noalias nocapture noundef align 8 dereferenceable(32) %__row, ptr noalias nocapture noundef nonnull readonly align 1 %0, i64 noundef %1) unnamed_addr #5 personality ptr @rust_eh_personality { start: %_4 = getelementptr inbounds i8, ptr %__row, i64 24 %_2.i = load i64, ptr %_4, align 8, !alias.scope !2799, !noalias !2802, !noundef !27 %_0.i = icmp eq i64 %_2.i, 0 br i1 %_0.i, label %bb2.i, label %spacetimedb::table::SequenceTrigger::maybe_decode_into.exit bb2.i: ; preds = %start %_5.i.i = icmp ult i64 %1, 8 br i1 %_5.i.i, label %bb10.i, label %bb11.i bb11.i: ; preds = %bb2.i %2 = load i64, ptr %0, align 1, !noalias !2805 store i64 %2, ptr %_4, align 8, !alias.scope !2809, !noalias !2802 br label %spacetimedb::table::SequenceTrigger::maybe_decode_into.exit bb10.i: ; preds = %bb2.i ; call spacetimedb::table::sequence_decode_error tail call void @spacetimedb::table::sequence_decode_error() #20 unreachable spacetimedb::table::SequenceTrigger::maybe_decode_into.exit: ; preds = %start, %bb11.i ret void } ```

Expected complexity level and risk

1

bfops commented 2 weeks ago

It sounds like @cloutiertyler wants to hold off on merging this for now, at least while we're assembling RC1