Count the number by iterating eleveldb if no state file exist

mocchira commented 7 years ago

This is the Fix for https://github.com/leo-project/leofs/issues/731. With this PR, leo_backend_db can keep holding the exact number of records even without a state file.

yosukehara commented 7 years ago

WIP

yosukehara commented 7 years ago

@mocchira I wonder how much time restarting backend_db_eleveldb's processes take for the process. We need to benchmark that.

mocchira commented 7 years ago

@yosukehara valid concern. I will paste the result running on my dev env however my dev is running on virtualbox (Disk I/O is very slow), the another test on a real server would be needed so that please do the test on a real one which spec is close to ones in production.

mocchira commented 7 years ago

benched with the following patch.

diff --git a/test/leo_backend_db_api_tests.erl b/test/leo_backend_db_api_tests.erl
index ffbbe87..9191a8f 100644
--- a/test/leo_backend_db_api_tests.erl
+++ b/test/leo_backend_db_api_tests.erl
@@ -67,7 +67,7 @@ backend_db_test_() ->
                            fun all_eleveldb_/1,
                            fun all_ets_/1,
                            fun first_/1,
-                           fun count_/1,
+                           %fun count_/1,
                            fun compact_/1
                           ]]}.

@@ -254,14 +254,18 @@ first_(_) ->
     ?assertEqual(DelCount, length(TestData)),
     ok.

-count_(_) ->
+count_() ->
     Id = ?TEST_INSTANCE_NAME4,
     Path = ?PATH4,
     Key = <<"key">>,
     Val = <<"val">>,

     ok = leo_backend_db_api:new(Id, 1, ?BACKEND_DB_LEVELDB, Path),
-    _TestData = [leo_backend_db_api:put(Id, <<Key/binary, Idx>>, Val) || Idx <- lists:seq($a, $z)],
+    Start = os:timestamp(),
+    _TestData = [leo_backend_db_api:put(Id, <<Key/binary, Idx:32/integer>>, Val) || Idx <- lists:seq(1, 100000)],
+    End = os:timestamp(),
+    Diff = timer:now_diff(End, Start),
+    ?debugVal(Diff),
     %% restart and confirm the number of items through count/1
     Ret1 = supervisor:terminate_child(leo_backend_db_sup, Id),
     ?debugVal(Ret1),
@@ -277,14 +281,21 @@ count_(_) ->
     end,
     StateFilePath = lists:append([Path_1, "_", atom_to_list(Id), ".state"]),
     ?debugVal(StateFilePath),
-    file:delete(StateFilePath),
+    %%file:delete(StateFilePath),
     timer:sleep(300),
+    Start2 = os:timestamp(),
     ok = leo_backend_db_api:new(Id, 1, ?BACKEND_DB_LEVELDB, Path),
+    End2 = os:timestamp(),
+    Diff2 = timer:now_diff(End2, Start2),
+    ?debugVal(Diff2),
     Count = leo_backend_db_api:count(Id),
     ?debugVal(Count),
-    ?assertEqual(26, Count),
+    ?assertEqual(100000, Count),
     ok.

+count_test_() ->
+    {timeout, 60, fun count_/0}.
+
 delete_all(Id) ->
     delete_all(Id, leo_backend_db_api:first(Id), 0).

The result with a state file

10,000 records take 100-500ms
100,000 records take 2-5secs
200,000 records take 2-5secs
300,000 records take 4-13secs

The result with a state file removed (uncomment file:delete cause the new code to run)

10,000 records take 100-500ms
100,000 records take 2-5secs
200,000 records take 2-5secs
300,000 records take 4-14secs

That said, the overhead of new code iterating over all records to count the number seems to be negligible.

yosukehara commented 7 years ago

@mocchira Thanks for sharing. In this week, we'll benchmark it.

mocchira commented 7 years ago

@yosukehara also please test to cover less records with fragmented (not compacted) case. Reference: https://github.com/leo-project/leofs/issues/731#issuecomment-303895148

This test can be done.

make lots of records in leo_mq by (delete-bucket or rebalance with lots of objects)
wait for finishing the above task
send SIGTERM to leo_storage
restart its leo_storage and measure how long it takes to start / how much resources it consumes

leo-project / leo_backend_db

Count the number by iterating eleveldb if no state file exist #10