intel-cloud / cosbench

a benchmark tool for cloud object storage service
Other
574 stars 240 forks source link

hashcheck=true not embedding the hashes inside objects for s3. #406

Open karasjoh000 opened 3 years ago

karasjoh000 commented 3 years ago

Hashchecks are enabled

<!-- following the example from https://github.com/intel-cloud/cosbench/blob/master/release/conf/hashcheck.xml -->
<?xml version="1.0" encoding="UTF-8" ?>
<workload name="19-1GB" description="sample benchmark for s3">
  <storage type="s3" config="accesskey=<>;secretkey=<>;endpoint=http://<>;path_style_access=true"/>
  <workflow>
    <workstage name="cleanup">
      <work type="cleanup" workers="30" config="cprefix=ssds3;containers=r(1,2);objects=r(1,8000)" />
    </workstage>
    <workstage name="dispose">
      <work type="dispose" workers="1" config="cprefix=ssds3;containers=r(1,2)" />
    </workstage>
    <workstage name="init">
      <work type="init" workers="1" config="cprefix=ssds3;containers=r(1,2)" />
    </workstage>
    <!-- <workstage name="prepare" division="object">
      <work type="prepare" workers="30" config="hashcheck=true;cprefix=ssds3;containers=r(1,2);objects=r(1,1000);sizes=c(10)MB" />
    </workstage> -->
    <workstage name="prepare" division="object">
      <work name="main" workers="19" totalBytes="200000000000" division="object">
        <operation type="write" ratio="100" config="hashcheck=true;cprefix=ssds3;containers=s(1,2);objects=s(1,40);sizes=c(1)GB" />
      </work>
    </workstage>

    <workstage name="main">
      <work name="main" workers="19" totalBytes="400000000000" division="object">
        <operation type="write" ratio="50" config="hashcheck=true;cprefix=ssds3;containers=s(1,2);objects=s(41,80);sizes=c(1)GB" />
        <operation type="read" ratio="50" config="hashcheck=true;cprefix=ssds3;containers=s(1,2);objects=s(1,40)"/>
      </work>
    </workstage>
  </workflow>
</workload>

But the objects have no hash stored at the tail !!!!<hash>!!!!

awse s3 cp s3://ssds31/myobjects1 check1
# download: s3://ssds31/myobjects1 to ./check1
tail -c 100 check1
# vgbcwguswthozwkuojbqfozebtgulkcrtbuqthgkveeaqwvtmfmrvjwplhnebpbhjpblxvtqerwpliodlguxepaamhwgmoiahfdy

This is for ceph rgw. Note I did apply this suggestion #320 by adding -Dcom.amazonaws.services.s3.disableGetObjectMD5Validation=true
otherwise, the parallel reads and writes did not work at all.