Closed Blinue closed 7 months ago
我用下面的代码测试了 CNG 和 Windows.Security.Cryptography (以下简称 WSC) 的哈希性能。发现 CNG 比 WSC 快了一个数量级。
#define NOMINMAX
#define WIN32_NO_STATUS
#include <iostream>
#include <Windows.h>
#include <winrt/Windows.Security.Cryptography.Core.h>
#include <winrt/Windows.Storage.Streams.h>
#include <bcrypt.h>
#include <ntstatus.h>
template<typename Fn>
static int Measure(const Fn& func) {
using namespace std::chrono;
auto t = steady_clock::now();
func();
auto duration = steady_clock::now() - t;
return int(duration_cast<microseconds>(duration).count());
}
static void Win32Hash(const std::vector<uint8_t>& data, std::array<uint8_t, 16>& hash) {
BCRYPT_ALG_HANDLE hAlg = NULL;
BCryptOpenAlgorithmProvider(&hAlg, BCRYPT_MD5_ALGORITHM, nullptr, 0);
uint32_t hashObjSize = 0;
ULONG result;
BCryptGetProperty(hAlg, BCRYPT_OBJECT_LENGTH, (PUCHAR)&hashObjSize, sizeof(hashObjSize), &result, 0);
std::unique_ptr<uint8_t[]> hashObj = std::make_unique<uint8_t[]>(hashObjSize);
BCRYPT_HASH_HANDLE hHash = NULL;
BCryptCreateHash(hAlg, &hHash, hashObj.get(), hashObjSize, NULL, 0, 0);
for (size_t i = 0; i < data.size(); i += 100) {
BCryptHashData(hHash, (PUCHAR)data.data() + i, std::min(ULONG(100), ULONG(data.size() - i)), 0);
}
BCryptFinishHash(hHash, hash.data(), (ULONG)hash.size(), 0);
BCryptDestroyHash(hHash);
BCryptCloseAlgorithmProvider(hAlg, 0);
}
struct __declspec(uuid("905a0fef-bc53-11df-8c49-001e4fc686da")) IBufferByteAccess : IUnknown {
virtual HRESULT __stdcall Buffer(void** value) = 0;
};
struct CustomBuffer : winrt::implements<CustomBuffer, winrt::Windows::Storage::Streams::IBuffer, IBufferByteAccess> {
void* _address;
uint32_t _size;
CustomBuffer(void* address, uint32_t size) : _address(address), _size(size) {}
uint32_t Capacity() const { return _size; }
uint32_t Length() const { return _size; }
void Length(uint32_t length) { throw winrt::hresult_not_implemented(); }
HRESULT __stdcall Buffer(void** value) final {
*value = _address;
return S_OK;
}
};
static void WinRTHash(const std::vector<uint8_t>& data, std::array<uint8_t, 16>& hash) {
using namespace winrt;
using namespace Windows::Security::Cryptography::Core;
using namespace Windows::Storage::Streams;
HashAlgorithmProvider hashAlgProvider = HashAlgorithmProvider::OpenAlgorithm(HashAlgorithmNames::Md5());
CryptographicHash hasher = hashAlgProvider.CreateHash();
for (size_t i = 0; i < data.size(); i += 100) {
IBuffer buffer = make<CustomBuffer>((void*)(data.data() + i), std::min(100u, uint32_t(data.size() - i)));
hasher.Append(buffer);
}
IBuffer hashBuffer = hasher.GetValueAndReset();
assert(hashBuffer.Length() == hash.size());
std::memcpy(hash.data(), hashBuffer.data(), hash.size());
}
static std::string HexHash(std::array<uint8_t, 16> hash) {
static char oct2Hex[16] = {
'0','1','2','3','4','5','6','7',
'8','9','a','b','c','d','e','f'
};
std::string result(16, 0);
char* pResult = &result[0];
uint8_t* b = hash.data();
for (int i = 0; i < hash.size(); ++i) {
*pResult++ = oct2Hex[(*b >> 4) & 0xf];
*pResult++ = oct2Hex[*b & 0xf];
++b;
}
return result;
}
int main() {
std::vector<uint8_t> data(100000);
std::generate(data.begin(), data.end(), []() { return (uint8_t)std::rand(); });
std::array<uint8_t, 16> hash;
int t = Measure([&]() {
Win32Hash(data, hash);
});
std::cout << "Win32: " << HexHash(hash) << " " << t << "us\n";
t = Measure([&]() {
WinRTHash(data, hash);
});
std::cout << "WinRT: " << HexHash(hash) << " " << t << "us" << std::endl;
}
代码中 CustomBuffer 用于将内存块传给 WinRT 而避免额外复制。
测试结果如下:
数据量 (KiB) | CNG (us) | WSC (us) |
---|---|---|
1 | 229 | 2736 |
10 | 239 | 2671 |
100 | 488 | 2978 |
1000 | 1983 | 5531 |
WSC 如此慢的主要原因是它需要加载数个 dll,包括 kernel.appcore.dll、bcryptprimitives.dll、clbcatq.dll、CryptoWinRT.dll、WinTypes.dll,而 CNG 只需加载 bcryptprimitives.dll。显然 WSC 只是 CNG 的很慢的包装。
接下来先预热一次来比较真正的执行性能。
更新 Measure 函数:
template<typename Fn>
static int Measure(const Fn& func) {
using namespace std::chrono;
// 预热
func();
nanoseconds total{};
for (int i = 0; i < 10; ++i) {
auto t = steady_clock::now();
func();
total += steady_clock::now() - t;
}
return int(duration_cast<microseconds>(total).count() / 10);
}
测试结果如下:
数据量 (KiB) | CNG (us) | WSC (us) |
---|---|---|
1 | 2 | 4 |
10 | 17 | 28 |
100 | 165 | 253 |
1000 | 1640 | 2530 |
初始化 CNG 需要约 300us,而 WSC 需要约 3000us。即使只比较执行性能,WSC 仍比 CNG 慢约 50%。
结论:避免使用 WSC。事实上,如果有 Win32 替代,应避免使用任何 WinRT API。
WIL 包含了实用的包装器来管理 Win32 资源、调用函数等,可以大幅简化我们的代码。很多自定义类在 WIL 有替代品。
其他更改: