Closed mudcube closed 3 days ago
Here's a script to test this issue. Only tested in Chrome.
import { fireproof } from '@fireproof/core'
// Global Test Configuration
const DB_NAME = 'storage-test'
const DOC_SIZE_MB = 1
const NUM_DOCS = 5
const TEST_PATTERN = 'random' // 'random', 'sequential', or 'repeating'
const TEST_TYPE = 'multiple' // 'multiple' or 'cycle'
const db = fireproof(DB_NAME)
// Helper to measure IndexedDB size
async function getIDBSize() {
const estimate = await navigator.storage.estimate()
return {
usage: (estimate.usage / (1024 * 1024)).toFixed(2),
quota: (estimate.quota / (1024 * 1024)).toFixed(2)
}
}
// Measure data size in memory
function getSize(data) {
const bytes = new TextEncoder().encode(JSON.stringify(data)).length
return {
bytes,
mb: (bytes / (1024 * 1024)).toFixed(2)
}
}
// Generate data patterns
function generatePattern(pattern, length) {
switch (pattern) {
case 'random':
const chunk = 65536
let data = ''
for (let i = 0; i < length; i += chunk) {
const size = Math.min(chunk, length - i)
const array = new Uint8Array(size)
crypto.getRandomValues(array)
data += String.fromCharCode.apply(null, array)
}
return btoa(data)
case 'sequential':
return btoa(Array.from(
{ length },
(_, i) => String.fromCharCode(i % 256)
).join(''))
case 'repeating':
const template = 'x'.repeat(1024)
let result = ''
while (result.length < length) {
result += template
}
return btoa(result.slice(0, length))
default:
throw new Error('Invalid pattern type')
}
}
// Generate test document
function generateDocument() {
const targetBytes = DOC_SIZE_MB * 1024 * 1024
const rawBytes = Math.ceil(targetBytes * 0.75)
const doc = {
_id: 'large-file',
message: 'Test Document',
timestamp: new Date().toISOString(),
pattern: TEST_PATTERN,
largeData: generatePattern(TEST_PATTERN, rawBytes)
}
return { doc, size: getSize(doc) }
}
// Log progress with storage info
async function logProgress(phase, index) {
const { usage, quota } = await getIDBSize()
console.log(
`[${phase}] ${index}/${NUM_DOCS}`,
`Storage: ${usage}MB / ${quota}MB`,
`(${((usage / quota) * 100).toFixed(1)}%)`
)
}
// Test multiple writes
async function testMultipleWrites() {
console.log('🚀 Starting Multiple Writes Test\n', {
DB_NAME,
DOC_SIZE_MB,
NUM_DOCS,
TEST_PATTERN,
TEST_TYPE
})
for (let i = 0; i < NUM_DOCS; i++) {
const { doc, size } = generateDocument()
await db.put(doc)
await logProgress('Write', i + 1)
}
}
// Test write/delete cycle
async function testWriteDeleteCycle() {
console.log('🔄 Starting Write/Delete Cycle Test\n', {
DB_NAME,
DOC_SIZE_MB,
NUM_DOCS,
TEST_PATTERN,
TEST_TYPE
})
for (let i = 0; i < NUM_DOCS; i++) {
const { doc } = generateDocument()
await db.put(doc)
await logProgress('Write', i + 1)
await db.del(doc._id)
await logProgress('Delete', i + 1)
}
}
// Main test runner
async function runTest() {
try {
const startSize = await getIDBSize()
console.log('Initial Storage:', `${startSize.usage}MB / ${startSize.quota}MB`)
if (TEST_TYPE === 'multiple') {
await testMultipleWrites()
} else if (TEST_TYPE === 'cycle') {
await testWriteDeleteCycle()
}
const endSize = await getIDBSize()
console.log('\nFinal Storage:', `${endSize.usage}MB / ${endSize.quota}MB`)
} catch (error) {
console.error('❌ Test Error:', error)
}
}
runTest()
Running into additional errors in the console, after reloading the page a few times, which then persists to following reloads;
{
"module": "Loader",
"level": "error",
"cid": "bafyreieeqnzvk242r6tjbwizinxw6wx3cpbbbgt3s66r6fnown5ic4bdwq",
"msg": "block not in reader"
}
Thanks for this -- I will try running it. FYI deletion is async, as the storage engine assumes there may be readers on slower connections so the actual write sequence is like this (threshold 100 is configurable). An update here means a put, delete, or bulk operation.
So clearly this is gonna suck if you are modifying large binaries (BTW you can put Uint8Array direct on the document, no need to btoa
)
That's why we have a _files
API https://github.com/fireproof-storage/fireproof/blob/c4462c8a9c0ca436070ae6d6b90c41a318dd315d/tests/fireproof/ledger.test.ts#L412
When you use files, the data is replicated lazily, so new connecting users will only download the newest version of each file, and only when they access it.
To really win on your benchmark for reclaiming space, we need to delete local _file CARs when they are replaced be a newer version... We don't do that yet, but we'd love a patch that enables that behavior.
Also you might want to take a look at this benchmark (its a node script). It demonstrates the behavior of large databases and compaction over time... you'll see why we eventually need multi-level compaction. On very long ledgers, eventually compaction overwhelms runtime performance, but that fixable in the architecture, we just need to write the code: https://github.com/dstanesc/fireproof-playground
Here is the tip of the iceberg regarding perf optimizing the document storage https://github.com/fireproof-storage/fireproof/issues/125
@jchris Thanks for describing how the updates work—that’s super interesting and cool! Also, thanks for pointing out the btoa and _files issues—woopsidaisy! 😊
Could you create an issue for the CAR files space reclamation implementation, perhaps with paths to files that will need to be modified, if easy enough?
Issue
IndexedDB storage size does not decrease after document deletion. The database continues to grow even when:
Question: Is there a way to disable version history for specific documents or collections? This might be needed for large files where we don't want to retain previous versions.
Reproduction
Expected
Actual
Environment