davedoesdev / webm-muxer.js

WebM muxer using libwebm and webm-tools compiled to Wasm
http://rawgit.davedoesdev.com/davedoesdev/webm-muxer.js/main/demo.html
MIT License
54 stars 5 forks source link

Video is partly seekable #26

Closed onthegit closed 2 years ago

onthegit commented 2 years ago

Hello, the video that is generated in most of the players including Firefox, can not be seek properly.

When trying to seek it blocks the seek and then after 3-4 seconds it starts playing, and sometimes it does not play at all after seeking.

davedoesdev commented 2 years ago

Hi- Recording in Chrome and then playing back in Firefox seems to seek fine for me using the demo. Could you see if files produced using the demo seek properly please? Thanks

onthegit commented 2 years ago

Hello, I am using the ts-ebml package directly,. Are there are any other changes made to EBML.Reader or EBML.encoder() made in your EBML.js?

or only makeMetadataSeekable is changed in EBML.js?

onthegit commented 2 years ago
reader.addListener("duration", ({ timecodeScale, duration }) => {
      console.log('timecodeScale', timecodeScale)
      console.log('duration', duration)
    });

The output is:

timecodeScale 1000000
 duration 1000
 duration 95
 duration -79
 duration 1000
 duration 630
 duration -613
 duration 284

Reported duration goes negative..Could that be the problem?

onthegit commented 2 years ago

Notice that CueTime is 0 all the time. This is what mkvinfo gives:

+ Cues at 281
| + Cue point at 287
|  + Cue time: 00:00:00.000000000 at 289
|  + Cue track positions at 292
|   + Cue track: 1 at 294
|   + Cue cluster position: 4271 at 297
| + Cue point at 301
|  + Cue time: 00:00:00.000000000 at 303
|  + Cue track positions at 306
|   + Cue track: 1 at 308
|   + Cue cluster position: 1243997 at 311
| + Cue point at 316
|  + Cue time: 00:00:00.000000000 at 318
|  + Cue track positions at 321
|   + Cue track: 1 at 323
|   + Cue cluster position: 2482883 at 326
| + Cue point at 331
|  + Cue time: 00:00:00.000000000 at 333
|  + Cue track positions at 336
|   + Cue track: 1 at 338
|   + Cue cluster position: 2671552 at 341
| + Cue point at 346
|  + Cue time: 00:00:00.000000000 at 348
|  + Cue track positions at 351
|   + Cue track: 1 at 353
|   + Cue cluster position: 3652132 at 356
| + Cue point at 361
|  + Cue time: 00:00:00.013000000 at 363
|  + Cue track positions at 366
|   + Cue track: 1 at 368
|   + Cue cluster position: 4332716 at 371
| + Cue point at 376
|  + Cue time: 00:00:00.000000000 at 378
|  + Cue track positions at 381
|   + Cue track: 1 at 383
|   + Cue cluster position: 4415649 at 386
| + Cue point at 391
|  + Cue time: 00:00:00.010000000 at 393
|  + Cue track positions at 396
|   + Cue track: 1 at 398
|   + Cue cluster position: 4698693 at 401
| + Cue point at 406
|  + Cue time: 00:00:00.000000000 at 408
|  + Cue track positions at 411
|   + Cue track: 1 at 413
|   + Cue cluster position: 5730201 at 416
| + Cue point at 421
|  + Cue time: 00:00:00.000000000 at 423
|  + Cue track positions at 426
|   + Cue track: 1 at 428
|   + Cue cluster position: 6486149 at 431
| + Cue point at 436
|  + Cue time: 00:00:00.000000000 at 438
|  + Cue track positions at 441
|   + Cue track: 1 at 443
|   + Cue cluster position: 7053657 at 446
| + Cue point at 451
|  + Cue time: 00:00:00.000000000 at 453
|  + Cue track positions at 456
|   + Cue track: 1 at 458
|   + Cue cluster position: 7349578 at 461
| + Cue point at 466
|  + Cue time: 00:00:00.000000000 at 468
|  + Cue track positions at 471
|   + Cue track: 1 at 473
|   + Cue cluster position: 8368502 at 476
davedoesdev commented 2 years ago

Must be something missing from ts-ebml as my saved files show:

|+ Cues at 281
| + Cue point at 287
|  + Cue time: 00:00:00.000000000 at 289
|  + Cue track positions at 292
|   + Cue track: 1 at 294
|   + Cue cluster position: 1199 at 297
| + Cue point at 301
|  + Cue time: 00:00:01.000000000 at 303
|  + Cue track positions at 307
|   + Cue track: 1 at 309
|   + Cue cluster position: 242144 at 312
| + Cue point at 317
|  + Cue time: 00:00:01.032000000 at 319
|  + Cue track positions at 323
|   + Cue track: 1 at 325
|   + Cue cluster position: 278124 at 328
| + Cue point at 333
|  + Cue time: 00:00:02.032000000 at 335
|  + Cue track positions at 339
|   + Cue track: 1 at 341
|   + Cue cluster position: 582338 at 344
| + Cue point at 349
|  + Cue time: 00:00:02.068000000 at 351
|  + Cue track positions at 355
|   + Cue track: 1 at 357
|   + Cue cluster position: 586529 at 360
| + Cue point at 365
|  + Cue time: 00:00:03.068000000 at 367
|  + Cue track positions at 371
|   + Cue track: 1 at 373
|   + Cue cluster position: 901084 at 376
davedoesdev commented 2 years ago

Here's what I changed:

--- EBML.js 2022-06-27 08:59:01.698780952 +0100
+++ /webm-muxer.js/EBML.js  2022-06-05 07:21:24.812827564 +0100
@@ -1,3 +1,5 @@
+// This is https://github.com/muaz-khan/RecordRTC/blob/master/libs/EBML.js
+// Copyright and licence: https://github.com/legokichi/ts-ebml
 (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.EBML = f()}})(function(){var define,module,exports;return (function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
@@ -938,7 +940,7 @@
  * @param duration - Duration (TimecodeScale)
  * @param cues - cue points for clusters
  */
-function makeMetadataSeekable(originalMetadata, duration, cuesInfo) {
+function makeMetadataSeekable(originalMetadata, duration, cuesInfo, cuesOffset, cuesPosition) {
     // extract the header, we can reuse this as-is
     var header = extractElement("EBML", originalMetadata);
     var headerSize = encodedSizeOfEbml(header);
@@ -977,8 +979,14 @@
         // SeekHead starts at 0
         var infoStart = seekHeadSize; // Info comes directly after SeekHead
         var tracksStart = infoStart + infoSize; // Tracks comes directly after Info
-        var cuesStart = tracksStart + tracksSize; // Cues starts directly after 
-        var newMetadataSize = cuesStart + cuesSize; // total size of metadata  
+        var cuesStart, newMetadataSize;
+        if (cuesPosition) {
+            cuesStart = cuesPosition - segmentContentStartPos;
+            newMetadataSize = tracksStart + tracksSize;
+        } else {
+            cuesStart = tracksStart + tracksSize; // Cues starts directly after
+            newMetadataSize = cuesStart + cuesSize; // total size of metadata
+        }
         // This is the offset all CueClusterPositions should be adjusted by due to the metadata size changing.
         var sizeDifference = newMetadataSize - originalMetadataSize;
         // console.error(`infoStart: ${infoStart}, infoSize: ${infoSize}`);
@@ -1016,8 +1024,12 @@
             //console.error(`CueClusterPosition: ${CueClusterPosition}, Corrected to: ${CueClusterPosition - segmentContentStartPos}  , offset by ${sizeDifference} to become ${(CueClusterPosition - segmentContentStartPos) + sizeDifference - segmentContentStartPos}`);
             // EBMLReader returns CueClusterPosition with absolute byte offsets. The Cues section expects them as offsets from the first level 1 element of the Segment, so we need to adjust it.
             CueClusterPosition -= segmentContentStartPos;
-            // We also need to adjust to take into account the change in metadata size from when EBMLReader read the original metadata.
-            CueClusterPosition += sizeDifference;
+            if (cuesOffset) {
+                CueClusterPosition += cuesOffset;
+            } else {
+                // We also need to adjust to take into account the change in metadata size from when EBMLReader read the original metadata.
+                CueClusterPosition += sizeDifference;
+            }
             cues.push({ name: "CueClusterPosition", type: "u", data: createUIntBuffer(CueClusterPosition) });
             cues.push({ name: "CueTrackPositions", type: "m", isEnd: true });
             cues.push({ name: "CuePoint", type: "m", isEnd: true });
@@ -1048,14 +1060,16 @@
         { name: "Segment", type: "m", isEnd: false, unknownSize: true },
         seekHead,
         info,
-        tracks,
-        cues
+        tracks
     ]);
+    if (!cuesPosition) {
+        finalMetadata = finalMetadata.concat(cues);
+    }
     var result = new EBMLEncoder_1.default().encode(finalMetadata);
     //printElementIds(finalMetadata);
     //console.error(`Final metadata buffer size: ${result.byteLength}`);
     //console.error(`Final metadata buffer size without header and segment: ${result.byteLength-segmentContentStartPos}`);
-    return result;
+    return cuesPosition ? [result, new EBMLEncoder_1.default().encode(cues)] : result;
 }
 exports.makeMetadataSeekable = makeMetadataSeekable;
 /**
onthegit commented 2 years ago

@davedoesdev thanks for your reply. I think it could be due to bug in ts-ebml package, I will need to investigate more. https://github.com/legokichi/ts-ebml/issues/16

onthegit commented 2 years ago

@davedoesdev here is the function that uses the ts-ebml package:

export function makeMetadataSeekable(originalMetadata, duration, cuesInfo, cuesOffset, cuesPosition) {
  // extract the header, we can reuse this as-is
  var header = extractElement("EBML", originalMetadata);
  var headerSize = encodedSizeOfEbml(header);
  //console.error("Header size: " + headerSize);
  //printElementIds(header);
  // After the header comes the Segment open tag, which in this implementation is always 12 bytes (4 byte id, 8 byte 'unknown length')
  // After that the segment content starts. All SeekPositions and CueClusterPosition must be relative to segmentContentStartPos
  var segmentContentStartPos = headerSize + 12;
  //console.error("segmentContentStartPos: " + segmentContentStartPos);    
  // find the original metadata size, and adjust it for header size and Segment start element so we can keep all positions relative to segmentContentStartPos
  var originalMetadataSize = originalMetadata[originalMetadata.length - 1].dataEnd - segmentContentStartPos;
  //console.error("Original Metadata size: " + originalMetadataSize);
  //printElementIds(originalMetadata);
  // extract the segment info, remove the potentially existing Duration element, and add our own one.
  var info = extractElement("Info", originalMetadata);
  removeElement("Duration", info);
  info.splice(1, 0, { name: "Duration", type: "f", data: createFloatBuffer(duration, 8) });
  var infoSize = encodedSizeOfEbml(info);
  //console.error("Info size: " + infoSize);
  //printElementIds(info);  
  // extract the track info, we can re-use this as is
  var tracks = extractElement("Tracks", originalMetadata);
  var tracksSize = encodedSizeOfEbml(tracks);
  //console.error("Tracks size: " + tracksSize);
  //printElementIds(tracks);  
  var seekHeadSize = 47; // Initial best guess, but could be slightly larger if the Cues element is huge.
  var seekHead = [];
  var cuesSize = 5 + cuesInfo.length * 15; // very rough initial approximation, depends a lot on file size and number of CuePoints                   
  var cues = [];
  var lastSizeDifference = -1; // 
  // The size of SeekHead and Cues elements depends on how many bytes the offsets values can be encoded in.
  // The actual offsets in CueClusterPosition depend on the final size of the SeekHead and Cues elements
  // We need to iteratively converge to a stable solution.
  var maxIterations = 10;
  var _loop_1 = function (i) {
    // SeekHead starts at 0
    var infoStart = seekHeadSize; // Info comes directly after SeekHead
    var tracksStart = infoStart + infoSize; // Tracks comes directly after Info
    var cuesStart, newMetadataSize;
    if (cuesPosition) {
      cuesStart = cuesPosition - segmentContentStartPos;
      newMetadataSize = tracksStart + tracksSize;
    } else {
      cuesStart = tracksStart + tracksSize; // Cues starts directly after
      newMetadataSize = cuesStart + cuesSize; // total size of metadata
    }
    // This is the offset all CueClusterPositions should be adjusted by due to the metadata size changing.
    var sizeDifference = newMetadataSize - originalMetadataSize;
    // console.error(`infoStart: ${infoStart}, infoSize: ${infoSize}`);
    // console.error(`tracksStart: ${tracksStart}, tracksSize: ${tracksSize}`);
    // console.error(`cuesStart: ${cuesStart}, cuesSize: ${cuesSize}`);
    // console.error(`originalMetadataSize: ${originalMetadataSize}, newMetadataSize: ${newMetadataSize}, sizeDifference: ${sizeDifference}`); 
    // create the SeekHead element
    seekHead = [];
    seekHead.push({ name: "SeekHead", type: "m", isEnd: false });
    seekHead.push({ name: "Seek", type: "m", isEnd: false });
    seekHead.push({ name: "SeekID", type: "b", data: new Buffer([0x15, 0x49, 0xA9, 0x66]) }); // Info
    seekHead.push({ name: "SeekPosition", type: "u", data: createUIntBuffer(infoStart) });
    seekHead.push({ name: "Seek", type: "m", isEnd: true });
    seekHead.push({ name: "Seek", type: "m", isEnd: false });
    seekHead.push({ name: "SeekID", type: "b", data: new Buffer([0x16, 0x54, 0xAE, 0x6B]) }); // Tracks
    seekHead.push({ name: "SeekPosition", type: "u", data: createUIntBuffer(tracksStart) });
    seekHead.push({ name: "Seek", type: "m", isEnd: true });
    seekHead.push({ name: "Seek", type: "m", isEnd: false });
    seekHead.push({ name: "SeekID", type: "b", data: new Buffer([0x1C, 0x53, 0xBB, 0x6B]) }); // Cues
    seekHead.push({ name: "SeekPosition", type: "u", data: createUIntBuffer(cuesStart) });
    seekHead.push({ name: "Seek", type: "m", isEnd: true });
    seekHead.push({ name: "SeekHead", type: "m", isEnd: true });
    seekHeadSize = encodedSizeOfEbml(seekHead);
    //console.error("SeekHead size: " + seekHeadSize);
    //printElementIds(seekHead);  
    // create the Cues element
    cues = [];
    cues.push({ name: "Cues", type: "m", isEnd: false });
    cuesInfo.forEach(function (_a) {
      var CueTrack = _a.CueTrack, CueClusterPosition = _a.CueClusterPosition, CueTime = _a.CueTime;
      cues.push({ name: "CuePoint", type: "m", isEnd: false });
      cues.push({ name: "CueTime", type: "u", data: createUIntBuffer(CueTime) });
      cues.push({ name: "CueTrackPositions", type: "m", isEnd: false });
      cues.push({ name: "CueTrack", type: "u", data: createUIntBuffer(CueTrack) });
      //console.error(`CueClusterPosition: ${CueClusterPosition}, Corrected to: ${CueClusterPosition - segmentContentStartPos}  , offset by ${sizeDifference} to become ${(CueClusterPosition - segmentContentStartPos) + sizeDifference - segmentContentStartPos}`);
      // EBMLReader returns CueClusterPosition with absolute byte offsets. The Cues section expects them as offsets from the first level 1 element of the Segment, so we need to adjust it.
      CueClusterPosition -= segmentContentStartPos;
      if (cuesOffset) {
        CueClusterPosition += cuesOffset;
      } else {
        // We also need to adjust to take into account the change in metadata size from when EBMLReader read the original metadata.
        CueClusterPosition += sizeDifference;
      }
      cues.push({ name: "CueClusterPosition", type: "u", data: createUIntBuffer(CueClusterPosition) });
      cues.push({ name: "CueTrackPositions", type: "m", isEnd: true });
      cues.push({ name: "CuePoint", type: "m", isEnd: true });
    });
    cues.push({ name: "Cues", type: "m", isEnd: true });
    cuesSize = encodedSizeOfEbml(cues);
    //console.error("Cues size: " + cuesSize);   
    //console.error("Cue count: " + cuesInfo.length);
    //printElementIds(cues);      
    // If the new MetadataSize is not the same as the previous iteration, we need to run once more.
    if (lastSizeDifference !== sizeDifference) {
      lastSizeDifference = sizeDifference;
      if (i === maxIterations - 1) {
        throw new Error("Failed to converge to a stable metadata size");
      }
    }
    else {
      return "break";
    }
  };
  for (var i = 0; i < maxIterations; i++) {
    var state_1 = _loop_1(i);
    if (state_1 === "break")
      break;
  }
  var finalMetadata = [].concat.apply([], [
    header,
    { name: "Segment", type: "m", isEnd: false, unknownSize: true },
    seekHead,
    info,
    tracks
  ]);
  if (!cuesPosition) {
    finalMetadata = finalMetadata.concat(cues);
  }
  var result = new EMBL.Encoder().encode(finalMetadata);
  //printElementIds(finalMetadata);
  //console.error(`Final metadata buffer size: ${result.byteLength}`);
  //console.error(`Final metadata buffer size without header and segment: ${result.byteLength-segmentContentStartPos}`);
  return cuesPosition ? [result, new EMBL.Encoder().encode(cues)] : result;
}
onthegit commented 2 years ago

I checked all the lines, and all the changes you made in EBML.js are incoporated in this function, so I dont know why the CueTime is 0 always.

onthegit commented 2 years ago

console.log on the cues - these come from ts-ebml reader: reader.cues - the cues that are sent to makeMetadataSeekable ->

0: {CueTrack: 1, CueClusterPosition: 223, CueTime: 0}
1: {CueTrack: 1, CueClusterPosition: 1268172, CueTime: 0}
2: {CueTrack: 1, CueClusterPosition: 2497280, CueTime: 0}
3: {CueTrack: 1, CueClusterPosition: 2686581, CueTime: 0}
4: {CueTrack: 1, CueClusterPosition: 3681639, CueTime: 0}
5: {CueTrack: 1, CueClusterPosition: 4374611, CueTime: 13}
6: {CueTrack: 1, CueClusterPosition: 4544810, CueTime: 6}
7: {CueTrack: 1, CueClusterPosition: 4830256, CueTime: 0}
8: {CueTrack: 1, CueClusterPosition: 5866258, CueTime: 3}
9: {CueTrack: 1, CueClusterPosition: 6662265, CueTime: 0}
10: {CueTrack: 1, CueClusterPosition: 6930436, CueTime: 0}
11: {CueTrack: 1, CueClusterPosition: 7247562, CueTime: 0}
12: {CueTrack: 1, CueClusterPosition: 8404353, CueTime: 0}
onthegit commented 2 years ago

Found the problem. Patches submitted to both ts-ebml and here: https://github.com/davedoesdev/webm-muxer.js/pull/27

Now the video is seekable and CueTimes are correct.

davedoesdev commented 2 years ago

@onthegit good catch.

I'm considering changing to https://github.com/node-ebml/node-ebml (see #28 ).