shaka-project / shaka-player

JavaScript player library / DASH & HLS client / MSE-EME player
Apache License 2.0
7.19k stars 1.34k forks source link

Combining MPD files shaving milliseconds from the videos #6732

Closed smriti0302 closed 4 months ago

smriti0302 commented 5 months ago

I'm combining multiple MPD files into one so that I can play it in Shaka Player seamlessly. This is being done as follows :

const fs = require("node:fs/promises");
const fsync = require("fs");
const { DOMParser, XMLSerializer } = require("@xmldom/xmldom");
const NAMESPACE = "urn:mpeg:dash:schema:mpd:2011";

class MPDCombiner {
  constructor(files, output) {
    this.files = files;
    this.parser = null;
    this.serializer = null;
    this.output = output;
    this.newLineNode = null;
    this.parent = null;
    this.rootDoc = null;
    this.totalDuration = 0;
  }

  resetState() {
    this.files = [];
    this.output = null;

    this.parent = null;
    this.rootDoc = null;
    this.totalDuration = 0;
  }

  addFiles(files) {
    this.files = files;
  }

  setOutput(output) {
    this.output = output;
  }

  getParser() {
    if (!this.parser) {
      this.parser = new DOMParser();
    }

    return this.parser;
  }

  getSerializer() {
    if (!this.serializer) {
      this.serializer = new XMLSerializer();
    }

    return this.serializer;
  }

  getNewLineNode(doc) {
    if (!this.newLineNode) {
      this.newLineNode = doc.createTextNode("\t\n\t");
    }
    return this.newLineNode.cloneNode();
  }

  modifyPeriodAttrib(rootElement, file) {
    // modifies the duration and id of Period, returns Period element
    if (!rootElement) {
      console.error("Root element not provided");
      return;
    }

    // Find the Period element, add duration attribute
    let periods = rootElement.getElementsByTagNameNS(NAMESPACE, "Period");

    if (periods.length < 1) {
      console.error("[MPDCombiner] No Period elements found in the manifest");
      throw new Error("No Period elements found in the manifest");
    }

    let period = periods[0];

    // Check if element is found
    if (!period) {
      console.error("[MPDCombiner] Period element not found in MPD manifest");
      throw new Error("Period element not found in MPD manifest");
    }

    let durationString = rootElement.getAttribute("mediaPresentationDuration");

    if (!durationString) {
      console.error(
        "[MPDCombiner] Duration attribute not found in MPD manifest"
      );
      throw new Error("Duration attribute not found in MPD manifest");
    }

    // Get the duration in seconds
    let durationS = this.getTimeFromDurationString(durationString);

    this.totalDuration += durationS;

    // Set the new duration attribute
    period.setAttribute("duration", durationString);
    period.setAttribute("id", file.name);

    return period;
  }

  modifySegmentTemplate(rootElement, file) {
    // add the url to SegmentTemplate

    let segmentTemplates = rootElement.getElementsByTagNameNS(
      NAMESPACE,
      "SegmentTemplate"
    );

    if (segmentTemplates.length < 1) {
      console.error(
        "[MPDCombiner] No SegmentTemplate elements found in the manifest"
      );
      throw new Error("No SegmentTemplate elements found in the manifest");
    }

    for (let i = 0; i < segmentTemplates.length; i++) {
      let segmentTemplate = segmentTemplates[i];

      segmentTemplate.setAttribute(
        "initialization",
        file.name + "/" + segmentTemplate.getAttribute("initialization")
      );

      segmentTemplate.setAttribute(
        "media",
        file.name + "/" + segmentTemplate.getAttribute("media")
      );
    }
  }

  getTimeFromDurationString(duration) {
    // Convert the duration string to seconds
    // Format: PT1H1M1.1S
    // 1 hour, 1 minute, 1.1 seconds
    let time = 0;

    let hours = 0;
    let minutes = 0;
    let seconds = 0;

    if (!duration) {
      console.error("Duration string not provided");
      return 0;
    }

    if (typeof duration !== "string") {
      console.error("Invalid duration string:", duration);
      return 0;
    }

    let prevCheckpoint = 0;

    for (let i = 0; i < duration.length; i++) {
      let char = duration[i];

      if (char == "P") {
        prevCheckpoint = i;
      }

      if (char == "T") {
        prevCheckpoint = i;
      }

      if (char == "H") {
        hours = parseInt(duration.substring(prevCheckpoint + 1, i));
        prevCheckpoint = i;
      }

      if (char == "M") {
        minutes = parseInt(duration.substring(prevCheckpoint + 1, i));
        prevCheckpoint = i;
      }

      if (char == "S") {
        seconds = parseFloat(duration.substring(prevCheckpoint + 1, i));
        prevCheckpoint = i;
      }
    }

    // console.log("Hours:", hours, "Minutes:", minutes, "Seconds:", seconds);
    time = hours * 3600 + minutes * 60 + seconds;

    // console.log(duration, "=>", time, "seconds");
    return time;
  }

  getDurationStringFromTime(time) {
    // Convert the time in seconds to duration string
    // Format: PT1H1M1.1S
    // 1 hour, 1 minute, 1.1 seconds
    let duration = "PT";

    let hours = 0;
    let minutes = 0;
    let seconds = 0;

    if (!time) {
      console.error("Time not provided");
      return "PT0S";
    }

    if (typeof time !== "number") {
      console.error("Invalid time:", time);
      return "PT0S";
    }

    hours = Math.floor(time / 3600);
    time = time % 3600;

    minutes = Math.floor(time / 60);
    seconds = (time % 60).toFixed(3);

    if (hours > 0) {
      duration += String(hours) + "H";
    }

    if (minutes > 0) {
      duration += String(minutes) + "M";
    }

    if (seconds > 0) {
      duration += String(seconds) + "S";
    }

    // console.log(time, "=>", duration);
    return duration;
  }

  async getCombinedManifest() {
    /* STEPS TO RUN
     * Get files
     * Read the file content
     * Modify the MPD file in sequence
     * Write the modified content to a new file
     */

    // Read the first file

    if (!this.files) {
      console.error("[MPDCombiner] No files provided");
      return;
    }

    if (!this.output) {
      console.error("[MPDCombiner] No output file provided");
      return;
    }

    if (this.files.length < 2) {
      console.error("[MPDCombiner] No files found to combine");
      return;
    }

    try {
      const start = new Date();
      const data = await fs.readFile(this.files[0].file, "utf8");
      this.parseInitialManifest(this.files[0], data);

      const filePromises = this.files.slice(1).map((file) => {
        return fs.readFile(file.file, "utf8");
      });

      const filesData = await Promise.all(filePromises);

      filesData.forEach((data, index) => {
        this.appendManifestToParent(this.files[index + 1], data);
      });

      this.parent.setAttribute(
        "mediaPresentationDuration",
        this.getDurationStringFromTime(this.totalDuration)
      );

      const combinedManifest = this.getSerializer().serializeToString(
        this.parent
      );

      const finalManifest = `<?xml version="1.0" ?>\n${combinedManifest}`;

      await fs.writeFile(this.output, finalManifest);

      const end = new Date();
      console.log("Time taken:", end - start, "ms");

      return end - start;
    } catch (err) {
      console.error("[MPDCombiner] Error :", err);
    }
  }

  parseInitialManifest(file, mpdFileString) {
    // This is the first manifest file in sequence
    // Parse and store the parent manifest
    console.log("[MPDCombiner] Parsing initial manifest");
    const parser = this.getParser();

    const doc = parser.parseFromString(mpdFileString, "text/xml");
    this.rootDoc = doc;

    let mpds = doc.getElementsByTagNameNS(NAMESPACE, "MPD");

    if (mpds.length < 1) {
      console.error("[MPDCombiner] No MPD elements found in the manifest");
      throw new Error("No MPD elements found in the manifest");
    }

    this.parent = mpds[0];

    if (!this.parent) {
      console.error(
        "[MPDCombiner] Parent MPD element not found in MPD manifest"
      );
      throw new Error("Parent MPD element not found in MPD manifest");
    }

    this.modifyPeriodAttrib(this.parent, file);

    this.modifySegmentTemplate(this.parent, file);

    console.log("[MPDCombiner] Initial manifest parsed successfully");
  }

  appendManifestToParent(file, mpdFileString) {
    const parser = this.getParser();
    const doc = parser.parseFromString(mpdFileString, "text/xml");

    let mpds = doc.getElementsByTagNameNS(NAMESPACE, "MPD");

    if (mpds.length < 1) {
      console.error("No MPD elements found in the manifest");
      return;
    }

    let root = mpds[0];

    let period = this.modifyPeriodAttrib(root, file);

    this.modifySegmentTemplate(root, file);

    this.parent.appendChild(this.getNewLineNode(doc));
    this.parent.appendChild(doc.createComment(file.name));
    this.parent.appendChild(this.getNewLineNode(doc));
    this.parent.appendChild(period);
    this.parent.appendChild(this.getNewLineNode(doc));
  }
}

const output = "./to_upload.mpd";

const content = fsync.readFileSync("new_files.json");

const files = JSON.parse(content);

const mpdCombiner = new MPDCombiner(files, output);
mpdCombiner.getCombinedManifest();

This returns an MPD file of the following format.

<?xml version="1.0" ?>
<MPD mediaPresentationDuration="PT24.000S" minBufferTime="PT5.00S" profiles="urn:mpeg:dash:profile:isoff-live:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:cenc="urn:mpeg:cenc:2013" xmlns:mspr="urn:microsoft:playready">
  <!-- Created with Bento4 mp4-dash.py, VERSION=2.0.0-641 -->
  <Period duration="PT8.000S" id="feet_apart_arms_down_ai_and_non_ai_drm">
    <!-- Video -->
    <AdaptationSet maxHeight="1080" maxWidth="1920" mimeType="video/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/init.mp4" media="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation bandwidth="10415584" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/1" scanType="progressive" width="1920"/>
      <Representation bandwidth="4142673" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/2" scanType="progressive" width="1920"/>
      <Representation bandwidth="1589566" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/3" scanType="progressive" width="1920"/>
    </AdaptationSet>
    <!-- Audio -->
    <AdaptationSet lang="en" mimeType="audio/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/init.mp4" media="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation audioSamplingRate="48000" bandwidth="139455" codecs="mp4a.40.2" id="audio/en/mp4a.40.2">
        <AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
      </Representation>
    </AdaptationSet>
  </Period>

  <!--feet_apart_arms_down_ai_and_non_ai_drm--> 
  <Period duration="PT8.000S" id="feet_apart_arms_down_ai_and_non_ai_drm">
    <!-- Video -->
    <AdaptationSet maxHeight="1080" maxWidth="1920" mimeType="video/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/init.mp4" media="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation bandwidth="10415584" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/1" scanType="progressive" width="1920"/>
      <Representation bandwidth="4142673" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/2" scanType="progressive" width="1920"/>
      <Representation bandwidth="1589566" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/3" scanType="progressive" width="1920"/>
    </AdaptationSet>
    <!-- Audio -->
    <AdaptationSet lang="en" mimeType="audio/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/init.mp4" media="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation audioSamplingRate="48000" bandwidth="139455" codecs="mp4a.40.2" id="audio/en/mp4a.40.2">
        <AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
      </Representation>
    </AdaptationSet>
  </Period> 

  <!--feet_apart_arms_down_ai_and_non_ai_drm--> 
  <Period duration="PT8.000S" id="feet_apart_arms_down_ai_and_non_ai_drm">
    <!-- Video -->
    <AdaptationSet maxHeight="1080" maxWidth="1920" mimeType="video/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/init.mp4" media="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation bandwidth="10415584" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/1" scanType="progressive" width="1920"/>
      <Representation bandwidth="4142673" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/2" scanType="progressive" width="1920"/>
      <Representation bandwidth="1589566" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/3" scanType="progressive" width="1920"/>
    </AdaptationSet>
    <!-- Audio -->
    <AdaptationSet lang="en" mimeType="audio/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/init.mp4" media="feet_apart_arms_down_ai_and_non_ai_drm/$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation audioSamplingRate="48000" bandwidth="139455" codecs="mp4a.40.2" id="audio/en/mp4a.40.2">
        <AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
      </Representation>
    </AdaptationSet>
  </Period> 
</MPD>

The issue I'm facing is that, when I play the mpd file in Shaka, a few milliseconds are removed from the start of each merged video (or, period). I'm not sure why this is happening.

For reference, here is a normal MPD file for one video :

<?xml version="1.0" ?>
<MPD mediaPresentationDuration="PT24.000S" minBufferTime="PT5.00S" profiles="urn:mpeg:dash:profile:isoff-live:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:cenc="urn:mpeg:cenc:2013" xmlns:mspr="urn:microsoft:playready">
  <!-- Created with Bento4 mp4-dash.py, VERSION=2.0.0-641 -->
  <Period>
    <!-- Video -->
    <AdaptationSet maxHeight="1080" maxWidth="1920" mimeType="video/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)=</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="$RepresentationID$/init.mp4" media="$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation bandwidth="9616772" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/1" scanType="progressive" width="1920"/>
      <Representation bandwidth="3649888" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/2" scanType="progressive" width="1920"/>
      <Representation bandwidth="1416417" codecs="avc1.64002A" frameRate="50" height="1080" id="video/avc1/3" scanType="progressive" width="1920"/>
    </AdaptationSet>
    <!-- Audio -->
    <AdaptationSet lang="en" mimeType="audio/mp4" segmentAlignment="true" startWithSAP="1">
      <!-- MPEG Common Encryption -->
      <ContentProtection cenc:default_KID="(kid here)" schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc"/>
      <!-- PlayReady -->
      <ContentProtection schemeIdUri="urn:uuid:(uuid here)" value="2.0">
        <mspr:pro>(some text here)=</mspr:pro>
        <cenc:pssh>(some text here)</cenc:pssh>
      </ContentProtection>
      <SegmentTemplate duration="5000" initialization="$RepresentationID$/init.mp4" media="$RepresentationID$/seg-$Number$.m4s" startNumber="1" timescale="1000"/>
      <Representation audioSamplingRate="48000" bandwidth="141315" codecs="mp4a.40.2" id="audio/en/mp4a.40.2">
        <AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
      </Representation>
    </AdaptationSet>
  </Period>
</MPD>

Any ideas on why this is happening and how I can fix it would be appreciated. Thanks in advance!

avelad commented 4 months ago

Can you provide us with the url of the final file so we can see what is happening? Thanks!

shaka-bot commented 4 months ago

Closing due to inactivity. If this is still an issue for you or if you have further questions, the OP can ask shaka-bot to reopen it by including @shaka-bot reopen in a comment.