astefanutti / decktape

PDF exporter for HTML presentations
MIT License
2.16k stars 175 forks source link

Feature Request: Option to Name Files from Slide Headers #343

Closed ullenboom closed 1 week ago

ullenboom commented 1 month ago

An option to automatically name files based on the slide headers would be helpful.

ullenboom commented 1 month ago

I've created a simple Node.js script for my reveal.js slides for anyone interested in an easy solution.

import path from "path";
import { load } from "cheerio";

const REVEAL_JS_PATH = ;
const SCREENSHOT_OUTPUT_PATH = ;

async function parseRevealJsHtml(path) {
    const data = await fs.readFile(path, "utf8");
    const $ = load(data);
    return $("section h2").map((_, element) => $(element).text().trim()).get();
}

async function getSortedScreenshotFiles(path) {
    const filenames = await fs.readdir(path);
    return filenames.sort((a, b) =>
        parseInt(a.match(/\d+/)[0]) - parseInt(b.match(/\d+/)[0])
    );
}

function zip(...arrays) {
    return arrays[0].map((_, i) => arrays.map(arr => arr[i]));
}

async function renameFile(oldFilename, newFilename) {
    const newFileName = `${newFilename.replace(/[^a-z0-9]/gi, '_').toLowerCase()}.png`;
    // const newFileName = `${newFilename.replace(/[^a-z0-9\-._()[\]]/gi, '_').toLowerCase()}.png`;
    const oldPath = path.join(SCREENSHOT_OUTPUT_PATH, oldFilename);
    const newPath = path.join(SCREENSHOT_OUTPUT_PATH, newFileName);
    await fs.rename(oldPath, newPath);
}

(async () => {
    const screenshotFilenames = getSortedScreenshotFiles(SCREENSHOT_OUTPUT_PATH);
    const h2Contents = parseRevealJsHtml(REVEAL_JS_PATH);
    const zipped = zip(await h2Contents, await screenshotFilenames);
    for (const [h2Content, oldFilename] of zipped) {
        if (h2Content && oldFilename) {
            await renameFile(oldFilename, h2Content);
        }
    }
})();
ullenboom commented 1 month ago

Ultimately, a Java solution turned out to be much faster to implement. For those interested:

import com.microsoft.playwright.Page;
import com.microsoft.playwright.Playwright;
import java.nio.file.Path;

public class JavaDeckTape {
  static final String SOURCE_URL = ...;
  static final Path OUTPUT_FOLDER = Path.of( "screenshots" );

  public static void main( String[] args ) {
    try ( var playwright = Playwright.create() ) {
      var browser = playwright.chromium().launch();
      var page = browser.newContext().newPage();
      page.navigate( SOURCE_URL );

      int slideCount = (int) page.evaluate( "() => Reveal.getTotalSlides()" );
      System.out.println( "Total slides: " + slideCount );

      var options = new Page.ScreenshotOptions();

      for ( int index = 1; index <= slideCount; index++ ) {
        var slideHeader = page.evaluate( """
                                             () => {
                                              const slide = Reveal.getCurrentSlide();
                                              const header = slide.querySelector("h1, h2, h3, h4, h5, h6");
                                              return header ? header.textContent.trim() : "No header found";
                                             }""" ).toString();
        var filename = slideHeader.replaceAll( "[^a-zA-Z0-9.-]+", "_" );
        options.setPath( OUTPUT_FOLDER.resolve( "screenshot-" + filename + ".png" ) );
        page.screenshot( options );
        page.evaluate( "() => Reveal.next()" );
      }

      browser.close();
    }
  }
}
astefanutti commented 1 week ago

Thanks a lot for the report. It's in essence similar to #69, and the solution you've shared would be a great basis for a PR.

Let me close this and track the work in #69.