Open Niceblueman opened 7 months ago
Hi! π
Firstly, thanks for your work on this project! π
Today I used patch-package to patch slate-transcript-editor@0.1.6-alpha.19 for the project I'm working on.
slate-transcript-editor@0.1.6-alpha.19
Here is the diff that solved my problem:
diff --git a/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js b/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js index be190f0..25a9686 100644 --- a/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js +++ b/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js @@ -1,8 +1,81 @@ -import { alignSTT } from 'stt-align-node'; +// import { alignSTT } from 'stt-align-node'; import { shortTimecode } from '../../../timecode-converter'; import countWords from '../../../count-words'; import generatePreviousTimingsUpToCurrent from '../../../dpe-to-slate/generate-previous-timings-up-to-current'; +function alignSTT(sttWords, transcriptText, start, end) { + const sttWordsList = sttWords.words; + const opCodes = calculateDiff(sttWordsList, transcriptText); + const transcriptWords = convertRefTextToList(transcriptText); + const alignedResults = alignRefTextWithSTT( + opCodes, + sttWordsList, + transcriptWords, + start, + end + ); + return alignedResults; +} + +// Function to calculate the difference between two arrays of words +function calculateDiff(array1, array2) { + const opCodes = []; + + // Iterate over the arrays and find the differences + let i = 0; + let j = 0; + + while (i < array1.length && j < array2.length) { + if (array1[i] === array2[j]) { + opCodes.push(['equal', i, i + 1, j, j + 1]); + i++; + j++; + } else { + opCodes.push(['delete', i, i + 1, j, j]); + i++; + } + } + // Handle remaining elements in array1 + while (i < array1.length) { + opCodes.push(['delete', i, i + 1, j, j]); + i++; + } + + // Handle remaining elements in array2 + while (j < array2.length) { + opCodes.push(['insert', i, i, j, j + 1]); + j++; + } + + return opCodes; +} + +// Function to convert a text string to a list of words +function convertRefTextToList(text) { + return text.split(/\s+/); +} + +// Function to align reference text with STT output based on calculated diff +function alignRefTextWithSTT(opCodes, sttWords, refWords, start, end) { + let alignedResults = []; + + for (const op of opCodes) { + const [tag, i1, i2, j1, j2] = op; + + if (tag === 'equal') { + alignedResults.push(...sttWords.slice(i1, i2)); + } else if (tag === 'delete') { + alignedResults.push(...Array(i2 - i1).fill('')); + } else if (tag === 'insert') { + alignedResults.push(...refWords.slice(j1, j2)); + } + } + + // Trim the result based on the specified start and end indices + alignedResults = alignedResults.slice(start, end); + + return alignedResults; +} const createSlateContentFromSlateJsParagraphs = (currentContent, newEntities) => { // Update entites to block structure. const updatedBlockArray = [];
This issue body was partially generated by patch-package.
Hi! π
Firstly, thanks for your work on this project! π
Today I used patch-package to patch
slate-transcript-editor@0.1.6-alpha.19
for the project I'm working on.Here is the diff that solved my problem:
This issue body was partially generated by patch-package.