Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 40 additions & 32 deletions packages/stt-adapters/speechmatics/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,6 @@

import generateEntitiesRanges from '../generate-entities-ranges/index.js';

/**
* groups words list from speechmatics based on punctuation.
* @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.
* @todo As this function is also used in the bbc-kaldi adapter, should it be refactored into its own file?
* @param {array} words - array of words objects from speechmatics transcript
*/
const groupWordsInParagraphs = (words) => {
const results = [];
let paragraph = { words: [], text: [] };

words.forEach((word) => {
// if word contains punctuation
if (/[.?!]/.test(word.punct)) {
paragraph.words.push(word);
paragraph.text.push(word.punct);
results.push(paragraph);
// reset paragraph
paragraph = { words: [], text: [] };
} else {
paragraph.words.push(word);
paragraph.text.push(word.punct);
}
});

return results;
};

/**
* Determines the speaker of a paragraph by comparing the start time of the paragraph with
* the speaker times.
Expand All @@ -41,14 +14,49 @@ const groupWordsInParagraphs = (words) => {
const getSpeaker = (start, speakers) => {
for (var speakerIdx in speakers) {
const speaker = speakers[speakerIdx];
if (start >= speaker.start & start < speaker.end) {
const segmentStart = parseFloat(start);
if (segmentStart >= speaker.start & segmentStart < speaker.end) {
return speaker.name;
}
}

return 'UNK';
};

/**
* groups words list from speechmatics based on speaker change and paragraph length.
* @param {array} words - array of words objects from speechmatics transcript
* @param {array} speakers - array of speaker objects from speechmatics transcript
* @param {int} words - number of words which trigger a paragraph break
*/
const groupWordsInParagraphs = (words, speakers, maxParagraphWords) => {
const results = [];
let paragraph = { words: [], text: [], speaker: '' };
let oldSpeaker = getSpeaker(words[0].start, speakers);
let newSpeaker;
let sentenceEnd = false;

words.forEach((word) => {
newSpeaker = getSpeaker(word.start, speakers);
// if speaker changes
if (newSpeaker !== oldSpeaker || (paragraph.words.length > maxParagraphWords && sentenceEnd)) {
paragraph.speaker = oldSpeaker;
results.push(paragraph);
oldSpeaker = newSpeaker;
// reset paragraph
paragraph = { words: [], text: [] };
}
paragraph.words.push(word);
paragraph.text.push(word.punct);
sentenceEnd = /[.?!]/.test(word.punct) ? true : false;
});

paragraph.speaker = oldSpeaker;
results.push(paragraph);

return results;
};

/**
* Speechmatics treats punctuation as own words. This function merges punctuations with
* the pevious word and adjusts the total duration of the word.
Expand Down Expand Up @@ -89,21 +97,21 @@ const speechmaticsToDraft = (speechmaticsJson) => {
tmpSpeakers = speechmaticsJson.speakers;
tmpSpeakers = tmpSpeakers.map((element) => {
return ({
start: element.time,
end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),
start: parseFloat(element.time),
end: (parseFloat(element.time) + parseFloat(element.duration)),
name: element.name,
});
});

const wordsByParagraphs = groupWordsInParagraphs(tmpWords);
const wordsByParagraphs = groupWordsInParagraphs(tmpWords, tmpSpeakers, 150);

wordsByParagraphs.forEach((paragraph) => {
const paragraphStart = paragraph.words[0].start;
const draftJsContentBlockParagraph = {
text: paragraph.text.join(' '),
type: 'paragraph',
data: {
speaker: getSpeaker(paragraphStart, tmpSpeakers),
speaker: paragraph.speaker,
words: paragraph.words,
start: paragraphStart
},
Expand Down
Loading