Answer the question
In order to leave comments, you need to log in
How to correctly recognize an audio file by speakers in google cloud speech to text?
I'm trying to recognize a test audio file by speakers (sides of the conversation) via Google Cloud, it's
just that audio is recognized, but it doesn't want to by speakers, here's a piece of php code:
<?php
// # Includes the autoloader for libraries installed with composer
require __DIR__ . '/vendor/autoload.php';
use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
putenv("GOOGLE_APPLICATION_CREDENTIALS=" . __DIR__ . "/путь до моего json-файла");
$audioFile = __DIR__ . '/test_call.wav';;
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 8000;
$languageCode = 'ru-RU';
$enableSpeakerDiarization = true;
$diarizationSpeakerCount = 2;
$model = 'phone_call';
// get contents of a file into a string
$content = file_get_contents($audioFile);
// set string as audio content
$audio = (new RecognitionAudio())
->setContent($content);
// set config
$config = (new RecognitionConfig())
->setEncoding($encoding)
->setSampleRateHertz($sampleRateHertz)
->setLanguageCode($languageCode)
->setEnableSeparateRecognitionPerChannel($enableSpeakerDiarization)
->setAudioChannelCount($diarizationSpeakerCount)
->setModel($model);
// create the speech client
$client = new SpeechClient();
// $client->useApplicationDefaultCredentials();
// create the asyncronous recognize operation
$operation = $client->longRunningRecognize($config, $audio);
$operation->pollUntilComplete();
if ($operation->operationSucceeded()) {
$response = $operation->getResult();
// each result is for a consecutive portion of the audio. iterate
// through them to get the transcripts for the entire audio file.
foreach ($response->getResults() as $result) {
$alternatives = $result->getAlternatives();
$mostLikely = $alternatives[0];
$transcript = $mostLikely->getTranscript();
$confidence = $mostLikely->getConfidence();
$words = $mostLikely->getWords();
printf('Transcript: %s' . PHP_EOL, $transcript);
printf('Confidence: %s' . PHP_EOL, $confidence);
var_dump($words);
foreach ($words as $wordInfo) {
echo 'word: ' . $wordInfo->getWord() . 'speaker_tag: ' . $wordInfo->getSpeakerTag() . "\n\r";
}
}
} else {
print_r($operation->getError());
}
$client->close();
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question