You are reading help file online using chmlib.com
|
This document is intended to help developers of speech recognition (SR) applications use the Microsoft speech recognition and audio APIs to persist or store the wav audio recognized by a SR engine. The topics covered include:
The following are typical scenarios that would need to store the wav audio recognized by the SR engine:
Follow these basic steps to retrieve and store recognized wav audio:
Note: Error handling is omitted for brevity
{
HRESULT hr = S_OK;
CComPtr<ISpRecoContext> cpRecoContext;
CComPtr<ISpRecoGrammar> cpRecoGrammar;
CComPtr<ISpRecoResult> cpRecoResult;
CComPtr<ISpStreamFormat> cpStreamFormat;
CSpEvent spEvent;
WAVEFORMATEX* pexFormat = NULL;
SPAUDIOOPTIONS eAudioOptions = SPAO_NONE;
' format for storing the audio
const SPSTREAMFORMAT spFormat = SPSF_22kHz8BitMono;
CSpStreamFormat Fmt( spFormat, &hr);
// Check hr
// Create shared recognition context for receiving events
hr = cpRecoContext.CoCreateInstance(CLSID_SpSharedRecoContext);
// Check hr
// Create a grammar
hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar);
// Check hr
// Load dictation
hr = cpRecoGrammar->LoadDictation(NULL, SPLO_STATIC);
// Check hr
// Enabled audio retention in the SAPI runtime, and set the retained audio format
hr = cpRecoContext->SetAudioOptions( SPAO_RETAIN_AUDIO, &Fmt.FormatId(), Fmt.WaveFormatExPtr());
// Check hr
// activate dictation
hr = cpRecoGrammar->SetDictationState(SPRS_ACTIVE);
// Check hr
// wait 15 seconds for an event to occur (specifically, the default event, recognition)
hr = cpRecoContext->WaitForNotifyEvent(15000);
if (S_OK == hr)
{
// retrieve the event from the recognition context
hr = spEvent.GetFrom(cpRecoContext);
if (S_OK == hr)
{
// verify that the event is a recognition event
if (SPEI_RECOGNITION == spEvent.eEventId)
{
// store the recognition result pointer
cpRecoResult = spEvent.RecoResult();
// release recognition result pointer in event object
spEvent.Clear();
}
}
}
// deactivate dictation (only processing one recognition in sample code)
hr = cpRecoGrammar->SetDictationState(SPRS_INACTIVE);
// Check hr
// unload dictation
hr = cpRecoGrammar->UnloadDictation();
// Check hr
// if recognition received, and result stored then store the audio
if (cpRecoResult)
{
// get stream pointer to recognized audio
// Note: specifying NULL for the start element and element length defaults to the entire recognized audio stream. Correction UI may only need a subset of the audio for playback
hr = cpRecoResult->GetAudio( 0, 0, &cpStreamFormat );
// Check hr
// basic SAPI-stream for file-based storage
CComPtr<ISpStream> cpStream;
ULONG cbWritten = 0;
// create file on hard-disk for storing recognized audio, and specify audio format as the retained audio format
hr = SPBindToFile(L"c:\\recognized_audio.wav", SPFM_CREATE_ALWAYS, &cpStream, &Fmt.FormatId(), Fmt.WaveFormatExPtr(), SPFEI_ALL_EVENTS);
// Check hr
' Continuously transfer data between the two streams until no more data is found (i.e. end of stream)
' Note only transfer 1000 bytes at a time to creating large chunks of data at one time
while (TRUE)
{
// for logging purposes, the app can retrieve the recognized audio stream length in bytes
STATSTG stats;
hr = cpStreamFormat->Stat(&stats, NULL);
// Check hr
// create a 1000-byte buffer for transferring
BYTE bBuffer[1000];
ULONG cbRead;
// request 1000 bytes of data from the input stream
hr = cpStreamFormat->Read(bBuffer, 1000, &cbRead);
// if data was returned…
if (SUCCEEDED(hr) && cbRead > 0)
{
' then transfer/write the audio to the file-based stream
hr = cpStream->Write(bBuffer, cbRead, &cbWritten);
// Check hr
}
// since there is no more data being added to the input stream, if the read request returned less than expected, the end of stream was reached, so break data transfer loop
if (cbRead < 1000)
{
break;
}
}
}
' explicitly close the file-based stream to flush file data and allow app to immediately use the file
hr = cpStream->Close();
}
}
Scripting code is similar to Visual Basic.
Option Explicit
Dim WithEvents RecoContext As SpSharedRecoContext ' context for receiving SR events
Dim Grammar As ISpeechRecoGrammar ' grammar
' Setup/Initialization code for application startup
Private Sub Form_Load()
' Create new shared recognition context (inproc works similarly)
Set RecoContext = New SpSharedRecoContext
' Create grammar
Set Grammar = RecoContext.CreateGrammar
' Activate retained audio
RecoContext.RetainedAudio = SRAORetainAudio
' Optionally, set the retained audio format to lower quality for smaller size
' RecoContext.RetainedAudioFormat = ???
' load and activate dictation
Grammar.DictationLoad
Grammar.DictationSetState SGDSActive
End Sub
' Recognition event was received
Private Sub RecoContext_Recognition(ByVal StreamNumber As Long, ByVal StreamPosition As Variant, ByVal RecognitionType As SpeechLib.SpeechRecognitionType, ByVal Result As SpeechLib.ISpeechRecoResult)
' Create new file-based stream for audio storage
Dim FileStream As New SpFileStream
' Variable for accessing the recognized audio stream
Dim AudioStream As SpMemoryStream
' Retrieve recognized audio from result object
' Note: application can also retrieve smaller portions of the audio stream by specifying a starting phrase element and phrase element length
Set AudioStream = Result.Audio
' Setup the file-based stream format with the same format as the audio stream format
Set FileStream.Format = AudioStream.Format
' Create a file on the hard-disk for storing the recognized audio
FileStream.Open "c:\recognized_audio.wav", SSFMCreateForWrite
Dim Buffer As Variant ' Buffer for storing stream data
Dim lRead As Long ' Amount of data read from the stream
Dim lWritten As Long ' Amount of data written to the stream
' Continuously transfer data between the two streams until no more data is found (i.e. end of stream)
' Note only transfer 1000 bytes at a time to creating large chunks of data at one time
Do While True
' read 1000 bytes of stream data
lRead = AudioStream.Read(Buffer, 1000)
' if data was retrieved, then transfer/write it to the file-based stream
If (lRead > 0) Then
lWritten = FileStream.Write(Buffer)
End If
' Since the input stream will not increase in size, the number of bytes read will only be less than requested if there is no more data to be transferred
If lRead < 1000
Exit Do ' exit if no more data
End If
Loop
' close the file-based stream
' Note: The stream will be closed automatically when the object is released, but explicit closing enables app to immediately use the file stream's data
FileStream.Close
' Sample code will deactivate and unload dictation, then shutdown after one recognition
Grammar.DictationSetState SGDSInactive
Grammar.DictationUnload
Unload Me ' shutdown app
End Sub
You are reading help file online using chmlib.com
|