Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add partial results support to the gpu batch recognizer #1554

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions c/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
CFLAGS=-I../src
LDFLAGS=-L../src -lvosk -ldl -lpthread -Wl,-rpath,../src

all: test_vosk test_vosk_speaker
all: test_vosk_gpu_batch test_vosk test_vosk_speaker

test_vosk_gpu_batch: test_vosk_gpu_batch.o
gcc $^ -o $@ $(LDFLAGS)

test_vosk: test_vosk.o
gcc $^ -o $@ $(LDFLAGS)
Expand All @@ -13,4 +16,4 @@ test_vosk_speaker: test_vosk_speaker.o
gcc $(CFLAGS) -c -o $@ $<

clean:
rm -f *.o *.a test_vosk test_vosk_speaker
rm -f *.o *.a test_vosk_gpu_batch test_vosk test_vosk_speaker
35 changes: 35 additions & 0 deletions c/test_vosk_gpu_batch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include <vosk_api.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>

int main() {
FILE *wavin;
char buf[3200];
int nread, final;

vosk_gpu_init();
VoskBatchModel *model = vosk_batch_model_new("model");
VoskBatchRecognizer *recognizer = vosk_batch_recognizer_new(model, 16000.0);

wavin = fopen("test.wav", "rb");
fseek(wavin, 44, SEEK_SET);
while (!feof(wavin)) {
nread = fread(buf, 1, sizeof(buf), wavin);
vosk_batch_recognizer_accept_waveform(recognizer,buf,nread);
while(vosk_batch_recognizer_get_pending_chunks(recognizer)>0) usleep(1000);

const char *result=vosk_batch_recognizer_front_result(recognizer);
if(strlen(result)) {
printf("%s\n", result);
vosk_batch_recognizer_pop(recognizer);
} else {
printf("%s\n", vosk_batch_recognizer_partial_result(recognizer));
}
}

fclose(wavin);
vosk_batch_recognizer_free(recognizer);
vosk_batch_model_free(model);
return 0;
}
36 changes: 27 additions & 9 deletions src/batch_recognizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ BatchRecognizer::BatchRecognizer(BatchModel *model, float
resampler_ = new LinearResample(
sample_frequency, 16000.0f,
std::min(sample_frequency / 2, 16000.0f / 2), 6);

partial_result_ = "{\n \"partial\" : \"\"\n}";
}

BatchRecognizer::~BatchRecognizer() {
Expand Down Expand Up @@ -114,27 +116,38 @@ void BatchRecognizer::SetNLSML(bool nlsml)

void BatchRecognizer::AcceptWaveform(const char *data, int len)
{
#define KALDI_BESTPATH_LOGS_ON 0

uint64_t id = id_;
if (!callbacks_set_) {
// Define the callback for results.
#if 0
model_->cuda_pipeline_->SetBestPathCallback(
// Define the callbacks for results.
model_->cuda_pipeline_->SetBestPathCallback(
id,
[&, id](const std::string &str, bool partial,
bool endpoint_detected) {
bool endpoint_detected) {
if (partial) {
KALDI_LOG << "id #" << id << " [partial] : " << str << ":";
#if KALDI_BESTPATH_LOGS_ON
KALDI_LOG << "id #" << id << " [partial] : " << str << ":";
#endif
partial_result_ = "{\n \"partial\" : \"" + str + "\"\n}"; // json-like partial result format
}

if (endpoint_detected) {
KALDI_LOG << "id #" << id << " [endpoint detected]";
#if KALDI_BESTPATH_LOGS_ON
KALDI_LOG << "id #" << id << " [endpoint detected]";
#endif
if(!partial) {
partial_result_ = "{\n \"partial\" : \"\"\n}"; // clear partial result
}
}

if (!partial) {
KALDI_LOG << "id #" << id << " : " << str;
#if KALDI_BESTPATH_LOGS_ON
KALDI_LOG << "id #" << id << " : " << str;
#endif
}
});
#endif
});

model_->cuda_pipeline_->SetLatticeCallback(
id,
[&, id](SegmentedLatticeCallbackParams& params) {
Expand Down Expand Up @@ -188,6 +201,11 @@ const char* BatchRecognizer::FrontResult()
return results_.front().c_str();
}

const char *BatchRecognizer::PartialResult()
{
return partial_result_.c_str();
}

void BatchRecognizer::Pop()
{
if (results_.empty()) {
Expand Down
2 changes: 2 additions & 0 deletions src/batch_recognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class BatchRecognizer {
void AcceptWaveform(const char *data, int len);
int GetNumPendingChunks();
const char *FrontResult();
const char *PartialResult();
void Pop();
void FinishStream();
void SetNLSML(bool nlsml);
Expand All @@ -48,6 +49,7 @@ class BatchRecognizer {
bool nlsml_;
float sample_frequency_;
std::queue<std::string> results_;
std::string partial_result_;
LinearResample *resampler_;
kaldi::Vector<BaseFloat> buffer_;
};
Expand Down
8 changes: 8 additions & 0 deletions src/vosk_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,14 @@ void vosk_batch_recognizer_pop(VoskBatchRecognizer *recognizer)
#endif
}

const char *vosk_batch_recognizer_partial_result(VoskBatchRecognizer *recognizer)
{
#if HAVE_CUDA
return ((BatchRecognizer *)recognizer)->PartialResult();
#else
return NULL;
#endif
}

int vosk_batch_recognizer_get_pending_chunks(VoskBatchRecognizer *recognizer)
{
Expand Down
3 changes: 3 additions & 0 deletions src/vosk_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,9 @@ const char *vosk_batch_recognizer_front_result(VoskBatchRecognizer *recognizer);
/** Release and free first retrieved result */
void vosk_batch_recognizer_pop(VoskBatchRecognizer *recognizer);

/** Return actual partial result */
const char *vosk_batch_recognizer_partial_result(VoskBatchRecognizer *recognizer);

/** Get amount of pending chunks for more intelligent waiting */
int vosk_batch_recognizer_get_pending_chunks(VoskBatchRecognizer *recognizer);

Expand Down