From 067a410a3401ede95118bb5bb3c11ddd329680c8 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Fri, 7 Feb 2025 15:46:45 -0800 Subject: [PATCH 1/4] feat: support nova 3 and keyterms in listen --- pkg/client/interfaces/v1/types-prerecorded.go | 1 + pkg/client/interfaces/v1/types-stream.go | 1 + pkg/client/listen/v1/rest/client.go | 11 +++++++++++ pkg/client/listen/v1/websocket/new_using_callbacks.go | 5 +++++ pkg/client/listen/v1/websocket/new_using_chan.go | 5 +++++ 5 files changed, 23 insertions(+) diff --git a/pkg/client/interfaces/v1/types-prerecorded.go b/pkg/client/interfaces/v1/types-prerecorded.go index 788b4a82..943d1651 100644 --- a/pkg/client/interfaces/v1/types-prerecorded.go +++ b/pkg/client/interfaces/v1/types-prerecorded.go @@ -31,6 +31,7 @@ type PreRecordedTranscriptionOptions struct { FillerWords bool `json:"filler_words,omitempty" schema:"filler_words,omitempty"` Intents bool `json:"intents,omitempty" schema:"intents,omitempty"` Keywords []string `json:"keywords,omitempty" schema:"keywords,omitempty"` + Keyterms []string `json:"keyterms,omitempty" schema:"keyterms,omitempty"` Language string `json:"language,omitempty" schema:"language,omitempty"` Measurements bool `json:"measurements,omitempty" schema:"measurements,omitempty"` Model string `json:"model,omitempty" schema:"model,omitempty"` diff --git a/pkg/client/interfaces/v1/types-stream.go b/pkg/client/interfaces/v1/types-stream.go index c9804233..bb8239cd 100644 --- a/pkg/client/interfaces/v1/types-stream.go +++ b/pkg/client/interfaces/v1/types-stream.go @@ -25,6 +25,7 @@ type LiveTranscriptionOptions struct { FillerWords bool `json:"filler_words,omitempty" schema:"filler_words,omitempty"` InterimResults bool `json:"interim_results,omitempty" schema:"interim_results,omitempty"` Keywords []string `json:"keywords,omitempty" schema:"keywords,omitempty"` + Keyterms []string `json:"keyterms,omitempty" schema:"keyterms,omitempty"` Language string `json:"language,omitempty" schema:"language,omitempty"` Model string `json:"model,omitempty" schema:"model,omitempty"` Multichannel bool `json:"multichannel,omitempty" schema:"multichannel,omitempty"` diff --git a/pkg/client/listen/v1/rest/client.go b/pkg/client/listen/v1/rest/client.go index 87c367f0..710e253b 100644 --- a/pkg/client/listen/v1/rest/client.go +++ b/pkg/client/listen/v1/rest/client.go @@ -16,6 +16,7 @@ import ( "net/http" "net/url" "os" + "strings" klog "k8s.io/klog/v2" @@ -75,6 +76,11 @@ Output parameters: func (c *Client) DoFile(ctx context.Context, filePath string, req *interfaces.PreRecordedTranscriptionOptions, resBody interface{}) error { klog.V(6).Infof("prerecorded.DoFile() ENTER\n") + if len(req.Keyterms) > 0 && !strings.HasPrefix(req.Model, "nova-3") { + klog.V(1).Info("Keyterms are only supported with nova-3 models.") + return nil + } + // file? fileInfo, err := os.Stat(filePath) if err != nil || errors.Is(err, os.ErrNotExist) { @@ -116,6 +122,11 @@ Output parameters: func (c *Client) DoStream(ctx context.Context, src io.Reader, options *interfaces.PreRecordedTranscriptionOptions, resBody interface{}) error { klog.V(6).Infof("prerecorded.DoStream() ENTER\n") + if len(options.Keyterms) > 0 && !strings.HasPrefix(options.Model, "nova-3") { + klog.V(1).Info("Keyterms are only supported with nova-3 models.") + return nil + } + uri, err := version.GetPrerecordedAPI(ctx, c.Options.Host, c.Options.APIVersion, c.Options.Path, options) if err != nil { klog.V(1).Infof("GetPrerecordedAPI failed. Err: %v\n", err) diff --git a/pkg/client/listen/v1/websocket/new_using_callbacks.go b/pkg/client/listen/v1/websocket/new_using_callbacks.go index 8c5b1580..3b532a5c 100644 --- a/pkg/client/listen/v1/websocket/new_using_callbacks.go +++ b/pkg/client/listen/v1/websocket/new_using_callbacks.go @@ -6,6 +6,7 @@ package websocketv1 import ( "context" + "strings" klog "k8s.io/klog/v2" @@ -69,6 +70,10 @@ func NewUsingCallbackWithCancel(ctx context.Context, ctxCancel context.CancelFun if apiKey != "" { cOptions.APIKey = apiKey } + if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") { + klog.V(1).Info("Keyterms are only supported with nova-3 models.") + return nil, nil + } err := cOptions.Parse() if err != nil { klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err) diff --git a/pkg/client/listen/v1/websocket/new_using_chan.go b/pkg/client/listen/v1/websocket/new_using_chan.go index 9ec73c23..b3f824d1 100644 --- a/pkg/client/listen/v1/websocket/new_using_chan.go +++ b/pkg/client/listen/v1/websocket/new_using_chan.go @@ -6,6 +6,7 @@ package websocketv1 import ( "context" + "strings" klog "k8s.io/klog/v2" @@ -69,6 +70,10 @@ func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, a if apiKey != "" { cOptions.APIKey = apiKey } + if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") { + klog.V(1).Info("Keyterms are only supported with nova-3 models.") + return nil, nil + } err := cOptions.Parse() if err != nil { klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err) From 635bc1e06be2b4031a8c04e27b9abfe003d6c8b6 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Fri, 7 Feb 2025 15:52:29 -0800 Subject: [PATCH 2/4] feat: support nova 3 and keyterms in agent --- pkg/client/agent/v1/websocket/new_using_chan.go | 5 +++++ pkg/client/interfaces/v1/types-agent.go | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/client/agent/v1/websocket/new_using_chan.go b/pkg/client/agent/v1/websocket/new_using_chan.go index 23662356..8ef57dc8 100644 --- a/pkg/client/agent/v1/websocket/new_using_chan.go +++ b/pkg/client/agent/v1/websocket/new_using_chan.go @@ -6,6 +6,7 @@ package websocketv1 import ( "context" + "strings" klog "k8s.io/klog/v2" @@ -69,6 +70,10 @@ func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, a if apiKey != "" { cOptions.APIKey = apiKey } + if len(*tOptions.Agent.Listen.Keyterms) > 0 && !strings.HasPrefix(tOptions.Agent.Listen.Model, "nova-3") { + klog.V(1).Info("Keyterms are only supported with nova-3 models.") + return nil, nil + } err := cOptions.Parse() if err != nil { klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err) diff --git a/pkg/client/interfaces/v1/types-agent.go b/pkg/client/interfaces/v1/types-agent.go index 376fcfcc..5c4a9ffc 100644 --- a/pkg/client/interfaces/v1/types-agent.go +++ b/pkg/client/interfaces/v1/types-agent.go @@ -35,7 +35,8 @@ type Audio struct { Output *Output `json:"output,omitempty"` } type Listen struct { - Model string `json:"model,omitempty"` + Model string `json:"model,omitempty"` + Keyterms *[]string `json:"keyterms,omitempty"` } type Provider struct { Type string `json:"type,omitempty"` From b6236ca7b9dd7704cc09ec62f70e42cadfd6b463 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Fri, 7 Feb 2025 15:59:53 -0800 Subject: [PATCH 3/4] feat: update the examples --- examples/agent/websocket/simple/main.go | 2 ++ examples/speech-to-text/rest/callback/callback/main.go | 2 ++ examples/speech-to-text/rest/file/main.go | 3 ++- examples/speech-to-text/rest/intent/main.go | 3 ++- examples/speech-to-text/rest/sentiment/main.go | 3 ++- examples/speech-to-text/rest/stream/main.go | 2 ++ examples/speech-to-text/rest/summary/main.go | 3 ++- examples/speech-to-text/rest/topic/main.go | 3 ++- examples/speech-to-text/rest/url/main.go | 2 ++ examples/speech-to-text/websocket/http_callback/main.go | 2 ++ examples/speech-to-text/websocket/http_channel/main.go | 2 ++ examples/speech-to-text/websocket/microphone_callback/main.go | 3 ++- examples/speech-to-text/websocket/microphone_channel/main.go | 3 ++- examples/speech-to-text/websocket/replay/main.go | 2 ++ 14 files changed, 28 insertions(+), 7 deletions(-) diff --git a/examples/agent/websocket/simple/main.go b/examples/agent/websocket/simple/main.go index 5898092a..1762b6d9 100644 --- a/examples/agent/websocket/simple/main.go +++ b/examples/agent/websocket/simple/main.go @@ -393,6 +393,8 @@ func main() { tOptions.Agent.Think.Provider.Type = "open_ai" tOptions.Agent.Think.Model = "gpt-4o-mini" tOptions.Agent.Think.Instructions = "You are a helpful AI assistant." + tOptions.Agent.Listen.Model = "nova-3" + tOptions.Agent.Listen.Keyterms = &[]string{"Bueller"} // implement your own callback callback := msginterfaces.AgentMessageChan(*NewMyHandler()) diff --git a/examples/speech-to-text/rest/callback/callback/main.go b/examples/speech-to-text/rest/callback/callback/main.go index 9d57abea..ea644b3d 100644 --- a/examples/speech-to-text/rest/callback/callback/main.go +++ b/examples/speech-to-text/rest/callback/callback/main.go @@ -38,6 +38,8 @@ func main() { ctx, url, &interfaces.PreRecordedTranscriptionOptions{ + Model: "nova-3", + Keyterms: []string{"deepgram"}, Punctuate: true, Diarize: true, Language: "en-US", diff --git a/examples/speech-to-text/rest/file/main.go b/examples/speech-to-text/rest/file/main.go index 12715a6a..a2da3784 100644 --- a/examples/speech-to-text/rest/file/main.go +++ b/examples/speech-to-text/rest/file/main.go @@ -32,7 +32,8 @@ func main() { // set the Transcription options options := &interfaces.PreRecordedTranscriptionOptions{ - Model: "nova-2", + Model: "nova-3", + Keyterms: []string{"Bueller"}, Punctuate: true, Paragraphs: true, SmartFormat: true, diff --git a/examples/speech-to-text/rest/intent/main.go b/examples/speech-to-text/rest/intent/main.go index 4b043679..0678dabc 100644 --- a/examples/speech-to-text/rest/intent/main.go +++ b/examples/speech-to-text/rest/intent/main.go @@ -32,7 +32,8 @@ func main() { // set the Transcription options options := &interfaces.PreRecordedTranscriptionOptions{ - Model: "nova-2", + Model: "nova-3", + Keyterms: []string{"deepgram"}, Punctuate: true, Language: "en-US", SmartFormat: true, diff --git a/examples/speech-to-text/rest/sentiment/main.go b/examples/speech-to-text/rest/sentiment/main.go index 783ab147..74461adf 100644 --- a/examples/speech-to-text/rest/sentiment/main.go +++ b/examples/speech-to-text/rest/sentiment/main.go @@ -32,7 +32,8 @@ func main() { // set the Transcription options options := &interfaces.PreRecordedTranscriptionOptions{ - Model: "nova-2", + Model: "nova-3", + Keyterms: []string{"deepgram"}, Punctuate: true, Language: "en-US", SmartFormat: true, diff --git a/examples/speech-to-text/rest/stream/main.go b/examples/speech-to-text/rest/stream/main.go index 1b98e31e..adfeb171 100644 --- a/examples/speech-to-text/rest/stream/main.go +++ b/examples/speech-to-text/rest/stream/main.go @@ -32,6 +32,8 @@ func main() { // set the Transcription options options := &interfaces.PreRecordedTranscriptionOptions{ + Model: "nova-3", + Keyterms: []string{"Bueller"}, Punctuate: true, Diarize: true, Language: "en-US", diff --git a/examples/speech-to-text/rest/summary/main.go b/examples/speech-to-text/rest/summary/main.go index d555431f..215d903a 100644 --- a/examples/speech-to-text/rest/summary/main.go +++ b/examples/speech-to-text/rest/summary/main.go @@ -32,7 +32,8 @@ func main() { // set the Transcription options options := &interfaces.PreRecordedTranscriptionOptions{ - Model: "nova-2", + Model: "nova-3", + Keyterms: []string{"deepgram"}, Punctuate: true, Language: "en-US", SmartFormat: true, diff --git a/examples/speech-to-text/rest/topic/main.go b/examples/speech-to-text/rest/topic/main.go index 32dded6d..e49aa005 100644 --- a/examples/speech-to-text/rest/topic/main.go +++ b/examples/speech-to-text/rest/topic/main.go @@ -32,7 +32,8 @@ func main() { // set the Transcription options options := &interfaces.PreRecordedTranscriptionOptions{ - Model: "nova-2", + Model: "nova-3", + Keyterms: []string{"deepgram"}, Punctuate: true, Language: "en-US", SmartFormat: true, diff --git a/examples/speech-to-text/rest/url/main.go b/examples/speech-to-text/rest/url/main.go index 502c9737..c9d5ea37 100644 --- a/examples/speech-to-text/rest/url/main.go +++ b/examples/speech-to-text/rest/url/main.go @@ -30,6 +30,8 @@ func main() { // send stream to Deepgram options := &interfaces.PreRecordedTranscriptionOptions{ + Model: "nova-3", + Keyterms: []string{"deepgram"}, Punctuate: true, Diarize: true, Language: "en-US", diff --git a/examples/speech-to-text/websocket/http_callback/main.go b/examples/speech-to-text/websocket/http_callback/main.go index 7496027e..ab1731ee 100644 --- a/examples/speech-to-text/websocket/http_callback/main.go +++ b/examples/speech-to-text/websocket/http_callback/main.go @@ -32,6 +32,8 @@ func main() { // set the Transcription options transcriptOptions := &interfaces.LiveTranscriptionOptions{ + Model: "nova-3", + Keyterms: []string{"deepgram"}, Language: "en-US", Punctuate: true, } diff --git a/examples/speech-to-text/websocket/http_channel/main.go b/examples/speech-to-text/websocket/http_channel/main.go index a7a9d89b..875b688a 100644 --- a/examples/speech-to-text/websocket/http_channel/main.go +++ b/examples/speech-to-text/websocket/http_channel/main.go @@ -36,6 +36,8 @@ func main() { // set the Transcription options transcriptOptions := &interfaces.LiveTranscriptionOptions{ + Model: "nova-3", + Keyterms: []string{"deepgram"}, Language: "en-US", Punctuate: true, } diff --git a/examples/speech-to-text/websocket/microphone_callback/main.go b/examples/speech-to-text/websocket/microphone_callback/main.go index d951d8c2..dc33b2b6 100644 --- a/examples/speech-to-text/websocket/microphone_callback/main.go +++ b/examples/speech-to-text/websocket/microphone_callback/main.go @@ -125,7 +125,8 @@ func main() { // set the Transcription options tOptions := &interfaces.LiveTranscriptionOptions{ - Model: "nova-2", + Model: "nova-3", + Keyterms: []string{"deepgram"}, Language: "en-US", Punctuate: true, Encoding: "linear16", diff --git a/examples/speech-to-text/websocket/microphone_channel/main.go b/examples/speech-to-text/websocket/microphone_channel/main.go index 719d2aae..56e65de1 100644 --- a/examples/speech-to-text/websocket/microphone_channel/main.go +++ b/examples/speech-to-text/websocket/microphone_channel/main.go @@ -224,7 +224,8 @@ func main() { // set the Transcription options tOptions := &interfaces.LiveTranscriptionOptions{ - Model: "nova-2", + Model: "nova-3", + Keyterms: []string{"deepgram"}, Language: "en-US", Punctuate: true, Encoding: "linear16", diff --git a/examples/speech-to-text/websocket/replay/main.go b/examples/speech-to-text/websocket/replay/main.go index e135e633..eded3fe9 100644 --- a/examples/speech-to-text/websocket/replay/main.go +++ b/examples/speech-to-text/websocket/replay/main.go @@ -29,6 +29,8 @@ func main() { // set the Transcription options options := &interfaces.LiveTranscriptionOptions{ + Model: "nova-3", + Keyterms: []string{"deepgram"}, Language: "en-US", Punctuate: true, Encoding: "mulaw", From a51351d7b47f73cb6f1c387c977ed631c6919d98 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Fri, 7 Feb 2025 16:00:33 -0800 Subject: [PATCH 4/4] fix: i guess we don't need the star? --- examples/agent/websocket/simple/main.go | 2 +- pkg/client/agent/v1/websocket/new_using_chan.go | 2 +- pkg/client/interfaces/v1/types-agent.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/agent/websocket/simple/main.go b/examples/agent/websocket/simple/main.go index 1762b6d9..8606489e 100644 --- a/examples/agent/websocket/simple/main.go +++ b/examples/agent/websocket/simple/main.go @@ -394,7 +394,7 @@ func main() { tOptions.Agent.Think.Model = "gpt-4o-mini" tOptions.Agent.Think.Instructions = "You are a helpful AI assistant." tOptions.Agent.Listen.Model = "nova-3" - tOptions.Agent.Listen.Keyterms = &[]string{"Bueller"} + tOptions.Agent.Listen.Keyterms = []string{"Bueller"} // implement your own callback callback := msginterfaces.AgentMessageChan(*NewMyHandler()) diff --git a/pkg/client/agent/v1/websocket/new_using_chan.go b/pkg/client/agent/v1/websocket/new_using_chan.go index 8ef57dc8..fbdecf00 100644 --- a/pkg/client/agent/v1/websocket/new_using_chan.go +++ b/pkg/client/agent/v1/websocket/new_using_chan.go @@ -70,7 +70,7 @@ func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, a if apiKey != "" { cOptions.APIKey = apiKey } - if len(*tOptions.Agent.Listen.Keyterms) > 0 && !strings.HasPrefix(tOptions.Agent.Listen.Model, "nova-3") { + if len(tOptions.Agent.Listen.Keyterms) > 0 && !strings.HasPrefix(tOptions.Agent.Listen.Model, "nova-3") { klog.V(1).Info("Keyterms are only supported with nova-3 models.") return nil, nil } diff --git a/pkg/client/interfaces/v1/types-agent.go b/pkg/client/interfaces/v1/types-agent.go index 5c4a9ffc..c6932813 100644 --- a/pkg/client/interfaces/v1/types-agent.go +++ b/pkg/client/interfaces/v1/types-agent.go @@ -35,8 +35,8 @@ type Audio struct { Output *Output `json:"output,omitempty"` } type Listen struct { - Model string `json:"model,omitempty"` - Keyterms *[]string `json:"keyterms,omitempty"` + Model string `json:"model,omitempty"` + Keyterms []string `json:"keyterms,omitempty"` } type Provider struct { Type string `json:"type,omitempty"`