Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support nova-3 and keyterms #277

Merged
merged 4 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/agent/websocket/simple/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ func main() {
tOptions.Agent.Think.Provider.Type = "open_ai"
tOptions.Agent.Think.Model = "gpt-4o-mini"
tOptions.Agent.Think.Instructions = "You are a helpful AI assistant."
tOptions.Agent.Listen.Model = "nova-3"
tOptions.Agent.Listen.Keyterms = []string{"Bueller"}

// implement your own callback
callback := msginterfaces.AgentMessageChan(*NewMyHandler())
Expand Down
2 changes: 2 additions & 0 deletions examples/speech-to-text/rest/callback/callback/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ func main() {
ctx,
url,
&interfaces.PreRecordedTranscriptionOptions{
Model: "nova-3",
Keyterms: []string{"deepgram"},
Punctuate: true,
Diarize: true,
Language: "en-US",
Expand Down
3 changes: 2 additions & 1 deletion examples/speech-to-text/rest/file/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ func main() {

// set the Transcription options
options := &interfaces.PreRecordedTranscriptionOptions{
Model: "nova-2",
Model: "nova-3",
Keyterms: []string{"Bueller"},
Punctuate: true,
Paragraphs: true,
SmartFormat: true,
Expand Down
3 changes: 2 additions & 1 deletion examples/speech-to-text/rest/intent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ func main() {

// set the Transcription options
options := &interfaces.PreRecordedTranscriptionOptions{
Model: "nova-2",
Model: "nova-3",
Keyterms: []string{"deepgram"},
Punctuate: true,
Language: "en-US",
SmartFormat: true,
Expand Down
3 changes: 2 additions & 1 deletion examples/speech-to-text/rest/sentiment/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ func main() {

// set the Transcription options
options := &interfaces.PreRecordedTranscriptionOptions{
Model: "nova-2",
Model: "nova-3",
Keyterms: []string{"deepgram"},
Punctuate: true,
Language: "en-US",
SmartFormat: true,
Expand Down
2 changes: 2 additions & 0 deletions examples/speech-to-text/rest/stream/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ func main() {

// set the Transcription options
options := &interfaces.PreRecordedTranscriptionOptions{
Model: "nova-3",
Keyterms: []string{"Bueller"},
Punctuate: true,
Diarize: true,
Language: "en-US",
Expand Down
3 changes: 2 additions & 1 deletion examples/speech-to-text/rest/summary/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ func main() {

// set the Transcription options
options := &interfaces.PreRecordedTranscriptionOptions{
Model: "nova-2",
Model: "nova-3",
Keyterms: []string{"deepgram"},
Punctuate: true,
Language: "en-US",
SmartFormat: true,
Expand Down
3 changes: 2 additions & 1 deletion examples/speech-to-text/rest/topic/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ func main() {

// set the Transcription options
options := &interfaces.PreRecordedTranscriptionOptions{
Model: "nova-2",
Model: "nova-3",
Keyterms: []string{"deepgram"},
Punctuate: true,
Language: "en-US",
SmartFormat: true,
Expand Down
2 changes: 2 additions & 0 deletions examples/speech-to-text/rest/url/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ func main() {

// send stream to Deepgram
options := &interfaces.PreRecordedTranscriptionOptions{
Model: "nova-3",
Keyterms: []string{"deepgram"},
Punctuate: true,
Diarize: true,
Language: "en-US",
Expand Down
2 changes: 2 additions & 0 deletions examples/speech-to-text/websocket/http_callback/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ func main() {

// set the Transcription options
transcriptOptions := &interfaces.LiveTranscriptionOptions{
Model: "nova-3",
Keyterms: []string{"deepgram"},
Language: "en-US",
Punctuate: true,
}
Expand Down
2 changes: 2 additions & 0 deletions examples/speech-to-text/websocket/http_channel/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ func main() {

// set the Transcription options
transcriptOptions := &interfaces.LiveTranscriptionOptions{
Model: "nova-3",
Keyterms: []string{"deepgram"},
Language: "en-US",
Punctuate: true,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ func main() {

// set the Transcription options
tOptions := &interfaces.LiveTranscriptionOptions{
Model: "nova-2",
Model: "nova-3",
Keyterms: []string{"deepgram"},
Language: "en-US",
Punctuate: true,
Encoding: "linear16",
Expand Down
3 changes: 2 additions & 1 deletion examples/speech-to-text/websocket/microphone_channel/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ func main() {

// set the Transcription options
tOptions := &interfaces.LiveTranscriptionOptions{
Model: "nova-2",
Model: "nova-3",
Keyterms: []string{"deepgram"},
Language: "en-US",
Punctuate: true,
Encoding: "linear16",
Expand Down
2 changes: 2 additions & 0 deletions examples/speech-to-text/websocket/replay/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ func main() {

// set the Transcription options
options := &interfaces.LiveTranscriptionOptions{
Model: "nova-3",
Keyterms: []string{"deepgram"},
Language: "en-US",
Punctuate: true,
Encoding: "mulaw",
Expand Down
5 changes: 5 additions & 0 deletions pkg/client/agent/v1/websocket/new_using_chan.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package websocketv1

import (
"context"
"strings"

klog "k8s.io/klog/v2"

Expand Down Expand Up @@ -69,6 +70,10 @@ func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, a
if apiKey != "" {
cOptions.APIKey = apiKey
}
if len(tOptions.Agent.Listen.Keyterms) > 0 && !strings.HasPrefix(tOptions.Agent.Listen.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil, nil
}
Comment on lines +73 to +76
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve error handling for keyterms validation.

The current implementation returns nil, nil when keyterms validation fails, which makes it unclear whether the operation succeeded or failed. Consider returning a proper error to indicate the validation failure.

 if len(tOptions.Agent.Listen.Keyterms) > 0 && !strings.HasPrefix(tOptions.Agent.Listen.Model, "nova-3") {
     klog.V(1).Info("Keyterms are only supported with nova-3 models.")
-    return nil, nil
+    return nil, fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", tOptions.Agent.Listen.Model)
 }

err := cOptions.Parse()
if err != nil {
klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err)
Expand Down
3 changes: 2 additions & 1 deletion pkg/client/interfaces/v1/types-agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ type Audio struct {
Output *Output `json:"output,omitempty"`
}
type Listen struct {
Model string `json:"model,omitempty"`
Model string `json:"model,omitempty"`
Keyterms []string `json:"keyterms,omitempty"`
}
type Provider struct {
Type string `json:"type,omitempty"`
Expand Down
1 change: 1 addition & 0 deletions pkg/client/interfaces/v1/types-prerecorded.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type PreRecordedTranscriptionOptions struct {
FillerWords bool `json:"filler_words,omitempty" schema:"filler_words,omitempty"`
Intents bool `json:"intents,omitempty" schema:"intents,omitempty"`
Keywords []string `json:"keywords,omitempty" schema:"keywords,omitempty"`
Keyterms []string `json:"keyterms,omitempty" schema:"keyterms,omitempty"`
Language string `json:"language,omitempty" schema:"language,omitempty"`
Measurements bool `json:"measurements,omitempty" schema:"measurements,omitempty"`
Model string `json:"model,omitempty" schema:"model,omitempty"`
Expand Down
1 change: 1 addition & 0 deletions pkg/client/interfaces/v1/types-stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type LiveTranscriptionOptions struct {
FillerWords bool `json:"filler_words,omitempty" schema:"filler_words,omitempty"`
InterimResults bool `json:"interim_results,omitempty" schema:"interim_results,omitempty"`
Keywords []string `json:"keywords,omitempty" schema:"keywords,omitempty"`
Keyterms []string `json:"keyterms,omitempty" schema:"keyterms,omitempty"`
Language string `json:"language,omitempty" schema:"language,omitempty"`
Model string `json:"model,omitempty" schema:"model,omitempty"`
Multichannel bool `json:"multichannel,omitempty" schema:"multichannel,omitempty"`
Expand Down
11 changes: 11 additions & 0 deletions pkg/client/listen/v1/rest/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"net/http"
"net/url"
"os"
"strings"

klog "k8s.io/klog/v2"

Expand Down Expand Up @@ -75,6 +76,11 @@ Output parameters:
func (c *Client) DoFile(ctx context.Context, filePath string, req *interfaces.PreRecordedTranscriptionOptions, resBody interface{}) error {
klog.V(6).Infof("prerecorded.DoFile() ENTER\n")

if len(req.Keyterms) > 0 && !strings.HasPrefix(req.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil
}
Comment on lines +79 to +82
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Return a descriptive error in DoFile.

Return a descriptive error to better communicate why the operation failed.

Apply this diff to improve error handling:

 if len(req.Keyterms) > 0 && !strings.HasPrefix(req.Model, "nova-3") {
   klog.V(1).Info("Keyterms are only supported with nova-3 models.")
-  return nil
+  return fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", req.Model)
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if len(req.Keyterms) > 0 && !strings.HasPrefix(req.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil
}
if len(req.Keyterms) > 0 && !strings.HasPrefix(req.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", req.Model)
}


// file?
fileInfo, err := os.Stat(filePath)
if err != nil || errors.Is(err, os.ErrNotExist) {
Expand Down Expand Up @@ -116,6 +122,11 @@ Output parameters:
func (c *Client) DoStream(ctx context.Context, src io.Reader, options *interfaces.PreRecordedTranscriptionOptions, resBody interface{}) error {
klog.V(6).Infof("prerecorded.DoStream() ENTER\n")

if len(options.Keyterms) > 0 && !strings.HasPrefix(options.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil
}
Comment on lines +125 to +128
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Return a descriptive error in DoStream.

Return a descriptive error to better communicate why the operation failed.

Apply this diff to improve error handling:

 if len(options.Keyterms) > 0 && !strings.HasPrefix(options.Model, "nova-3") {
   klog.V(1).Info("Keyterms are only supported with nova-3 models.")
-  return nil
+  return fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", options.Model)
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if len(options.Keyterms) > 0 && !strings.HasPrefix(options.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil
}
if len(options.Keyterms) > 0 && !strings.HasPrefix(options.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", options.Model)
}


uri, err := version.GetPrerecordedAPI(ctx, c.Options.Host, c.Options.APIVersion, c.Options.Path, options)
if err != nil {
klog.V(1).Infof("GetPrerecordedAPI failed. Err: %v\n", err)
Expand Down
5 changes: 5 additions & 0 deletions pkg/client/listen/v1/websocket/new_using_callbacks.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package websocketv1

import (
"context"
"strings"

klog "k8s.io/klog/v2"

Expand Down Expand Up @@ -69,6 +70,10 @@ func NewUsingCallbackWithCancel(ctx context.Context, ctxCancel context.CancelFun
if apiKey != "" {
cOptions.APIKey = apiKey
}
if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil, nil
}
Comment on lines +73 to +76
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Return a descriptive error instead of nil.

While the validation logic is correct, returning nil, nil might be unclear to callers. Consider returning a descriptive error to better communicate why the operation failed.

Apply this diff to improve error handling:

 if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") {
   klog.V(1).Info("Keyterms are only supported with nova-3 models.")
-  return nil, nil
+  return nil, fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", tOptions.Model)
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil, nil
}
if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil, fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", tOptions.Model)
}

err := cOptions.Parse()
if err != nil {
klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err)
Expand Down
5 changes: 5 additions & 0 deletions pkg/client/listen/v1/websocket/new_using_chan.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package websocketv1

import (
"context"
"strings"

klog "k8s.io/klog/v2"

Expand Down Expand Up @@ -69,6 +70,10 @@ func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, a
if apiKey != "" {
cOptions.APIKey = apiKey
}
if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") {
klog.V(1).Info("Keyterms are only supported with nova-3 models.")
return nil, nil
}
Comment on lines +73 to +76
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve error handling for keyterms validation.

The current implementation returns nil, nil when keyterms validation fails, which makes it unclear whether the operation succeeded or failed. Consider returning a proper error to indicate the validation failure.

 if len(tOptions.Keyterms) > 0 && !strings.HasPrefix(tOptions.Model, "nova-3") {
     klog.V(1).Info("Keyterms are only supported with nova-3 models.")
-    return nil, nil
+    return nil, fmt.Errorf("keyterms are only supported with nova-3 models, got model: %s", tOptions.Model)
 }

err := cOptions.Parse()
if err != nil {
klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err)
Expand Down
Loading