Skip to content

Commit

Permalink
added nativeFromTextual support for standard json and added avro roun…
Browse files Browse the repository at this point in the history
…dtrip tool
  • Loading branch information
xmcqueen committed Sep 29, 2021
1 parent 9f4e906 commit 0758065
Show file tree
Hide file tree
Showing 9 changed files with 711 additions and 120 deletions.
4 changes: 2 additions & 2 deletions array.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ import (
"reflect"
)

func makeArrayCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) {
func makeArrayCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) {
// array type must have items
itemSchema, ok := schemaMap["items"]
if !ok {
return nil, fmt.Errorf("Array ought to have items key")
}
itemCodec, err := buildCodec(st, enclosingNamespace, itemSchema)
itemCodec, err := buildCodec(st, enclosingNamespace, itemSchema, cb)
if err != nil {
return nil, fmt.Errorf("Array items ought to be valid Avro type: %s", err)
}
Expand Down
54 changes: 40 additions & 14 deletions codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ type Codec struct {
Rabin uint64
}

// codecBuilder holds the 3 kinds of codec builders so they can be
// replaced if needed
// and so they can be passed down the call stack during codec building
type codecBuilder struct {
mapBuilder func(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error)
stringBuilder func(st map[string]*Codec, enclosingNamespace string, typeName string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error)
sliceBuilder func(st map[string]*Codec, enclosingNamespace string, schemaArray []interface{}, cb *codecBuilder) (*Codec, error)
}

// NewCodec returns a Codec used to translate between a byte slice of either
// binary or textual Avro data and native Go data.
//
Expand Down Expand Up @@ -87,6 +96,22 @@ type Codec struct {
// fmt.Println(err)
// }
func NewCodec(schemaSpecification string) (*Codec, error) {
return NewCodecFrom(schemaSpecification, &codecBuilder{
buildCodecForTypeDescribedByMap,
buildCodecForTypeDescribedByString,
buildCodecForTypeDescribedBySlice,
})
}

func NewCodecForStandardJSON(schemaSpecification string) (*Codec, error) {
return NewCodecFrom(schemaSpecification, &codecBuilder{
buildCodecForTypeDescribedByMap,
buildCodecForTypeDescribedByString,
buildCodecForTypeDescribedBySliceJSON,
})
}

func NewCodecFrom(schemaSpecification string, cb *codecBuilder) (*Codec, error) {
var schema interface{}

if err := json.Unmarshal([]byte(schemaSpecification), &schema); err != nil {
Expand All @@ -96,7 +121,7 @@ func NewCodec(schemaSpecification string) (*Codec, error) {
// bootstrap a symbol table with primitive type codecs for the new codec
st := newSymbolTable()

c, err := buildCodec(st, nullNamespace, schema)
c, err := buildCodec(st, nullNamespace, schema, cb)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -494,25 +519,26 @@ func (c *Codec) SchemaCRC64Avro() int64 {

// convert a schema data structure to a codec, prefixing with specified
// namespace
func buildCodec(st map[string]*Codec, enclosingNamespace string, schema interface{}) (*Codec, error) {
func buildCodec(st map[string]*Codec, enclosingNamespace string, schema interface{}, cb *codecBuilder) (*Codec, error) {
switch schemaType := schema.(type) {
case map[string]interface{}:
return buildCodecForTypeDescribedByMap(st, enclosingNamespace, schemaType)
return cb.mapBuilder(st, enclosingNamespace, schemaType, cb)
case string:
return buildCodecForTypeDescribedByString(st, enclosingNamespace, schemaType, nil)
return cb.stringBuilder(st, enclosingNamespace, schemaType, nil, cb)
case []interface{}:
return buildCodecForTypeDescribedBySlice(st, enclosingNamespace, schemaType)
return cb.sliceBuilder(st, enclosingNamespace, schemaType, cb)
default:
return nil, fmt.Errorf("unknown schema type: %T", schema)
}
}

// Reach into the map, grabbing its "type". Use that to create the codec.
func buildCodecForTypeDescribedByMap(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) {
func buildCodecForTypeDescribedByMap(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) {
t, ok := schemaMap["type"]
if !ok {
return nil, fmt.Errorf("missing type: %v", schemaMap)
}

switch v := t.(type) {
case string:
// Already defined types may be abbreviated with its string name.
Expand All @@ -522,17 +548,17 @@ func buildCodecForTypeDescribedByMap(st map[string]*Codec, enclosingNamespace st
// EXAMPLE: "type":"int"
// EXAMPLE: "type":"record"
// EXAMPLE: "type":"somePreviouslyDefinedCustomTypeString"
return buildCodecForTypeDescribedByString(st, enclosingNamespace, v, schemaMap)
return cb.stringBuilder(st, enclosingNamespace, v, schemaMap, cb)
case map[string]interface{}:
return buildCodecForTypeDescribedByMap(st, enclosingNamespace, v)
return cb.mapBuilder(st, enclosingNamespace, v, cb)
case []interface{}:
return buildCodecForTypeDescribedBySlice(st, enclosingNamespace, v)
return cb.sliceBuilder(st, enclosingNamespace, v, cb)
default:
return nil, fmt.Errorf("type ought to be either string, map[string]interface{}, or []interface{}; received: %T", t)
}
}

func buildCodecForTypeDescribedByString(st map[string]*Codec, enclosingNamespace string, typeName string, schemaMap map[string]interface{}) (*Codec, error) {
func buildCodecForTypeDescribedByString(st map[string]*Codec, enclosingNamespace string, typeName string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) {
isLogicalType := false
searchType := typeName
// logicalType will be non-nil for those fields without a logicalType property set
Expand All @@ -557,23 +583,23 @@ func buildCodecForTypeDescribedByString(st map[string]*Codec, enclosingNamespace
// There are only a small handful of complex Avro data types.
switch searchType {
case "array":
return makeArrayCodec(st, enclosingNamespace, schemaMap)
return makeArrayCodec(st, enclosingNamespace, schemaMap, cb)
case "enum":
return makeEnumCodec(st, enclosingNamespace, schemaMap)
case "fixed":
return makeFixedCodec(st, enclosingNamespace, schemaMap)
case "map":
return makeMapCodec(st, enclosingNamespace, schemaMap)
return makeMapCodec(st, enclosingNamespace, schemaMap, cb)
case "record":
return makeRecordCodec(st, enclosingNamespace, schemaMap)
return makeRecordCodec(st, enclosingNamespace, schemaMap, cb)
case "bytes.decimal":
return makeDecimalBytesCodec(st, enclosingNamespace, schemaMap)
case "fixed.decimal":
return makeDecimalFixedCodec(st, enclosingNamespace, schemaMap)
default:
if isLogicalType {
delete(schemaMap, "logicalType")
return buildCodecForTypeDescribedByString(st, enclosingNamespace, typeName, schemaMap)
return buildCodecForTypeDescribedByString(st, enclosingNamespace, typeName, schemaMap, cb)
}
return nil, fmt.Errorf("unknown type name: %q", searchType)
}
Expand Down
75 changes: 75 additions & 0 deletions enum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
package goavro

import (
"encoding/json"
"fmt"
"testing"
)

Expand Down Expand Up @@ -59,3 +61,76 @@ func TestEnumTextCodec(t *testing.T) {
testTextEncodeFail(t, `{"type":"enum","name":"e1","symbols":["alpha","bravo"]}`, "charlie", `cannot encode textual enum "e1": value ought to be member of symbols`)
testTextDecodeFail(t, `{"type":"enum","name":"e1","symbols":["alpha","bravo"]}`, []byte(`"charlie"`), `cannot decode textual enum "e1": value ought to be member of symbols`)
}

func TestGH233(t *testing.T) {
// here's the fail case
// testTextCodecPass(t, `{"type":"record","name":"FooBar","namespace":"com.foo.bar","fields":[{"name":"event","type":["null",{"type":"enum","name":"FooBarEvent","symbols":["CREATED","UPDATED"]}]}]}`, map[string]interface{}{"event": Union("FooBarEvent", "CREATED")}, []byte(`{"event":{"FooBarEvent":"CREATED"}}`))
// remove the namespace and it passes
testTextCodecPass(t, `{"type":"record","name":"FooBar","fields":[{"name":"event","type":["null",{"type":"enum","name":"FooBarEvent","symbols":["CREATED","UPDATED"]}]}]}`, map[string]interface{}{"event": Union("FooBarEvent", "CREATED")}, []byte(`{"event":{"FooBarEvent":"CREATED"}}`))
// experiments
// the basic enum
testTextCodecPass(t, `{"type":"enum","name":"FooBarEvent","symbols":["CREATED","UPDATED"]}`, "CREATED", []byte(`"CREATED"`))
// the basic enum with namespace
testTextCodecPass(t, `{"type":"enum","name":"FooBarEvent","namespace":"com.foo.bar","symbols":["CREATED","UPDATED"]}`, "CREATED", []byte(`"CREATED"`))
// union with enum
testTextCodecPass(t, `["null",{"type":"enum","name":"FooBarEvent","symbols":["CREATED","UPDATED"]}]`, Union("FooBarEvent", "CREATED"), []byte(`{"FooBarEvent":"CREATED"}`))
// FAIL: union with enum with namespace: cannot determine codec: "FooBarEvent"
// testTextCodecPass(t, `["null",{"type":"enum","name":"FooBarEvent","namespace":"com.foo.bar","symbols":["CREATED","UPDATED"]}]`, Union("FooBarEvent", "CREATED"), []byte(`{"FooBarEvent":"CREATED"}`))
// conclusion, union is not handling namespaces correctly
// try union with record instead of enum (records and enums both have namespaces)
// get a basic record going
testTextCodecPass(t, `{"type":"record","name":"LongList","fields":[{"name":"next","type":["null","LongList"],"default":null}]}`, map[string]interface{}{"next": Union("LongList", map[string]interface{}{"next": nil})}, []byte(`{"next":{"LongList":{"next":null}}}`))
// add a namespace to the record
// fails in the same way cannot determine codec: "LongList" for key: "next"
// testTextCodecPass(t, `{"type":"record","name":"LongList","namespace":"com.foo.bar","fields":[{"name":"next","type":["null","LongList"],"default":null}]}`, map[string]interface{}{"next": Union("LongList", map[string]interface{}{"next": nil})}, []byte(`{"next":{"LongList":{"next":null}}}`))
//
// experiments on syntax solutions
// testTextCodecPass(t, `["null",{"type":"enum","name":"com.foo.bar.FooBarEvent","symbols":["CREATED","UPDATED"]}]`, Union("com.foo.bar.FooBarEvent", "CREATED"), []byte(`{"FooBarEvent":"CREATED"}`))
// thie TestUnionMapRecordFitsInRecord tests binary from Native, but not native from textual
// that's where the error is happening
// if the namespace is specified in the incoming name it works
testTextCodecPass(t, `{"type":"record","name":"ns1.LongList","fields":[{"name":"next","type":["null","LongList"],"default":null}]}`, map[string]interface{}{"next": Union("ns1.LongList", map[string]interface{}{"next": nil})}, []byte(`{"next":{"ns1.LongList":{"next":null}}}`))

// try the failcase with the namespace specified on the input
testTextCodecPass(t, `{"type":"record","name":"FooBar","namespace":"com.foo.bar","fields":[{"name":"event","type":["null",{"type":"enum","name":"FooBarEvent","symbols":["CREATED","UPDATED"]}]}]}`, map[string]interface{}{"event": Union("com.foo.bar.FooBarEvent", "CREATED")}, []byte(`{"event":{"com.foo.bar.FooBarEvent":"CREATED"}}`))

}

func ExampleCheckSolutionGH233() {
const avroSchema = `
{
"type": "record",
"name": "FooBar",
"namespace": "com.foo.bar",
"fields": [
{
"name": "event",
"type": [
"null",
{
"type": "enum",
"name": "FooBarEvent",
"symbols": ["CREATED", "UPDATED"]
}
]
}
]
}
`
codec, _ := NewCodec(avroSchema)

const avroJson = `{"event":{"com.foo.bar.FooBarEvent":"CREATED"}}`

native, _, err := codec.NativeFromTextual([]byte(avroJson))
if err != nil {
panic(err)
}

blob, err := json.Marshal(native)
if err != nil {
panic(err)
}
fmt.Println(string(blob))
// Output: {"event":{"com.foo.bar.FooBarEvent":"CREATED"}}

}
Loading

0 comments on commit 0758065

Please sign in to comment.