diff --git a/cbor_json.go b/cbor_json.go index 975ce78..f52fbda 100644 --- a/cbor_json.go +++ b/cbor_json.go @@ -3,8 +3,8 @@ package gson -import "strconv" import "math" +import "strconv" import "encoding/binary" var nullBin = []byte("null") diff --git a/collate.go b/collate.go index 564d1a7..8025135 100644 --- a/collate.go +++ b/collate.go @@ -4,6 +4,8 @@ import "encoding/json" import "math/big" import "bytes" +import "golang.org/x/text/collate" + // Collation order for supported types. Applications desiring different // ordering between types can initialize these byte values before // instantiating a config object. @@ -34,6 +36,8 @@ type collateConfig struct { enc *json.Encoder buf *bytes.Buffer zf *big.Float + tcltbuffer *collate.Buffer + textcollator *collate.Collator } // Collate abstraction for value encoded into binary-collation. diff --git a/collate_test.go b/collate_test.go index 98604c4..f2b32b6 100644 --- a/collate_test.go +++ b/collate_test.go @@ -1,9 +1,14 @@ package gson +import "sort" import "bytes" +import "strings" import "testing" import "reflect" +import "golang.org/x/text/collate" +import "golang.org/x/text/language" + func TestCollateReset(t *testing.T) { config := NewDefaultConfig() clt := config.NewCollate(make([]byte, 0, 1024)) @@ -49,6 +54,117 @@ func TestCollateEmpty(t *testing.T) { }() } +func TestAlternateSortTypes(t *testing.T) { + testCases := []struct { + lang string + in testtxtclts + want []string + }{{ + lang: "zh,cmn,zh-Hant-u-co-pinyin,zh-HK-u-co-pinyin,zh-pinyin", + in: testtxtclts{ + &testtxtclt{in: "爸爸"}, &testtxtclt{in: "妈妈"}, + &testtxtclt{in: "儿子"}, &testtxtclt{in: "女儿"}, + }, + want: []string{"爸爸", "儿子", "妈妈", "女儿"}, + }, { + lang: "zh-Hant,zh-u-co-stroke,zh-Hant-u-co-stroke", + in: testtxtclts{ + &testtxtclt{in: "爸爸"}, &testtxtclt{in: "妈妈"}, + &testtxtclt{in: "儿子"}, &testtxtclt{in: "女儿"}, + }, + want: []string{"儿子", "女儿", "妈妈", "爸爸"}, + }} + + for _, tc := range testCases { + for _, tag := range strings.Split(tc.lang, ",") { + collator := collate.New(language.MustParse(tag)) + config := NewDefaultConfig().SetTextCollator(collator) + for _, item := range tc.in { + item.collate(config) + } + sort.Sort(tc.in) + got := []string{} + for _, item := range tc.in { + got = append(got, item.in) + } + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("%v %v expected %v; got %v", tag, tc.in, tc.want, got) + } + } + } +} + +func TestTextNocase(t *testing.T) { + testCases := []struct { + lang string + in testtxtclts + want []string + }{{ + lang: "en", + in: testtxtclts{ + &testtxtclt{in: "B"}, &testtxtclt{in: "b"}, + &testtxtclt{in: "a"}, &testtxtclt{in: "A"}, + }, + want: []string{"a", "A", "B", "b"}, + }} + + for _, tc := range testCases { + for _, tag := range strings.Split(tc.lang, ",") { + collator := collate.New(language.MustParse(tag)) + config := NewDefaultConfig().SetTextCollator(collator) + for _, item := range tc.in { + item.collate(config) + } + sort.Sort(tc.in) + got := []string{} + for _, item := range tc.in { + got = append(got, item.in) + } + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("%v %v expected %v; got %v", tag, tc.in, tc.want, got) + } + } + } +} + +func TestTextGermanSwedish(t *testing.T) { + testCases := []struct { + lang string + in testtxtclts + want []string + }{{ + lang: "de", + in: testtxtclts{ + &testtxtclt{in: "a"}, &testtxtclt{in: "z"}, &testtxtclt{in: "ä"}, + }, + want: []string{"a", "ä", "z"}, + }, { + lang: "sv", + in: testtxtclts{ + &testtxtclt{in: "a"}, &testtxtclt{in: "z"}, &testtxtclt{in: "ä"}, + }, + want: []string{"a", "z", "ä"}, + }} + + for _, tc := range testCases { + for _, tag := range strings.Split(tc.lang, ",") { + collator := collate.New(language.MustParse(tag)) + config := NewDefaultConfig().SetTextCollator(collator) + for _, item := range tc.in { + item.collate(config) + } + sort.Sort(tc.in) + got := []string{} + for _, item := range tc.in { + got = append(got, item.in) + } + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("%v %v expected %v; got %v", tag, tc.in, tc.want, got) + } + } + } +} + // sort type for slice of []byte type ByteSlices [][]byte @@ -64,3 +180,27 @@ func (bs ByteSlices) Less(i, j int) bool { func (bs ByteSlices) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] } + +type testtxtclt struct { + in string + clt []byte +} + +func (item *testtxtclt) collate(config *Config) { + val := config.NewValue(item.in) + item.clt = val.Tocollate(config.NewCollate(nil)).Bytes() +} + +type testtxtclts []*testtxtclt + +func (items testtxtclts) Len() int { + return len(items) +} + +func (items testtxtclts) Less(i, j int) bool { + return bytes.Compare(items[i].clt, items[j].clt) < 0 +} + +func (items testtxtclts) Swap(i, j int) { + items[i], items[j] = items[j], items[i] +} diff --git a/config.go b/config.go index fb81e1a..3f2d004 100644 --- a/config.go +++ b/config.go @@ -5,6 +5,8 @@ import "fmt" import "math/big" import "encoding/json" +import "golang.org/x/text/collate" + // NumberKind how to treat numbers. type NumberKind byte @@ -72,12 +74,15 @@ func NewDefaultConfig() *Config { } func (config *Config) init() *Config { + // collateConfig config.buf = bytes.NewBuffer(make([]byte, 0, 1024)) // start with 1K config.enc = json.NewEncoder(config.buf) - a, b, c, d := config.strlen, config.numkeys, config.itemlen, config.ptrlen - config.pools = newMempool(a, b, c, d) config.zf = big.NewFloat(0) config.zf.SetPrec(64) + config.tcltbuffer = &collate.Buffer{} + // mempools + a, b, c, d := config.strlen, config.numkeys, config.itemlen, config.ptrlen + config.pools = newMempool(a, b, c, d) return config } @@ -135,6 +140,12 @@ func (config Config) SetMaxkeys(n int) *Config { return config.init() } +// SetTextCollator for string type. +func (config Config) SetTextCollator(collator *collate.Collator) *Config { + config.textcollator = collator + return &config +} + // ResetPools configure a new set of pools with specified size, instead // of using the default size: MaxStringLen, MaxKeys, MaxCollateLen, and, // MaxJsonpointerLen. diff --git a/util.go b/util.go index 9cb17c7..2eb626f 100644 --- a/util.go +++ b/util.go @@ -186,9 +186,16 @@ func collateString(str string, code []byte, config *Config) (n int) { code[0], code[1] = TypeMissing, Terminator return 2 } + strcode := str2bytes(str) + if config.textcollator != nil { + config.tcltbuffer.Reset() + strcode = config.textcollator.Key(config.tcltbuffer, strcode) + strcode = strcode[:len(strcode)-1] // return text is null terminated + } + code[n] = TypeString n++ - n += suffixEncodeString(str2bytes(str), code[n:]) + n += suffixEncodeString(strcode, code[n:]) code[n] = Terminator n++ return n