diff --git a/goutils b/goutils index 272bc534..0fcd6283 160000 --- a/goutils +++ b/goutils @@ -1 +1 @@ -Subproject commit 272bc5343946eb60f7283e8e7a7ff65644b11eca +Subproject commit 0fcd628370319b28b92288d34f8bcff1e7e9d4db diff --git a/internal/serialization/gin_binding.go b/internal/serialization/gin_binding.go new file mode 100644 index 00000000..6631792c --- /dev/null +++ b/internal/serialization/gin_binding.go @@ -0,0 +1,37 @@ +package serialization + +import ( + "net/http" + + "github.com/bytedance/sonic" + "github.com/goccy/go-yaml" +) + +type ( + GinJSONBinding struct{} + GinYAMLBinding struct{} +) + +func (b GinJSONBinding) Name() string { + return "json" +} + +func (b GinJSONBinding) Bind(req *http.Request, obj any) error { + m := make(map[string]any) + if err := sonic.ConfigDefault.NewDecoder(NewSubstituteEnvReader(req.Body)).Decode(&m); err != nil { + return err + } + return MapUnmarshalValidate(m, obj) +} + +func (b GinYAMLBinding) Name() string { + return "yaml" +} + +func (b GinYAMLBinding) Bind(req *http.Request, obj any) error { + m := make(map[string]any) + if err := yaml.NewDecoder(NewSubstituteEnvReader(req.Body)).Decode(&m); err != nil { + return err + } + return MapUnmarshalValidate(m, obj) +} diff --git a/internal/serialization/gin_binding_test.go b/internal/serialization/gin_binding_test.go new file mode 100644 index 00000000..d7f8830e --- /dev/null +++ b/internal/serialization/gin_binding_test.go @@ -0,0 +1,50 @@ +package serialization_test + +import ( + "bytes" + "net/http/httptest" + "testing" + + "github.com/yusing/godoxy/internal/serialization" + gperr "github.com/yusing/goutils/errs" +) + +type TestStruct struct { + Value string `json:"value"` + Value2 int `json:"value2"` +} + +func (t *TestStruct) Validate() gperr.Error { + if t.Value == "" { + return gperr.New("value is required") + } + if t.Value2 != 0 && (t.Value2 < 5 || t.Value2 > 10) { + return gperr.New("value2 must be between 5 and 10") + } + return nil +} + +func TestGinBinding(t *testing.T) { + + tests := []struct { + name string + input string + wantErr bool + }{ + {"valid1", `{"value": "test", "value2": 7}`, false}, + {"valid2", `{"value": "test"}`, false}, + {"invalid1", `{"value2": 7}`, true}, + {"invalid2", `{"value": "test", "value2": 3}`, true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var dst TestStruct + body := bytes.NewBufferString(tt.input) + req := httptest.NewRequest("POST", "/", body) + err := serialization.GinJSONBinding{}.Bind(req, &dst) + if (err != nil) != tt.wantErr { + t.Errorf("%s: Bind() error = %v, wantErr %v", tt.name, err, tt.wantErr) + } + }) + } +} diff --git a/internal/serialization/reader.go b/internal/serialization/reader.go new file mode 100644 index 00000000..b44ee3cd --- /dev/null +++ b/internal/serialization/reader.go @@ -0,0 +1,146 @@ +package serialization + +import ( + "bytes" + "io" +) + +type SubstituteEnvReader struct { + reader io.Reader + buf []byte // buffered data with substitutions applied + err error // sticky error +} + +func NewSubstituteEnvReader(reader io.Reader) *SubstituteEnvReader { + return &SubstituteEnvReader{reader: reader} +} + +const peekSize = 4096 +const maxVarNameLength = 256 + +func (r *SubstituteEnvReader) Read(p []byte) (n int, err error) { + // Return buffered data first + if len(r.buf) > 0 { + n = copy(p, r.buf) + r.buf = r.buf[n:] + return n, nil + } + + // Return sticky error if we have one + if r.err != nil { + return 0, r.err + } + + var buf [2 * peekSize]byte + + // Read a chunk from the underlying reader + chunk, more := buf[:peekSize], buf[peekSize:] + nRead, readErr := r.reader.Read(chunk) + if nRead == 0 { + if readErr != nil { + return 0, readErr + } + return 0, io.EOF + } + chunk = chunk[:nRead] + + // Check if there's a potential incomplete pattern at the end + // Pattern: ${VAR_NAME} + // We need to check if chunk ends with a partial pattern like "$", "${", "${VAR", etc. + incompleteStart := findIncompletePatternStart(chunk) + + if incompleteStart >= 0 && readErr == nil { + // There might be an incomplete pattern, read more to complete it + incomplete := chunk[incompleteStart:] + chunk = chunk[:incompleteStart] + + // Keep reading until we complete the pattern or hit EOF/error + for { + // Limit how much we buffer to prevent memory exhaustion + if len(incomplete) > maxVarNameLength+3 { // ${} + var name + // Pattern too long to be valid, give up and process as-is + chunk = append(chunk, incomplete...) + break + } + nMore, moreErr := r.reader.Read(more) + if nMore > 0 { + incomplete = append(incomplete, more[:nMore]...) + // Check if pattern is now complete + if idx := bytes.IndexByte(incomplete, '}'); idx >= 0 { + // Pattern complete, append the rest back to chunk + chunk = append(chunk, incomplete...) + break + } + } + if moreErr != nil { + // No more data, append whatever we have + chunk = append(chunk, incomplete...) + readErr = moreErr + break + } + } + } + + substituted, subErr := substituteEnv(chunk) + if subErr != nil { + r.err = subErr + return 0, subErr + } + + n = copy(p, substituted) + if n < len(substituted) { + // Buffer the rest + r.buf = substituted[n:] + } + + // Store sticky error for next read + if readErr != nil && readErr != io.EOF { + r.err = readErr + } else { + if readErr == io.EOF && n > 0 { + return n, nil + } + if readErr == io.EOF { + return n, io.EOF + } + } + + return n, nil +} + +// findIncompletePatternStart returns the index where an incomplete ${...} pattern starts, +// or -1 if there's no incomplete pattern at the end. +func findIncompletePatternStart(data []byte) int { + // Look for '$' near the end that might be start of ${VAR} + // Maximum var name we reasonably expect + "${}" = ~256 chars + searchStart := max(0, len(data)-maxVarNameLength) + + for i := len(data) - 1; i >= searchStart; i-- { + if data[i] == '$' { + // Check if this is a complete pattern or incomplete + if i+1 >= len(data) { + // Just "$" at end + return i + } + if data[i+1] == '{' { + // Check if there's anything after "${" + if i+2 >= len(data) { + // Just "${" at end + return i + } + // Check if pattern is complete by looking for '}' + for j := i + 2; j < len(data); j++ { + if data[j] == '}' { + // This pattern is complete, continue searching for another + break + } + if j == len(data)-1 { + // Reached end without finding '}', incomplete pattern + return i + } + } + } + } + } + return -1 +} diff --git a/internal/serialization/reader_bench_test.go b/internal/serialization/reader_bench_test.go new file mode 100644 index 00000000..7a415b6d --- /dev/null +++ b/internal/serialization/reader_bench_test.go @@ -0,0 +1,286 @@ +package serialization + +import ( + "bytes" + "io" + "os" + "strings" + "testing" +) + +// setupEnv sets up environment variables for benchmarks +func setupEnv(b *testing.B) { + b.Helper() + os.Setenv("BENCH_VAR", "benchmark_value") + os.Setenv("BENCH_VAR_2", "second_value") + os.Setenv("BENCH_VAR_3", "third_value") +} + +// cleanupEnv cleans up environment variables after benchmarks +func cleanupEnv(b *testing.B) { + b.Helper() + os.Unsetenv("BENCH_VAR") + os.Unsetenv("BENCH_VAR_2") + os.Unsetenv("BENCH_VAR_3") +} + +// BenchmarkSubstituteEnvReader_NoSubstitution benchmarks reading without any env substitutions +func BenchmarkSubstituteEnvReader_NoSubstitution(b *testing.B) { + r := strings.NewReader(`key: value +name: test +data: some content here +`) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_SingleSubstitution benchmarks reading with a single env substitution +func BenchmarkSubstituteEnvReader_SingleSubstitution(b *testing.B) { + setupEnv(b) + defer cleanupEnv(b) + + r := strings.NewReader(`key: ${BENCH_VAR} +`) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_MultipleSubstitutions benchmarks reading with multiple env substitutions +func BenchmarkSubstituteEnvReader_MultipleSubstitutions(b *testing.B) { + setupEnv(b) + defer cleanupEnv(b) + + r := strings.NewReader(`key1: ${BENCH_VAR} +key2: ${BENCH_VAR_2} +key3: ${BENCH_VAR_3} +`) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_LargeInput_NoSubstitution benchmarks large input without substitutions +func BenchmarkSubstituteEnvReader_LargeInput_NoSubstitution(b *testing.B) { + r := strings.NewReader(strings.Repeat("x", 100000)) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_LargeInput_WithSubstitutions benchmarks large input with scattered substitutions +func BenchmarkSubstituteEnvReader_LargeInput_WithSubstitutions(b *testing.B) { + setupEnv(b) + defer cleanupEnv(b) + + var builder bytes.Buffer + for range 100 { + builder.WriteString(strings.Repeat("x", 1000)) + builder.WriteString("${BENCH_VAR}") + } + r := bytes.NewReader(builder.Bytes()) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_SmallBuffer benchmarks reading with a small buffer size +func BenchmarkSubstituteEnvReader_SmallBuffer(b *testing.B) { + setupEnv(b) + defer cleanupEnv(b) + + r := strings.NewReader(`key: ${BENCH_VAR} and some more content here`) + buf := make([]byte, 16) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + for { + _, err := reader.Read(buf) + if err == io.EOF { + break + } + if err != nil { + b.Fatal(err) + } + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_YAMLConfig benchmarks a realistic YAML config scenario +func BenchmarkSubstituteEnvReader_YAMLConfig(b *testing.B) { + setupEnv(b) + defer cleanupEnv(b) + + r := strings.NewReader(`database: + host: ${BENCH_VAR} + port: ${BENCH_VAR_2} + username: ${BENCH_VAR_3} + password: ${BENCH_VAR} +cache: + enabled: true + ttl: ${BENCH_VAR_2} +server: + host: ${BENCH_VAR} + port: 8080 +`) + + b.ResetTimer() + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_BoundaryPattern benchmarks patterns at buffer boundaries (4096 bytes) +func BenchmarkSubstituteEnvReader_BoundaryPattern(b *testing.B) { + setupEnv(b) + defer cleanupEnv(b) + + // Pattern exactly at 4090 bytes, with ${VAR} crossing the 4096 boundary + prefix := strings.Repeat("x", 4090) + r := strings.NewReader(prefix + "${BENCH_VAR}") + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_MultipleBoundaries benchmarks multiple patterns crossing boundaries +func BenchmarkSubstituteEnvReader_MultipleBoundaries(b *testing.B) { + setupEnv(b) + defer cleanupEnv(b) + + var builder bytes.Buffer + for range 10 { + builder.WriteString(strings.Repeat("x", 4000)) + builder.WriteString("${BENCH_VAR}") + } + r := bytes.NewReader(builder.Bytes()) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_SpecialChars benchmarks substitution with special characters +func BenchmarkSubstituteEnvReader_SpecialChars(b *testing.B) { + os.Setenv("SPECIAL_BENCH_VAR", `value with "quotes" and \backslash\`) + defer os.Unsetenv("SPECIAL_BENCH_VAR") + + r := strings.NewReader(`key: ${SPECIAL_BENCH_VAR} +`) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_EmptyValue benchmarks substitution with empty value +func BenchmarkSubstituteEnvReader_EmptyValue(b *testing.B) { + os.Setenv("EMPTY_BENCH_VAR", "") + defer os.Unsetenv("EMPTY_BENCH_VAR") + + r := strings.NewReader(`key: ${EMPTY_BENCH_VAR} +`) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkSubstituteEnvReader_DollarWithoutBrace benchmarks $ without following { +func BenchmarkSubstituteEnvReader_DollarWithoutBrace(b *testing.B) { + os.Setenv("BENCH_VAR", "benchmark_value") + defer os.Unsetenv("BENCH_VAR") + + r := strings.NewReader(`price: $100 and $200 for ${BENCH_VAR}`) + + for b.Loop() { + reader := NewSubstituteEnvReader(r) + _, err := io.ReadAll(reader) + if err != nil { + b.Fatal(err) + } + r.Seek(0, io.SeekStart) + } +} + +// BenchmarkFindIncompletePatternStart benchmarks the findIncompletePatternStart function +func BenchmarkFindIncompletePatternStart(b *testing.B) { + testCases := []struct { + name string + input string + }{ + {"no pattern", strings.Repeat("hello world ", 100)}, + {"complete pattern", strings.Repeat("hello ${VAR} world ", 50)}, + {"dollar at end", strings.Repeat("hello ", 100) + "$"}, + {"incomplete at end", strings.Repeat("hello ", 100) + "${VAR"}, + {"large input no pattern", strings.Repeat("x", 5000)}, + {"large input with pattern", strings.Repeat("x", 4000) + "${VAR}"}, + } + + for _, tc := range testCases { + b.Run(tc.name, func(b *testing.B) { + data := []byte(tc.input) + for b.Loop() { + findIncompletePatternStart(data) + } + }) + } +} diff --git a/internal/serialization/reader_test.go b/internal/serialization/reader_test.go new file mode 100644 index 00000000..2d9f6961 --- /dev/null +++ b/internal/serialization/reader_test.go @@ -0,0 +1,217 @@ +package serialization + +import ( + "bytes" + "io" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSubstituteEnvReader_Basic(t *testing.T) { + os.Setenv("TEST_VAR", "hello") + defer os.Unsetenv("TEST_VAR") + + input := []byte(`key: ${TEST_VAR}`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + require.Equal(t, `key: "hello"`, string(output)) +} + +func TestSubstituteEnvReader_Multiple(t *testing.T) { + os.Setenv("VAR1", "first") + os.Setenv("VAR2", "second") + defer os.Unsetenv("VAR1") + defer os.Unsetenv("VAR2") + + input := []byte(`a: ${VAR1}, b: ${VAR2}`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + require.Equal(t, `a: "first", b: "second"`, string(output)) +} + +func TestSubstituteEnvReader_NoSubstitution(t *testing.T) { + input := []byte(`key: value`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + require.Equal(t, `key: value`, string(output)) +} + +func TestSubstituteEnvReader_UnsetEnvError(t *testing.T) { + os.Unsetenv("UNSET_VAR_FOR_TEST") + + input := []byte(`key: ${UNSET_VAR_FOR_TEST}`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + _, err := io.ReadAll(reader) + require.Error(t, err) + require.Contains(t, err.Error(), "UNSET_VAR_FOR_TEST is not set") +} + +func TestSubstituteEnvReader_SmallBuffer(t *testing.T) { + os.Setenv("SMALL_BUF_VAR", "value") + defer os.Unsetenv("SMALL_BUF_VAR") + + input := []byte(`key: ${SMALL_BUF_VAR}`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + var result []byte + buf := make([]byte, 3) + for { + n, err := reader.Read(buf) + if n > 0 { + result = append(result, buf[:n]...) + } + if err == io.EOF { + break + } + require.NoError(t, err) + } + require.Equal(t, `key: "value"`, string(result)) +} + +func TestSubstituteEnvReader_SpecialChars(t *testing.T) { + os.Setenv("SPECIAL_VAR", `hello "world" \n`) + defer os.Unsetenv("SPECIAL_VAR") + + input := []byte(`key: ${SPECIAL_VAR}`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + require.Equal(t, `key: "hello \"world\" \\n"`, string(output)) +} + +func TestSubstituteEnvReader_EmptyValue(t *testing.T) { + os.Setenv("EMPTY_VAR", "") + defer os.Unsetenv("EMPTY_VAR") + + input := []byte(`key: ${EMPTY_VAR}`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + require.Equal(t, `key: ""`, string(output)) +} + +func TestSubstituteEnvReader_LargeInput(t *testing.T) { + os.Setenv("LARGE_VAR", "replaced") + defer os.Unsetenv("LARGE_VAR") + + prefix := strings.Repeat("x", 5000) + suffix := strings.Repeat("y", 5000) + input := []byte(prefix + "${LARGE_VAR}" + suffix) + + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + expected := prefix + `"replaced"` + suffix + require.Equal(t, expected, string(output)) +} + +func TestSubstituteEnvReader_PatternAtBoundary(t *testing.T) { + os.Setenv("BOUNDARY_VAR", "boundary_value") + defer os.Unsetenv("BOUNDARY_VAR") + + prefix := strings.Repeat("a", 4090) + input := []byte(prefix + "${BOUNDARY_VAR}") + + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + expected := prefix + `"boundary_value"` + require.Equal(t, expected, string(output)) +} + +func TestSubstituteEnvReader_MultiplePatternsBoundary(t *testing.T) { + os.Setenv("VAR_A", "aaa") + os.Setenv("VAR_B", "bbb") + defer os.Unsetenv("VAR_A") + defer os.Unsetenv("VAR_B") + + prefix := strings.Repeat("x", 4090) + input := []byte(prefix + "${VAR_A} middle ${VAR_B}") + + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + expected := prefix + `"aaa" middle "bbb"` + require.Equal(t, expected, string(output)) +} + +func TestSubstituteEnvReader_YAMLConfig(t *testing.T) { + os.Setenv("DB_HOST", "localhost") + os.Setenv("DB_PORT", "5432") + os.Setenv("DB_PASSWORD", "secret123") + defer os.Unsetenv("DB_HOST") + defer os.Unsetenv("DB_PORT") + defer os.Unsetenv("DB_PASSWORD") + + input := []byte(`database: + host: ${DB_HOST} + port: ${DB_PORT} + password: ${DB_PASSWORD} +`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + expected := `database: + host: "localhost" + port: "5432" + password: "secret123" +` + require.Equal(t, expected, string(output)) +} + +func TestSubstituteEnvReader_DollarWithoutBrace(t *testing.T) { + input := []byte(`key: $NOT_A_PATTERN`) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + require.Equal(t, `key: $NOT_A_PATTERN`, string(output)) +} + +func TestSubstituteEnvReader_EmptyInput(t *testing.T) { + input := []byte(``) + reader := NewSubstituteEnvReader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + require.NoError(t, err) + require.Equal(t, ``, string(output)) +} + +func TestFindIncompletePatternStart(t *testing.T) { + tests := []struct { + name string + input string + expected int + }{ + {"no pattern", "hello world", -1}, + {"complete pattern", "hello ${VAR} world", -1}, + {"dollar at end", "hello $", 6}, + {"dollar brace at end", "hello ${", 6}, + {"incomplete var at end", "hello ${VAR", 6}, + {"complete then incomplete", "hello ${VAR} ${INCOMPLETE", 13}, + {"multiple complete", "${A} ${B} ${C}", -1}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := findIncompletePatternStart([]byte(tt.input)) + require.Equal(t, tt.expected, result) + }) + } +}