diff --git a/internal/net/gphttp/middleware/README.md b/internal/net/gphttp/middleware/README.md index fd5171a7..a48ac257 100644 --- a/internal/net/gphttp/middleware/README.md +++ b/internal/net/gphttp/middleware/README.md @@ -14,7 +14,7 @@ This package implements a flexible HTTP middleware system for GoDoxy. Middleware - **Entrypoint Overlay Promotion**: Promote route-local middleware entries with `bypass` into matching entrypoint middleware for HTTP routes - **Dynamic Loading**: Load middleware definitions from files at runtime -Response body rewriting is only applied to unencoded, text-like content types (for example `text/*`, JSON, YAML, XML). Response status and headers can always be modified. +Response body rewriting is only applied to unencoded content. Known-size text-like responses (for example `text/*`, JSON, YAML, XML) are eligible, and HTML/XHTML responses may also be rewritten when their size is unknown or chunked, provided they stay within the rewrite buffer limit. Response status and headers can always be modified. Request-variable substitution reads request fields from the active outbound request. Upstream variables such as `$upstream_host` and `$upstream_url` resolve from the current route context, which is normally attached by the route / reverse-proxy layer before middleware executes. diff --git a/internal/net/gphttp/middleware/middleware.go b/internal/net/gphttp/middleware/middleware.go index 91803f2a..cb2a4e56 100644 --- a/internal/net/gphttp/middleware/middleware.go +++ b/internal/net/gphttp/middleware/middleware.go @@ -21,7 +21,6 @@ import ( const ( mimeEventStream = "text/event-stream" - headerContentType = "Content-Type" maxModifiableBody = 4 * 1024 * 1024 // 4MB ) @@ -294,21 +293,27 @@ func canBufferAndModifyResponseBody(respHeader http.Header) bool { if err != nil { // skip if invalid content type return false } - if hasNonIdentityEncoding(respHeader.Values("Transfer-Encoding")) { - return false - } if hasNonIdentityEncoding(respHeader.Values("Content-Encoding")) { return false } + contentLengthKnown := false if contentLengthRaw := respHeader.Get("Content-Length"); contentLengthRaw != "" { contentLength, err := strconv.ParseInt(contentLengthRaw, 10, 64) if err != nil || contentLength >= maxModifiableBody { return false } + contentLengthKnown = true } if !isTextLikeMediaType(contentType) { return false } + transferEncoding := respHeader.Values("Transfer-Encoding") + if hasNonIdentityEncoding(transferEncoding) { + return isHTMLLikeMediaType(contentType) && isChunkedTransferEncoding(transferEncoding) + } + if !contentLengthKnown { + return isHTMLLikeMediaType(contentType) + } return true } @@ -325,6 +330,24 @@ func hasNonIdentityEncoding(values []string) bool { return false } +func isChunkedTransferEncoding(values []string) bool { + foundChunked := false + for _, value := range values { + for token := range strings.SplitSeq(value, ",") { + token = strings.TrimSpace(token) + switch { + case token == "", strings.EqualFold(token, "identity"): + continue + case strings.EqualFold(token, "chunked"): + foundChunked = true + default: + return false + } + } + } + return foundChunked +} + func isTextLikeMediaType(contentType string) bool { if contentType == "" { return false @@ -351,6 +374,10 @@ func isTextLikeMediaType(contentType string) bool { return contentType == "application/x-www-form-urlencoded" } +func isHTMLLikeMediaType(contentType string) bool { + return contentType == "text/html" || contentType == "application/xhtml+xml" +} + func (m *Middleware) LogWarn(req *http.Request) *zerolog.Event { //nolint:zerologlint return log.Warn().Str("middleware", m.name). diff --git a/internal/net/gphttp/middleware/middleware_test.go b/internal/net/gphttp/middleware/middleware_test.go index 5d961ade..6f66dab9 100644 --- a/internal/net/gphttp/middleware/middleware_test.go +++ b/internal/net/gphttp/middleware/middleware_test.go @@ -126,7 +126,7 @@ func TestMiddlewareResponseRewriteGate(t *testing.T) { expectBody: "binary", }, { - name: "block_body_rewrite_for_transfer_encoded_html", + name: "allow_body_rewrite_for_transfer_encoded_html", respHeaders: http.Header{ "Content-Type": []string{"text/html"}, "Transfer-Encoding": []string{"chunked"}, @@ -134,6 +134,17 @@ func TestMiddlewareResponseRewriteGate(t *testing.T) { respBody: []byte("
original"), expectStatus: http.StatusTeapot, expectHeader: "1", + expectBody: "rewritten-body", + }, + { + name: "block_body_rewrite_for_non_chunked_transfer_encoded_html", + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + "Transfer-Encoding": []string{"gzip"}, + }, + respBody: []byte("original"), + expectStatus: http.StatusTeapot, + expectHeader: "1", expectBody: "original", }, { @@ -208,12 +219,23 @@ func TestMiddlewareResponseRewriteGateServeHTTP(t *testing.T) { expectBody: "binary", }, { - name: "block_body_rewrite_for_transfer_encoded_html", + name: "allow_body_rewrite_for_transfer_encoded_html", respHeaders: http.Header{ "Content-Type": []string{"text/html"}, "Transfer-Encoding": []string{"chunked"}, }, respBody: "original", + expectStatusCode: http.StatusTeapot, + expectHeader: "1", + expectBody: "rewritten-body", + }, + { + name: "block_body_rewrite_for_non_chunked_transfer_encoded_html", + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + "Transfer-Encoding": []string{"gzip"}, + }, + respBody: "original", expectStatusCode: http.StatusOK, expectHeader: "", expectBody: "original", @@ -290,10 +312,10 @@ func TestMiddlewareHeaderRewriteDoesNotBufferLargeBody(t *testing.T) { expect.Equal(t, string(data), "video") } -func TestThemedSkipsBodyRewriteWhenRewriteBlocked(t *testing.T) { +func TestThemedRewritesChunkedHTML(t *testing.T) { result, err := newMiddlewareTest(Themed, &testArgs{ middlewareOpt: OptionsRaw{ - "theme": DarkTheme, + "css": "https://example.com/theme.css", }, respHeaders: http.Header{ "Content-Type": []string{"text/html; charset=utf-8"}, @@ -302,5 +324,22 @@ func TestThemedSkipsBodyRewriteWhenRewriteBlocked(t *testing.T) { respBody: []byte("original"), }) expect.NoError(t, err) - expect.Equal(t, string(result.Data), "original") + expect.Equal(t, string(result.Data), `original`) +} + +func TestThemedSkipsOversizedChunkedHTML(t *testing.T) { + originalBody := "" + strings.Repeat("a", maxModifiableBody) + "" + + result, err := newMiddlewareTest(Themed, &testArgs{ + middlewareOpt: OptionsRaw{ + "css": "https://example.com/theme.css", + }, + respHeaders: http.Header{ + "Content-Type": []string{"text/html; charset=utf-8"}, + "Transfer-Encoding": []string{"chunked"}, + }, + respBody: []byte(originalBody), + }) + expect.NoError(t, err) + expect.Equal(t, string(result.Data), originalBody) } diff --git a/internal/net/gphttp/middleware/modify_html.go b/internal/net/gphttp/middleware/modify_html.go index ea609daa..fff9757a 100644 --- a/internal/net/gphttp/middleware/modify_html.go +++ b/internal/net/gphttp/middleware/modify_html.go @@ -51,16 +51,38 @@ func (m *modifyHTML) modifyResponse(resp *http.Response) error { return nil } - // Skip modification for streaming/chunked responses to avoid blocking reads - // Unknown content length or any transfer encoding indicates streaming. - // if resp.ContentLength < 0 || len(resp.TransferEncoding) > 0 { - // log.Debug().Str("url", fullURL(resp.Request)).Strs("transfer-encoding", resp.TransferEncoding).Msg("skipping modification for streaming/chunked response") - // return nil - // } - // NOTE: do not put it in the defer, it will be used as resp.Body - content, release, err := httputils.ReadAllBody(resp) - resp.Body.Close() + var ( + content []byte + release func([]byte) + err error + ) + if resp.ContentLength >= int64(maxModifiableBody) { + return nil + } + + originalBody := resp.Body + if resp.ContentLength < 0 { + // tmp swap Body to LimitedReader + limited := io.LimitedReader{R: originalBody, N: int64(maxModifiableBody) + 1} + resp.Body = io.NopCloser(&limited) + content, release, err = httputils.ReadAllBody(resp) + // Successfully read N bytes + if err == nil && limited.N == 0 { + fullReader := io.NopCloser(io.MultiReader(bytes.NewReader(content), originalBody)) + onClose := func() { + release(content) + _ = originalBody.Close() + } + resp.Body = ioutils.NewHookReadCloser(fullReader, onClose) + resp.ContentLength = -1 + resp.Header.Del("Content-Length") + return nil + } + } else { + content, release, err = httputils.ReadAllBody(resp) + } + _ = originalBody.Close() if err != nil { log.Err(err).Str("url", fullURL(resp.Request)).Msg("failed to read response body") // Fail open: do not abort the response. Return an empty body safely.