mirror of
https://github.com/yusing/godoxy.git
synced 2026-04-27 18:57:04 +02:00
fix(middleware): allow HTML rewrite for chunked and unknown-length bodies
Relax response-body gating so HTML and XHTML can be buffered when Transfer-Encoding is chunked-only, or when Content-Length is missing, while still rejecting non-identity encodings that are not chunked HTML and other non-HTML cases. Update modifyHTML to cap reads for unknown length, splice the original stream back when the cap is hit, and document the behavior in the package README. Extend tests for themed middleware and the rewrite gate.
This commit is contained in:
@@ -14,7 +14,7 @@ This package implements a flexible HTTP middleware system for GoDoxy. Middleware
|
||||
- **Entrypoint Overlay Promotion**: Promote route-local middleware entries with `bypass` into matching entrypoint middleware for HTTP routes
|
||||
- **Dynamic Loading**: Load middleware definitions from files at runtime
|
||||
|
||||
Response body rewriting is only applied to unencoded, text-like content types (for example `text/*`, JSON, YAML, XML). Response status and headers can always be modified.
|
||||
Response body rewriting is only applied to unencoded content. Known-size text-like responses (for example `text/*`, JSON, YAML, XML) are eligible, and HTML/XHTML responses may also be rewritten when their size is unknown or chunked, provided they stay within the rewrite buffer limit. Response status and headers can always be modified.
|
||||
|
||||
Request-variable substitution reads request fields from the active outbound request. Upstream variables such as `$upstream_host` and `$upstream_url` resolve from the current route context, which is normally attached by the route / reverse-proxy layer before middleware executes.
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ import (
|
||||
|
||||
const (
|
||||
mimeEventStream = "text/event-stream"
|
||||
headerContentType = "Content-Type"
|
||||
maxModifiableBody = 4 * 1024 * 1024 // 4MB
|
||||
)
|
||||
|
||||
@@ -294,21 +293,27 @@ func canBufferAndModifyResponseBody(respHeader http.Header) bool {
|
||||
if err != nil { // skip if invalid content type
|
||||
return false
|
||||
}
|
||||
if hasNonIdentityEncoding(respHeader.Values("Transfer-Encoding")) {
|
||||
return false
|
||||
}
|
||||
if hasNonIdentityEncoding(respHeader.Values("Content-Encoding")) {
|
||||
return false
|
||||
}
|
||||
contentLengthKnown := false
|
||||
if contentLengthRaw := respHeader.Get("Content-Length"); contentLengthRaw != "" {
|
||||
contentLength, err := strconv.ParseInt(contentLengthRaw, 10, 64)
|
||||
if err != nil || contentLength >= maxModifiableBody {
|
||||
return false
|
||||
}
|
||||
contentLengthKnown = true
|
||||
}
|
||||
if !isTextLikeMediaType(contentType) {
|
||||
return false
|
||||
}
|
||||
transferEncoding := respHeader.Values("Transfer-Encoding")
|
||||
if hasNonIdentityEncoding(transferEncoding) {
|
||||
return isHTMLLikeMediaType(contentType) && isChunkedTransferEncoding(transferEncoding)
|
||||
}
|
||||
if !contentLengthKnown {
|
||||
return isHTMLLikeMediaType(contentType)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -325,6 +330,24 @@ func hasNonIdentityEncoding(values []string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func isChunkedTransferEncoding(values []string) bool {
|
||||
foundChunked := false
|
||||
for _, value := range values {
|
||||
for token := range strings.SplitSeq(value, ",") {
|
||||
token = strings.TrimSpace(token)
|
||||
switch {
|
||||
case token == "", strings.EqualFold(token, "identity"):
|
||||
continue
|
||||
case strings.EqualFold(token, "chunked"):
|
||||
foundChunked = true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return foundChunked
|
||||
}
|
||||
|
||||
func isTextLikeMediaType(contentType string) bool {
|
||||
if contentType == "" {
|
||||
return false
|
||||
@@ -351,6 +374,10 @@ func isTextLikeMediaType(contentType string) bool {
|
||||
return contentType == "application/x-www-form-urlencoded"
|
||||
}
|
||||
|
||||
func isHTMLLikeMediaType(contentType string) bool {
|
||||
return contentType == "text/html" || contentType == "application/xhtml+xml"
|
||||
}
|
||||
|
||||
func (m *Middleware) LogWarn(req *http.Request) *zerolog.Event {
|
||||
//nolint:zerologlint
|
||||
return log.Warn().Str("middleware", m.name).
|
||||
|
||||
@@ -126,7 +126,7 @@ func TestMiddlewareResponseRewriteGate(t *testing.T) {
|
||||
expectBody: "binary",
|
||||
},
|
||||
{
|
||||
name: "block_body_rewrite_for_transfer_encoded_html",
|
||||
name: "allow_body_rewrite_for_transfer_encoded_html",
|
||||
respHeaders: http.Header{
|
||||
"Content-Type": []string{"text/html"},
|
||||
"Transfer-Encoding": []string{"chunked"},
|
||||
@@ -134,6 +134,17 @@ func TestMiddlewareResponseRewriteGate(t *testing.T) {
|
||||
respBody: []byte("<html><body>original</body></html>"),
|
||||
expectStatus: http.StatusTeapot,
|
||||
expectHeader: "1",
|
||||
expectBody: "rewritten-body",
|
||||
},
|
||||
{
|
||||
name: "block_body_rewrite_for_non_chunked_transfer_encoded_html",
|
||||
respHeaders: http.Header{
|
||||
"Content-Type": []string{"text/html"},
|
||||
"Transfer-Encoding": []string{"gzip"},
|
||||
},
|
||||
respBody: []byte("<html><body>original</body></html>"),
|
||||
expectStatus: http.StatusTeapot,
|
||||
expectHeader: "1",
|
||||
expectBody: "<html><body>original</body></html>",
|
||||
},
|
||||
{
|
||||
@@ -208,12 +219,23 @@ func TestMiddlewareResponseRewriteGateServeHTTP(t *testing.T) {
|
||||
expectBody: "binary",
|
||||
},
|
||||
{
|
||||
name: "block_body_rewrite_for_transfer_encoded_html",
|
||||
name: "allow_body_rewrite_for_transfer_encoded_html",
|
||||
respHeaders: http.Header{
|
||||
"Content-Type": []string{"text/html"},
|
||||
"Transfer-Encoding": []string{"chunked"},
|
||||
},
|
||||
respBody: "<html><body>original</body></html>",
|
||||
expectStatusCode: http.StatusTeapot,
|
||||
expectHeader: "1",
|
||||
expectBody: "rewritten-body",
|
||||
},
|
||||
{
|
||||
name: "block_body_rewrite_for_non_chunked_transfer_encoded_html",
|
||||
respHeaders: http.Header{
|
||||
"Content-Type": []string{"text/html"},
|
||||
"Transfer-Encoding": []string{"gzip"},
|
||||
},
|
||||
respBody: "<html><body>original</body></html>",
|
||||
expectStatusCode: http.StatusOK,
|
||||
expectHeader: "",
|
||||
expectBody: "<html><body>original</body></html>",
|
||||
@@ -290,10 +312,10 @@ func TestMiddlewareHeaderRewriteDoesNotBufferLargeBody(t *testing.T) {
|
||||
expect.Equal(t, string(data), "video")
|
||||
}
|
||||
|
||||
func TestThemedSkipsBodyRewriteWhenRewriteBlocked(t *testing.T) {
|
||||
func TestThemedRewritesChunkedHTML(t *testing.T) {
|
||||
result, err := newMiddlewareTest(Themed, &testArgs{
|
||||
middlewareOpt: OptionsRaw{
|
||||
"theme": DarkTheme,
|
||||
"css": "https://example.com/theme.css",
|
||||
},
|
||||
respHeaders: http.Header{
|
||||
"Content-Type": []string{"text/html; charset=utf-8"},
|
||||
@@ -302,5 +324,22 @@ func TestThemedSkipsBodyRewriteWhenRewriteBlocked(t *testing.T) {
|
||||
respBody: []byte("<html><body>original</body></html>"),
|
||||
})
|
||||
expect.NoError(t, err)
|
||||
expect.Equal(t, string(result.Data), "<html><body>original</body></html>")
|
||||
expect.Equal(t, string(result.Data), `<html><head></head><body>original<link rel="stylesheet" href="https://example.com/theme.css"/></body></html>`)
|
||||
}
|
||||
|
||||
func TestThemedSkipsOversizedChunkedHTML(t *testing.T) {
|
||||
originalBody := "<html><body>" + strings.Repeat("a", maxModifiableBody) + "</body></html>"
|
||||
|
||||
result, err := newMiddlewareTest(Themed, &testArgs{
|
||||
middlewareOpt: OptionsRaw{
|
||||
"css": "https://example.com/theme.css",
|
||||
},
|
||||
respHeaders: http.Header{
|
||||
"Content-Type": []string{"text/html; charset=utf-8"},
|
||||
"Transfer-Encoding": []string{"chunked"},
|
||||
},
|
||||
respBody: []byte(originalBody),
|
||||
})
|
||||
expect.NoError(t, err)
|
||||
expect.Equal(t, string(result.Data), originalBody)
|
||||
}
|
||||
|
||||
@@ -51,16 +51,38 @@ func (m *modifyHTML) modifyResponse(resp *http.Response) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip modification for streaming/chunked responses to avoid blocking reads
|
||||
// Unknown content length or any transfer encoding indicates streaming.
|
||||
// if resp.ContentLength < 0 || len(resp.TransferEncoding) > 0 {
|
||||
// log.Debug().Str("url", fullURL(resp.Request)).Strs("transfer-encoding", resp.TransferEncoding).Msg("skipping modification for streaming/chunked response")
|
||||
// return nil
|
||||
// }
|
||||
|
||||
// NOTE: do not put it in the defer, it will be used as resp.Body
|
||||
content, release, err := httputils.ReadAllBody(resp)
|
||||
resp.Body.Close()
|
||||
var (
|
||||
content []byte
|
||||
release func([]byte)
|
||||
err error
|
||||
)
|
||||
if resp.ContentLength >= int64(maxModifiableBody) {
|
||||
return nil
|
||||
}
|
||||
|
||||
originalBody := resp.Body
|
||||
if resp.ContentLength < 0 {
|
||||
// tmp swap Body to LimitedReader
|
||||
limited := io.LimitedReader{R: originalBody, N: int64(maxModifiableBody) + 1}
|
||||
resp.Body = io.NopCloser(&limited)
|
||||
content, release, err = httputils.ReadAllBody(resp)
|
||||
// Successfully read N bytes
|
||||
if err == nil && limited.N == 0 {
|
||||
fullReader := io.NopCloser(io.MultiReader(bytes.NewReader(content), originalBody))
|
||||
onClose := func() {
|
||||
release(content)
|
||||
_ = originalBody.Close()
|
||||
}
|
||||
resp.Body = ioutils.NewHookReadCloser(fullReader, onClose)
|
||||
resp.ContentLength = -1
|
||||
resp.Header.Del("Content-Length")
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
content, release, err = httputils.ReadAllBody(resp)
|
||||
}
|
||||
_ = originalBody.Close()
|
||||
if err != nil {
|
||||
log.Err(err).Str("url", fullURL(resp.Request)).Msg("failed to read response body")
|
||||
// Fail open: do not abort the response. Return an empty body safely.
|
||||
|
||||
Reference in New Issue
Block a user