merge: better favicon handling

This commit is contained in:
yusing
2025-04-24 15:34:47 +08:00
parent 31812430f1
commit 5df2553774
11 changed files with 722 additions and 330 deletions

View File

@@ -1,101 +0,0 @@
package favicon
import (
"encoding/json"
"time"
"github.com/yusing/go-proxy/internal/common"
"github.com/yusing/go-proxy/internal/jsonstore"
"github.com/yusing/go-proxy/internal/logging"
"github.com/yusing/go-proxy/internal/route/routes"
"github.com/yusing/go-proxy/internal/task"
)
type cacheEntry struct {
Icon []byte `json:"icon"`
LastAccess time.Time `json:"lastAccess"`
}
// cache key can be absolute url or route name.
var iconCache = jsonstore.Store[*cacheEntry](common.NamespaceIconCache)
const (
iconCacheTTL = 3 * 24 * time.Hour
cleanUpInterval = time.Hour
)
func init() {
go func() {
cleanupTicker := time.NewTicker(cleanUpInterval)
defer cleanupTicker.Stop()
for {
select {
case <-task.RootContextCanceled():
return
case <-cleanupTicker.C:
pruneExpiredIconCache()
}
}
}()
}
func pruneExpiredIconCache() {
nPruned := 0
for key, icon := range iconCache.Range {
if icon.IsExpired() {
iconCache.Delete(key)
nPruned++
}
}
if nPruned > 0 {
logging.Info().Int("pruned", nPruned).Msg("pruned expired icon cache")
}
}
func routeKey(r routes.HTTPRoute) string {
return r.ProviderName() + ":" + r.Name()
}
func PruneRouteIconCache(route routes.HTTPRoute) {
iconCache.Delete(routeKey(route))
}
func loadIconCache(key string) *fetchResult {
icon, ok := iconCache.Load(key)
if ok && icon != nil {
logging.Debug().
Str("key", key).
Msg("icon found in cache")
icon.LastAccess = time.Now()
return &fetchResult{icon: icon.Icon}
}
return nil
}
func storeIconCache(key string, icon []byte) {
iconCache.Store(key, &cacheEntry{Icon: icon, LastAccess: time.Now()})
}
func (e *cacheEntry) IsExpired() bool {
return time.Since(e.LastAccess) > iconCacheTTL
}
func (e *cacheEntry) UnmarshalJSON(data []byte) error {
attempt := struct {
Icon []byte `json:"icon"`
LastAccess time.Time `json:"lastAccess"`
}{}
err := json.Unmarshal(data, &attempt)
if err == nil {
e.Icon = attempt.Icon
e.LastAccess = attempt.LastAccess
return nil
}
// fallback to bytes
err = json.Unmarshal(data, &e.Icon)
if err == nil {
e.LastAccess = time.Now()
return nil
}
return err
}

View File

@@ -1,37 +0,0 @@
package favicon
import (
"bufio"
"errors"
"net"
"net/http"
)
type content struct {
header http.Header
data []byte
status int
}
func newContent() *content {
return &content{
header: make(http.Header),
}
}
func (c *content) Header() http.Header {
return c.header
}
func (c *content) Write(data []byte) (int, error) {
c.data = append(c.data, data...)
return len(data), nil
}
func (c *content) WriteHeader(statusCode int) {
c.status = statusCode
}
func (c *content) Hijack() (net.Conn, *bufio.ReadWriter, error) {
return nil, nil, errors.New("not supported")
}

View File

@@ -1,48 +1,13 @@
package favicon
import (
"bytes"
"context"
"errors"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/vincent-petithory/dataurl"
"github.com/yusing/go-proxy/internal/gperr"
"github.com/yusing/go-proxy/internal/homepage"
"github.com/yusing/go-proxy/internal/logging"
gphttp "github.com/yusing/go-proxy/internal/net/gphttp"
"github.com/yusing/go-proxy/internal/net/gphttp"
"github.com/yusing/go-proxy/internal/route/routes"
"github.com/yusing/go-proxy/internal/utils/strutils"
)
type fetchResult struct {
icon []byte
contentType string
statusCode int
errMsg string
}
func (res *fetchResult) OK() bool {
return res.icon != nil
}
func (res *fetchResult) ContentType() string {
if res.contentType == "" {
if bytes.HasPrefix(res.icon, []byte("<svg")) || bytes.HasPrefix(res.icon, []byte("<?xml")) {
return "image/svg+xml"
}
return "image/x-icon"
}
return res.contentType
}
const (
MaxRedirectDepth = 5
)
// GetFavIcon returns the favicon of the route
@@ -71,213 +36,42 @@ func GetFavIcon(w http.ResponseWriter, req *http.Request) {
gphttp.ClientError(w, err, http.StatusBadRequest)
return
}
fetchResult := getFavIconFromURL(&iconURL)
fetchResult := homepage.FetchFavIconFromURL(req.Context(), &iconURL)
if !fetchResult.OK() {
http.Error(w, fetchResult.errMsg, fetchResult.statusCode)
http.Error(w, fetchResult.ErrMsg, fetchResult.StatusCode)
return
}
w.Header().Set("Content-Type", fetchResult.ContentType())
gphttp.WriteBody(w, fetchResult.icon)
gphttp.WriteBody(w, fetchResult.Icon)
return
}
// try with route.Homepage.Icon
// try with route.Icon
r, ok := routes.HTTP.Get(alias)
if !ok {
gphttp.ClientError(w, errors.New("no such route"), http.StatusNotFound)
return
}
var result *fetchResult
var result *homepage.FetchResult
hp := r.HomepageItem()
if hp.Icon != nil {
if hp.Icon.IconSource == homepage.IconSourceRelative {
result = findIcon(r, req, hp.Icon.Value)
result = homepage.FindIcon(req.Context(), r, hp.Icon.Value)
} else {
result = getFavIconFromURL(hp.Icon)
result = homepage.FetchFavIconFromURL(req.Context(), hp.Icon)
}
} else {
// try extract from "link[rel=icon]"
result = findIcon(r, req, "/")
result = homepage.FindIcon(req.Context(), r, "/")
}
if result.statusCode == 0 {
result.statusCode = http.StatusOK
if result.StatusCode == 0 {
result.StatusCode = http.StatusOK
}
if !result.OK() {
http.Error(w, result.errMsg, result.statusCode)
http.Error(w, result.ErrMsg, result.StatusCode)
return
}
w.Header().Set("Content-Type", result.ContentType())
gphttp.WriteBody(w, result.icon)
}
func getFavIconFromURL(iconURL *homepage.IconURL) *fetchResult {
switch iconURL.IconSource {
case homepage.IconSourceAbsolute:
return fetchIconAbsolute(iconURL.URL())
case homepage.IconSourceRelative:
return &fetchResult{statusCode: http.StatusBadRequest, errMsg: "unexpected relative icon"}
case homepage.IconSourceWalkXCode, homepage.IconSourceSelfhSt:
return fetchKnownIcon(iconURL)
}
return &fetchResult{statusCode: http.StatusBadRequest, errMsg: "invalid icon source"}
}
func fetchIconAbsolute(url string) *fetchResult {
if result := loadIconCache(url); result != nil {
return result
}
resp, err := gphttp.Get(url)
if err != nil || resp.StatusCode != http.StatusOK {
if err == nil {
err = errors.New(resp.Status)
}
logging.Error().Err(err).
Str("url", url).
Msg("failed to get icon")
return &fetchResult{statusCode: http.StatusBadGateway, errMsg: "connection error"}
}
defer resp.Body.Close()
icon, err := io.ReadAll(resp.Body)
if err != nil {
logging.Error().Err(err).
Str("url", url).
Msg("failed to read icon")
return &fetchResult{statusCode: http.StatusInternalServerError, errMsg: "internal error"}
}
storeIconCache(url, icon)
return &fetchResult{icon: icon}
}
var nameSanitizer = strings.NewReplacer(
"_", "-",
" ", "-",
"(", "",
")", "",
)
func sanitizeName(name string) string {
return strings.ToLower(nameSanitizer.Replace(name))
}
func fetchKnownIcon(url *homepage.IconURL) *fetchResult {
// if icon isn't in the list, no need to fetch
if !url.HasIcon() {
logging.Debug().
Str("value", url.String()).
Str("url", url.URL()).
Msg("no such icon")
return &fetchResult{statusCode: http.StatusNotFound, errMsg: "no such icon"}
}
return fetchIconAbsolute(url.URL())
}
func fetchIcon(filetype, filename string) *fetchResult {
result := fetchKnownIcon(homepage.NewSelfhStIconURL(filename, filetype))
if result.icon == nil {
return result
}
return fetchKnownIcon(homepage.NewWalkXCodeIconURL(filename, filetype))
}
func findIcon(r routes.HTTPRoute, req *http.Request, uri string) *fetchResult {
key := routeKey(r)
if result := loadIconCache(key); result != nil {
return result
}
result := fetchIcon("png", sanitizeName(r.Name()))
cont := r.ContainerInfo()
if !result.OK() && cont != nil {
result = fetchIcon("png", sanitizeName(cont.Image.Name))
}
if !result.OK() {
// fallback to parse html
result = findIconSlow(r, req, uri, 0)
}
if result.OK() {
storeIconCache(key, result.icon)
}
return result
}
func findIconSlow(r routes.HTTPRoute, req *http.Request, uri string, depth int) *fetchResult {
ctx, cancel := context.WithTimeoutCause(req.Context(), 3*time.Second, errors.New("favicon request timeout"))
defer cancel()
newReq := req.WithContext(ctx)
newReq.Header.Set("Accept-Encoding", "identity") // disable compression
u, err := url.ParseRequestURI(strutils.SanitizeURI(uri))
if err != nil {
logging.Error().Err(err).
Str("route", r.Name()).
Str("path", uri).
Msg("failed to parse uri")
return &fetchResult{statusCode: http.StatusInternalServerError, errMsg: "cannot parse uri"}
}
newReq.URL.Path = u.Path
newReq.URL.RawPath = u.RawPath
newReq.URL.RawQuery = u.RawQuery
newReq.RequestURI = u.String()
c := newContent()
r.ServeHTTP(c, newReq)
if c.status != http.StatusOK {
switch c.status {
case 0:
return &fetchResult{statusCode: http.StatusBadGateway, errMsg: "connection error"}
default:
if loc := c.Header().Get("Location"); loc != "" {
if depth > MaxRedirectDepth {
return &fetchResult{statusCode: http.StatusBadGateway, errMsg: "too many redirects"}
}
loc = strutils.SanitizeURI(loc)
if loc == "/" || loc == newReq.URL.Path {
return &fetchResult{statusCode: http.StatusBadGateway, errMsg: "circular redirect"}
}
return findIconSlow(r, req, loc, depth+1)
}
}
return &fetchResult{statusCode: c.status, errMsg: "upstream error: " + string(c.data)}
}
// return icon data
if !gphttp.GetContentType(c.header).IsHTML() {
return &fetchResult{icon: c.data, contentType: c.header.Get("Content-Type")}
}
// try extract from "link[rel=icon]" from path "/"
doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(c.data))
if err != nil {
logging.Error().Err(err).
Str("route", r.Name()).
Msg("failed to parse html")
return &fetchResult{statusCode: http.StatusInternalServerError, errMsg: "internal error"}
}
ele := doc.Find("head > link[rel=icon]").First()
if ele.Length() == 0 {
return &fetchResult{statusCode: http.StatusNotFound, errMsg: "icon element not found"}
}
href := ele.AttrOr("href", "")
if href == "" {
return &fetchResult{statusCode: http.StatusNotFound, errMsg: "icon href not found"}
}
// https://en.wikipedia.org/wiki/Data_URI_scheme
if strings.HasPrefix(href, "data:image/") {
dataURI, err := dataurl.DecodeString(href)
if err != nil {
logging.Error().Err(err).
Str("route", r.Name()).
Msg("failed to decode favicon")
return &fetchResult{statusCode: http.StatusInternalServerError, errMsg: "internal error"}
}
return &fetchResult{icon: dataURI.Data, contentType: dataURI.ContentType()}
}
switch {
case strings.HasPrefix(href, "http://"), strings.HasPrefix(href, "https://"):
return fetchIconAbsolute(href)
default:
return findIconSlow(r, req, href, 0)
}
gphttp.WriteBody(w, result.Icon)
}