mirror of
https://github.com/yusing/godoxy.git
synced 2026-04-25 10:18:59 +02:00
refactor(favicon): improve cache and error handling
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@@ -12,8 +13,11 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/vincent-petithory/dataurl"
|
||||
apitypes "github.com/yusing/godoxy/internal/api/types"
|
||||
gphttp "github.com/yusing/godoxy/internal/net/gphttp"
|
||||
"github.com/yusing/goutils/cache"
|
||||
httputils "github.com/yusing/goutils/http"
|
||||
strutils "github.com/yusing/goutils/strings"
|
||||
)
|
||||
@@ -21,17 +25,31 @@ import (
|
||||
type FetchResult struct {
|
||||
Icon []byte
|
||||
StatusCode int
|
||||
ErrMsg string
|
||||
|
||||
contentType string
|
||||
}
|
||||
|
||||
const faviconFetchTimeout = 3 * time.Second
|
||||
|
||||
func (res *FetchResult) OK() bool {
|
||||
return len(res.Icon) > 0
|
||||
func FetchResultWithErrorf(statusCode int, msgFmt string, args ...any) (FetchResult, error) {
|
||||
return FetchResult{StatusCode: statusCode}, fmt.Errorf(msgFmt, args...)
|
||||
}
|
||||
|
||||
func FetchResultOK(icon []byte, contentType string) (FetchResult, error) {
|
||||
return FetchResult{Icon: icon, contentType: contentType}, nil
|
||||
}
|
||||
|
||||
func GinFetchError(c *gin.Context, statusCode int, err error) {
|
||||
if statusCode == 0 {
|
||||
statusCode = http.StatusInternalServerError
|
||||
}
|
||||
if statusCode == http.StatusInternalServerError {
|
||||
c.Error(apitypes.InternalServerError(err, "unexpected error"))
|
||||
} else {
|
||||
c.JSON(statusCode, apitypes.Error(err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
const faviconFetchTimeout = 3 * time.Second
|
||||
|
||||
func (res *FetchResult) ContentType() string {
|
||||
if res.contentType == "" {
|
||||
if bytes.HasPrefix(res.Icon, []byte("<svg")) || bytes.HasPrefix(res.Icon, []byte("<?xml")) {
|
||||
@@ -44,56 +62,54 @@ func (res *FetchResult) ContentType() string {
|
||||
|
||||
const maxRedirectDepth = 5
|
||||
|
||||
func FetchFavIconFromURL(ctx context.Context, iconURL *IconURL) *FetchResult {
|
||||
func FetchFavIconFromURL(ctx context.Context, iconURL *IconURL) (FetchResult, error) {
|
||||
switch iconURL.IconSource {
|
||||
case IconSourceAbsolute:
|
||||
return fetchIconAbsolute(ctx, iconURL.URL())
|
||||
return FetchIconAbsolute(ctx, iconURL.URL())
|
||||
case IconSourceRelative:
|
||||
return &FetchResult{StatusCode: http.StatusBadRequest, ErrMsg: "unexpected relative icon"}
|
||||
return FetchResultWithErrorf(http.StatusBadRequest, "unexpected relative icon")
|
||||
case IconSourceWalkXCode, IconSourceSelfhSt:
|
||||
return fetchKnownIcon(ctx, iconURL)
|
||||
}
|
||||
return &FetchResult{StatusCode: http.StatusBadRequest, ErrMsg: "invalid icon source"}
|
||||
return FetchResultWithErrorf(http.StatusBadRequest, "invalid icon source")
|
||||
}
|
||||
|
||||
func fetchIconAbsolute(ctx context.Context, url string) *FetchResult {
|
||||
if result := loadIconCache(url); result != nil {
|
||||
return result
|
||||
}
|
||||
|
||||
var FetchIconAbsolute = cache.NewKeyFunc(func(ctx context.Context, url string) (FetchResult, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
||||
return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "request timeout"}
|
||||
}
|
||||
return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: err.Error()}
|
||||
return FetchResultWithErrorf(http.StatusInternalServerError, "cannot create request: %w", err)
|
||||
}
|
||||
|
||||
resp, err := gphttp.Do(req)
|
||||
if err == nil {
|
||||
defer resp.Body.Close()
|
||||
} else {
|
||||
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
||||
return FetchResultWithErrorf(http.StatusBadGateway, "request timeout")
|
||||
}
|
||||
return FetchResultWithErrorf(http.StatusBadGateway, "connection error: %w", err)
|
||||
}
|
||||
if err != nil || resp.StatusCode != http.StatusOK {
|
||||
return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "connection error"}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return FetchResultWithErrorf(resp.StatusCode, "upstream error: http %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
icon, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "internal error"}
|
||||
return FetchResultWithErrorf(http.StatusInternalServerError, "failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
if len(icon) == 0 {
|
||||
return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "empty icon"}
|
||||
return FetchResultWithErrorf(http.StatusNotFound, "empty icon")
|
||||
}
|
||||
|
||||
res := &FetchResult{Icon: icon}
|
||||
res := FetchResult{Icon: icon}
|
||||
if contentType := resp.Header.Get("Content-Type"); contentType != "" {
|
||||
res.contentType = contentType
|
||||
}
|
||||
// else leave it empty
|
||||
storeIconCache(url, res)
|
||||
return res
|
||||
}
|
||||
return res, nil
|
||||
}).WithMaxEntries(200).WithRetriesExponentialBackoff(3).WithTTL(4 * time.Hour).Build()
|
||||
|
||||
var nameSanitizer = strings.NewReplacer(
|
||||
"_", "-",
|
||||
@@ -106,57 +122,57 @@ func sanitizeName(name string) string {
|
||||
return strings.ToLower(nameSanitizer.Replace(name))
|
||||
}
|
||||
|
||||
func fetchKnownIcon(ctx context.Context, url *IconURL) *FetchResult {
|
||||
func fetchKnownIcon(ctx context.Context, url *IconURL) (FetchResult, error) {
|
||||
// if icon isn't in the list, no need to fetch
|
||||
if !url.HasIcon() {
|
||||
return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "no such icon"}
|
||||
return FetchResult{StatusCode: http.StatusNotFound}, errors.New("no such icon")
|
||||
}
|
||||
|
||||
return fetchIconAbsolute(ctx, url.URL())
|
||||
return FetchIconAbsolute(ctx, url.URL())
|
||||
}
|
||||
|
||||
func fetchIcon(ctx context.Context, filename string) *FetchResult {
|
||||
func fetchIcon(ctx context.Context, filename string) (FetchResult, error) {
|
||||
for _, fileType := range []string{"svg", "webp", "png"} {
|
||||
result := fetchKnownIcon(ctx, NewSelfhStIconURL(filename, fileType))
|
||||
if result.OK() {
|
||||
return result
|
||||
result, err := fetchKnownIcon(ctx, NewSelfhStIconURL(filename, fileType))
|
||||
if err == nil {
|
||||
return result, err
|
||||
}
|
||||
result = fetchKnownIcon(ctx, NewWalkXCodeIconURL(filename, fileType))
|
||||
if result.OK() {
|
||||
return result
|
||||
result, err = fetchKnownIcon(ctx, NewWalkXCodeIconURL(filename, fileType))
|
||||
if err == nil {
|
||||
return result, err
|
||||
}
|
||||
}
|
||||
return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "no icon found"}
|
||||
return FetchResultWithErrorf(http.StatusNotFound, "no icon found")
|
||||
}
|
||||
|
||||
func FindIcon(ctx context.Context, r route, uri string) *FetchResult {
|
||||
if result := loadIconCache(r.Key()); result != nil {
|
||||
return result
|
||||
}
|
||||
|
||||
func FindIcon(ctx context.Context, r route, uri string) (FetchResult, error) {
|
||||
for _, ref := range r.References() {
|
||||
result := fetchIcon(ctx, sanitizeName(ref))
|
||||
if result.OK() {
|
||||
storeIconCache(r.Key(), result)
|
||||
return result
|
||||
result, err := fetchIcon(ctx, sanitizeName(ref))
|
||||
if err == nil {
|
||||
return result, err
|
||||
}
|
||||
}
|
||||
if r, ok := r.(httpRoute); ok {
|
||||
// fallback to parse html
|
||||
return findIconSlow(ctx, r, uri, nil)
|
||||
return findIconSlowCached(context.WithValue(ctx, "route", r), uri)
|
||||
}
|
||||
return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "no icon found"}
|
||||
return FetchResultWithErrorf(http.StatusNotFound, "no icon found")
|
||||
}
|
||||
|
||||
func findIconSlow(ctx context.Context, r httpRoute, uri string, stack []string) *FetchResult {
|
||||
var findIconSlowCached = cache.NewKeyFunc(func(ctx context.Context, key string) (FetchResult, error) {
|
||||
r := ctx.Value("route").(httpRoute)
|
||||
return findIconSlow(ctx, r, key, nil)
|
||||
}).WithMaxEntries(200).Build() // no retries, no ttl
|
||||
|
||||
func findIconSlow(ctx context.Context, r httpRoute, uri string, stack []string) (FetchResult, error) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "request timeout"}
|
||||
return FetchResultWithErrorf(http.StatusBadGateway, "request timeout")
|
||||
default:
|
||||
}
|
||||
|
||||
if len(stack) > maxRedirectDepth {
|
||||
return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "too many redirects"}
|
||||
return FetchResultWithErrorf(http.StatusBadGateway, "too many redirects")
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeoutCause(ctx, faviconFetchTimeout, errors.New("favicon request timeout"))
|
||||
@@ -164,13 +180,13 @@ func findIconSlow(ctx context.Context, r httpRoute, uri string, stack []string)
|
||||
|
||||
newReq, err := http.NewRequestWithContext(ctx, http.MethodGet, r.TargetURL().String(), nil)
|
||||
if err != nil {
|
||||
return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "cannot create request"}
|
||||
return FetchResultWithErrorf(http.StatusInternalServerError, "cannot create request: %w", err)
|
||||
}
|
||||
newReq.Header.Set("Accept-Encoding", "identity") // disable compression
|
||||
|
||||
u, err := url.ParseRequestURI(strutils.SanitizeURI(uri))
|
||||
if err != nil {
|
||||
return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "cannot parse uri"}
|
||||
return FetchResultWithErrorf(http.StatusInternalServerError, "cannot parse uri: %w", err)
|
||||
}
|
||||
newReq.URL.Path = u.Path
|
||||
newReq.URL.RawPath = u.RawPath
|
||||
@@ -182,48 +198,48 @@ func findIconSlow(ctx context.Context, r httpRoute, uri string, stack []string)
|
||||
if c.status != http.StatusOK {
|
||||
switch c.status {
|
||||
case 0:
|
||||
return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "connection error"}
|
||||
return FetchResultWithErrorf(http.StatusBadGateway, "connection error")
|
||||
default:
|
||||
if loc := c.Header().Get("Location"); loc != "" {
|
||||
loc = strutils.SanitizeURI(loc)
|
||||
if loc == "/" || loc == newReq.URL.Path || slices.Contains(stack, loc) {
|
||||
return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "circular redirect"}
|
||||
return FetchResultWithErrorf(http.StatusBadGateway, "circular redirect")
|
||||
}
|
||||
// append current path to stack
|
||||
// handles redirect to the same path with different query
|
||||
return findIconSlow(ctx, r, loc, append(stack, newReq.URL.Path))
|
||||
}
|
||||
}
|
||||
return &FetchResult{StatusCode: c.status, ErrMsg: "upstream error: " + string(c.data)}
|
||||
return FetchResultWithErrorf(c.status, "upstream error: %s", c.data)
|
||||
}
|
||||
// return icon data
|
||||
if !httputils.GetContentType(c.header).IsHTML() {
|
||||
return &FetchResult{Icon: c.data, contentType: c.header.Get("Content-Type")}
|
||||
return FetchResultOK(c.data, c.header.Get("Content-Type"))
|
||||
}
|
||||
// try extract from "link[rel=icon]" from path "/"
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(c.data))
|
||||
if err != nil {
|
||||
return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "failed to parse html"}
|
||||
return FetchResultWithErrorf(http.StatusInternalServerError, "failed to parse html: %w", err)
|
||||
}
|
||||
ele := doc.Find("head > link[rel=icon]").First()
|
||||
if ele.Length() == 0 {
|
||||
return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "icon element not found"}
|
||||
return FetchResultWithErrorf(http.StatusNotFound, "icon element not found")
|
||||
}
|
||||
href := ele.AttrOr("href", "")
|
||||
if href == "" {
|
||||
return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "icon href not found"}
|
||||
return FetchResultWithErrorf(http.StatusNotFound, "icon href not found")
|
||||
}
|
||||
// https://en.wikipedia.org/wiki/Data_URI_scheme
|
||||
if strings.HasPrefix(href, "data:image/") {
|
||||
dataURI, err := dataurl.DecodeString(href)
|
||||
if err != nil {
|
||||
return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "failed to decode favicon"}
|
||||
return FetchResultWithErrorf(http.StatusInternalServerError, "failed to decode favicon: %w", err)
|
||||
}
|
||||
return &FetchResult{Icon: dataURI.Data, contentType: dataURI.ContentType()}
|
||||
return FetchResultOK(dataURI.Data, dataURI.ContentType())
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(href, "http://"), strings.HasPrefix(href, "https://"):
|
||||
return fetchIconAbsolute(ctx, href)
|
||||
return FetchIconAbsolute(ctx, href)
|
||||
default:
|
||||
return findIconSlow(ctx, r, href, append(stack, newReq.URL.Path))
|
||||
}
|
||||
|
||||
@@ -1,145 +0,0 @@
|
||||
package homepage
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/yusing/godoxy/internal/common"
|
||||
"github.com/yusing/godoxy/internal/jsonstore"
|
||||
"github.com/yusing/godoxy/internal/utils"
|
||||
"github.com/yusing/godoxy/internal/utils/atomic"
|
||||
"github.com/yusing/goutils/task"
|
||||
)
|
||||
|
||||
type cacheEntry struct {
|
||||
Icon []byte `json:"icon"`
|
||||
ContentType string `json:"content_type,omitempty"`
|
||||
LastAccess atomic.Value[time.Time] `json:"last_access"`
|
||||
}
|
||||
|
||||
// cache key can be absolute url or route name.
|
||||
var (
|
||||
iconCache = jsonstore.Store[*cacheEntry](common.NamespaceIconCache)
|
||||
iconMu sync.RWMutex
|
||||
)
|
||||
|
||||
const (
|
||||
iconCacheTTL = 3 * 24 * time.Hour
|
||||
cleanUpInterval = time.Minute
|
||||
maxIconSize = 1024 * 1024 // 1MB
|
||||
maxCacheEntries = 100
|
||||
)
|
||||
|
||||
func init() {
|
||||
go func() {
|
||||
cleanupTicker := time.NewTicker(cleanUpInterval)
|
||||
defer cleanupTicker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-task.RootContextCanceled():
|
||||
return
|
||||
case <-cleanupTicker.C:
|
||||
pruneExpiredIconCache()
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func pruneExpiredIconCache() {
|
||||
nPruned := 0
|
||||
for key, icon := range iconCache.Range {
|
||||
if icon.IsExpired() {
|
||||
iconCache.Delete(key)
|
||||
nPruned++
|
||||
}
|
||||
}
|
||||
if iconCache.Size() > maxCacheEntries {
|
||||
iconCache.Clear()
|
||||
newIconCache := make(map[string]*cacheEntry, maxCacheEntries)
|
||||
i := 0
|
||||
for key, icon := range iconCache.Range {
|
||||
if i == maxCacheEntries {
|
||||
break
|
||||
}
|
||||
if !icon.IsExpired() {
|
||||
newIconCache[key] = icon
|
||||
i++
|
||||
}
|
||||
}
|
||||
for key, icon := range newIconCache {
|
||||
iconCache.Store(key, icon)
|
||||
}
|
||||
}
|
||||
if nPruned > 0 {
|
||||
log.Info().Int("pruned", nPruned).Msg("pruned expired icon cache")
|
||||
}
|
||||
}
|
||||
|
||||
func PruneRouteIconCache(route route) {
|
||||
iconCache.Delete(route.Key())
|
||||
}
|
||||
|
||||
func loadIconCache(key string) *FetchResult {
|
||||
iconMu.RLock()
|
||||
defer iconMu.RUnlock()
|
||||
icon, ok := iconCache.Load(key)
|
||||
if ok && len(icon.Icon) > 0 {
|
||||
log.Debug().
|
||||
Str("key", key).
|
||||
Msg("icon found in cache")
|
||||
icon.LastAccess.Store(utils.TimeNow())
|
||||
return &FetchResult{Icon: icon.Icon, contentType: icon.ContentType}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func storeIconCache(key string, result *FetchResult) {
|
||||
icon := result.Icon
|
||||
if len(icon) > maxIconSize {
|
||||
log.Debug().Int("size", len(icon)).Msg("icon cache size exceeds max cache size")
|
||||
return
|
||||
}
|
||||
|
||||
iconMu.Lock()
|
||||
defer iconMu.Unlock()
|
||||
|
||||
entry := &cacheEntry{Icon: icon, ContentType: result.contentType}
|
||||
entry.LastAccess.Store(time.Now())
|
||||
iconCache.Store(key, entry)
|
||||
log.Debug().Str("key", key).Int("size", len(icon)).Msg("stored icon cache")
|
||||
}
|
||||
|
||||
func (e *cacheEntry) IsExpired() bool {
|
||||
return time.Since(e.LastAccess.Load()) > iconCacheTTL
|
||||
}
|
||||
|
||||
func (e *cacheEntry) UnmarshalJSON(data []byte) error {
|
||||
var tmp struct {
|
||||
Icon []byte `json:"icon"`
|
||||
ContentType string `json:"content_type,omitempty"`
|
||||
LastAccess time.Time `json:"last_access"`
|
||||
}
|
||||
// check if data is json
|
||||
if sonic.Valid(data) {
|
||||
err := sonic.Unmarshal(data, &tmp)
|
||||
// return only if unmarshal is successful
|
||||
// otherwise fallback to base64
|
||||
if err == nil {
|
||||
e.Icon = tmp.Icon
|
||||
e.ContentType = tmp.ContentType
|
||||
e.LastAccess.Store(tmp.LastAccess)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// fallback to base64
|
||||
icon, err := base64.StdEncoding.DecodeString(string(data))
|
||||
if err == nil {
|
||||
e.Icon = icon
|
||||
e.LastAccess.Store(time.Now())
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
Reference in New Issue
Block a user