修复: 网站预览资源路径+七牛目录层级

2026-05-13 21:16:33 +08:00
parent 2a363fd729
commit 316e517989
6 changed files with 63 additions and 93 deletions
--- a/internal/ossdrv/service.go
+++ b/internal/ossdrv/service.go
@@ -8,7 +8,6 @@ import (
 	"os"
 	"path"
 	"path/filepath"
-	"regexp"
 	"strings"
 	"sync"
 	"time"
@@ -640,9 +639,7 @@ func (s *Service) RenamePath(connID string, oldPath string, newPath string) (*fi
 	return result, nil
 }

-// htmlResourceRegex 提取 HTML 资源引用的正则
-var htmlResourceRegex = regexp.MustCompile(`(?:src|href|data|poster)=["']([^"']+)["']`)
-var htmlCssUrlRegex = regexp.MustCompile(`url\(\s*["']?([^"')]+)["']?\s*\)`)
+

 // DownloadSiteForPreview 下载 HTML 及其引用的资源到临时目录
 // 对绝对路径（/开头）从 HTML 目录逐级向上嗅探网站根目录
@@ -695,7 +692,7 @@ func (s *Service) DownloadSiteForPreview(connID string, rawPath string) (string,
 	if err != nil {
 		return htmlLocalPath, nil // HTML 已下载，资源解析失败不影响
 	}
-	resources := extractHtmlResources(string(htmlContent))
+	resources := filesystem.ExtractHtmlResources(string(htmlContent))

 	// 4. 下载资源
 	htmlOssDir := keyDir
@@ -716,7 +713,7 @@ func (s *Service) DownloadSiteForPreview(connID string, rawPath string) (string,
 	}

 	for _, resPath := range resources {
-		if shouldSkipResource(resPath) {
+		if filesystem.ShouldSkipResource(resPath) {
 			continue
 		}

@@ -817,57 +814,19 @@ func supplementDir(c oss.OSSProvider, ctx context.Context, remoteDir string, tmp
 	prefix := remoteDir + "/"
 	result, err := c.ListFiles(ctx, &oss.ListOptions{Prefix: prefix, MaxKeys: 200})
 	if err != nil {
-		return
+			return
 	}
 	for _, f := range result.Files {
 		if strings.HasSuffix(f.Key, "/") || f.Size == 0 {
 			continue
 		}
-		relPath := strings.TrimPrefix(f.Key, siteRoot)
-		localPath := filepath.Join(tmpDir, filepath.FromSlash(relPath))
+		localPath := filepath.Join(tmpDir, filepath.FromSlash(f.Key))
 		if _, err := os.Stat(localPath); err == nil {
 			continue
 		}
 		downloadResource(c, ctx, f.Key, localPath)
 	}
 }
-func extractHtmlResources(html string) []string {
-	seen := make(map[string]bool)
-	var resources []string
-
-	add := func(v string) {
-		v = strings.TrimSpace(v)
-		if v != "" && !seen[v] {
-			seen[v] = true
-			resources = append(resources, v)
-		}
-	}
-
-	for _, m := range htmlResourceRegex.FindAllStringSubmatch(html, -1) {
-		if len(m) > 1 {
-			add(m[1])
-		}
-	}
-	for _, m := range htmlCssUrlRegex.FindAllStringSubmatch(html, -1) {
-		if len(m) > 1 {
-			add(m[1])
-		}
-	}
-
-	return resources
-}
-
-// shouldSkipResource 判断资源路径是否应跳过
-func shouldSkipResource(p string) bool {
-	return strings.HasPrefix(p, "data:") ||
-		strings.HasPrefix(p, "http://") ||
-		strings.HasPrefix(p, "https://") ||
-		strings.HasPrefix(p, "//") ||
-		strings.HasPrefix(p, "#") ||
-		strings.HasPrefix(p, "javascript:") ||
-		strings.HasPrefix(p, "mailto:") ||
-		strings.HasPrefix(p, "blob:")
-}

 // DownloadToTemp 下载文件到本地临时目录（带 SQLite 缓存）
 func (s *Service) DownloadToTemp(connID string, rawPath string) (string, error) {