修复: 网站预览资源路径+七牛目录层级
This commit is contained in:
@@ -8,7 +8,6 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -640,9 +639,7 @@ func (s *Service) RenamePath(connID string, oldPath string, newPath string) (*fi
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// htmlResourceRegex 提取 HTML 资源引用的正则
|
||||
var htmlResourceRegex = regexp.MustCompile(`(?:src|href|data|poster)=["']([^"']+)["']`)
|
||||
var htmlCssUrlRegex = regexp.MustCompile(`url\(\s*["']?([^"')]+)["']?\s*\)`)
|
||||
|
||||
|
||||
// DownloadSiteForPreview 下载 HTML 及其引用的资源到临时目录
|
||||
// 对绝对路径(/开头)从 HTML 目录逐级向上嗅探网站根目录
|
||||
@@ -695,7 +692,7 @@ func (s *Service) DownloadSiteForPreview(connID string, rawPath string) (string,
|
||||
if err != nil {
|
||||
return htmlLocalPath, nil // HTML 已下载,资源解析失败不影响
|
||||
}
|
||||
resources := extractHtmlResources(string(htmlContent))
|
||||
resources := filesystem.ExtractHtmlResources(string(htmlContent))
|
||||
|
||||
// 4. 下载资源
|
||||
htmlOssDir := keyDir
|
||||
@@ -716,7 +713,7 @@ func (s *Service) DownloadSiteForPreview(connID string, rawPath string) (string,
|
||||
}
|
||||
|
||||
for _, resPath := range resources {
|
||||
if shouldSkipResource(resPath) {
|
||||
if filesystem.ShouldSkipResource(resPath) {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -817,57 +814,19 @@ func supplementDir(c oss.OSSProvider, ctx context.Context, remoteDir string, tmp
|
||||
prefix := remoteDir + "/"
|
||||
result, err := c.ListFiles(ctx, &oss.ListOptions{Prefix: prefix, MaxKeys: 200})
|
||||
if err != nil {
|
||||
return
|
||||
return
|
||||
}
|
||||
for _, f := range result.Files {
|
||||
if strings.HasSuffix(f.Key, "/") || f.Size == 0 {
|
||||
continue
|
||||
}
|
||||
relPath := strings.TrimPrefix(f.Key, siteRoot)
|
||||
localPath := filepath.Join(tmpDir, filepath.FromSlash(relPath))
|
||||
localPath := filepath.Join(tmpDir, filepath.FromSlash(f.Key))
|
||||
if _, err := os.Stat(localPath); err == nil {
|
||||
continue
|
||||
}
|
||||
downloadResource(c, ctx, f.Key, localPath)
|
||||
}
|
||||
}
|
||||
func extractHtmlResources(html string) []string {
|
||||
seen := make(map[string]bool)
|
||||
var resources []string
|
||||
|
||||
add := func(v string) {
|
||||
v = strings.TrimSpace(v)
|
||||
if v != "" && !seen[v] {
|
||||
seen[v] = true
|
||||
resources = append(resources, v)
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range htmlResourceRegex.FindAllStringSubmatch(html, -1) {
|
||||
if len(m) > 1 {
|
||||
add(m[1])
|
||||
}
|
||||
}
|
||||
for _, m := range htmlCssUrlRegex.FindAllStringSubmatch(html, -1) {
|
||||
if len(m) > 1 {
|
||||
add(m[1])
|
||||
}
|
||||
}
|
||||
|
||||
return resources
|
||||
}
|
||||
|
||||
// shouldSkipResource 判断资源路径是否应跳过
|
||||
func shouldSkipResource(p string) bool {
|
||||
return strings.HasPrefix(p, "data:") ||
|
||||
strings.HasPrefix(p, "http://") ||
|
||||
strings.HasPrefix(p, "https://") ||
|
||||
strings.HasPrefix(p, "//") ||
|
||||
strings.HasPrefix(p, "#") ||
|
||||
strings.HasPrefix(p, "javascript:") ||
|
||||
strings.HasPrefix(p, "mailto:") ||
|
||||
strings.HasPrefix(p, "blob:")
|
||||
}
|
||||
|
||||
// DownloadToTemp 下载文件到本地临时目录(带 SQLite 缓存)
|
||||
func (s *Service) DownloadToTemp(connID string, rawPath string) (string, error) {
|
||||
|
||||
Reference in New Issue
Block a user