修复: 网站预览资源路径+七牛目录层级
This commit is contained in:
47
internal/filesystem/site_resource.go
Normal file
47
internal/filesystem/site_resource.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package filesystem
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
htmlResRegex = regexp.MustCompile(`(?:src|href|data|poster)=["']([^"']+)["']`)
|
||||
htmlCssUrlRe = regexp.MustCompile(`url\(\s*["']?([^"')]+)["']?\s*\)`)
|
||||
)
|
||||
|
||||
// ExtractHtmlResources 从 HTML 内容提取资源路径
|
||||
func ExtractHtmlResources(html string) []string {
|
||||
seen := make(map[string]bool)
|
||||
var resources []string
|
||||
add := func(v string) {
|
||||
v = strings.TrimSpace(v)
|
||||
if v != "" && !seen[v] {
|
||||
seen[v] = true
|
||||
resources = append(resources, v)
|
||||
}
|
||||
}
|
||||
for _, m := range htmlResRegex.FindAllStringSubmatch(html, -1) {
|
||||
if len(m) > 1 {
|
||||
add(m[1])
|
||||
}
|
||||
}
|
||||
for _, m := range htmlCssUrlRe.FindAllStringSubmatch(html, -1) {
|
||||
if len(m) > 1 {
|
||||
add(m[1])
|
||||
}
|
||||
}
|
||||
return resources
|
||||
}
|
||||
|
||||
// ShouldSkipResource 判断资源路径是否应跳过
|
||||
func ShouldSkipResource(p string) bool {
|
||||
return strings.HasPrefix(p, "data:") ||
|
||||
strings.HasPrefix(p, "http://") ||
|
||||
strings.HasPrefix(p, "https://") ||
|
||||
strings.HasPrefix(p, "//") ||
|
||||
strings.HasPrefix(p, "#") ||
|
||||
strings.HasPrefix(p, "javascript:") ||
|
||||
strings.HasPrefix(p, "mailto:") ||
|
||||
strings.HasPrefix(p, "blob:")
|
||||
}
|
||||
Reference in New Issue
Block a user