Private
Public Access
1
0
Files
u-desk/internal/filesystem/site_resource.go

48 lines
1.1 KiB
Go

package filesystem
import (
"regexp"
"strings"
)
var (
htmlResRegex = regexp.MustCompile(`(?:src|href|data|poster)=["']([^"']+)["']`)
htmlCssUrlRe = regexp.MustCompile(`url\(\s*["']?([^"')]+)["']?\s*\)`)
)
// ExtractHtmlResources 从 HTML 内容提取资源路径
func ExtractHtmlResources(html string) []string {
seen := make(map[string]bool)
var resources []string
add := func(v string) {
v = strings.TrimSpace(v)
if v != "" && !seen[v] {
seen[v] = true
resources = append(resources, v)
}
}
for _, m := range htmlResRegex.FindAllStringSubmatch(html, -1) {
if len(m) > 1 {
add(m[1])
}
}
for _, m := range htmlCssUrlRe.FindAllStringSubmatch(html, -1) {
if len(m) > 1 {
add(m[1])
}
}
return resources
}
// ShouldSkipResource 判断资源路径是否应跳过
func ShouldSkipResource(p string) bool {
return strings.HasPrefix(p, "data:") ||
strings.HasPrefix(p, "http://") ||
strings.HasPrefix(p, "https://") ||
strings.HasPrefix(p, "//") ||
strings.HasPrefix(p, "#") ||
strings.HasPrefix(p, "javascript:") ||
strings.HasPrefix(p, "mailto:") ||
strings.HasPrefix(p, "blob:")
}