48 lines
1.1 KiB
Go
48 lines
1.1 KiB
Go
package filesystem
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
htmlResRegex = regexp.MustCompile(`(?:src|href|data|poster)=["']([^"']+)["']`)
|
|
htmlCssUrlRe = regexp.MustCompile(`url\(\s*["']?([^"')]+)["']?\s*\)`)
|
|
)
|
|
|
|
// ExtractHtmlResources 从 HTML 内容提取资源路径
|
|
func ExtractHtmlResources(html string) []string {
|
|
seen := make(map[string]bool)
|
|
var resources []string
|
|
add := func(v string) {
|
|
v = strings.TrimSpace(v)
|
|
if v != "" && !seen[v] {
|
|
seen[v] = true
|
|
resources = append(resources, v)
|
|
}
|
|
}
|
|
for _, m := range htmlResRegex.FindAllStringSubmatch(html, -1) {
|
|
if len(m) > 1 {
|
|
add(m[1])
|
|
}
|
|
}
|
|
for _, m := range htmlCssUrlRe.FindAllStringSubmatch(html, -1) {
|
|
if len(m) > 1 {
|
|
add(m[1])
|
|
}
|
|
}
|
|
return resources
|
|
}
|
|
|
|
// ShouldSkipResource 判断资源路径是否应跳过
|
|
func ShouldSkipResource(p string) bool {
|
|
return strings.HasPrefix(p, "data:") ||
|
|
strings.HasPrefix(p, "http://") ||
|
|
strings.HasPrefix(p, "https://") ||
|
|
strings.HasPrefix(p, "//") ||
|
|
strings.HasPrefix(p, "#") ||
|
|
strings.HasPrefix(p, "javascript:") ||
|
|
strings.HasPrefix(p, "mailto:") ||
|
|
strings.HasPrefix(p, "blob:")
|
|
}
|