package filesystem import ( "regexp" "strings" ) var ( htmlResRegex = regexp.MustCompile(`(?:src|href|data|poster)=["']([^"']+)["']`) htmlCssUrlRe = regexp.MustCompile(`url\(\s*["']?([^"')]+)["']?\s*\)`) ) // ExtractHtmlResources 从 HTML 内容提取资源路径 func ExtractHtmlResources(html string) []string { seen := make(map[string]bool) var resources []string add := func(v string) { v = strings.TrimSpace(v) if v != "" && !seen[v] { seen[v] = true resources = append(resources, v) } } for _, m := range htmlResRegex.FindAllStringSubmatch(html, -1) { if len(m) > 1 { add(m[1]) } } for _, m := range htmlCssUrlRe.FindAllStringSubmatch(html, -1) { if len(m) > 1 { add(m[1]) } } return resources } // ShouldSkipResource 判断资源路径是否应跳过 func ShouldSkipResource(p string) bool { return strings.HasPrefix(p, "data:") || strings.HasPrefix(p, "http://") || strings.HasPrefix(p, "https://") || strings.HasPrefix(p, "//") || strings.HasPrefix(p, "#") || strings.HasPrefix(p, "javascript:") || strings.HasPrefix(p, "mailto:") || strings.HasPrefix(p, "blob:") }