package filesystem import ( "bytes" "fmt" "os" ) const maxDetectSize = 500 * 1024 // 500KB // FileTypeInfo 文件类型信息 type FileTypeInfo struct { Extension string `json:"extension"` Category string `json:"category"` // image, text, binary MIMEType string `json:"mime_type"` Confidence float64 `json:"confidence"` } // 常见文件魔数 var magicNumbers = []struct { magic []byte ext string category string mime string }{ // 图片 {[]byte{0xFF, 0xD8, 0xFF}, "jpg", "image", "image/jpeg"}, {[]byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}, "png", "image", "image/png"}, {[]byte{0x47, 0x49, 0x46, 0x38}, "gif", "image", "image/gif"}, {[]byte{0x42, 0x4D}, "bmp", "image", "image/bmp"}, {[]byte{0x57, 0x45, 0x42, 0x50}, "webp", "image", "image/webp"}, // 文档 {[]byte{0x25, 0x50, 0x44, 0x46}, "pdf", "pdf", "application/pdf"}, // 压缩 {[]byte{0x50, 0x4B, 0x03, 0x04}, "zip", "archive", "application/zip"}, } // DetectFileTypeByContent 通过文件内容检测文件类型 func (s *FileSystemService) DetectFileTypeByContent(path string) (*FileTypeInfo, error) { if err := s.validatePath(path); err != nil { return nil, fmt.Errorf("路径验证失败: %w", err) } info, err := os.Stat(path) if err != nil { return nil, fmt.Errorf("无法访问文件: %w", err) } if info.Size() > maxDetectSize { return &FileTypeInfo{Category: "unknown", Confidence: 0}, nil } data, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("读取文件失败: %w", err) } // 检测魔数 for _, m := range magicNumbers { if len(data) >= len(m.magic) && bytes.Equal(data[:len(m.magic)], m.magic) { return &FileTypeInfo{ Extension: m.ext, Category: m.category, MIMEType: m.mime, Confidence: 0.95, }, nil } } // 检测是否为文本 if isTextContent(data) { return &FileTypeInfo{ Extension: "txt", Category: "text", MIMEType: "text/plain", Confidence: 0.8, }, nil } return &FileTypeInfo{ Extension: "", Category: "binary", MIMEType: "application/octet-stream", Confidence: 0.5, }, nil } // isTextContent 检测是否为文本内容 func isTextContent(data []byte) bool { if len(data) == 0 { return false } textBytes := 0 for _, b := range data[:min(len(data), 512)] { if b == 9 || b == 10 || b == 13 || (b >= 32 && b <= 126) { textBytes++ } else if b == 0 { return false } } return float64(textBytes)/float64(min(len(data), 512)) > 0.9 } func min(a, b int) int { if a < b { return a } return b } // DetectFileTypeByContentSimple 简化接口 func DetectFileTypeByContentSimple(path string) (map[string]interface{}, error) { service, err := GetGlobalService() if err != nil { return nil, err } info, err := service.DetectFileTypeByContent(path) if err != nil { return nil, err } return map[string]interface{}{ "extension": info.Extension, "category": info.Category, "mime_type": info.MIMEType, "confidence": info.Confidence, }, nil }