Private
Public Access
1
0
Files
u-desk/internal/filesystem/content_detector.go
绝尘 a5d30684ed 重构:文件系统模块化架构,增强 Markdown 渲染
- 拆分 FileSystem.vue 为模块化组件架构
- 新增 Markdown Mermaid 图表渲染支持
- 新增 180+ 编程语言代码高亮
- 修复编辑/预览模式切换渲染问题
- 优化亮色/暗色模式主题适配
- 新增 TypeScript 类型定义
2026-02-04 03:32:46 +08:00

134 lines
3.0 KiB
Go

package filesystem
import (
"bytes"
"fmt"
"os"
)
const maxDetectSize = 500 * 1024 // 500KB
// FileTypeInfo 文件类型信息
type FileTypeInfo struct {
Extension string `json:"extension"`
Category string `json:"category"` // image, text, binary
MIMEType string `json:"mime_type"`
Confidence float64 `json:"confidence"`
}
// 常见文件魔数
var magicNumbers = []struct {
magic []byte
ext string
category string
mime string
}{
// 图片
{[]byte{0xFF, 0xD8, 0xFF}, "jpg", "image", "image/jpeg"},
{[]byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}, "png", "image", "image/png"},
{[]byte{0x47, 0x49, 0x46, 0x38}, "gif", "image", "image/gif"},
{[]byte{0x42, 0x4D}, "bmp", "image", "image/bmp"},
{[]byte{0x57, 0x45, 0x42, 0x50}, "webp", "image", "image/webp"},
// 文档
{[]byte{0x25, 0x50, 0x44, 0x46}, "pdf", "pdf", "application/pdf"},
// 压缩
{[]byte{0x50, 0x4B, 0x03, 0x04}, "zip", "archive", "application/zip"},
}
// DetectFileTypeByContent 通过文件内容检测文件类型
func (s *FileSystemService) DetectFileTypeByContent(path string) (*FileTypeInfo, error) {
if err := s.validatePath(path); err != nil {
return nil, fmt.Errorf("路径验证失败: %w", err)
}
info, err := os.Stat(path)
if err != nil {
return nil, fmt.Errorf("无法访问文件: %w", err)
}
if info.Size() > maxDetectSize {
return &FileTypeInfo{Category: "unknown", Confidence: 0}, nil
}
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("读取文件失败: %w", err)
}
// 检测魔数
for _, m := range magicNumbers {
if len(data) >= len(m.magic) && bytes.Equal(data[:len(m.magic)], m.magic) {
return &FileTypeInfo{
Extension: m.ext,
Category: m.category,
MIMEType: m.mime,
Confidence: 0.95,
}, nil
}
}
// 检测是否为文本
if isTextContent(data) {
return &FileTypeInfo{
Extension: "txt",
Category: "text",
MIMEType: "text/plain",
Confidence: 0.8,
}, nil
}
return &FileTypeInfo{
Extension: "",
Category: "binary",
MIMEType: "application/octet-stream",
Confidence: 0.5,
}, nil
}
// isTextContent 检测是否为文本内容
func isTextContent(data []byte) bool {
if len(data) == 0 {
return false
}
textBytes := 0
for _, b := range data[:min(len(data), 512)] {
if b == 9 || b == 10 || b == 13 || (b >= 32 && b <= 126) {
textBytes++
} else if b == 0 {
return false
}
}
return float64(textBytes)/float64(min(len(data), 512)) > 0.9
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// DetectFileTypeByContentSimple 简化接口
func DetectFileTypeByContentSimple(path string) (map[string]interface{}, error) {
service, err := GetGlobalService()
if err != nil {
return nil, err
}
info, err := service.DetectFileTypeByContent(path)
if err != nil {
return nil, err
}
return map[string]interface{}{
"extension": info.Extension,
"category": info.Category,
"mime_type": info.MIMEType,
"confidence": info.Confidence,
}, nil
}