From f31953c42b0f21625eebce76de55c6887e7fc9de Mon Sep 17 00:00:00 2001 From: Sarv Date: Fri, 28 Mar 2025 16:48:49 +0800 Subject: [PATCH] Support Media Message (#9) --- go.mod | 11 +- go.sum | 22 +- internal/chatlog/database/service.go | 4 + internal/chatlog/http/route.go | 94 ++++- internal/chatlog/mcp/service.go | 4 +- internal/errors/middleware.go | 3 +- internal/model/media.go | 44 +++ internal/model/media_darwinv3.go | 40 ++ internal/model/media_v4.go | 35 ++ internal/model/mediamessage.go | 355 ++++++++++++++++++ internal/model/message.go | 121 +++--- internal/model/message_darwinv3.go | 45 ++- internal/model/message_v4.go | 58 ++- internal/model/wxproto/packedinfo.pb.go | 252 +++++++++++++ internal/model/wxproto/packedinfo.proto | 19 + .../datasource/darwinv3/datasource.go | 105 +++++- internal/wechatdb/datasource/datasource.go | 3 + internal/wechatdb/datasource/v4/datasource.go | 101 +++++ .../datasource/windowsv3/datasource.go | 153 ++++++++ internal/wechatdb/repository/media.go | 11 + internal/wechatdb/wechatdb.go | 4 + pkg/util/dat2img/dat2img.go | 60 +++ pkg/util/lz4/lz4.go | 16 + pkg/util/strings.go | 4 + 24 files changed, 1428 insertions(+), 136 deletions(-) create mode 100644 internal/model/media.go create mode 100644 internal/model/media_darwinv3.go create mode 100644 internal/model/media_v4.go create mode 100644 internal/model/mediamessage.go create mode 100644 internal/model/wxproto/packedinfo.pb.go create mode 100644 internal/model/wxproto/packedinfo.proto create mode 100644 internal/wechatdb/repository/media.go create mode 100644 pkg/util/dat2img/dat2img.go create mode 100644 pkg/util/lz4/lz4.go diff --git a/go.mod b/go.mod index 4400eb6..c18e687 100644 --- a/go.mod +++ b/go.mod @@ -8,14 +8,15 @@ require ( github.com/google/uuid v1.6.0 github.com/klauspost/compress v1.18.0 github.com/mattn/go-sqlite3 v1.14.24 - github.com/rivo/tview v0.0.0-20250322200051-73a5bd7d6839 + github.com/pierrec/lz4/v4 v4.1.22 + github.com/rivo/tview v0.0.0-20250325173046-7b72abf45814 github.com/shirou/gopsutil/v4 v4.25.2 github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.9.1 - github.com/spf13/viper v1.20.0 + github.com/spf13/viper v1.20.1 golang.org/x/crypto v0.36.0 golang.org/x/sys v0.31.0 - google.golang.org/protobuf v1.36.5 + google.golang.org/protobuf v1.36.6 howett.net/plist v1.0.1 ) @@ -47,7 +48,7 @@ require ( github.com/pelletier/go-toml/v2 v2.2.3 // indirect github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/rivo/uniseg v0.4.7 // indirect - github.com/sagikazarmark/locafero v0.8.0 // indirect + github.com/sagikazarmark/locafero v0.9.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.14.0 // indirect github.com/spf13/cast v1.7.1 // indirect @@ -60,7 +61,7 @@ require ( github.com/yusufpapurcu/wmi v1.2.4 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/arch v0.15.0 // indirect - golang.org/x/net v0.37.0 // indirect + golang.org/x/net v0.38.0 // indirect golang.org/x/term v0.30.0 // indirect golang.org/x/text v0.23.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index b7f0f51..b0e5eda 100644 --- a/go.sum +++ b/go.sum @@ -81,12 +81,14 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= +github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= +github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/rivo/tview v0.0.0-20250322200051-73a5bd7d6839 h1:/v0ptNHBQaQCxlvS4QLxLKKGfsSA9hcZcNgqVgmPRro= -github.com/rivo/tview v0.0.0-20250322200051-73a5bd7d6839/go.mod h1:02iFIz7K/A9jGCvrizLPvoqr4cEIx7q54RH5Qudkrss= +github.com/rivo/tview v0.0.0-20250325173046-7b72abf45814 h1:pJIO3sp+rkDbJTeqqpe2Oihq3hegiM5ASvsd6S0pvjg= +github.com/rivo/tview v0.0.0-20250325173046-7b72abf45814/go.mod h1:02iFIz7K/A9jGCvrizLPvoqr4cEIx7q54RH5Qudkrss= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= @@ -94,8 +96,8 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sagikazarmark/locafero v0.8.0 h1:mXaMVw7IqxNBxfv3LdWt9MDmcWDQ1fagDH918lOdVaQ= -github.com/sagikazarmark/locafero v0.8.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk= +github.com/sagikazarmark/locafero v0.9.0 h1:GbgQGNtTrEmddYDSAH9QLRyfAHY12md+8YFTqyMTC9k= +github.com/sagikazarmark/locafero v0.9.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk= github.com/shirou/gopsutil/v4 v4.25.2 h1:NMscG3l2CqtWFS86kj3vP7soOczqrQYIEhO/pMvvQkk= github.com/shirou/gopsutil/v4 v4.25.2/go.mod h1:34gBYJzyqCDT11b6bMHP0XCvWeU3J61XRT7a2EmCRTA= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= @@ -110,8 +112,8 @@ github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.20.0 h1:zrxIyR3RQIOsarIrgL8+sAvALXul9jeEPa06Y0Ph6vY= -github.com/spf13/viper v1.20.0/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4= +github.com/spf13/viper v1.20.1 h1:ZMi+z/lvLyPSCoNtFCpqjy0S4kPbirhpTMwl8BkW9X4= +github.com/spf13/viper v1.20.1/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -161,8 +163,8 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= -golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= -golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -217,8 +219,8 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= -google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/chatlog/database/service.go b/internal/chatlog/database/service.go index 5014119..2d0ac23 100644 --- a/internal/chatlog/database/service.go +++ b/internal/chatlog/database/service.go @@ -57,6 +57,10 @@ func (s *Service) GetSessions(key string, limit, offset int) (*wechatdb.GetSessi return s.db.GetSessions(key, limit, offset) } +func (s *Service) GetMedia(_type string, key string) (*model.Media, error) { + return s.db.GetMedia(_type, key) +} + // Close closes the database connection func (s *Service) Close() { // Add cleanup code if needed diff --git a/internal/chatlog/http/route.go b/internal/chatlog/http/route.go index d25e948..4c9a781 100644 --- a/internal/chatlog/http/route.go +++ b/internal/chatlog/http/route.go @@ -5,10 +5,13 @@ import ( "fmt" "io/fs" "net/http" + "os" + "path/filepath" "strings" "github.com/sjzar/chatlog/internal/errors" "github.com/sjzar/chatlog/pkg/util" + "github.com/sjzar/chatlog/pkg/util/dat2img" "github.com/gin-gonic/gin" ) @@ -27,6 +30,12 @@ func (s *Service) initRouter() { router.StaticFileFS("/favicon.ico", "./favicon.ico", http.FS(staticDir)) router.StaticFileFS("/", "./index.htm", http.FS(staticDir)) + // Media + router.GET("/image/:key", s.GetImage) + router.GET("/video/:key", s.GetVideo) + router.GET("/file/:key", s.GetFile) + router.GET("/data/*path", s.GetMediaData) + // MCP Server { router.GET("/sse", s.mcp.HandleSSE) @@ -108,7 +117,7 @@ func (s *Service) GetChatlog(c *gin.Context) { c.Writer.Flush() for _, m := range messages { - c.Writer.WriteString(m.PlainText(len(q.Talker) == 0)) + c.Writer.WriteString(m.PlainText(len(q.Talker) == 0, c.Request.Host)) c.Writer.WriteString("\n") c.Writer.Flush() } @@ -251,3 +260,86 @@ func (s *Service) GetSessions(c *gin.Context) { c.Writer.Flush() } } + +func (s *Service) GetImage(c *gin.Context) { + s.GetMedia(c, "image") +} + +func (s *Service) GetVideo(c *gin.Context) { + s.GetMedia(c, "video") +} + +func (s *Service) GetFile(c *gin.Context) { + s.GetMedia(c, "file") +} + +func (s *Service) GetMedia(c *gin.Context, _type string) { + key := c.Param("key") + if key == "" { + errors.Err(c, errors.ErrInvalidArg(key)) + return + } + + media, err := s.db.GetMedia(_type, key) + if err != nil { + errors.Err(c, err) + return + } + + if c.Query("info") != "" { + c.JSON(http.StatusOK, media) + return + } + + c.Redirect(http.StatusFound, "/data/"+media.Path) +} + +func (s *Service) GetMediaData(c *gin.Context) { + relativePath := filepath.Clean(c.Param("path")) + + absolutePath := filepath.Join(s.ctx.DataDir, relativePath) + + if _, err := os.Stat(absolutePath); os.IsNotExist(err) { + c.JSON(http.StatusNotFound, gin.H{ + "error": "File not found", + }) + return + } + + ext := strings.ToLower(filepath.Ext(absolutePath)) + switch { + case ext == ".dat": + s.HandleDatFile(c, absolutePath) + default: + // 直接返回文件 + c.File(absolutePath) + } + +} + +func (s *Service) HandleDatFile(c *gin.Context, path string) { + + b, err := os.ReadFile(path) + if err != nil { + errors.Err(c, err) + return + } + out, ext, err := dat2img.Dat2Image(b) + if err != nil { + c.File(path) + return + } + + switch ext { + case "jpg": + c.Data(http.StatusOK, "image/jpeg", out) + case "png": + c.Data(http.StatusOK, "image/png", out) + case "gif": + c.Data(http.StatusOK, "image/gif", out) + case "bmp": + c.Data(http.StatusOK, "image/bmp", out) + default: + c.File(path) + } +} diff --git a/internal/chatlog/mcp/service.go b/internal/chatlog/mcp/service.go index 2f2c95f..82f1126 100644 --- a/internal/chatlog/mcp/service.go +++ b/internal/chatlog/mcp/service.go @@ -200,7 +200,7 @@ func (s *Service) toolsCall(session *mcp.Session, req *mcp.Request) error { return fmt.Errorf("无法获取聊天记录: %v", err) } for _, m := range messages { - buf.WriteString(m.PlainText(len(talker) == 0)) + buf.WriteString(m.PlainText(len(talker) == 0, "")) buf.WriteString("\n") } default: @@ -273,7 +273,7 @@ func (s *Service) resourcesRead(session *mcp.Session, req *mcp.Request) error { return fmt.Errorf("无法获取聊天记录: %v", err) } for _, m := range messages { - buf.WriteString(m.PlainText(len(u.Host) == 0)) + buf.WriteString(m.PlainText(len(u.Host) == 0, "")) buf.WriteString("\n") } default: diff --git a/internal/errors/middleware.go b/internal/errors/middleware.go index 5afb763..82cffa3 100644 --- a/internal/errors/middleware.go +++ b/internal/errors/middleware.go @@ -6,6 +6,7 @@ import ( "github.com/gin-gonic/gin" "github.com/google/uuid" + log "github.com/sirupsen/logrus" ) // ErrorHandlerMiddleware 是一个 Gin 中间件,用于统一处理请求过程中的错误 @@ -53,7 +54,7 @@ func RecoveryMiddleware() gin.HandlerFunc { } // 记录错误日志 - fmt.Printf("PANIC RECOVERED: %v\n", err) + log.Errorf("PANIC RECOVERED: %v\n", err) // 返回 500 错误 c.JSON(http.StatusInternalServerError, err) diff --git a/internal/model/media.go b/internal/model/media.go new file mode 100644 index 0000000..cd03bfb --- /dev/null +++ b/internal/model/media.go @@ -0,0 +1,44 @@ +package model + +import ( + "path/filepath" +) + +type Media struct { + Type string `json:"type"` // 媒体类型:image, video, voice, file + Key string `json:"key"` // MD5 + Path string `json:"path"` + Name string `json:"name"` + Size int64 `json:"size"` + ModifyTime int64 `json:"modifyTime"` +} + +type MediaV3 struct { + Type string `json:"type"` + Key string `json:"key"` + Dir1 string `json:"dir1"` + Dir2 string `json:"dir2"` + Name string `json:"name"` + ModifyTime int64 `json:"modifyTime"` +} + +func (m *MediaV3) Wrap() *Media { + + var path string + switch m.Type { + case "image": + path = filepath.Join("FileStorage", "MsgAttach", m.Dir1, "Image", m.Dir2, m.Name) + case "video": + path = filepath.Join("FileStorage", "Video", m.Dir2, m.Name) + case "file": + path = filepath.Join("FileStorage", "File", m.Dir2, m.Name) + } + + return &Media{ + Type: m.Type, + Key: m.Key, + ModifyTime: m.ModifyTime, + Path: path, + Name: m.Name, + } +} diff --git a/internal/model/media_darwinv3.go b/internal/model/media_darwinv3.go new file mode 100644 index 0000000..583384d --- /dev/null +++ b/internal/model/media_darwinv3.go @@ -0,0 +1,40 @@ +package model + +import "path/filepath" + +// CREATE TABLE HlinkMediaRecord( +// mediaMd5 TEXT, +// mediaSize INTEGER, +// inodeNumber INTEGER, +// modifyTime INTEGER , +// CONSTRAINT _Md5_Size UNIQUE (mediaMd5,mediaSize) +// ) +// CREATE TABLE HlinkMediaDetail( +// localId INTEGER PRIMARY KEY AUTOINCREMENT, +// inodeNumber INTEGER, +// relativePath TEXT, +// fileName TEXT +// ) +type MediaDarwinV3 struct { + MediaMd5 string `json:"mediaMd5"` + MediaSize int64 `json:"mediaSize"` + InodeNumber int64 `json:"inodeNumber"` + ModifyTime int64 `json:"modifyTime"` + RelativePath string `json:"relativePath"` + FileName string `json:"fileName"` +} + +func (m *MediaDarwinV3) Wrap() *Media { + + path := filepath.Join("Message/MessageTemp", m.RelativePath, m.FileName) + name := filepath.Base(path) + + return &Media{ + Type: "", + Key: m.MediaMd5, + Size: m.MediaSize, + ModifyTime: m.ModifyTime, + Path: path, + Name: name, + } +} diff --git a/internal/model/media_v4.go b/internal/model/media_v4.go new file mode 100644 index 0000000..1390da9 --- /dev/null +++ b/internal/model/media_v4.go @@ -0,0 +1,35 @@ +package model + +import "path/filepath" + +type MediaV4 struct { + Type string `json:"type"` + Key string `json:"key"` + Dir1 string `json:"dir1"` + Dir2 string `json:"dir2"` + Name string `json:"name"` + Size int64 `json:"size"` + ModifyTime int64 `json:"modifyTime"` +} + +func (m *MediaV4) Wrap() *Media { + + var path string + switch m.Type { + case "image": + path = filepath.Join("msg", "attach", m.Dir1, m.Dir2, "Img", m.Name) + case "video": + path = filepath.Join("msg", "video", m.Dir1, m.Name) + case "file": + path = filepath.Join("msg", "file", m.Dir1, m.Name) + } + + return &Media{ + Type: m.Type, + Key: m.Key, + Path: path, + Name: m.Name, + Size: m.Size, + ModifyTime: m.ModifyTime, + } +} diff --git a/internal/model/mediamessage.go b/internal/model/mediamessage.go new file mode 100644 index 0000000..34249a9 --- /dev/null +++ b/internal/model/mediamessage.go @@ -0,0 +1,355 @@ +package model + +import ( + "encoding/xml" + "fmt" + "strings" + "time" + + "github.com/sjzar/chatlog/pkg/util" +) + +type MediaMessage struct { + Type int64 + SubType int + MediaMD5 string + MediaPath string + Title string + Desc string + Content string + URL string + + RecordInfo *RecordInfo + + ReferDisplayName string + ReferUserName string + ReferCreateTime time.Time + ReferMessage *MediaMessage + + Host string + + Message XMLMessage +} + +func NewMediaMessage(_type int64, data string) (*MediaMessage, error) { + + __type, subType := util.SplitInt64ToTwoInt32(_type) + + m := &MediaMessage{ + Type: __type, + SubType: int(subType), + } + + if _type == 1 { + m.Content = data + return m, nil + } + + var msg XMLMessage + err := xml.Unmarshal([]byte(data), &msg) + if err != nil { + return nil, err + } + + m.Message = msg + if err := m.parse(); err != nil { + return nil, err + } + + return m, nil +} + +func (m *MediaMessage) parse() error { + + switch m.Type { + case 3: + m.MediaMD5 = m.Message.Image.MD5 + case 43: + m.MediaMD5 = m.Message.Video.RawMd5 + case 49: + m.SubType = m.Message.App.Type + switch m.SubType { + case 5: + m.Title = m.Message.App.Title + m.URL = m.Message.App.URL + case 6: + m.Title = m.Message.App.Title + m.MediaMD5 = m.Message.App.MD5 + case 19: + m.Title = m.Message.App.Title + m.Desc = m.Message.App.Des + if m.Message.App.RecordItem == nil { + break + } + recordInfo := &RecordInfo{} + err := xml.Unmarshal([]byte(m.Message.App.RecordItem.CDATA), recordInfo) + if err != nil { + return err + } + m.RecordInfo = recordInfo + case 57: + m.Content = m.Message.App.Title + if m.Message.App.ReferMsg == nil { + break + } + subMsg, err := NewMediaMessage(m.Message.App.ReferMsg.Type, m.Message.App.ReferMsg.Content) + if err != nil { + break + } + m.ReferDisplayName = m.Message.App.ReferMsg.DisplayName + m.ReferUserName = m.Message.App.ReferMsg.ChatUsr + m.ReferCreateTime = time.Unix(m.Message.App.ReferMsg.CreateTime, 0) + m.ReferMessage = subMsg + } + } + + return nil +} + +func (m *MediaMessage) SetHost(host string) { + m.Host = host +} + +func (m *MediaMessage) String() string { + switch m.Type { + case 1: + return m.Content + case 3: + return fmt.Sprintf("![图片](http://%s/image/%s)", m.Host, m.MediaMD5) + case 34: + return "[语音]" + case 43: + if m.MediaPath != "" { + return fmt.Sprintf("![视频](http://%s/data/%s)", m.Host, m.MediaPath) + } + return fmt.Sprintf("![视频](http://%s/video/%s)", m.Host, m.MediaMD5) + case 47: + return "[动画表情]" + case 49: + switch m.SubType { + case 5: + return fmt.Sprintf("[链接|%s](%s)", m.Title, m.URL) + case 6: + return fmt.Sprintf("[文件|%s](http://%s/file/%s)", m.Title, m.Host, m.MediaMD5) + case 8: + return "[GIF表情]" + case 19: + if m.RecordInfo == nil { + return "[合并转发]" + } + buf := strings.Builder{} + for _, item := range m.RecordInfo.DataList.DataItems { + buf.WriteString(item.SourceName + ": ") + switch item.DataType { + case "jpg": + buf.WriteString(fmt.Sprintf("![图片](http://%s/image/%s)", m.Host, item.FullMD5)) + default: + buf.WriteString(item.DataDesc) + } + buf.WriteString("\n") + } + return m.Content + case 33, 36: + return "[小程序]" + case 57: + if m.ReferMessage == nil { + if m.Content == "" { + return "[引用]" + } + return "> [引用]\n" + m.Content + } + buf := strings.Builder{} + buf.WriteString("> ") + if m.ReferDisplayName != "" { + buf.WriteString(m.ReferDisplayName) + buf.WriteString("(") + buf.WriteString(m.ReferUserName) + buf.WriteString(")") + } else { + buf.WriteString(m.ReferUserName) + } + buf.WriteString(" ") + buf.WriteString(m.ReferCreateTime.Format("2006-01-02 15:04:05")) + buf.WriteString("\n") + buf.WriteString("> ") + m.ReferMessage.SetHost(m.Host) + buf.WriteString(strings.ReplaceAll(m.ReferMessage.String(), "\n", "\n> ")) + buf.WriteString("\n") + buf.WriteString(m.Content) + m.Content = buf.String() + return m.Content + case 63: + return "[视频号]" + case 87: + return "[群公告]" + case 2000: + return "[转账]" + case 2003: + return "[红包封面]" + default: + return "[分享]" + } + case 50: + return "[语音通话]" + case 10000: + return "[系统消息]" + default: + content := m.Content + if len(content) > 120 { + content = content[:120] + "<...>" + } + return fmt.Sprintf("Type: %d Content: %s", m.Type, content) + } +} + +type XMLMessage struct { + XMLName xml.Name `xml:"msg"` + Image Image `xml:"img,omitempty"` + Video Video `xml:"videomsg,omitempty"` + App App `xml:"appmsg,omitempty"` +} + +type XMLImageMessage struct { + XMLName xml.Name `xml:"msg"` + Img Image `xml:"img"` +} + +type Image struct { + MD5 string `xml:"md5,attr"` + // HdLength string `xml:"hdlength,attr"` + // Length string `xml:"length,attr"` + // AesKey string `xml:"aeskey,attr"` + // EncryVer string `xml:"encryver,attr"` + // OriginSourceMd5 string `xml:"originsourcemd5,attr"` + // FileKey string `xml:"filekey,attr"` + // UploadContinueCount string `xml:"uploadcontinuecount,attr"` + // ImgSourceUrl string `xml:"imgsourceurl,attr"` + // HevcMidSize string `xml:"hevc_mid_size,attr"` + // CdnBigImgUrl string `xml:"cdnbigimgurl,attr"` + // CdnMidImgUrl string `xml:"cdnmidimgurl,attr"` + // CdnThumbUrl string `xml:"cdnthumburl,attr"` + // CdnThumbLength string `xml:"cdnthumblength,attr"` + // CdnThumbWidth string `xml:"cdnthumbwidth,attr"` + // CdnThumbHeight string `xml:"cdnthumbheight,attr"` + // CdnThumbAesKey string `xml:"cdnthumbaeskey,attr"` +} + +type XMLVideoMessage struct { + XMLName xml.Name `xml:"msg"` + VideoMsg Video `xml:"videomsg"` +} + +type Video struct { + RawMd5 string `xml:"rawmd5,attr"` + // Length string `xml:"length,attr"` + // PlayLength string `xml:"playlength,attr"` + // Offset string `xml:"offset,attr"` + // FromUserName string `xml:"fromusername,attr"` + // Status string `xml:"status,attr"` + // Compress string `xml:"compress,attr"` + // CameraType string `xml:"cameratype,attr"` + // Source string `xml:"source,attr"` + // AesKey string `xml:"aeskey,attr"` + // CdnVideoUrl string `xml:"cdnvideourl,attr"` + // CdnThumbUrl string `xml:"cdnthumburl,attr"` + // CdnThumbLength string `xml:"cdnthumblength,attr"` + // CdnThumbWidth string `xml:"cdnthumbwidth,attr"` + // CdnThumbHeight string `xml:"cdnthumbheight,attr"` + // CdnThumbAesKey string `xml:"cdnthumbaeskey,attr"` + // EncryVer string `xml:"encryver,attr"` + // RawLength string `xml:"rawlength,attr"` + // CdnRawVideoUrl string `xml:"cdnrawvideourl,attr"` + // CdnRawVideoAesKey string `xml:"cdnrawvideoaeskey,attr"` +} + +type App struct { + Type int `xml:"type"` + Title string `xml:"title"` + Des string `xml:"des"` + URL string `xml:"url"` // type 5 分享 + AppAttach AppAttach `xml:"appattach"` // type 6 文件 + MD5 string `xml:"md5"` // type 6 文件 + RecordItem *RecordItem `xml:"recorditem,omitempty"` // type 19 合并转发 + ReferMsg *ReferMsg `xml:"refermsg,omitempty"` // type 57 引用 +} + +// ReferMsg 表示引用消息 +type ReferMsg struct { + Type int64 `xml:"type"` + SvrID string `xml:"svrid"` + FromUsr string `xml:"fromusr"` + ChatUsr string `xml:"chatusr"` + DisplayName string `xml:"displayname"` + MsgSource string `xml:"msgsource"` + Content string `xml:"content"` + StrID string `xml:"strid"` + CreateTime int64 `xml:"createtime"` +} + +// AppAttach 表示应用附件 +type AppAttach struct { + TotalLen string `xml:"totallen"` + AttachID string `xml:"attachid"` + CDNAttachURL string `xml:"cdnattachurl"` + EmoticonMD5 string `xml:"emoticonmd5"` + AESKey string `xml:"aeskey"` + FileExt string `xml:"fileext"` + IsLargeFileMsg string `xml:"islargefilemsg"` +} + +type RecordItem struct { + CDATA string `xml:",cdata"` + + // 解析后的记录信息 + RecordInfo *RecordInfo +} + +// RecordInfo 表示聊天记录信息 +type RecordInfo struct { + XMLName xml.Name `xml:"recordinfo"` + FromScene string `xml:"fromscene,omitempty"` + FavUsername string `xml:"favusername,omitempty"` + FavCreateTime string `xml:"favcreatetime,omitempty"` + IsChatRoom string `xml:"isChatRoom,omitempty"` + Title string `xml:"title,omitempty"` + Desc string `xml:"desc,omitempty"` + Info string `xml:"info,omitempty"` + DataList DataList `xml:"datalist,omitempty"` +} + +// DataList 表示数据列表 +type DataList struct { + Count string `xml:"count,attr,omitempty"` + DataItems []DataItem `xml:"dataitem,omitempty"` +} + +// DataItem 表示数据项 +type DataItem struct { + DataType string `xml:"datatype,attr,omitempty"` + DataID string `xml:"dataid,attr,omitempty"` + HTMLID string `xml:"htmlid,attr,omitempty"` + DataFmt string `xml:"datafmt,omitempty"` + SourceName string `xml:"sourcename,omitempty"` + SourceTime string `xml:"sourcetime,omitempty"` + SourceHeadURL string `xml:"sourceheadurl,omitempty"` + DataDesc string `xml:"datadesc,omitempty"` + + // 图片特有字段 + ThumbSourcePath string `xml:"thumbsourcepath,omitempty"` + ThumbSize string `xml:"thumbsize,omitempty"` + CDNDataURL string `xml:"cdndataurl,omitempty"` + CDNDataKey string `xml:"cdndatakey,omitempty"` + CDNThumbURL string `xml:"cdnthumburl,omitempty"` + CDNThumbKey string `xml:"cdnthumbkey,omitempty"` + DataSourcePath string `xml:"datasourcepath,omitempty"` + FullMD5 string `xml:"fullmd5,omitempty"` + ThumbFullMD5 string `xml:"thumbfullmd5,omitempty"` + ThumbHead256MD5 string `xml:"thumbhead256md5,omitempty"` + DataSize string `xml:"datasize,omitempty"` + CDNEncryVer string `xml:"cdnencryver,omitempty"` + SrcChatname string `xml:"srcChatname,omitempty"` + SrcMsgLocalID string `xml:"srcMsgLocalid,omitempty"` + SrcMsgCreateTime string `xml:"srcMsgCreateTime,omitempty"` + MessageUUID string `xml:"messageuuid,omitempty"` + FromNewMsgID string `xml:"fromnewmsgid,omitempty"` +} diff --git a/internal/model/message.go b/internal/model/message.go index f4eb802..390530c 100644 --- a/internal/model/message.go +++ b/internal/model/message.go @@ -1,11 +1,12 @@ package model import ( - "fmt" + "path/filepath" "strings" "time" "github.com/sjzar/chatlog/internal/model/wxproto" + "github.com/sjzar/chatlog/pkg/util/lz4" "google.golang.org/protobuf/proto" ) @@ -23,7 +24,7 @@ type Message struct { TalkerID int `json:"talkerID"` // 聊天对象,Name2ID 表序号,索引值 Talker string `json:"talker"` // 聊天对象,微信 ID or 群 ID IsSender int `json:"isSender"` // 是否为发送消息,0 接收消息,1 发送消息 - Type int `json:"type"` // 消息类型 + Type int64 `json:"type"` // 消息类型 SubType int `json:"subType"` // 消息子类型 Content string `json:"content"` // 消息内容,文字聊天内容 或 XML CompressContent []byte `json:"compressContent"` // 非文字聊天内容,如图片、语音、视频等 @@ -32,8 +33,9 @@ type Message struct { // Fill Info // 从联系人等信息中填充 - DisplayName string `json:"-"` // 显示名称 - ChatRoomName string `json:"-"` // 群聊名称 + DisplayName string `json:"-"` // 显示名称 + ChatRoomName string `json:"-"` // 群聊名称 + MediaMessage *MediaMessage `json:"-"` // 多媒体消息 Version string `json:"-"` // 消息版本,内部判断 } @@ -72,7 +74,7 @@ type MessageV3 struct { TalkerID int `json:"TalkerId"` // 聊天对象,Name2ID 表序号,索引值 StrTalker string `json:"StrTalker"` // 聊天对象,微信 ID or 群 ID IsSender int `json:"IsSender"` // 是否为发送消息,0 接收消息,1 发送消息 - Type int `json:"Type"` // 消息类型 + Type int64 `json:"Type"` // 消息类型 SubType int `json:"SubType"` // 消息子类型 StrContent string `json:"StrContent"` // 消息内容,文字聊天内容 或 XML CompressContent []byte `json:"CompressContent"` // 非文字聊天内容,如图片、语音、视频等 @@ -99,14 +101,7 @@ type MessageV3 struct { func (m *MessageV3) Wrap() *Message { - isChatRoom := strings.HasSuffix(m.StrTalker, "@chatroom") - - var chatRoomSender string - if len(m.BytesExtra) != 0 && isChatRoom { - chatRoomSender = ParseBytesExtra(m.BytesExtra) - } - - return &Message{ + _m := &Message{ Sequence: m.Sequence, CreateTime: time.Unix(m.CreateTime, 0), TalkerID: m.TalkerID, @@ -116,33 +111,65 @@ func (m *MessageV3) Wrap() *Message { SubType: m.SubType, Content: m.StrContent, CompressContent: m.CompressContent, - IsChatRoom: isChatRoom, - ChatRoomSender: chatRoomSender, Version: WeChatV3, } + + _m.IsChatRoom = strings.HasSuffix(_m.Talker, "@chatroom") + + if _m.Type == 49 { + b, err := lz4.Decompress(m.CompressContent) + if err == nil { + _m.Content = string(b) + } + } + + if _m.Type != 1 { + mediaMessage, err := NewMediaMessage(_m.Type, _m.Content) + if err == nil { + _m.MediaMessage = mediaMessage + } + } + + if len(m.BytesExtra) != 0 { + if bytesExtra := ParseBytesExtra(m.BytesExtra); bytesExtra != nil { + if _m.IsChatRoom { + _m.ChatRoomSender = bytesExtra[1] + } + // FIXME xml 中的 md5 数据无法匹配到 hardlink 记录,所以直接用 proto 数据 + if _m.Type == 43 { + path := bytesExtra[4] + parts := strings.Split(filepath.ToSlash(path), "/") + if len(parts) > 1 { + path = strings.Join(parts[1:], "/") + } + _m.MediaMessage.MediaPath = path + } + } + } + + return _m } // ParseBytesExtra 解析额外数据 // 按需解析 -func ParseBytesExtra(b []byte) (chatRoomSender string) { +func ParseBytesExtra(b []byte) map[int]string { var pbMsg wxproto.BytesExtra if err := proto.Unmarshal(b, &pbMsg); err != nil { - return + return nil } if pbMsg.Items == nil { - return + return nil } + ret := make(map[int]string, len(pbMsg.Items)) for _, item := range pbMsg.Items { - if item.Type == 1 { - return item.Value - } + ret[int(item.Type)] = item.Value } - return + return ret } -func (m *Message) PlainText(showChatRoom bool) string { +func (m *Message) PlainText(showChatRoom bool, host string) string { buf := strings.Builder{} talker := m.Talker @@ -177,51 +204,13 @@ func (m *Message) PlainText(showChatRoom bool) string { buf.WriteString(m.CreateTime.Format("2006-01-02 15:04:05")) buf.WriteString("\n") - switch m.Type { - case 1: + if m.MediaMessage != nil { + m.MediaMessage.SetHost(host) + buf.WriteString(m.MediaMessage.String()) + } else { buf.WriteString(m.Content) - case 3: - buf.WriteString("[图片]") - case 34: - buf.WriteString("[语音]") - case 43: - buf.WriteString("[视频]") - case 47: - buf.WriteString("[动画表情]") - case 49: - switch m.SubType { - case 6: - buf.WriteString("[文件]") - case 8: - buf.WriteString("[GIF表情]") - case 19: - buf.WriteString("[合并转发]") - case 33, 36: - buf.WriteString("[小程序]") - case 57: - buf.WriteString("[引用]") - case 63: - buf.WriteString("[视频号]") - case 87: - buf.WriteString("[群公告]") - case 2000: - buf.WriteString("[转账]") - case 2003: - buf.WriteString("[红包封面]") - default: - buf.WriteString("[分享]") - } - case 50: - buf.WriteString("[语音通话]") - case 10000: - buf.WriteString("[系统消息]") - default: - content := m.Content - if len(content) > 120 { - content = content[:120] + "<...>" - } - buf.WriteString(fmt.Sprintf("Type: %d Content: %s", m.Type, content)) } + buf.WriteString("\n") return buf.String() diff --git a/internal/model/message_darwinv3.go b/internal/model/message_darwinv3.go index 8b73044..48dfbf7 100644 --- a/internal/model/message_darwinv3.go +++ b/internal/model/message_darwinv3.go @@ -23,16 +23,16 @@ import ( // ConBlob BLOB // ) type MessageDarwinV3 struct { - MesCreateTime int64 `json:"mesCreateTime"` - MesContent string `json:"mesContent"` - MesType int `json:"mesType"` + MsgCreateTime int64 `json:"msgCreateTime"` + MsgContent string `json:"msgContent"` + MessageType int64 `json:"messageType"` MesDes int `json:"mesDes"` // 0: 发送, 1: 接收 - MesSource string `json:"mesSource"` // MesLocalID int64 `json:"mesLocalID"` // MesSvrID int64 `json:"mesSvrID"` // MesStatus int `json:"mesStatus"` // MesImgStatus int `json:"mesImgStatus"` + // MsgSource string `json:"msgSource"` // IntRes1 int `json:"IntRes1"` // IntRes2 int `json:"IntRes2"` // StrRes1 string `json:"StrRes1"` @@ -44,26 +44,31 @@ type MessageDarwinV3 struct { } func (m *MessageDarwinV3) Wrap(talker string) *Message { - isChatRoom := strings.HasSuffix(talker, "@chatroom") - var chatRoomSender string - content := m.MesContent - if isChatRoom { - split := strings.SplitN(m.MesContent, ":\n", 2) + _m := &Message{ + CreateTime: time.Unix(m.MsgCreateTime, 0), + Type: m.MessageType, + IsSender: (m.MesDes + 1) % 2, + Version: WeChatDarwinV3, + } + + _m.IsChatRoom = strings.HasSuffix(talker, "@chatroom") + + _m.Content = m.MsgContent + if _m.IsChatRoom { + split := strings.SplitN(m.MsgContent, ":\n", 2) if len(split) == 2 { - chatRoomSender = split[0] - content = split[1] + _m.ChatRoomSender = split[0] + _m.Content = split[1] } } - return &Message{ - CreateTime: time.Unix(m.MesCreateTime, 0), - Content: content, - Talker: talker, - Type: m.MesType, - IsSender: (m.MesDes + 1) % 2, - IsChatRoom: isChatRoom, - ChatRoomSender: chatRoomSender, - Version: WeChatDarwinV3, + if _m.Type != 1 { + mediaMessage, err := NewMediaMessage(_m.Type, _m.Content) + if err == nil { + _m.MediaMessage = mediaMessage + } } + + return _m } diff --git a/internal/model/message_v4.go b/internal/model/message_v4.go index 4b04537..88987ba 100644 --- a/internal/model/message_v4.go +++ b/internal/model/message_v4.go @@ -5,7 +5,9 @@ import ( "strings" "time" + "github.com/sjzar/chatlog/internal/model/wxproto" "github.com/sjzar/chatlog/pkg/util/zstd" + "google.golang.org/protobuf/proto" ) // CREATE TABLE Msg_md5(talker)( @@ -29,7 +31,7 @@ import ( // ) type MessageV4 struct { SortSeq int64 `json:"sort_seq"` // 消息序号,10位时间戳 + 3位序号 - LocalType int `json:"local_type"` // 消息类型 + LocalType int64 `json:"local_type"` // 消息类型 RealSenderID int `json:"real_sender_id"` // 发送人 ID,对应 Name2Id 表序号 CreateTime int64 `json:"create_time"` // 消息创建时间,10位时间戳 MessageContent []byte `json:"message_content"` // 消息内容,文字聊天内容 或 zstd 压缩内容 @@ -50,12 +52,11 @@ type MessageV4 struct { func (m *MessageV4) Wrap(id2Name map[int]string, isChatRoom bool) *Message { _m := &Message{ - Sequence: m.SortSeq, - CreateTime: time.Unix(m.CreateTime, 0), - TalkerID: m.RealSenderID, // 依赖 Name2Id 表进行转换为 StrTalker - CompressContent: m.PackedInfoData, - Type: m.LocalType, - Version: WeChatV4, + Sequence: m.SortSeq, + CreateTime: time.Unix(m.CreateTime, 0), + TalkerID: m.RealSenderID, // 依赖 Name2Id 表进行转换为 StrTalker + Type: m.LocalType, + Version: WeChatV4, } if name, ok := id2Name[m.RealSenderID]; ok { @@ -66,16 +67,12 @@ func (m *MessageV4) Wrap(id2Name map[int]string, isChatRoom bool) *Message { _m.IsSender = 1 } - if _m.Type == 1 { - _m.Content = string(m.MessageContent) - } else { - if bytes.HasPrefix(m.MessageContent, []byte{0x28, 0xb5, 0x2f, 0xfd}) { - if b, err := zstd.Decompress(m.MessageContent); err == nil { - _m.Content = string(b) - } - } else { - _m.CompressContent = m.MessageContent + if bytes.HasPrefix(m.MessageContent, []byte{0x28, 0xb5, 0x2f, 0xfd}) { + if b, err := zstd.Decompress(m.MessageContent); err == nil { + _m.Content = string(b) } + } else { + _m.Content = string(m.MessageContent) } if isChatRoom { @@ -87,5 +84,34 @@ func (m *MessageV4) Wrap(id2Name map[int]string, isChatRoom bool) *Message { } } + if _m.Type != 1 { + mediaMessage, err := NewMediaMessage(_m.Type, _m.Content) + if err == nil { + _m.MediaMessage = mediaMessage + _m.Type = mediaMessage.Type + _m.SubType = mediaMessage.SubType + } + } + + if len(m.PackedInfoData) != 0 { + if packedInfo := ParsePackedInfo(m.PackedInfoData); packedInfo != nil { + // FIXME 尝试解决 v4 版本 xml 数据无法匹配到 hardlink 记录的问题 + if _m.Type == 3 && packedInfo.Image != nil { + _m.MediaMessage.MediaMD5 = packedInfo.Image.Md5 + } + if _m.Type == 43 && packedInfo.Video != nil { + _m.MediaMessage.MediaMD5 = packedInfo.Video.Md5 + } + } + } + return _m } + +func ParsePackedInfo(b []byte) *wxproto.PackedInfo { + var pbMsg wxproto.PackedInfo + if err := proto.Unmarshal(b, &pbMsg); err != nil { + return nil + } + return &pbMsg +} diff --git a/internal/model/wxproto/packedinfo.pb.go b/internal/model/wxproto/packedinfo.pb.go new file mode 100644 index 0000000..5a10ef3 --- /dev/null +++ b/internal/model/wxproto/packedinfo.pb.go @@ -0,0 +1,252 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.5 +// protoc v5.29.3 +// source: packedinfo.proto + +package wxproto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type PackedInfo struct { + state protoimpl.MessageState `protogen:"open.v1"` + Type uint32 `protobuf:"varint,1,opt,name=type,proto3" json:"type,omitempty"` // 始终为 106 (0x6a) + Version uint32 `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"` // 始终为 14 (0xe) + Image *ImageHash `protobuf:"bytes,3,opt,name=image,proto3" json:"image,omitempty"` // 图片哈希 + Video *VideoHash `protobuf:"bytes,4,opt,name=video,proto3" json:"video,omitempty"` // 视频哈希 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PackedInfo) Reset() { + *x = PackedInfo{} + mi := &file_packedinfo_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PackedInfo) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PackedInfo) ProtoMessage() {} + +func (x *PackedInfo) ProtoReflect() protoreflect.Message { + mi := &file_packedinfo_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PackedInfo.ProtoReflect.Descriptor instead. +func (*PackedInfo) Descriptor() ([]byte, []int) { + return file_packedinfo_proto_rawDescGZIP(), []int{0} +} + +func (x *PackedInfo) GetType() uint32 { + if x != nil { + return x.Type + } + return 0 +} + +func (x *PackedInfo) GetVersion() uint32 { + if x != nil { + return x.Version + } + return 0 +} + +func (x *PackedInfo) GetImage() *ImageHash { + if x != nil { + return x.Image + } + return nil +} + +func (x *PackedInfo) GetVideo() *VideoHash { + if x != nil { + return x.Video + } + return nil +} + +type ImageHash struct { + state protoimpl.MessageState `protogen:"open.v1"` + Md5 string `protobuf:"bytes,4,opt,name=md5,proto3" json:"md5,omitempty"` // 32 字符的 MD5 哈希 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ImageHash) Reset() { + *x = ImageHash{} + mi := &file_packedinfo_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ImageHash) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ImageHash) ProtoMessage() {} + +func (x *ImageHash) ProtoReflect() protoreflect.Message { + mi := &file_packedinfo_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ImageHash.ProtoReflect.Descriptor instead. +func (*ImageHash) Descriptor() ([]byte, []int) { + return file_packedinfo_proto_rawDescGZIP(), []int{1} +} + +func (x *ImageHash) GetMd5() string { + if x != nil { + return x.Md5 + } + return "" +} + +type VideoHash struct { + state protoimpl.MessageState `protogen:"open.v1"` + Md5 string `protobuf:"bytes,8,opt,name=md5,proto3" json:"md5,omitempty"` // 32 字符的 MD5 哈希 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *VideoHash) Reset() { + *x = VideoHash{} + mi := &file_packedinfo_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *VideoHash) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*VideoHash) ProtoMessage() {} + +func (x *VideoHash) ProtoReflect() protoreflect.Message { + mi := &file_packedinfo_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use VideoHash.ProtoReflect.Descriptor instead. +func (*VideoHash) Descriptor() ([]byte, []int) { + return file_packedinfo_proto_rawDescGZIP(), []int{2} +} + +func (x *VideoHash) GetMd5() string { + if x != nil { + return x.Md5 + } + return "" +} + +var File_packedinfo_proto protoreflect.FileDescriptor + +var file_packedinfo_proto_rawDesc = string([]byte{ + 0x0a, 0x10, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x69, 0x6e, 0x66, 0x6f, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x12, 0x0c, 0x61, 0x70, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x22, 0x98, 0x01, 0x0a, 0x0a, 0x50, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x49, 0x6e, 0x66, 0x6f, 0x12, + 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x74, + 0x79, 0x70, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x2d, 0x0a, + 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x61, + 0x70, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x49, 0x6d, 0x61, 0x67, + 0x65, 0x48, 0x61, 0x73, 0x68, 0x52, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x2d, 0x0a, 0x05, + 0x76, 0x69, 0x64, 0x65, 0x6f, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x61, 0x70, + 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x56, 0x69, 0x64, 0x65, 0x6f, + 0x48, 0x61, 0x73, 0x68, 0x52, 0x05, 0x76, 0x69, 0x64, 0x65, 0x6f, 0x22, 0x1d, 0x0a, 0x09, 0x49, + 0x6d, 0x61, 0x67, 0x65, 0x48, 0x61, 0x73, 0x68, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x64, 0x35, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6d, 0x64, 0x35, 0x22, 0x1d, 0x0a, 0x09, 0x56, 0x69, + 0x64, 0x65, 0x6f, 0x48, 0x61, 0x73, 0x68, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x64, 0x35, 0x18, 0x08, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6d, 0x64, 0x35, 0x42, 0x0b, 0x5a, 0x09, 0x2e, 0x3b, 0x77, + 0x78, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +}) + +var ( + file_packedinfo_proto_rawDescOnce sync.Once + file_packedinfo_proto_rawDescData []byte +) + +func file_packedinfo_proto_rawDescGZIP() []byte { + file_packedinfo_proto_rawDescOnce.Do(func() { + file_packedinfo_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_packedinfo_proto_rawDesc), len(file_packedinfo_proto_rawDesc))) + }) + return file_packedinfo_proto_rawDescData +} + +var file_packedinfo_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_packedinfo_proto_goTypes = []any{ + (*PackedInfo)(nil), // 0: app.protobuf.PackedInfo + (*ImageHash)(nil), // 1: app.protobuf.ImageHash + (*VideoHash)(nil), // 2: app.protobuf.VideoHash +} +var file_packedinfo_proto_depIdxs = []int32{ + 1, // 0: app.protobuf.PackedInfo.image:type_name -> app.protobuf.ImageHash + 2, // 1: app.protobuf.PackedInfo.video:type_name -> app.protobuf.VideoHash + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_packedinfo_proto_init() } +func file_packedinfo_proto_init() { + if File_packedinfo_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_packedinfo_proto_rawDesc), len(file_packedinfo_proto_rawDesc)), + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_packedinfo_proto_goTypes, + DependencyIndexes: file_packedinfo_proto_depIdxs, + MessageInfos: file_packedinfo_proto_msgTypes, + }.Build() + File_packedinfo_proto = out.File + file_packedinfo_proto_goTypes = nil + file_packedinfo_proto_depIdxs = nil +} diff --git a/internal/model/wxproto/packedinfo.proto b/internal/model/wxproto/packedinfo.proto new file mode 100644 index 0000000..49a15c6 --- /dev/null +++ b/internal/model/wxproto/packedinfo.proto @@ -0,0 +1,19 @@ +syntax = "proto3"; +package app.protobuf; +option go_package=".;wxproto"; + +message PackedInfo { + uint32 type = 1; // 始终为 106 (0x6a) + uint32 version = 2; // 始终为 14 (0xe) + ImageHash image = 3; // 图片哈希 + VideoHash video = 4; // 视频哈希 +} + + +message ImageHash { + string md5 = 4; // 32 字符的 MD5 哈希 +} + +message VideoHash { + string md5 = 8; // 32 字符的 MD5 哈希 +} \ No newline at end of file diff --git a/internal/wechatdb/datasource/darwinv3/datasource.go b/internal/wechatdb/datasource/darwinv3/datasource.go index 7fda605..e7038a5 100644 --- a/internal/wechatdb/datasource/darwinv3/datasource.go +++ b/internal/wechatdb/datasource/darwinv3/datasource.go @@ -6,7 +6,6 @@ import ( "database/sql" "encoding/hex" "fmt" - "log" "strings" "time" @@ -14,6 +13,7 @@ import ( "github.com/sjzar/chatlog/pkg/util" _ "github.com/mattn/go-sqlite3" + log "github.com/sirupsen/logrus" ) const ( @@ -21,6 +21,7 @@ const ( ContactFilePattern = "^wccontact_new2\\.db$" ChatRoomFilePattern = "^group_new\\.db$" SessionFilePattern = "^session_new\\.db$" + MediaFilePattern = "^hldata\\.db$" ) type DataSource struct { @@ -29,6 +30,7 @@ type DataSource struct { contactDb *sql.DB chatRoomDb *sql.DB sessionDb *sql.DB + mediaDb *sql.DB talkerDBMap map[string]*sql.DB user2DisplayName map[string]string @@ -54,6 +56,9 @@ func New(path string) (*DataSource, error) { if err := ds.initSessionDb(path); err != nil { return nil, fmt.Errorf("初始化会话数据库失败: %w", err) } + if err := ds.initMediaDb(path); err != nil { + return nil, fmt.Errorf("初始化会话数据库失败: %w", err) + } return ds, nil } @@ -138,7 +143,7 @@ func (ds *DataSource) initChatRoomDb(path string) error { return fmt.Errorf("连接群聊数据库失败: %w", err) } - rows, err := ds.chatRoomDb.Query("SELECT m_nsUsrName, nickname FROM GroupMember") + rows, err := ds.chatRoomDb.Query("SELECT m_nsUsrName, IFNULL(nickname,\"\") FROM GroupMember") if err != nil { log.Printf("警告: 获取群聊成员失败: %v", err) return nil @@ -173,6 +178,21 @@ func (ds *DataSource) initSessionDb(path string) error { return nil } +func (ds *DataSource) initMediaDb(path string) error { + files, err := util.FindFilesWithPatterns(path, MediaFilePattern, true) + if err != nil { + return fmt.Errorf("查找媒体数据库文件失败: %w", err) + } + if len(files) == 0 { + return fmt.Errorf("未找到媒体数据库文件: %s", path) + } + ds.mediaDb, err = sql.Open("sqlite3", files[0]) + if err != nil { + return fmt.Errorf("连接媒体数据库失败: %w", err) + } + return nil +} + // GetMessages 实现获取消息的方法 func (ds *DataSource) GetMessages(ctx context.Context, startTime, endTime time.Time, talker string, limit, offset int) ([]*model.Message, error) { // 在 darwinv3 中,每个联系人/群聊的消息存储在单独的表中,表名为 Chat_md5(talker) @@ -191,7 +211,7 @@ func (ds *DataSource) GetMessages(ctx context.Context, startTime, endTime time.T // 构建查询条件 query := fmt.Sprintf(` - SELECT msgCreateTime, msgContent, messageType, mesDes, msgSource, CompressContent, ConBlob + SELECT msgCreateTime, msgContent, messageType, mesDes FROM %s WHERE msgCreateTime >= ? AND msgCreateTime <= ? ORDER BY msgCreateTime ASC @@ -216,15 +236,11 @@ func (ds *DataSource) GetMessages(ctx context.Context, startTime, endTime time.T messages := []*model.Message{} for rows.Next() { var msg model.MessageDarwinV3 - var compressContent, conBlob []byte err := rows.Scan( - &msg.MesCreateTime, - &msg.MesContent, - &msg.MesType, + &msg.MsgCreateTime, + &msg.MsgContent, + &msg.MessageType, &msg.MesDes, - &msg.MesSource, - &compressContent, - &conBlob, ) if err != nil { log.Printf("警告: 扫描消息行失败: %v", err) @@ -260,13 +276,13 @@ func (ds *DataSource) GetContacts(ctx context.Context, key string, limit, offset if key != "" { // 按照关键字查询 - query = `SELECT IFNULL(m_nsUsrName,""), nickname, IFNULL(m_nsRemark,""), m_uiSex, IFNULL(m_nsAliasName,"") + query = `SELECT IFNULL(m_nsUsrName,""), IFNULL(nickname,""), IFNULL(m_nsRemark,""), m_uiSex, IFNULL(m_nsAliasName,"") FROM WCContact WHERE m_nsUsrName = ? OR nickname = ? OR m_nsRemark = ? OR m_nsAliasName = ?` args = []interface{}{key, key, key, key} } else { // 查询所有联系人 - query = `SELECT IFNULL(m_nsUsrName,""), nickname, IFNULL(m_nsRemark,""), m_uiSex, IFNULL(m_nsAliasName,"") + query = `SELECT IFNULL(m_nsUsrName,""), IFNULL(nickname,""), IFNULL(m_nsRemark,""), m_uiSex, IFNULL(m_nsAliasName,"") FROM WCContact` } @@ -314,13 +330,13 @@ func (ds *DataSource) GetChatRooms(ctx context.Context, key string, limit, offse if key != "" { // 按照关键字查询 - query = `SELECT IFNULL(m_nsUsrName,""), nickname, IFNULL(m_nsRemark,""), IFNULL(m_nsChatRoomMemList,""), IFNULL(m_nsChatRoomAdminList,"") + query = `SELECT IFNULL(m_nsUsrName,""), IFNULL(nickname,""), IFNULL(m_nsRemark,""), IFNULL(m_nsChatRoomMemList,""), IFNULL(m_nsChatRoomAdminList,"") FROM GroupContact WHERE m_nsUsrName = ? OR nickname = ? OR m_nsRemark = ?` args = []interface{}{key, key, key} } else { // 查询所有群聊 - query = `SELECT IFNULL(m_nsUsrName,""), nickname, IFNULL(m_nsRemark,""), IFNULL(m_nsChatRoomMemList,""), IFNULL(m_nsChatRoomAdminList,"") + query = `SELECT IFNULL(m_nsUsrName,""), IFNULL(nickname,""), IFNULL(m_nsRemark,""), IFNULL(m_nsChatRoomMemList,""), IFNULL(m_nsChatRoomAdminList,"") FROM GroupContact` } @@ -364,7 +380,7 @@ func (ds *DataSource) GetChatRooms(ctx context.Context, key string, limit, offse if err == nil && len(contacts) > 0 && strings.HasSuffix(contacts[0].UserName, "@chatroom") { // 再次尝试通过用户名查找群聊 rows, err := ds.chatRoomDb.QueryContext(ctx, - `SELECT IFNULL(m_nsUsrName,""), nickname, IFNULL(m_nsRemark,""), IFNULL(m_nsChatRoomMemList,""), IFNULL(m_nsChatRoomAdminList,"") + `SELECT IFNULL(m_nsUsrName,""), IFNULL(nickname,""), IFNULL(m_nsRemark,""), IFNULL(m_nsChatRoomMemList,""), IFNULL(m_nsChatRoomAdminList,"") FROM GroupContact WHERE m_nsUsrName = ?`, contacts[0].UserName) @@ -470,6 +486,58 @@ func (ds *DataSource) GetSessions(ctx context.Context, key string, limit, offset return sessions, nil } +func (ds *DataSource) GetMedia(ctx context.Context, _type string, key string) (*model.Media, error) { + if key == "" { + return nil, fmt.Errorf("key 不能为空") + } + query := `SELECT + r.mediaMd5, + r.mediaSize, + r.inodeNumber, + r.modifyTime, + d.relativePath, + d.fileName +FROM + HlinkMediaRecord r +JOIN + HlinkMediaDetail d ON r.inodeNumber = d.inodeNumber +WHERE + r.mediaMd5 = ?` + args := []interface{}{key} + // 执行查询 + rows, err := ds.mediaDb.QueryContext(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("查询媒体失败: %w", err) + } + defer rows.Close() + + var media *model.Media + for rows.Next() { + var mediaDarwinV3 model.MediaDarwinV3 + err := rows.Scan( + &mediaDarwinV3.MediaMd5, + &mediaDarwinV3.MediaSize, + &mediaDarwinV3.InodeNumber, + &mediaDarwinV3.ModifyTime, + &mediaDarwinV3.RelativePath, + &mediaDarwinV3.FileName, + ) + + if err != nil { + return nil, fmt.Errorf("扫描会话行失败: %w", err) + } + + // 包装成通用模型 + media = mediaDarwinV3.Wrap() + } + + if media == nil { + return nil, fmt.Errorf("未找到媒体 %s", key) + } + + return media, nil +} + // Close 实现关闭数据库连接的方法 func (ds *DataSource) Close() error { var errs []error @@ -502,6 +570,13 @@ func (ds *DataSource) Close() error { } } + // 关闭媒体数据库连接 + if ds.mediaDb != nil { + if err := ds.mediaDb.Close(); err != nil { + errs = append(errs, fmt.Errorf("关闭媒体数据库失败: %w", err)) + } + } + if len(errs) > 0 { return fmt.Errorf("关闭数据库连接时发生错误: %v", errs) } diff --git a/internal/wechatdb/datasource/datasource.go b/internal/wechatdb/datasource/datasource.go index be7f84b..727969b 100644 --- a/internal/wechatdb/datasource/datasource.go +++ b/internal/wechatdb/datasource/datasource.go @@ -30,6 +30,9 @@ type DataSource interface { // 最近会话 GetSessions(ctx context.Context, key string, limit, offset int) ([]*model.Session, error) + // 媒体 + GetMedia(ctx context.Context, _type string, key string) (*model.Media, error) + Close() error } diff --git a/internal/wechatdb/datasource/v4/datasource.go b/internal/wechatdb/datasource/v4/datasource.go index 11fa4a6..eb205ea 100644 --- a/internal/wechatdb/datasource/v4/datasource.go +++ b/internal/wechatdb/datasource/v4/datasource.go @@ -21,6 +21,7 @@ const ( MessageFilePattern = "^message_([0-9]?[0-9])?\\.db$" ContactFilePattern = "^contact\\.db$" SessionFilePattern = "^session\\.db$" + MediaFilePattern = "^hardlink\\.db$" ) // MessageDBInfo 存储消息数据库的信息 @@ -36,6 +37,7 @@ type DataSource struct { messageDbs map[string]*sql.DB contactDb *sql.DB sessionDb *sql.DB + mediaDb *sql.DB // 消息数据库信息 messageFiles []MessageDBInfo @@ -57,6 +59,9 @@ func New(path string) (*DataSource, error) { if err := ds.initSessionDb(path); err != nil { return nil, fmt.Errorf("初始化会话数据库失败: %w", err) } + if err := ds.initMediaDb(path); err != nil { + return nil, fmt.Errorf("初始化媒体数据库失败: %w", err) + } return ds, nil } @@ -175,6 +180,21 @@ func (ds *DataSource) initSessionDb(path string) error { return nil } +func (ds *DataSource) initMediaDb(path string) error { + files, err := util.FindFilesWithPatterns(path, MediaFilePattern, true) + if err != nil { + return fmt.Errorf("查找媒体数据库文件失败: %w", err) + } + if len(files) == 0 { + return fmt.Errorf("未找到媒体数据库文件: %s", path) + } + ds.mediaDb, err = sql.Open("sqlite3", files[0]) + if err != nil { + return fmt.Errorf("连接媒体数据库失败: %w", err) + } + return nil +} + // getDBInfosForTimeRange 获取时间范围内的数据库信息 func (ds *DataSource) getDBInfosForTimeRange(startTime, endTime time.Time) []MessageDBInfo { var dbs []MessageDBInfo @@ -602,6 +622,81 @@ func (ds *DataSource) GetSessions(ctx context.Context, key string, limit, offset return sessions, nil } +func (ds *DataSource) GetMedia(ctx context.Context, _type string, key string) (*model.Media, error) { + if key == "" { + return nil, fmt.Errorf("key 不能为空") + } + + if len(key) != 32 { + return nil, fmt.Errorf("key 长度必须为 32") + } + + var table string + switch _type { + case "image": + table = "image_hardlink_info_v3" + case "video": + table = "video_hardlink_info_v3" + case "file": + table = "file_hardlink_info_v3" + default: + return nil, fmt.Errorf("不支持的媒体类型: %s", _type) + } + + query := fmt.Sprintf(` + SELECT + f.md5, + f.file_name, + f.file_size, + f.modify_time, + IFNULL(d1.username,""), + IFNULL(d2.username,"") + FROM + %s f + LEFT JOIN + dir2id d1 ON d1.rowid = f.dir1 + LEFT JOIN + dir2id d2 ON d2.rowid = f.dir2 + `, table) + query += " WHERE f.md5 = ? OR f.file_name LIKE ? || '%'" + args := []interface{}{key, key} + + rows, err := ds.mediaDb.QueryContext(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("查询媒体失败: %w", err) + } + defer rows.Close() + + var media *model.Media + for rows.Next() { + var mediaV4 model.MediaV4 + err := rows.Scan( + &mediaV4.Key, + &mediaV4.Name, + &mediaV4.Size, + &mediaV4.ModifyTime, + &mediaV4.Dir1, + &mediaV4.Dir2, + ) + if err != nil { + return nil, fmt.Errorf("扫描会话行失败: %w", err) + } + mediaV4.Type = _type + media = mediaV4.Wrap() + + // 跳过缩略图 + if _type == "image" && !strings.Contains(media.Name, "_t") { + break + } + } + + if media == nil { + return nil, fmt.Errorf("未找到媒体 %s", key) + } + + return media, nil +} + func (ds *DataSource) Close() error { var errs []error @@ -626,6 +721,12 @@ func (ds *DataSource) Close() error { } } + if ds.mediaDb != nil { + if err := ds.mediaDb.Close(); err != nil { + errs = append(errs, fmt.Errorf("关闭媒体数据库失败: %w", err)) + } + } + if len(errs) > 0 { return fmt.Errorf("关闭数据库连接时发生错误: %v", errs) } diff --git a/internal/wechatdb/datasource/windowsv3/datasource.go b/internal/wechatdb/datasource/windowsv3/datasource.go index eafe119..20d857f 100644 --- a/internal/wechatdb/datasource/windowsv3/datasource.go +++ b/internal/wechatdb/datasource/windowsv3/datasource.go @@ -3,6 +3,7 @@ package windowsv3 import ( "context" "database/sql" + "encoding/hex" "fmt" "log" "sort" @@ -18,6 +19,9 @@ import ( const ( MessageFilePattern = "^MSG([0-9]?[0-9])?\\.db$" ContactFilePattern = "^MicroMsg.db$" + ImageFilePattern = "^HardLinkImage\\.db$" + VideoFilePattern = "^HardLinkVideo\\.db$" + FileFilePattern = "^HardLinkFile\\.db$" ) // MessageDBInfo 保存消息数据库的信息 @@ -37,6 +41,10 @@ type DataSource struct { // 联系人数据库 contactDbFile string contactDb *sql.DB + + imageDb *sql.DB + videoDb *sql.DB + fileDb *sql.DB } // New 创建一个新的 WindowsV3DataSource @@ -56,6 +64,10 @@ func New(path string) (*DataSource, error) { return nil, fmt.Errorf("初始化联系人数据库失败: %w", err) } + if err := ds.initMediaDb(path); err != nil { + return nil, fmt.Errorf("初始化多媒体数据库失败: %w", err) + } + return ds, nil } @@ -178,6 +190,53 @@ func (ds *DataSource) initContactDb(path string) error { return nil } +// initContactDb 初始化联系人数据库 +func (ds *DataSource) initMediaDb(path string) error { + files, err := util.FindFilesWithPatterns(path, ImageFilePattern, true) + if err != nil { + return fmt.Errorf("查找图片数据库文件失败: %w", err) + } + + if len(files) == 0 { + return fmt.Errorf("未找到图片数据库文件: %s", path) + } + + ds.imageDb, err = sql.Open("sqlite3", files[0]) + if err != nil { + return fmt.Errorf("连接图片数据库失败: %w", err) + } + + files, err = util.FindFilesWithPatterns(path, VideoFilePattern, true) + if err != nil { + return fmt.Errorf("查找视频数据库文件失败: %w", err) + } + + if len(files) == 0 { + return fmt.Errorf("未找到视频数据库文件: %s", path) + } + + ds.videoDb, err = sql.Open("sqlite3", files[0]) + if err != nil { + return fmt.Errorf("连接视频数据库失败: %w", err) + } + + files, err = util.FindFilesWithPatterns(path, FileFilePattern, true) + if err != nil { + return fmt.Errorf("查找文件数据库文件失败: %w", err) + } + + if len(files) == 0 { + return fmt.Errorf("未找到文件数据库文件: %s", path) + } + + ds.fileDb, err = sql.Open("sqlite3", files[0]) + if err != nil { + return fmt.Errorf("连接文件数据库失败: %w", err) + } + + return nil +} + // getDBInfosForTimeRange 获取时间范围内的数据库信息 func (ds *DataSource) getDBInfosForTimeRange(startTime, endTime time.Time) []MessageDBInfo { var dbs []MessageDBInfo @@ -589,6 +648,84 @@ func (ds *DataSource) GetSessions(ctx context.Context, key string, limit, offset return sessions, nil } +func (ds *DataSource) GetMedia(ctx context.Context, _type string, key string) (*model.Media, error) { + if key == "" { + return nil, fmt.Errorf("key 不能为空") + } + + md5key, err := hex.DecodeString(key) + if err != nil { + return nil, fmt.Errorf("解析 key 失败: %w", err) + } + + var db *sql.DB + var table1, table2 string + + switch _type { + case "image": + db = ds.imageDb + table1 = "HardLinkImageAttribute" + table2 = "HardLinkImageID" + case "video": + db = ds.videoDb + table1 = "HardLinkVideoAttribute" + table2 = "HardLinkVideoID" + case "file": + db = ds.fileDb + table1 = "HardLinkFileAttribute" + table2 = "HardLinkFileID" + default: + return nil, fmt.Errorf("不支持的媒体类型: %s", _type) + + } + + query := fmt.Sprintf(` + SELECT + a.FileName, + a.ModifyTime, + IFNULL(d1.Dir,"") AS Dir1, + IFNULL(d2.Dir,"") AS Dir2 + FROM + %s a + LEFT JOIN + %s d1 ON a.DirID1 = d1.DirId + LEFT JOIN + %s d2 ON a.DirID2 = d2.DirId + WHERE + a.Md5 = ? + `, table1, table2, table2) + args := []interface{}{md5key} + + rows, err := db.QueryContext(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("查询媒体失败: %w", err) + } + defer rows.Close() + + var media *model.Media + for rows.Next() { + var mediaV3 model.MediaV3 + err := rows.Scan( + &mediaV3.Name, + &mediaV3.ModifyTime, + &mediaV3.Dir1, + &mediaV3.Dir2, + ) + if err != nil { + return nil, fmt.Errorf("扫描会话行失败: %w", err) + } + mediaV3.Type = _type + mediaV3.Key = key + media = mediaV3.Wrap() + } + + if media == nil { + return nil, fmt.Errorf("未找到媒体 %s", key) + } + + return media, nil +} + // Close 实现 DataSource 接口的 Close 方法 func (ds *DataSource) Close() error { var errs []error @@ -607,6 +744,22 @@ func (ds *DataSource) Close() error { } } + if ds.imageDb != nil { + if err := ds.imageDb.Close(); err != nil { + errs = append(errs, fmt.Errorf("关闭图片数据库失败: %w", err)) + } + } + if ds.videoDb != nil { + if err := ds.videoDb.Close(); err != nil { + errs = append(errs, fmt.Errorf("关闭视频数据库失败: %w", err)) + } + } + if ds.fileDb != nil { + if err := ds.fileDb.Close(); err != nil { + errs = append(errs, fmt.Errorf("关闭文件数据库失败: %w", err)) + } + } + if len(errs) > 0 { return fmt.Errorf("关闭数据库连接时发生错误: %v", errs) } diff --git a/internal/wechatdb/repository/media.go b/internal/wechatdb/repository/media.go new file mode 100644 index 0000000..7e68dcc --- /dev/null +++ b/internal/wechatdb/repository/media.go @@ -0,0 +1,11 @@ +package repository + +import ( + "context" + + "github.com/sjzar/chatlog/internal/model" +) + +func (r *Repository) GetMedia(ctx context.Context, _type string, key string) (*model.Media, error) { + return r.ds.GetMedia(ctx, _type, key) +} diff --git a/internal/wechatdb/wechatdb.go b/internal/wechatdb/wechatdb.go index d909eb0..293a904 100644 --- a/internal/wechatdb/wechatdb.go +++ b/internal/wechatdb/wechatdb.go @@ -121,3 +121,7 @@ func (w *DB) GetSessions(key string, limit, offset int) (*GetSessionsResp, error Items: sessions, }, nil } + +func (w *DB) GetMedia(_type string, key string) (*model.Media, error) { + return w.repo.GetMedia(context.Background(), _type, key) +} diff --git a/pkg/util/dat2img/dat2img.go b/pkg/util/dat2img/dat2img.go new file mode 100644 index 0000000..daf8b29 --- /dev/null +++ b/pkg/util/dat2img/dat2img.go @@ -0,0 +1,60 @@ +package dat2img + +// copy from: https://github.com/tujiaw/wechat_dat_to_image + +import ( + "fmt" +) + +type Format struct { + Header []byte + Ext string +} + +var ( + JPG = Format{Header: []byte{0xFF, 0xD8, 0xFF}, Ext: "jpg"} + PNG = Format{Header: []byte{0x89, 0x50, 0x4E, 0x47}, Ext: "png"} + GIF = Format{Header: []byte{0x47, 0x49, 0x46, 0x38}, Ext: "gif"} + TIFF = Format{Header: []byte{0x49, 0x49, 0x2A, 0x00}, Ext: "tiff"} + BMP = Format{Header: []byte{0x42, 0x4D}, Ext: "bmp"} + Formats = []Format{JPG, PNG, GIF, TIFF, BMP} +) + +func Dat2Image(data []byte) ([]byte, string, error) { + + if len(data) < 4 { + return nil, "", fmt.Errorf("data length is too short: %d", len(data)) + } + + findFormat := func(data []byte, header []byte) bool { + xorBit := data[0] ^ header[0] + for i := 0; i < len(header); i++ { + if data[i]^header[i] != xorBit { + return false + } + } + return true + } + + var xorBit byte + var find bool + var ext string + for _, format := range Formats { + if find = findFormat(data, format.Header); find { + xorBit = data[0] ^ format.Header[0] + ext = format.Ext + break + } + } + + if !find { + return nil, "", fmt.Errorf("unknown image type: %x %x", data[0], data[1]) + } + + out := make([]byte, len(data)) + for i := range data { + out[i] = data[i] ^ xorBit + } + + return out, ext, nil +} diff --git a/pkg/util/lz4/lz4.go b/pkg/util/lz4/lz4.go new file mode 100644 index 0000000..0a10470 --- /dev/null +++ b/pkg/util/lz4/lz4.go @@ -0,0 +1,16 @@ +package lz4 + +import ( + "github.com/pierrec/lz4/v4" +) + +func Decompress(src []byte) ([]byte, error) { + // FIXME: lz4 的压缩率预计不到 3,这里设置了 4 保险一点 + out := make([]byte, len(src)*4) + + n, err := lz4.UncompressBlock(src, out) + if err != nil { + return nil, err + } + return out[:n], nil +} diff --git a/pkg/util/strings.go b/pkg/util/strings.go index 25d403a..1aaa9cb 100644 --- a/pkg/util/strings.go +++ b/pkg/util/strings.go @@ -41,3 +41,7 @@ func IsNumeric(s string) bool { } return len(s) > 0 } + +func SplitInt64ToTwoInt32(input int64) (int64, int64) { + return input & 0xFFFFFFFF, input >> 32 +}