This commit is contained in:
睿 安
2026-01-21 16:48:36 +08:00
commit abba5cb273
246 changed files with 57473 additions and 0 deletions

40
app/DataBase/__init__.py Normal file
View File

@@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
"""
@File : __init__.py.py
@Author : Shuaikang Zhou
@Time : 2023/1/5 0:10
@IDE : Pycharm
@Version : Python3.10
@comment : ···
"""
from .hard_link import HardLink
from .micro_msg import MicroMsg
from .media_msg import MediaMsg
from .misc import Misc
from .msg import Msg
from .msg import MsgType
misc_db = Misc()
msg_db = Msg()
micro_msg_db = MicroMsg()
hard_link_db = HardLink()
media_msg_db = MediaMsg()
def close_db():
misc_db.close()
msg_db.close()
micro_msg_db.close()
hard_link_db.close()
media_msg_db.close()
def init_db():
misc_db.init_database()
msg_db.init_database()
micro_msg_db.init_database()
hard_link_db.init_database()
media_msg_db.init_database()
__all__ = ['misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db', 'MsgType', "media_msg_db", "close_db"]

297
app/DataBase/hard_link.py Normal file
View File

@@ -0,0 +1,297 @@
import binascii
import os.path
import sqlite3
import threading
import traceback
import xml.etree.ElementTree as ET
from app.log import log, logger
from app.util.protocbuf.msg_pb2 import MessageBytesExtra
image_db_lock = threading.Lock()
video_db_lock = threading.Lock()
image_db_path = "./app/Database/Msg/HardLinkImage.db"
video_db_path = "./app/Database/Msg/HardLinkVideo.db"
root_path = "FileStorage/MsgAttach/"
video_root_path = "FileStorage/Video/"
@log
def get_md5_from_xml(content, type_="img"):
try:
# 解析XML
root = ET.fromstring(content)
if type_ == "img":
# 提取md5的值
md5_value = root.find(".//img").get("md5")
elif type_ == "video":
md5_value = root.find(".//videomsg").get("md5")
# print(md5_value)
return md5_value
except ET.ParseError:
return None
def decodeExtraBuf(extra_buf_content: bytes):
if not extra_buf_content:
return {
"region": ('', '', ''),
"signature": '',
"telephone": '',
"gender": 0,
}
trunkName = {
b"\x46\xCF\x10\xC4": "个性签名",
b"\xA4\xD9\x02\x4A": "国家",
b"\xE2\xEA\xA8\xD1": "省份",
b"\x1D\x02\x5B\xBF": "",
# b"\x81\xAE\x19\xB4": "朋友圈背景url",
# b"\xF9\x17\xBC\xC0": "公司名称",
# b"\x4E\xB9\x6D\x85": "企业微信属性",
# b"\x0E\x71\x9F\x13": "备注图片",
b"\x75\x93\x78\xAD": "手机号",
b"\x74\x75\x2C\x06": "性别",
}
res = {"手机号": ""}
off = 0
try:
for key in trunkName:
trunk_head = trunkName[key]
try:
off = extra_buf_content.index(key) + 4
except:
pass
char = extra_buf_content[off: off + 1]
off += 1
if char == b"\x04": # 四个字节的int小端序
intContent = extra_buf_content[off: off + 4]
off += 4
intContent = int.from_bytes(intContent, "little")
res[trunk_head] = intContent
elif char == b"\x18": # utf-16字符串
lengthContent = extra_buf_content[off: off + 4]
off += 4
lengthContent = int.from_bytes(lengthContent, "little")
strContent = extra_buf_content[off: off + lengthContent]
off += lengthContent
res[trunk_head] = strContent.decode("utf-16").rstrip("\x00")
return {
"region": (res["国家"], res["省份"], res[""]),
"signature": res["个性签名"],
"telephone": res["手机号"],
"gender": res["性别"],
}
except:
logger.error(f'联系人解析错误:\n{traceback.format_exc()}')
return {
"region": ('', '', ''),
"signature": '',
"telephone": '',
"gender": 0,
}
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
@singleton
class HardLink:
def __init__(self):
self.imageDB = None
self.videoDB = None
self.image_cursor = None
self.video_cursor = None
self.open_flag = False
self.init_database()
def init_database(self):
if not self.open_flag:
if os.path.exists(image_db_path):
self.imageDB = sqlite3.connect(image_db_path, check_same_thread=False)
# '''创建游标'''
self.image_cursor = self.imageDB.cursor()
self.open_flag = True
if image_db_lock.locked():
image_db_lock.release()
if os.path.exists(video_db_path):
self.videoDB = sqlite3.connect(video_db_path, check_same_thread=False)
# '''创建游标'''
self.video_cursor = self.videoDB.cursor()
self.open_flag = True
if video_db_lock.locked():
video_db_lock.release()
def get_image_by_md5(self, md5: bytes):
if not md5:
return None
if not self.open_flag:
return None
sql = """
select Md5Hash,MD5,FileName,HardLinkImageID.Dir as DirName1,HardLinkImageID2.Dir as DirName2
from HardLinkImageAttribute
join HardLinkImageID on HardLinkImageAttribute.DirID1 = HardLinkImageID.DirID
join HardLinkImageID as HardLinkImageID2 on HardLinkImageAttribute.DirID2 = HardLinkImageID2.DirID
where MD5 = ?;
"""
try:
image_db_lock.acquire(True)
try:
self.image_cursor.execute(sql, [md5])
except AttributeError:
self.init_database()
self.image_cursor.execute(sql, [md5])
result = self.image_cursor.fetchone()
return result
finally:
image_db_lock.release()
def get_video_by_md5(self, md5: bytes):
if not md5:
return None
if not self.open_flag:
return None
sql = """
select Md5Hash,MD5,FileName,HardLinkVideoID2.Dir as DirName2
from HardLinkVideoAttribute
join HardLinkVideoID as HardLinkVideoID2 on HardLinkVideoAttribute.DirID2 = HardLinkVideoID2.DirID
where MD5 = ?;
"""
try:
video_db_lock.acquire(True)
try:
self.video_cursor.execute(sql, [md5])
except sqlite3.OperationalError:
return None
except AttributeError:
self.init_database()
self.video_cursor.execute(sql, [md5])
result = self.video_cursor.fetchone()
return result
finally:
video_db_lock.release()
def get_image_original(self, content, bytesExtra) -> str:
msg_bytes = MessageBytesExtra()
msg_bytes.ParseFromString(bytesExtra)
result = ''
for tmp in msg_bytes.message2:
if tmp.field1 != 4:
continue
pathh = tmp.field2 # wxid\FileStorage\...
pathh = "\\".join(pathh.split("\\")[1:])
return pathh
md5 = get_md5_from_xml(content)
if not md5:
pass
else:
result = self.get_image_by_md5(binascii.unhexlify(md5))
if result:
dir1 = result[3]
dir2 = result[4]
data_image = result[2]
dir0 = "Image"
dat_image = os.path.join(root_path, dir1, dir0, dir2, data_image)
result = dat_image
return result
def get_image_thumb(self, content, bytesExtra) -> str:
msg_bytes = MessageBytesExtra()
msg_bytes.ParseFromString(bytesExtra)
result = ''
for tmp in msg_bytes.message2:
if tmp.field1 != 3:
continue
pathh = tmp.field2 # wxid\FileStorage\...
pathh = "\\".join(pathh.split("\\")[1:])
return pathh
md5 = get_md5_from_xml(content)
if not md5:
pass
else:
result = self.get_image_by_md5(binascii.unhexlify(md5))
if result:
dir1 = result[3]
dir2 = result[4]
data_image = result[2]
dir0 = "Thumb"
dat_image = os.path.join(root_path, dir1, dir0, dir2, data_image)
result = dat_image
return result
def get_image(self, content, bytesExtra, up_dir="", thumb=False) -> str:
msg_bytes = MessageBytesExtra()
msg_bytes.ParseFromString(bytesExtra)
if thumb:
result = self.get_image_thumb(content, bytesExtra)
else:
result = self.get_image_original(content, bytesExtra)
if not (result and os.path.exists(os.path.join(up_dir, result))):
result = self.get_image_thumb(content, bytesExtra)
return result
def get_video(self, content, bytesExtra, thumb=False):
msg_bytes = MessageBytesExtra()
msg_bytes.ParseFromString(bytesExtra)
for tmp in msg_bytes.message2:
if tmp.field1 != (3 if thumb else 4):
continue
pathh = tmp.field2 # wxid\FileStorage\...
pathh = "\\".join(pathh.split("\\")[1:])
return pathh
md5 = get_md5_from_xml(content, type_="video")
if not md5:
return ''
result = self.get_video_by_md5(binascii.unhexlify(md5))
if result:
dir2 = result[3]
data_image = result[2].split(".")[0] + ".jpg" if thumb else result[2]
# dir0 = 'Thumb' if thumb else 'Image'
dat_image = os.path.join(video_root_path, dir2, data_image)
return dat_image
else:
return ''
def close(self):
if self.open_flag:
try:
image_db_lock.acquire(True)
video_db_lock.acquire(True)
self.open_flag = False
self.imageDB.close()
self.videoDB.close()
finally:
image_db_lock.release()
video_db_lock.release()
def __del__(self):
self.close()
# 6b02292eecea118f06be3a5b20075afc_t
if __name__ == "__main__":
msg_root_path = "./Msg/"
image_db_path = "./Msg/HardLinkImage.db"
video_db_path = "./Msg/HardLinkVideo.db"
hard_link_db = HardLink()
hard_link_db.init_database()
# content = '''<?xml version="1.0"?><msg>\n\t<img aeskey="bc37a58c32cb203ee9ac587b068e5853" encryver="1" cdnthumbaeskey="bc37a58c32cb203ee9ac587b068e5853" cdnthumburl="3057020100044b30490201000204d181705002032f5405020428a7b4de02046537869d042462313532363539632d663930622d343463302d616636662d333837646434633061626534020401150a020201000405004c4c6d00" cdnthumblength="3097" cdnthumbheight="120" cdnthumbwidth="68" cdnmidheight="0" cdnmidwidth="0" cdnhdheight="0" cdnhdwidth="0" cdnmidimgurl="3057020100044b30490201000204d181705002032f5405020428a7b4de02046537869d042462313532363539632d663930622d343463302d616636662d333837646434633061626534020401150a020201000405004c4c6d00" length="57667" md5="6844b812d5d514eb6878657e0bf4cdbb" originsourcemd5="1dfdfa24922270ea1cb5daba103f45ca" />\n\t<platform_signature></platform_signature>\n\t<imgdatahash></imgdatahash>\n</msg>\n'''
# print(hard_link_db.get_image(content))
# print(hard_link_db.get_image(content, thumb=False))
# result = get_md5_from_xml(content)
# print(result)
content = """<?xml version="1.0"?>
<msg>
<videomsg aeskey="d635d2013d221dbd05a4eab3a8185f5a" cdnvideourl="3057020100044b304902010002040297cead02032f540502042ba7b4de020465673b74042438316562356530652d653764352d343263632d613531642d6464383661313330623965330204052400040201000405004c537500" cdnthumbaeskey="d635d2013d221dbd05a4eab3a8185f5a" cdnthumburl="3057020100044b304902010002040297cead02032f540502042ba7b4de020465673b74042438316562356530652d653764352d343263632d613531642d6464383661313330623965330204052400040201000405004c537500" length="25164270" playlength="60" cdnthumblength="7419" cdnthumbwidth="1920" cdnthumbheight="1080" fromusername="wxid_yt67eeoo4blm22" md5="95558f0e503651375b475636519d2285" newmd5="4ece19bcd92dc5b93b83f397461a1310" isplaceholder="0" rawmd5="d660ba186bb31126d94fa568144face8" rawlength="143850007" cdnrawvideourl="3052020100044b30490201000204d8cd585302032f540502040f6a42b7020465673b85042464666462306634342d653339342d343232302d613534392d3930633030646236306266610204059400040201000400" cdnrawvideoaeskey="5915b14ac8d121e0944d9e444aebb7ed" overwritenewmsgid="0" originsourcemd5="a1a567d8c170bca33d075b787a60dd3f" isad="0" />
</msg>
"""
print(hard_link_db.get_video(content))
print(hard_link_db.get_video(content, thumb=True))

145
app/DataBase/media_msg.py Normal file
View File

@@ -0,0 +1,145 @@
import os.path
import subprocess
import sys
import traceback
from os import system
import sqlite3
import threading
import xml.etree.ElementTree as ET
from pilk import decode
from app.log import logger
lock = threading.Lock()
db_path = "./app/Database/Msg/MediaMSG.db"
def get_ffmpeg_path():
# 获取打包后的资源目录
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
# 构建 FFmpeg 可执行文件的路径
ffmpeg_path = os.path.join(resource_dir, 'app', 'resources','data', 'ffmpeg.exe')
return ffmpeg_path
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
@singleton
class MediaMsg:
def __init__(self):
self.DB = None
self.cursor: sqlite3.Cursor = None
self.open_flag = False
self.init_database()
def init_database(self):
if not self.open_flag:
if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
self.cursor = self.DB.cursor()
self.open_flag = True
if lock.locked():
lock.release()
def get_media_buffer(self, reserved0):
sql = '''
select Buf
from Media
where Reserved0 = ?
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [reserved0])
result = self.cursor.fetchone()
finally:
lock.release()
return result[0] if result else None
def get_audio(self, reserved0, output_path):
buf = self.get_media_buffer(reserved0)
if not buf:
return ''
silk_path = f"{output_path}/{reserved0}.silk"
pcm_path = f"{output_path}/{reserved0}.pcm"
mp3_path = f"{output_path}/{reserved0}.mp3"
if os.path.exists(mp3_path):
return mp3_path
with open(silk_path, "wb") as f:
f.write(buf)
# open(silk_path, "wb").write()
try:
decode(silk_path, pcm_path, 44100)
# 调用系统上的 ffmpeg 可执行文件
# 获取 FFmpeg 可执行文件的路径
ffmpeg_path = get_ffmpeg_path()
# # 调用 FFmpeg
if os.path.exists(ffmpeg_path):
cmd = f'''"{ffmpeg_path}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
# system(cmd)
# 使用subprocess.run()执行命令
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
# 源码运行的时候下面的有效
# 这里不知道怎么捕捉异常
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data','ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
# system(cmd)
# 使用subprocess.run()执行命令
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
os.remove(silk_path)
os.remove(pcm_path)
except Exception as e:
print(f"Error: {e}")
logger.error(f'语音发送错误\n{traceback.format_exc()}')
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data', 'ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
# system(cmd)
# 使用subprocess.run()执行命令
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
finally:
print(mp3_path)
return mp3_path
def get_audio_path(self, reserved0, output_path):
mp3_path = f"{output_path}\\{reserved0}.mp3"
mp3_path = mp3_path.replace("/", "\\")
return mp3_path
def get_audio_text(self, content):
try:
root = ET.fromstring(content)
transtext = root.find(".//voicetrans").get("transtext")
return transtext
except:
return ""
def close(self):
if self.open_flag:
try:
lock.acquire(True)
self.open_flag = False
self.DB.close()
finally:
lock.release()
def __del__(self):
self.close()
if __name__ == '__main__':
db_path = './Msg/MediaMSG.db'
media_msg_db = MediaMsg()
reserved = '2865682741418252473'
path = media_msg_db.get_audio(reserved, r"D:\gou\message\WeChatMsg")
print(path)

103
app/DataBase/merge.py Normal file
View File

@@ -0,0 +1,103 @@
import os
import sqlite3
import traceback
from app.log import logger
def merge_MediaMSG_databases(source_paths, target_path):
# 创建目标数据库连接
target_conn = sqlite3.connect(target_path)
target_cursor = target_conn.cursor()
try:
# 开始事务
target_conn.execute("BEGIN;")
for i, source_path in enumerate(source_paths):
if not os.path.exists(source_path):
continue
db = sqlite3.connect(source_path)
db.text_factory = str
cursor = db.cursor()
# 附加源数据库
try:
sql = '''SELECT Key,Reserved0,Buf,Reserved1,Reserved2 FROM Media;'''
cursor.execute(sql)
result = cursor.fetchall()
target_cursor.executemany(
"INSERT INTO Media (Key,Reserved0,Buf,Reserved1,Reserved2)"
"VALUES(?,?,?,?,?)",
result)
except sqlite3.IntegrityError:
print("有重复key", "跳过")
except sqlite3.OperationalError:
print("no such table: Media", "跳过")
cursor.close()
db.close()
# 提交事务
target_conn.execute("COMMIT;")
except Exception as e:
# 发生异常时回滚事务
target_conn.execute("ROLLBACK;")
raise e
finally:
# 关闭目标数据库连接
target_conn.close()
def merge_databases(source_paths, target_path):
# 创建目标数据库连接
target_conn = sqlite3.connect(target_path)
target_cursor = target_conn.cursor()
try:
# 开始事务
target_conn.execute("BEGIN;")
for i, source_path in enumerate(source_paths):
if not os.path.exists(source_path):
continue
db = sqlite3.connect(source_path)
db.text_factory = str
cursor = db.cursor()
try:
sql = '''
SELECT TalkerId,MsgsvrID,Type,SubType,IsSender,CreateTime,Sequence,StrTalker,StrContent,DisplayContent,BytesExtra,CompressContent
FROM MSG;
'''
cursor.execute(sql)
result = cursor.fetchall()
# 附加源数据库
target_cursor.executemany(
"INSERT INTO MSG "
"(TalkerId,MsgsvrID,Type,SubType,IsSender,CreateTime,Sequence,StrTalker,StrContent,DisplayContent,"
"BytesExtra,CompressContent)"
"VALUES(?,?,?,?,?,?,?,?,?,?,?,?)",
result)
except:
logger.error(f'{source_path}数据库合并错误:\n{traceback.format_exc()}')
cursor.close()
db.close()
# 提交事务
target_conn.execute("COMMIT;")
except Exception as e:
# 发生异常时回滚事务
target_conn.execute("ROLLBACK;")
raise e
finally:
# 关闭目标数据库连接
target_conn.close()
if __name__ == "__main__":
# 源数据库文件列表
source_databases = ["Msg/MSG1.db", "Msg/MSG2.db", "Msg/MSG3.db"]
# 目标数据库文件
target_database = "Msg/MSG.db"
import shutil
shutil.copy('Msg/MSG0.db', target_database) # 使用一个数据库文件作为模板
# 合并数据库
merge_databases(source_databases, target_database)

152
app/DataBase/micro_msg.py Normal file
View File

@@ -0,0 +1,152 @@
import os.path
import sqlite3
import threading
lock = threading.Lock()
db_path = "./app/Database/Msg/MicroMsg.db"
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
def is_database_exist():
return os.path.exists(db_path)
class MicroMsg:
def __init__(self):
self.DB = None
self.cursor = None
self.open_flag = False
self.init_database()
def init_database(self):
if not self.open_flag:
if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
self.cursor = self.DB.cursor()
self.open_flag = True
if lock.locked():
lock.release()
def get_contact(self):
if not self.open_flag:
return []
try:
lock.acquire(True)
sql = '''SELECT UserName, Alias, Type, Remark, NickName, PYInitial, RemarkPYInitial, ContactHeadImgUrl.smallHeadImgUrl, ContactHeadImgUrl.bigHeadImgUrl,ExTraBuf,COALESCE(ContactLabel.LabelName, 'None') AS labelName
FROM Contact
INNER JOIN ContactHeadImgUrl ON Contact.UserName = ContactHeadImgUrl.usrName
LEFT JOIN ContactLabel ON Contact.LabelIDList = ContactLabel.LabelId
WHERE (Type!=4 AND VerifyFlag=0)
AND NickName != ''
ORDER BY
CASE
WHEN RemarkPYInitial = '' THEN PYInitial
ELSE RemarkPYInitial
END ASC
'''
self.cursor.execute(sql)
result = self.cursor.fetchall()
except sqlite3.OperationalError:
# lock.acquire(True)
sql = '''
SELECT UserName, Alias, Type, Remark, NickName, PYInitial, RemarkPYInitial, ContactHeadImgUrl.smallHeadImgUrl, ContactHeadImgUrl.bigHeadImgUrl,ExTraBuf,"None"
FROM Contact
INNER JOIN ContactHeadImgUrl ON Contact.UserName = ContactHeadImgUrl.usrName
WHERE (Type!=4 AND VerifyFlag=0)
AND NickName != ''
ORDER BY
CASE
WHEN RemarkPYInitial = '' THEN PYInitial
ELSE RemarkPYInitial
END ASC
'''
self.cursor.execute(sql)
result = self.cursor.fetchall()
finally:
lock.release()
from app.DataBase import msg_db
return msg_db.get_contact(result)
def get_contact_by_username(self, username):
if not self.open_flag:
return None
try:
lock.acquire(True)
sql = '''
SELECT UserName, Alias, Type, Remark, NickName, PYInitial, RemarkPYInitial, ContactHeadImgUrl.smallHeadImgUrl, ContactHeadImgUrl.bigHeadImgUrl,ExTraBuf,ContactLabel.LabelName
FROM Contact
INNER JOIN ContactHeadImgUrl ON Contact.UserName = ContactHeadImgUrl.usrName
LEFT JOIN ContactLabel ON Contact.LabelIDList = ContactLabel.LabelId
WHERE UserName = ?
'''
self.cursor.execute(sql, [username])
result = self.cursor.fetchone()
except sqlite3.OperationalError:
# 解决ContactLabel表不存在的问题
# lock.acquire(True)
sql = '''
SELECT UserName, Alias, Type, Remark, NickName, PYInitial, RemarkPYInitial, ContactHeadImgUrl.smallHeadImgUrl, ContactHeadImgUrl.bigHeadImgUrl,ExTraBuf,"None"
FROM Contact
INNER JOIN ContactHeadImgUrl ON Contact.UserName = ContactHeadImgUrl.usrName
WHERE UserName = ?
'''
self.cursor.execute(sql, [username])
result = self.cursor.fetchone()
finally:
lock.release()
return result
def get_chatroom_info(self, chatroomname):
'''
获取群聊信息
'''
if not self.open_flag:
return None
try:
lock.acquire(True)
sql = '''SELECT ChatRoomName, RoomData FROM ChatRoom WHERE ChatRoomName = ?'''
self.cursor.execute(sql, [chatroomname])
result = self.cursor.fetchone()
finally:
lock.release()
return result
def close(self):
if self.open_flag:
try:
lock.acquire(True)
self.open_flag = False
self.DB.close()
finally:
lock.release()
def __del__(self):
self.close()
if __name__ == '__main__':
db_path = "./app/database/Msg/MicroMsg.db"
msg = MicroMsg()
msg.init_database()
contacts = msg.get_contact()
from app.DataBase.hard_link import decodeExtraBuf
s = {'wxid_vtz9jk9ulzjt22','wxid_zu9l4wxdv1pa22', 'wxid_0o18ef858vnu22','wxid_8piw6sb4hvfm22','wxid_e7ypfycxpnu322','wxid_oxmg02c8kwxu22','wxid_7pp2fblq7hkq22','wxid_h1n9niofgyci22'}
for contact in contacts:
if contact[0] in s:
print(contact[:7])
buf = contact[9]
info = decodeExtraBuf(buf)
print(info)

78
app/DataBase/misc.py Normal file
View File

@@ -0,0 +1,78 @@
import os.path
import sqlite3
import threading
lock = threading.Lock()
DB = None
cursor = None
db_path = "./app/Database/Msg/Misc.db"
# db_path = './Msg/Misc.db'
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
@singleton
class Misc:
def __init__(self):
self.DB = None
self.cursor = None
self.open_flag = False
self.init_database()
def init_database(self):
if not self.open_flag:
if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
self.cursor = self.DB.cursor()
self.open_flag = True
if lock.locked():
lock.release()
def get_avatar_buffer(self, userName):
if not self.open_flag:
return None
sql = '''
select smallHeadBuf
from ContactHeadImg1
where usrName=?;
'''
if not self.open_flag:
self.init_database()
try:
lock.acquire(True)
self.cursor.execute(sql, [userName])
result = self.cursor.fetchall()
if result:
return result[0][0]
finally:
lock.release()
return None
def close(self):
if self.open_flag:
try:
lock.acquire(True)
self.open_flag = False
self.DB.close()
finally:
lock.release()
def __del__(self):
self.close()
if __name__ == '__main__':
Misc()
print(Misc().get_avatar_buffer('wxid_al2oan01b6fn11'))

894
app/DataBase/msg.py Normal file
View File

@@ -0,0 +1,894 @@
import os.path
import random
import sqlite3
import threading
import traceback
from collections import defaultdict
from datetime import datetime, date
from typing import Tuple
from app.log import logger
from app.util.compress_content import parser_reply
from app.util.protocbuf.msg_pb2 import MessageBytesExtra
db_path = "./app/Database/Msg/MSG.db"
lock = threading.Lock()
def is_database_exist():
return os.path.exists(db_path)
def convert_to_timestamp_(time_input) -> int:
if isinstance(time_input, (int, float)):
# 如果输入是时间戳,直接返回
return int(time_input)
elif isinstance(time_input, str):
# 如果输入是格式化的时间字符串,将其转换为时间戳
try:
dt_object = datetime.strptime(time_input, '%Y-%m-%d %H:%M:%S')
return int(dt_object.timestamp())
except ValueError:
# 如果转换失败,可能是其他格式的字符串,可以根据需要添加更多的处理逻辑
print("Error: Unsupported date format")
return -1
elif isinstance(time_input, date):
# 如果输入是datetime.date对象将其转换为时间戳
dt_object = datetime.combine(time_input, datetime.min.time())
return int(dt_object.timestamp())
else:
print("Error: Unsupported input type")
return -1
def convert_to_timestamp(time_range) -> Tuple[int, int]:
"""
将时间转换成时间戳
@param time_range:
@return:
"""
if not time_range:
return 0, 0
else:
return convert_to_timestamp_(time_range[0]), convert_to_timestamp_(time_range[1])
def parser_chatroom_message(messages):
from app.DataBase import micro_msg_db, misc_db
from app.util.protocbuf.msg_pb2 import MessageBytesExtra
from app.person import Contact, Me, ContactDefault
'''
获取一个群聊的聊天记录
return list
a[0]: localId,
a[1]: talkerId, 和strtalker对应的不是群聊信息发送人
a[2]: type,
a[3]: subType,
a[4]: is_sender,
a[5]: timestamp,
a[6]: status, (没啥用)
a[7]: str_content,
a[8]: str_time, (格式化的时间)
a[9]: msgSvrId,
a[10]: BytesExtra,
a[11]: CompressContent,
a[12]: DisplayContent,
a[13]: msg_sender, ContactPC 或 ContactDefault 类型,这个才是群聊里的信息发送人,不是群聊或者自己是发送者没有这个字段)
'''
updated_messages = [] # 用于存储修改后的消息列表
for row in messages:
message = list(row)
if message[4] == 1: # 自己发送的就没必要解析了
message.append(Me())
updated_messages.append(tuple(message))
continue
if message[10] is None: # BytesExtra是空的跳过
message.append(ContactDefault(wxid))
updated_messages.append(tuple(message))
continue
msgbytes = MessageBytesExtra()
msgbytes.ParseFromString(message[10])
wxid = ''
for tmp in msgbytes.message2:
if tmp.field1 != 1:
continue
wxid = tmp.field2
if wxid == "": # 系统消息里面 wxid 不存在
message.append(ContactDefault(wxid))
updated_messages.append(tuple(message))
continue
# todo 解析还是有问题,会出现这种带:的东西
if ':' in wxid: # wxid_ewi8gfgpp0eu22:25319:1
wxid = wxid.split(':')[0]
contact_info_list = micro_msg_db.get_contact_by_username(wxid)
if contact_info_list is None: # 群聊中已退群的联系人不会保存在数据库里
message.append(ContactDefault(wxid))
updated_messages.append(tuple(message))
continue
contact_info = {
'UserName': contact_info_list[0],
'Alias': contact_info_list[1],
'Type': contact_info_list[2],
'Remark': contact_info_list[3],
'NickName': contact_info_list[4],
'smallHeadImgUrl': contact_info_list[7]
}
contact = Contact(contact_info)
contact.smallHeadImgBLOG = misc_db.get_avatar_buffer(contact.wxid)
contact.set_avatar(contact.smallHeadImgBLOG)
message.append(contact)
updated_messages.append(tuple(message))
return updated_messages
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
class MsgType:
TEXT = 1
IMAGE = 3
EMOJI = 47
class Msg:
def __init__(self):
self.DB = None
self.cursor = None
self.open_flag = False
self.init_database()
def init_database(self, path=None):
global db_path
if not self.open_flag:
if path:
db_path = path
if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
self.cursor = self.DB.cursor()
self.open_flag = True
if lock.locked():
lock.release()
def add_sender(self, messages):
"""
@param messages:
@return:
"""
new_messages = []
for message in messages:
is_sender = message[4]
wxid = ''
if is_sender:
pass
else:
msgbytes = MessageBytesExtra()
msgbytes.ParseFromString(message[10])
for tmp in msgbytes.message2:
if tmp.field1 != 1:
continue
wxid = tmp.field2
new_message = (*message, wxid)
new_messages.append(new_message)
return new_messages
def get_messages(
self,
username_,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
):
"""
return list
a[0]: localId,
a[1]: talkerId, 和strtalker对应的不是群聊信息发送人
a[2]: type,
a[3]: subType,
a[4]: is_sender,
a[5]: timestamp,
a[6]: status, (没啥用)
a[7]: str_content,
a[8]: str_time, (格式化的时间)
a[9]: msgSvrId,
a[10]: BytesExtra,
a[11]: CompressContent,
a[12]: DisplayContent,
a[13]: 联系人的类(如果是群聊就有,不是的话没有这个字段)
"""
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
from MSG
where StrTalker=?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
order by CreateTime
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchall()
finally:
lock.release()
return parser_chatroom_message(result) if username_.__contains__('@chatroom') else result
# result.sort(key=lambda x: x[5])
# return self.add_sender(result)
def get_messages_all(self, time_range=None):
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,StrTalker,Reserved1,CompressContent
from MSG
{'WHERE CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
order by CreateTime
'''
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchall()
finally:
lock.release()
result.sort(key=lambda x: x[5])
return result
def get_messages_group_by_day(
self,
username_: str,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
) -> dict:
"""
return dict {
date: messages
}
"""
if not self.open_flag:
return {}
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
from MSG
where StrTalker=? AND type=1
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
order by CreateTime;
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchall()
finally:
lock.release()
result = parser_chatroom_message(result) if username_.__contains__('@chatroom') else result
# 按天分组存储聊天记录
grouped_results = defaultdict(list)
for row in result:
'2024-01-01'
date = row[8][:10] # 获取日期部分
grouped_results[date].append(row) # 将消息加入对应的日期列表中
return grouped_results
def get_messages_length(self):
sql = '''
select count(*)
group by MsgSvrID
from MSG
'''
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchone()
except Exception as e:
result = None
finally:
lock.release()
return result[0]
def get_message_by_num(self, username_, local_id):
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
from MSG
where StrTalker = ? and localId < ? and (Type=1 or Type=3)
order by CreateTime desc
limit 20
'''
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, local_id])
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
# result.sort(key=lambda x: x[5])
return parser_chatroom_message(result) if username_.__contains__('@chatroom') else result
def get_messages_by_type(
self,
username_,
type_,
year_='all',
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
):
"""
@param username_:
@param type_:
@param year_:
@param time_range: Tuple(timestamp:开始时间戳,timestamp:结束时间戳)
@return:
"""
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
if year_ == 'all':
sql = f'''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
from MSG
where StrTalker=? and Type=?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
order by CreateTime
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, type_])
result = self.cursor.fetchall()
finally:
lock.release()
else:
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
from MSG
where StrTalker=? and Type=? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?
order by CreateTime
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, type_, year_])
finally:
lock.release()
result = self.cursor.fetchall()
return result
def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10, time_range=None, year_='all'):
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
from MSG
where StrTalker=? and Type=1 and LENGTH(StrContent)<? and StrContent like ?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
order by CreateTime desc
'''
temp = []
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, max_len, f'%{keyword}%'] if year_ == "all" else [username_, max_len,
f'%{keyword}%',
year_])
messages = self.cursor.fetchall()
finally:
lock.release()
if len(messages) > 5:
messages = random.sample(messages, num)
try:
lock.acquire(True)
for msg in messages:
local_id = msg[0]
is_send = msg[4]
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where localId > ? and StrTalker=? and Type=1 and IsSender=?
limit 1
'''
self.cursor.execute(sql, [local_id, username_, 1 - is_send])
temp.append((msg, self.cursor.fetchone()))
finally:
lock.release()
res = []
for dialog in temp:
msg1 = dialog[0]
msg2 = dialog[1]
try:
res.append((
(msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]),
(msg2[4], msg2[5], msg2[7], msg2[8])
))
except TypeError:
res.append((
('', '', ['', ''], ''),
('', '', '', '')
))
"""
返回值为一个列表,每个列表元素是一个对话
每个对话是一个元组数据
('is_send','时间戳','以关键词为分割符的消息内容','格式化时间')
"""
return res
def get_contact(self, contacts):
if not self.open_flag:
return None
try:
lock.acquire(True)
sql = '''select StrTalker, MAX(CreateTime) from MSG group by StrTalker'''
self.cursor.execute(sql)
res = self.cursor.fetchall()
finally:
lock.release()
res = {StrTalker: CreateTime for StrTalker, CreateTime in res}
contacts = [list(cur_contact) for cur_contact in contacts]
for i, cur_contact in enumerate(contacts):
if cur_contact[0] in res:
contacts[i].append(res[cur_contact[0]])
else:
contacts[i].append(0)
contacts.sort(key=lambda cur_contact: cur_contact[-1], reverse=True)
return contacts
def get_messages_calendar(self, username_):
sql = '''
SELECT strftime('%Y-%m-%d',CreateTime,'unixepoch','localtime') as days
from (
SELECT MsgSvrID, CreateTime
FROM MSG
WHERE StrTalker = ?
ORDER BY CreateTime
)
group by days
'''
if not self.open_flag:
print('数据库未就绪')
return None
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchall()
finally:
lock.release()
return [date[0] for date in result]
def get_messages_by_days(
self,
username_,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
):
result = None
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
SELECT strftime('%Y-%m-%d',CreateTime,'unixepoch','localtime') as days,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
WHERE StrTalker = ?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
)
group by days
'''
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchall()
finally:
lock.release()
return result
def get_messages_by_month(
self,
username_,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
):
result = None
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
SELECT strftime('%Y-%m',CreateTime,'unixepoch','localtime') as days,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
WHERE StrTalker = ?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
)
group by days
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
return result
def get_messages_by_hour(self, username_, time_range=None, year_='all'):
result = []
if not self.open_flag:
return result
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
where StrTalker = ?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
)
group by hours
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
result = self.cursor.fetchall()
return result
def get_first_time_of_message(self, username_=''):
if not self.open_flag:
return None
sql = f'''
select StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime
from MSG
{'where StrTalker=?' if username_ else ''}
order by CreateTime
limit 1
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_] if username_ else [])
result = self.cursor.fetchone()
finally:
lock.release()
return result
def get_latest_time_of_message(self, username_='', time_range=None, year_='all'):
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
SELECT isSender,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,
strftime('%H:%M:%S', CreateTime,'unixepoch','localtime') as hour
FROM MSG
WHERE Type=1 AND
{'StrTalker = ? AND ' if username_ else f"'{username_}'=? AND "}
hour BETWEEN '00:00:00' AND '05:00:00'
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
ORDER BY hour DESC
LIMIT 20;
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, year_] if year_ != "all" else [username_])
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
result = self.cursor.fetchall()
if not result:
return []
res = []
is_sender = result[0][0]
res.append(result[0])
for msg in result[1:]:
if msg[0] != is_sender:
res.append(msg)
break
return res
def get_send_messages_type_number(
self,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
) -> list:
"""
统计自己发的各类型消息条数按条数降序精确到subtype\n
return [(type_1, subtype_1, number_1), (type_2, subtype_2, number_2), ...]\n
be like [(1, 0, 71481), (3, 0, 6686), (49, 57, 3887), ..., (10002, 0, 1)]
"""
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f"""
SELECT type, subtype, Count(MsgSvrID)
from MSG
where isSender = 1
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
group by type, subtype
order by Count(MsgSvrID) desc
"""
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
return result
def get_messages_number(
self,
username_,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
) -> int:
"""
统计好友聊天消息的数量
@param username_:
@param time_range:
@return:
"""
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f"""
SELECT Count(MsgSvrID)
from MSG
where StrTalker = ?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
"""
result = 0
if not self.open_flag:
return 0
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchone()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
return result[0] if result else 0
def get_chatted_top_contacts(
self,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
contain_chatroom=False,
top_n=10
) -> list:
"""
统计聊天最多的 n 个联系人(默认不包含群组),按条数降序\n
return [(wxid_1, number_1), (wxid_2, number_2), ...]
"""
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f"""
SELECT strtalker, Count(MsgSvrID)
from MSG
where strtalker != "filehelper" and strtalker != "notifymessage" and strtalker not like "gh_%"
{"and strtalker not like '%@chatroom'" if not contain_chatroom else ""}
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
group by strtalker
order by Count(MsgSvrID) desc
limit {top_n}
"""
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
return result
def get_send_messages_length(
self,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
) -> int:
"""
统计自己总共发消息的字数包含type=1的文本和type=49,subtype=57里面自己发的文本
"""
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql_type_1 = f"""
SELECT sum(length(strContent))
from MSG
where isSender = 1 and type = 1
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
"""
sql_type_49 = f"""
SELECT CompressContent
from MSG
where isSender = 1 and type = 49 and subtype = 57
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
"""
sum_type_1 = None
result_type_49 = None
sum_type_49 = 0
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql_type_1)
sum_type_1 = self.cursor.fetchall()[0][0]
self.cursor.execute(sql_type_49)
result_type_49 = self.cursor.fetchall()
for message in result_type_49:
message = message[0]
content = parser_reply(message)
if content["is_error"]:
continue
sum_type_49 += len(content["title"])
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
return sum_type_1 + sum_type_49
def get_send_messages_number_sum(
self,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
) -> int:
"""统计自己总共发了多少条消息"""
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f"""
SELECT count(MsgSvrID)
from MSG
where isSender = 1
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
"""
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchall()[0][0]
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
return result
def get_send_messages_number_by_hour(
self,
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
) -> list:
"""
统计每个(小时)时段自己总共发了多少消息,从最多到最少排序\n
return be like [('23', 9526), ('00', 7890), ('22', 7600), ..., ('05', 29)]
"""
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f"""
SELECT strftime('%H', CreateTime, 'unixepoch', 'localtime') as hour,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
where isSender = 1
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
)
group by hour
order by count(MsgSvrID) desc
"""
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
return result
def get_message_length(
self,
username_='',
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
) -> int:
"""
统计自己总共发消息的字数包含type=1的文本和type=49,subtype=57里面自己发的文本
"""
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql_type_1 = f"""
SELECT sum(length(strContent))
from MSG
where StrTalker = ? and
type = 1
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
"""
sql_type_49 = f"""
SELECT CompressContent
from MSG
where StrTalker = ? and
type = 49 and subtype = 57
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
"""
sum_type_1 = 0
result_type_1 = 0
result_type_49 = 0
sum_type_49 = 0
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql_type_1, [username_])
result_type_1 = self.cursor.fetchall()[0][0]
self.cursor.execute(sql_type_49, [username_])
result_type_49 = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
for message in result_type_49:
message = message[0]
content = parser_reply(message)
if content["is_error"]:
continue
sum_type_49 += len(content["title"])
sum_type_1 = result_type_1 if result_type_1 else 0
return sum_type_1 + sum_type_49
def close(self):
if self.open_flag:
try:
lock.acquire(True)
self.open_flag = False
self.DB.close()
finally:
lock.release()
def __del__(self):
self.close()
if __name__ == '__main__':
db_path = "./Msg/MSG.db"
msg = Msg()
msg.init_database()
wxid = 'wxid_0o18ef858vnu22'
wxid = '24521163022@chatroom'
wxid = 'wxid_vtz9jk9ulzjt22' # si
print()
time_range = ('2023-01-01 00:00:00', '2024-01-01 00:00:00')
print(msg.get_messages_calendar(wxid))
print(msg.get_first_time_of_message())
print(msg.get_latest_time_of_message())
top_n = msg.get_chatted_top_contacts(time_range=time_range, top_n=9999999)
print(top_n)
print(len(top_n))

184
app/DataBase/package_msg.py Normal file
View File

@@ -0,0 +1,184 @@
import threading
from app.DataBase import msg_db, micro_msg_db, misc_db
from app.util.protocbuf.msg_pb2 import MessageBytesExtra
from app.util.protocbuf.roomdata_pb2 import ChatRoomData
from app.person import Contact, Me, ContactDefault
lock = threading.Lock()
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
@singleton
class PackageMsg:
def __init__(self):
self.ChatRoomMap = {}
def get_package_message_all(self):
'''
获取完整的聊天记录
'''
updated_messages = [] # 用于存储修改后的消息列表
messages = msg_db.get_messages_all()
for row in messages:
row_list = list(row)
# 删除不使用的几个字段
del row_list[13]
del row_list[12]
del row_list[11]
del row_list[10]
del row_list[9]
strtalker = row[11]
info = micro_msg_db.get_contact_by_username(strtalker)
if info is not None:
row_list.append(info[3])
row_list.append(info[4])
else:
row_list.append('')
row_list.append('')
# 判断是否是群聊
if strtalker.__contains__('@chatroom'):
# 自己发送
if row[4] == 1:
row_list.append('')
else:
# 存在BytesExtra为空的情况此时消息类型应该为提示性消息。跳过不处理
if row[10] is None:
continue
# 解析BytesExtra
msgbytes = MessageBytesExtra()
msgbytes.ParseFromString(row[10])
wxid = ''
for tmp in msgbytes.message2:
if tmp.field1 != 1:
continue
wxid = tmp.field2
sender = ''
# 获取群聊成员列表
membersMap = self.get_chatroom_member_list(strtalker)
if membersMap is not None:
if wxid in membersMap:
sender = membersMap.get(wxid)
else:
senderinfo = micro_msg_db.get_contact_by_username(wxid)
if senderinfo is not None:
sender = senderinfo[4]
membersMap[wxid] = senderinfo[4]
if len(senderinfo[3]) > 0:
sender = senderinfo[3]
membersMap[wxid] = senderinfo[3]
row_list.append(sender)
else:
if row[4] == 1:
row_list.append('')
else:
if info is not None:
row_list.append(info[4])
else:
row_list.append('')
updated_messages.append(tuple(row_list))
return updated_messages
def get_package_message_by_wxid(self, chatroom_wxid):
'''
获取一个群聊的聊天记录
return list
a[0]: localId,
a[1]: talkerId, 和strtalker对应的不是群聊信息发送人
a[2]: type,
a[3]: subType,
a[4]: is_sender,
a[5]: timestamp,
a[6]: status, (没啥用)
a[7]: str_content,
a[8]: str_time, (格式化的时间)
a[9]: msgSvrId,
a[10]: BytesExtra,
a[11]: CompressContent,
a[12]: DisplayContent,
a[13]: msg_sender, ContactPC 或 ContactDefault 类型,这个才是群聊里的信息发送人,不是群聊或者自己是发送者没有这个字段)
'''
updated_messages = [] # 用于存储修改后的消息列表
messages = msg_db.get_messages(chatroom_wxid)
for row in messages:
message = list(row)
if message[4] == 1: # 自己发送的就没必要解析了
message.append(Me())
updated_messages.append(message)
continue
if message[10] is None: # BytesExtra是空的跳过
message.append(ContactDefault(wxid))
updated_messages.append(message)
continue
msgbytes = MessageBytesExtra()
msgbytes.ParseFromString(message[10])
wxid = ''
for tmp in msgbytes.message2:
if tmp.field1 != 1:
continue
wxid = tmp.field2
if wxid == "": # 系统消息里面 wxid 不存在
message.append(ContactDefault(wxid))
updated_messages.append(message)
continue
contact_info_list = micro_msg_db.get_contact_by_username(wxid)
if contact_info_list is None: # 群聊中已退群的联系人不会保存在数据库里
message.append(ContactDefault(wxid))
updated_messages.append(message)
continue
contact_info = {
'UserName': contact_info_list[0],
'Alias': contact_info_list[1],
'Type': contact_info_list[2],
'Remark': contact_info_list[3],
'NickName': contact_info_list[4],
'smallHeadImgUrl': contact_info_list[7]
}
contact = Contact(contact_info)
contact.smallHeadImgBLOG = misc_db.get_avatar_buffer(contact.wxid)
contact.set_avatar(contact.smallHeadImgBLOG)
message.append(contact)
updated_messages.append(tuple(message))
return updated_messages
def get_chatroom_member_list(self, strtalker):
membermap = {}
'''
获取群聊成员
'''
try:
lock.acquire(True)
if strtalker in self.ChatRoomMap:
membermap = self.ChatRoomMap.get(strtalker)
else:
chatroom = micro_msg_db.get_chatroom_info(strtalker)
if chatroom is None:
return None
# 解析RoomData数据
parsechatroom = ChatRoomData()
parsechatroom.ParseFromString(chatroom[1])
# 群成员数据放入字典存储
for mem in parsechatroom.members:
if mem.displayName is not None and len(mem.displayName) > 0:
membermap[mem.wxID] = mem.displayName
self.ChatRoomMap[strtalker] = membermap
finally:
lock.release()
return membermap
if __name__ == "__main__":
p = PackageMsg()
print(p.get_package_message_by_wxid("48615079469@chatroom"))