init
This commit is contained in:
0
app/util/exporter/__init__.py
Normal file
0
app/util/exporter/__init__.py
Normal file
176
app/util/exporter/exporter.py
Normal file
176
app/util/exporter/exporter.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import csv
|
||||
import html
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
import filecmp
|
||||
|
||||
from PyQt5.QtCore import pyqtSignal, QThread
|
||||
|
||||
from app.config import OUTPUT_DIR
|
||||
from app.person import Me, Contact
|
||||
|
||||
os.makedirs(os.path.join(OUTPUT_DIR, '聊天记录'), exist_ok=True)
|
||||
|
||||
|
||||
def set_global_font(doc, font_name):
|
||||
# 创建一个新样式
|
||||
style = doc.styles['Normal']
|
||||
|
||||
# 设置字体名称
|
||||
style.font.name = font_name
|
||||
# 遍历文档中的所有段落,将样式应用到每个段落
|
||||
for paragraph in doc.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.font.name = font_name
|
||||
|
||||
|
||||
def makedirs(path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'image'), exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'emoji'), exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'video'), exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'voice'), exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'file'), exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'avatar'), exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'music'), exist_ok=True)
|
||||
os.makedirs(os.path.join(path, 'icon'), exist_ok=True)
|
||||
resource_dir = os.path.join('app', 'resources', 'data', 'icons')
|
||||
if not os.path.exists(resource_dir):
|
||||
# 获取打包后的资源目录
|
||||
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
|
||||
# 构建 FFmpeg 可执行文件的路径
|
||||
resource_dir = os.path.join(resource_dir, 'app', 'resources', 'data', 'icons')
|
||||
target_folder = os.path.join(path, 'icon')
|
||||
# 拷贝一些必备的图标
|
||||
for root, dirs, files in os.walk(resource_dir):
|
||||
relative_path = os.path.relpath(root, resource_dir)
|
||||
target_path = os.path.join(target_folder, relative_path)
|
||||
|
||||
# 遍历文件夹中的文件
|
||||
for file in files:
|
||||
source_file_path = os.path.join(root, file)
|
||||
target_file_path = os.path.join(target_path, file)
|
||||
if not os.path.exists(target_file_path):
|
||||
shutil.copy(source_file_path, target_file_path)
|
||||
else:
|
||||
# 比较文件内容
|
||||
if not filecmp.cmp(source_file_path, target_file_path, shallow=False):
|
||||
# 文件内容不一致,进行覆盖拷贝
|
||||
shutil.copy(source_file_path, target_file_path)
|
||||
|
||||
|
||||
def escape_js_and_html(input_str):
|
||||
if not input_str:
|
||||
return ''
|
||||
# 转义HTML特殊字符
|
||||
html_escaped = html.escape(input_str, quote=False)
|
||||
|
||||
# 手动处理JavaScript转义字符
|
||||
js_escaped = (
|
||||
html_escaped
|
||||
.replace("\\", "\\\\")
|
||||
.replace("'", r"\'")
|
||||
.replace('"', r'\"')
|
||||
.replace("\n", r'\n')
|
||||
.replace("\r", r'\r')
|
||||
.replace("\t", r'\t')
|
||||
)
|
||||
|
||||
return js_escaped
|
||||
|
||||
|
||||
class ExporterBase(QThread):
|
||||
progressSignal = pyqtSignal(int)
|
||||
rangeSignal = pyqtSignal(int)
|
||||
okSignal = pyqtSignal(int)
|
||||
i = 1
|
||||
CSV = 0
|
||||
DOCX = 1
|
||||
HTML = 2
|
||||
CSV_ALL = 3
|
||||
CONTACT_CSV = 4
|
||||
TXT = 5
|
||||
|
||||
def __init__(self, contact, type_=DOCX, message_types={}, time_range=None, messages=None,index=0, parent=None):
|
||||
super().__init__(parent)
|
||||
self.message_types = message_types # 导出的消息类型
|
||||
self.contact: Contact = contact # 联系人
|
||||
self.output_type = type_ # 导出文件类型
|
||||
self.total_num = 1 # 总的消息数量
|
||||
self.num = 0 # 当前处理的消息数量
|
||||
self.index = index #
|
||||
self.last_timestamp = 0
|
||||
self.time_range = time_range
|
||||
self.messages = messages
|
||||
self.origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
makedirs(self.origin_path)
|
||||
|
||||
def run(self):
|
||||
self.export()
|
||||
|
||||
def export(self):
|
||||
raise NotImplementedError("export method must be implemented in subclasses")
|
||||
|
||||
def cancel(self):
|
||||
self.requestInterruption()
|
||||
|
||||
def is_5_min(self, timestamp) -> bool:
|
||||
if abs(timestamp - self.last_timestamp) > 300:
|
||||
self.last_timestamp = timestamp
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_avatar_path(self, is_send, message, is_absolute_path=False) -> str:
|
||||
if is_absolute_path:
|
||||
if self.contact.is_chatroom:
|
||||
avatar = message[13].avatar_path
|
||||
else:
|
||||
avatar = Me().avatar_path if is_send else self.contact.avatar_path
|
||||
else:
|
||||
if self.contact.is_chatroom:
|
||||
avatar = message[13].smallHeadImgUrl
|
||||
else:
|
||||
avatar = Me().smallHeadImgUrl if is_send else self.contact.smallHeadImgUrl
|
||||
return avatar
|
||||
|
||||
def get_display_name(self, is_send, message) -> str:
|
||||
if self.contact.is_chatroom:
|
||||
if is_send:
|
||||
display_name = Me().name
|
||||
else:
|
||||
display_name = message[13].remark
|
||||
else:
|
||||
display_name = Me().name if is_send else self.contact.remark
|
||||
return escape_js_and_html(display_name)
|
||||
|
||||
def text(self, doc, message):
|
||||
return
|
||||
|
||||
def image(self, doc, message):
|
||||
return
|
||||
|
||||
def audio(self, doc, message):
|
||||
return
|
||||
|
||||
def emoji(self, doc, message):
|
||||
return
|
||||
|
||||
def file(self, doc, message):
|
||||
return
|
||||
|
||||
def refermsg(self, doc, message):
|
||||
return
|
||||
|
||||
def system_msg(self, doc, message):
|
||||
return
|
||||
|
||||
def video(self, doc, message):
|
||||
return
|
||||
|
||||
def music_share(self, doc, message):
|
||||
return
|
||||
|
||||
def share_card(self, doc, message):
|
||||
return
|
||||
96
app/util/exporter/exporter_ai_txt.py
Normal file
96
app/util/exporter/exporter_ai_txt.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from app.DataBase import msg_db
|
||||
from app.util.compress_content import parser_reply, share_card
|
||||
from app.util.exporter.exporter import ExporterBase
|
||||
|
||||
|
||||
def remove_privacy_info(text):
|
||||
# 正则表达式模式
|
||||
patterns = {
|
||||
'phone': r'\b(\+?86[-\s]?)?1[3-9]\d{9}\b', # 手机号
|
||||
'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # 邮箱
|
||||
'id_card': r'\b\d{15}|\d{18}|\d{17}X\b', # 身份证号
|
||||
'password': r'\b(?:password|pwd|pass|psw)[\s=:]*\S+\b', # 密码
|
||||
'account': r'\b(?:account|username|user|acct)[\s=:]*\S+\b' # 账号
|
||||
}
|
||||
|
||||
for key, pattern in patterns.items():
|
||||
text = re.sub(pattern, f'[{key} xxx]', text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
class AiTxtExporter(ExporterBase):
|
||||
last_is_send = -1
|
||||
|
||||
def title(self, message):
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
display_name = ''
|
||||
if is_send != self.last_is_send:
|
||||
display_name = '\n' + self.get_display_name(is_send, message) + ':'
|
||||
self.last_is_send = is_send
|
||||
return display_name
|
||||
|
||||
def text(self, doc, message):
|
||||
str_content = remove_privacy_info(message[7])
|
||||
doc.write(
|
||||
f'''{self.title(message)}{str_content} '''
|
||||
)
|
||||
|
||||
def image(self, doc, message):
|
||||
doc.write(
|
||||
f'''{self.title(message)}[图片]'''
|
||||
)
|
||||
|
||||
def audio(self, doc, message):
|
||||
doc.write(
|
||||
f'''{self.title(message)}[语音]'''
|
||||
)
|
||||
|
||||
def emoji(self, doc, message):
|
||||
doc.write(
|
||||
f'''{self.title(message)}[表情包]'''
|
||||
)
|
||||
|
||||
def file(self, doc, message):
|
||||
doc.write(
|
||||
f'''{self.title(message)}[文件]'''
|
||||
)
|
||||
|
||||
def system_msg(self, doc, message):
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
str_content = str_content.replace('<![CDATA[', "").replace(
|
||||
' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
|
||||
doc.write(
|
||||
f'''{str_time} {str_content}'''
|
||||
)
|
||||
|
||||
def video(self, doc, message):
|
||||
is_send = message[4]
|
||||
doc.write(
|
||||
f'''{self.title(message)}[视频]'''
|
||||
)
|
||||
|
||||
def export(self):
|
||||
# 实现导出为txt的逻辑
|
||||
print(f"【开始导出 TXT {self.contact.remark}】")
|
||||
origin_path = self.origin_path
|
||||
os.makedirs(origin_path, exist_ok=True)
|
||||
filename = os.path.join(origin_path, self.contact.remark + '_chat.txt')
|
||||
messages = msg_db.get_messages_group_by_day(self.contact.wxid, time_range=self.time_range)
|
||||
total_steps = len(messages)
|
||||
with open(filename, mode='w', newline='', encoding='utf-8') as f:
|
||||
for date, messages in messages.items():
|
||||
f.write(f"\n\n{'*' * 20}{date}{'*' * 20}\n")
|
||||
for index, message in enumerate(messages):
|
||||
type_ = message[2]
|
||||
sub_type = message[3]
|
||||
self.progressSignal.emit(int((index + 1) / total_steps * 100))
|
||||
if type_ == 1 and self.message_types.get(type_):
|
||||
self.text(f, message)
|
||||
print(f"【完成导出 TXT {self.contact.remark}】")
|
||||
self.okSignal.emit(1)
|
||||
40
app/util/exporter/exporter_csv.py
Normal file
40
app/util/exporter/exporter_csv.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import csv
|
||||
import os
|
||||
|
||||
from app.DataBase import msg_db
|
||||
from app.person import Me
|
||||
from app.util.exporter.exporter import ExporterBase
|
||||
from app.config import OUTPUT_DIR
|
||||
|
||||
|
||||
class CSVExporter(ExporterBase):
|
||||
def to_csv(self):
|
||||
print(f"【开始导出 CSV {self.contact.remark}】")
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
os.makedirs(origin_path, exist_ok=True)
|
||||
filename = os.path.join(origin_path,f"{self.contact.remark}_utf8.csv")
|
||||
columns = ['localId', 'TalkerId', 'Type', 'SubType',
|
||||
'IsSender', 'CreateTime', 'Status', 'StrContent',
|
||||
'StrTime', 'Remark', 'NickName', 'Sender']
|
||||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||||
# 写入CSV文件
|
||||
with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(columns)
|
||||
# 写入数据
|
||||
# writer.writerows(messages)
|
||||
for msg in messages:
|
||||
if self.contact.is_chatroom:
|
||||
other_data = [msg[13].remark, msg[13].nickName, msg[13].wxid]
|
||||
else:
|
||||
is_send = msg[4]
|
||||
Remark = Me().remark if is_send else self.contact.remark
|
||||
nickname = Me().nickName if is_send else self.contact.nickName
|
||||
wxid = Me().wxid if is_send else self.contact.wxid
|
||||
other_data = [Remark,nickname,wxid]
|
||||
writer.writerow([*msg[:9], *other_data])
|
||||
print(f"【完成导出 CSV {self.contact.remark}】")
|
||||
self.okSignal.emit(1)
|
||||
|
||||
def run(self):
|
||||
self.to_csv()
|
||||
380
app/util/exporter/exporter_docx.py
Normal file
380
app/util/exporter/exporter_docx.py
Normal file
@@ -0,0 +1,380 @@
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
from re import findall
|
||||
|
||||
import docx
|
||||
from docx import shared
|
||||
from docx.enum.table import WD_ALIGN_VERTICAL
|
||||
from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.oxml.ns import qn
|
||||
from docxcompose.composer import Composer
|
||||
|
||||
from app.DataBase import msg_db, hard_link_db
|
||||
from app.util.exporter.exporter import ExporterBase, escape_js_and_html
|
||||
from app.config import OUTPUT_DIR
|
||||
from app.log import logger
|
||||
from app.person import Me
|
||||
from app.util.compress_content import parser_reply, share_card, music_share
|
||||
from app.util.image import get_image_abs_path
|
||||
from app.util.music import get_music_path
|
||||
|
||||
# 要删除的编码字符
|
||||
encoded_chars = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
|
||||
|
||||
# 创建一个字典,将要删除的字符映射为 None
|
||||
char_mapping = {char: None for char in encoded_chars}
|
||||
|
||||
|
||||
def filter_control_characters(input_string):
|
||||
"""
|
||||
过滤掉不可打印字符
|
||||
@param input_string:
|
||||
@return:
|
||||
"""
|
||||
|
||||
# 过滤掉非可打印字符
|
||||
filtered_string = input_string.translate(char_mapping)
|
||||
|
||||
return filtered_string
|
||||
|
||||
|
||||
class DocxExporter(ExporterBase):
|
||||
def text(self, doc, message):
|
||||
type_ = message[2]
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
avatar = self.get_avatar_path(is_send, message, True)
|
||||
content_cell = self.create_table(doc, is_send, avatar)
|
||||
try:
|
||||
content_cell.paragraphs[0].add_run(str_content)
|
||||
except ValueError:
|
||||
try:
|
||||
str_content = filter_control_characters(str_content)
|
||||
content_cell.paragraphs[0].add_run(str_content)
|
||||
except ValueError:
|
||||
logger.error(f'非法字符:{str_content}')
|
||||
content_cell.paragraphs[0].add_run('非法字符')
|
||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||
# doc.add_picture(avatar)
|
||||
if is_send:
|
||||
p = content_cell.paragraphs[0]
|
||||
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
doc.add_paragraph()
|
||||
|
||||
def image(self, doc, message):
|
||||
str_content = message[7]
|
||||
is_send = message[4]
|
||||
BytesExtra = message[10]
|
||||
avatar = self.get_avatar_path(is_send, message, True)
|
||||
content = self.create_table(doc, is_send, avatar)
|
||||
run = content.paragraphs[0].add_run()
|
||||
str_content = escape_js_and_html(str_content)
|
||||
image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True)
|
||||
base_path = os.path.join(OUTPUT_DIR, '聊天记录', self.contact.remark, 'image')
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_path)):
|
||||
image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False)
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_thumb_path)):
|
||||
return
|
||||
image_path = image_thumb_path
|
||||
image_path = get_image_abs_path(image_path, base_path=base_path)
|
||||
try:
|
||||
run.add_picture(image_path, height=shared.Inches(2))
|
||||
doc.add_paragraph()
|
||||
except Exception:
|
||||
print("Error!image")
|
||||
|
||||
def audio(self, doc, message):
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
msgSvrId = message[9]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
avatar = self.get_avatar_path(is_send, message, True)
|
||||
content_cell = self.create_table(doc, is_send, avatar)
|
||||
content_cell.paragraphs[0].add_run('【语音】')
|
||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||
if is_send:
|
||||
p = content_cell.paragraphs[0]
|
||||
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
doc.add_paragraph()
|
||||
|
||||
def emoji(self, doc, message):
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
avatar = self.get_avatar_path(is_send, message, True)
|
||||
content_cell = self.create_table(doc, is_send, avatar)
|
||||
content_cell.paragraphs[0].add_run('【表情包】')
|
||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||
if is_send:
|
||||
p = content_cell.paragraphs[0]
|
||||
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
doc.add_paragraph()
|
||||
|
||||
def file(self, doc, message):
|
||||
bytesExtra = message[10]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
avatar = self.get_avatar_path(is_send, message, True)
|
||||
content_cell = self.create_table(doc, is_send, avatar)
|
||||
content_cell.paragraphs[0].add_run('【文件】')
|
||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||
if is_send:
|
||||
p = content_cell.paragraphs[0]
|
||||
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
doc.add_paragraph()
|
||||
|
||||
def refermsg(self, doc, message):
|
||||
"""
|
||||
处理回复消息
|
||||
@param doc:
|
||||
@param message:
|
||||
@return:
|
||||
"""
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
content = parser_reply(message[11])
|
||||
refer_msg = content.get('refer')
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
avatar = self.get_avatar_path(is_send, message, True)
|
||||
content_cell = self.create_table(doc, is_send, avatar)
|
||||
content_cell.paragraphs[0].add_run(content.get('title'))
|
||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||
reply_p = content_cell.add_paragraph()
|
||||
reply_content = f"{refer_msg.get('displayname')}:{refer_msg.get('content')}" if refer_msg else '未知引用'
|
||||
run = content_cell.paragraphs[1].add_run(reply_content)
|
||||
'''设置被回复内容格式'''
|
||||
run.font.color.rgb = shared.RGBColor(121, 121, 121)
|
||||
run.font_size = shared.Inches(0.3)
|
||||
run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
|
||||
|
||||
if is_send:
|
||||
p = content_cell.paragraphs[0]
|
||||
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
reply_p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
doc.add_paragraph()
|
||||
|
||||
def system_msg(self, doc, message):
|
||||
str_content = message[7]
|
||||
is_send = message[4]
|
||||
str_time = message[8]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
|
||||
str_content = str_content.replace('<![CDATA[', "").replace(
|
||||
' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
|
||||
res = findall('(</{0,1}(img|revo|_wc_cus|a).*?>)', str_content)
|
||||
for xmlstr, b in res:
|
||||
str_content = str_content.replace(xmlstr, "")
|
||||
doc.add_paragraph(str_content).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
def video(self, doc, message):
|
||||
type_ = message[2]
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
BytesExtra = message[10]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
avatar = self.get_avatar_path(is_send, message, True)
|
||||
content_cell = self.create_table(doc, is_send, avatar)
|
||||
content_cell.paragraphs[0].add_run('【视频】')
|
||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||
if is_send:
|
||||
p = content_cell.paragraphs[0]
|
||||
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
doc.add_paragraph()
|
||||
|
||||
def create_table(self, doc, is_send, avatar_path):
|
||||
'''
|
||||
#! 创建一个1*2表格
|
||||
#! isSend = 1 (0,0)存聊天内容,(0,1)存头像
|
||||
#! isSend = 0 (0,0)存头像,(0,1)存聊天内容
|
||||
#! 返回聊天内容的坐标
|
||||
'''
|
||||
table = doc.add_table(rows=1, cols=2, style='Normal Table')
|
||||
table.cell(0, 1).height = shared.Inches(0.5)
|
||||
table.cell(0, 0).height = shared.Inches(0.5)
|
||||
if is_send:
|
||||
'''表格右对齐'''
|
||||
table.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
avatar = table.cell(0, 1).paragraphs[0].add_run()
|
||||
'''插入头像,设置头像宽度'''
|
||||
avatar.add_picture(avatar_path, width=shared.Inches(0.5))
|
||||
'''设置单元格宽度跟头像一致'''
|
||||
table.cell(0, 1).width = shared.Inches(0.5)
|
||||
content_cell = table.cell(0, 0)
|
||||
'''聊天内容右对齐'''
|
||||
content_cell.paragraphs[0].paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||||
else:
|
||||
avatar = table.cell(0, 0).paragraphs[0].add_run()
|
||||
avatar.add_picture(avatar_path, width=shared.Inches(0.5))
|
||||
'''设置单元格宽度'''
|
||||
table.cell(0, 0).width = shared.Inches(0.5)
|
||||
content_cell = table.cell(0, 1)
|
||||
'''聊天内容垂直居中对齐'''
|
||||
content_cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
|
||||
return content_cell
|
||||
|
||||
def music_share(self, doc, message):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
content = music_share(message[11])
|
||||
music_path = ''
|
||||
if content.get('audio_url') != '':
|
||||
music_path = get_music_path(content.get('audio_url'), content.get('title'),
|
||||
output_path=origin_path + '/music')
|
||||
if music_path != '':
|
||||
music_path = f'./music/{os.path.basename(music_path)}'
|
||||
music_path = music_path.replace('\\', '/')
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
|
||||
def share_card(self, doc, message):
|
||||
origin_path = f"{os.getcwd()}/data/聊天记录/{self.contact.remark}"
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
bytesExtra = message[10]
|
||||
compress_content_ = message[11]
|
||||
card_data = share_card(bytesExtra, compress_content_)
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
thumbnail = ''
|
||||
if card_data.get('thumbnail'):
|
||||
thumbnail = os.path.join(Me().wx_dir, card_data.get('thumbnail'))
|
||||
if os.path.exists(thumbnail):
|
||||
shutil.copy(thumbnail, os.path.join(origin_path, 'image', os.path.basename(thumbnail)))
|
||||
thumbnail = './image/' + os.path.basename(thumbnail)
|
||||
else:
|
||||
thumbnail = ''
|
||||
app_logo = ''
|
||||
if card_data.get('app_logo'):
|
||||
app_logo = os.path.join(Me().wx_dir, card_data.get('app_logo'))
|
||||
if os.path.exists(app_logo):
|
||||
shutil.copy(app_logo, os.path.join(origin_path, 'image', os.path.basename(app_logo)))
|
||||
app_logo = './image/' + os.path.basename(app_logo)
|
||||
else:
|
||||
app_logo = ''
|
||||
|
||||
def merge_docx(self, conRemark, n):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录')
|
||||
all_file_path = []
|
||||
for i in range(n):
|
||||
file_name = f"{conRemark}{i}.docx"
|
||||
all_file_path.append(origin_path + '/' + file_name)
|
||||
filename = f"{conRemark}.docx"
|
||||
# print(all_file_path)
|
||||
doc = docx.Document()
|
||||
doc.save(origin_path + '/' + filename)
|
||||
master = docx.Document(origin_path + '/' + filename)
|
||||
middle_new_docx = Composer(master)
|
||||
num = 0
|
||||
for word in all_file_path:
|
||||
word_document = docx.Document(word)
|
||||
word_document.add_page_break()
|
||||
if num != 0:
|
||||
middle_new_docx.append(word_document)
|
||||
num = num + 1
|
||||
os.remove(word)
|
||||
middle_new_docx.save(origin_path + '/' + filename)
|
||||
|
||||
def export(self):
|
||||
print(f"【开始导出 DOCX {self.contact.remark}】")
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||||
Me().save_avatar(os.path.join(origin_path, 'avatar', f'{Me().wxid}.png'))
|
||||
if self.contact.is_chatroom:
|
||||
for message in messages:
|
||||
if message[4]: # is_send
|
||||
continue
|
||||
try:
|
||||
chatroom_avatar_path =os.path.join(origin_path, 'avatar', f'{message[13].wxid}.png')
|
||||
message[13].save_avatar(chatroom_avatar_path)
|
||||
except:
|
||||
print(message)
|
||||
pass
|
||||
else:
|
||||
self.contact.save_avatar(os.path.join(origin_path, 'avatar', f'{self.contact.wxid}.png'))
|
||||
self.rangeSignal.emit(len(messages))
|
||||
|
||||
def newdoc():
|
||||
nonlocal n, doc
|
||||
doc = docx.Document()
|
||||
doc.styles["Normal"].font.name = "Cambria"
|
||||
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
|
||||
n += 1
|
||||
|
||||
doc = None
|
||||
n = 0
|
||||
index = 0
|
||||
newdoc()
|
||||
for index, message in enumerate(messages):
|
||||
if index % 200 == 0 and index:
|
||||
filename = os.path.join(origin_path, f"{self.contact.remark}_{n}.docx")
|
||||
doc.save(filename)
|
||||
self.okSignal.emit(n)
|
||||
newdoc()
|
||||
|
||||
type_ = message[2]
|
||||
sub_type = message[3]
|
||||
timestamp = message[5]
|
||||
self.progressSignal.emit(1)
|
||||
if self.is_5_min(timestamp):
|
||||
str_time = message[8]
|
||||
doc.add_paragraph(str_time).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
if type_ == 1 and self.message_types.get(type_):
|
||||
self.text(doc, message)
|
||||
elif type_ == 3 and self.message_types.get(type_):
|
||||
self.image(doc, message)
|
||||
elif type_ == 34 and self.message_types.get(type_):
|
||||
self.audio(doc, message)
|
||||
elif type_ == 43 and self.message_types.get(type_):
|
||||
self.video(doc, message)
|
||||
elif type_ == 47 and self.message_types.get(type_):
|
||||
self.emoji(doc, message)
|
||||
elif type_ == 10000 and self.message_types.get(type_):
|
||||
self.system_msg(doc, message)
|
||||
elif type_ == 49 and sub_type == 57 and self.message_types.get(1):
|
||||
self.refermsg(doc, message)
|
||||
elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
|
||||
self.file(doc, message)
|
||||
if index % 25 == 0:
|
||||
print(f"【导出 DOCX {self.contact.remark}】{index}/{len(messages)}")
|
||||
if index % 25:
|
||||
print(f"【导出 DOCX {self.contact.remark}】{index + 1}/{len(messages)}")
|
||||
filename = os.path.join(origin_path, f"{self.contact.remark}_{n}.docx")
|
||||
try:
|
||||
# document.save(filename)
|
||||
doc.save(filename)
|
||||
except PermissionError:
|
||||
filename = filename[:-5] + f'{time.time()}' + '.docx'
|
||||
# document.save(filename)
|
||||
doc.save(filename)
|
||||
self.okSignal.emit(n)
|
||||
print(f"【完成导出 DOCX {self.contact.remark}】")
|
||||
self.okSignal.emit(10086)
|
||||
523
app/util/exporter/exporter_html.py
Normal file
523
app/util/exporter/exporter_html.py
Normal file
@@ -0,0 +1,523 @@
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import traceback
|
||||
from re import findall
|
||||
|
||||
from PyQt5.QtCore import pyqtSignal, QThread
|
||||
|
||||
from app.DataBase import msg_db, hard_link_db, media_msg_db
|
||||
from app.util.exporter.exporter import ExporterBase, escape_js_and_html
|
||||
from app.config import OUTPUT_DIR
|
||||
from app.log import logger
|
||||
from app.person import Me
|
||||
from app.util import path
|
||||
from app.util.compress_content import parser_reply, share_card, music_share, file, transfer_decompress, call_decompress
|
||||
from app.util.emoji import get_emoji_url
|
||||
from app.util.image import get_image_path, get_image
|
||||
from app.util.music import get_music_path
|
||||
|
||||
icon_files = {
|
||||
'./icon/word.png': ['doc', 'docx'],
|
||||
'./icon/excel.png': ['xls', 'xlsx'],
|
||||
'./icon/csv.png': ['csv'],
|
||||
'./icon/txt.png': ['txt'],
|
||||
'./icon/zip.png': ['zip', '7z', 'rar'],
|
||||
'./icon/ppt.png': ['ppt', 'pptx'],
|
||||
'./icon/pdf.png': ['pdf'],
|
||||
}
|
||||
|
||||
|
||||
class HtmlExporter(ExporterBase):
|
||||
def text(self, doc, message):
|
||||
type_ = message[2]
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
str_content = escape_js_and_html(str_content)
|
||||
doc.write(
|
||||
f'''{{ type:{1}, text: '{str_content}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
|
||||
def image(self, doc, message):
|
||||
base_path = os.path.join(OUTPUT_DIR, '聊天记录', self.contact.remark, 'image')
|
||||
type_ = message[2]
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
BytesExtra = message[10]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
str_content = escape_js_and_html(str_content)
|
||||
image_path = hard_link_db.get_image(str_content, BytesExtra, up_dir=Me().wx_dir, thumb=False)
|
||||
image_path = get_image_path(image_path, base_path=base_path)
|
||||
doc.write(
|
||||
f'''{{ type:{type_}, text: '{image_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
|
||||
def audio(self, doc, message):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
msgSvrId = message[9]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
try:
|
||||
audio_path = media_msg_db.get_audio_path(msgSvrId, output_path=origin_path + "/voice")
|
||||
audio_path = "./voice/" + os.path.basename(audio_path)
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
return
|
||||
voice_to_text = media_msg_db.get_audio_text(str_content)
|
||||
if voice_to_text and voice_to_text != "":
|
||||
voice_to_text = escape_js_and_html(voice_to_text)
|
||||
doc.write(
|
||||
f'''{{ type:34, text:'{audio_path}',is_send:{is_send},avatar_path:'{avatar}',voice_to_text:'{voice_to_text}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
|
||||
def emoji(self, doc, message):
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
emoji_path = get_emoji_url(str_content, thumb=True)
|
||||
doc.write(
|
||||
f'''{{ type:{3}, text: '{emoji_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
|
||||
def file(self, doc, message):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
bytesExtra = message[10]
|
||||
compress_content = message[11]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
file_info = file(bytesExtra, compress_content, output_path=origin_path + '/file')
|
||||
if file_info.get('is_error') == False:
|
||||
icon_path = None
|
||||
for icon, extensions in icon_files.items():
|
||||
if file_info.get('file_ext') in extensions:
|
||||
icon_path = icon
|
||||
break
|
||||
# 如果没有与文件后缀匹配的图标,则使用默认图标
|
||||
if icon_path is None:
|
||||
default_icon = './icon/file.png'
|
||||
icon_path = default_icon
|
||||
file_path = file_info.get('file_path')
|
||||
if file_path != "":
|
||||
file_path = './file/' + file_info.get('file_name')
|
||||
doc.write(
|
||||
f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}',sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}',app_name: '{file_info.get('app_name')}'}},'''
|
||||
)
|
||||
|
||||
def refermsg(self, doc, message):
|
||||
"""
|
||||
处理回复消息
|
||||
@param doc:
|
||||
@param message:
|
||||
@return:
|
||||
"""
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
content = parser_reply(message[11])
|
||||
refer_msg = content.get('refer')
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
contentText = escape_js_and_html(content.get('title'))
|
||||
if refer_msg:
|
||||
referText = f"{escape_js_and_html(refer_msg.get('displayname'))}:{escape_js_and_html(refer_msg.get('content'))}"
|
||||
doc.write(
|
||||
f'''{{ type:49, text: '{contentText}',is_send:{is_send},sub_type:{content.get('type')},refer_text: '{referText}',avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
else:
|
||||
doc.write(
|
||||
f'''{{ type:49, text: '{contentText}',is_send:{is_send},sub_type:{content.get('type')},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
|
||||
def system_msg(self, doc, message):
|
||||
str_content = message[7]
|
||||
is_send = message[4]
|
||||
str_time = message[8]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
|
||||
str_content = str_content.replace('<![CDATA[', "").replace(
|
||||
' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
|
||||
res = findall('(</{0,1}(img|revo|_wc_cus|a).*?>)', str_content)
|
||||
for xmlstr, b in res:
|
||||
str_content = str_content.replace(xmlstr, "")
|
||||
str_content = escape_js_and_html(str_content)
|
||||
doc.write(
|
||||
f'''{{ type:0, text: '{str_content}',is_send:{is_send},avatar_path:'',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:''}},'''
|
||||
)
|
||||
|
||||
def video(self, doc, message):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
type_ = message[2]
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
BytesExtra = message[10]
|
||||
timestamp = message[5]
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
video_path = hard_link_db.get_video(str_content, BytesExtra, thumb=False)
|
||||
image_path = hard_link_db.get_video(str_content, BytesExtra, thumb=True)
|
||||
if video_path is None and image_path is not None:
|
||||
image_path = path.get_relative_path(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image')
|
||||
try:
|
||||
# todo 网络图片问题
|
||||
print(origin_path + image_path[1:])
|
||||
os.utime(origin_path + image_path[1:], (timestamp, timestamp))
|
||||
doc.write(
|
||||
f'''{{ type:3, text: '{image_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
except:
|
||||
doc.write(
|
||||
f'''{{ type:1, text: '视频丢失',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
return
|
||||
if video_path is None and image_path is None:
|
||||
return
|
||||
video_path = f'{Me().wx_dir}/{video_path}'
|
||||
video_path = video_path.replace('\\', '/')
|
||||
if os.path.exists(video_path):
|
||||
new_path = origin_path + '/video/' + os.path.basename(video_path)
|
||||
if not os.path.exists(new_path):
|
||||
shutil.copy(video_path, os.path.join(origin_path, 'video'))
|
||||
os.utime(new_path, (timestamp, timestamp))
|
||||
video_path = f'./video/{os.path.basename(video_path)}'
|
||||
doc.write(
|
||||
f'''{{ type:{type_}, text: '{video_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}'}},'''
|
||||
)
|
||||
|
||||
def music_share(self, doc, message):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
content = music_share(message[11])
|
||||
music_path = ''
|
||||
if content.get('is_error') == False:
|
||||
if content.get('audio_url') != '':
|
||||
music_path = get_music_path(content.get('audio_url'), content.get('title'),
|
||||
output_path=origin_path + '/music')
|
||||
if music_path != '':
|
||||
music_path = f'./music/{os.path.basename(music_path)}'
|
||||
music_path = music_path.replace('\\', '/')
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
music_path = escape_js_and_html(music_path)
|
||||
doc.write(
|
||||
f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},'''
|
||||
)
|
||||
|
||||
def share_card(self, doc, message):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
bytesExtra = message[10]
|
||||
compress_content_ = message[11]
|
||||
card_data = share_card(bytesExtra, compress_content_)
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
thumbnail = ''
|
||||
if card_data.get('thumbnail'):
|
||||
thumbnail = os.path.join(Me().wx_dir, card_data.get('thumbnail'))
|
||||
if os.path.exists(thumbnail):
|
||||
shutil.copy(thumbnail, os.path.join(origin_path, 'image', os.path.basename(thumbnail)))
|
||||
thumbnail = './image/' + os.path.basename(thumbnail)
|
||||
else:
|
||||
thumbnail = ''
|
||||
app_logo = ''
|
||||
if card_data.get('app_logo'):
|
||||
app_logo = os.path.join(Me().wx_dir, card_data.get('app_logo'))
|
||||
if os.path.exists(app_logo):
|
||||
shutil.copy(app_logo, os.path.join(origin_path, 'image', os.path.basename(app_logo)))
|
||||
app_logo = './image/' + os.path.basename(app_logo)
|
||||
else:
|
||||
app_logo = card_data.get('app_logo')
|
||||
doc.write(
|
||||
f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',app_name:'{card_data.get('app_name')}'}},\n'''
|
||||
)
|
||||
|
||||
def transfer(self, doc, message):
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
compress_content_ = message[11]
|
||||
# open("test.bin", "wb").write(compress_content_)
|
||||
transfer_detail = transfer_decompress(compress_content_)
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
try:
|
||||
text_info_map = {
|
||||
1: transfer_detail["pay_memo"] or "发起转账",
|
||||
3: "已收款",
|
||||
4: "已退还",
|
||||
5: "非实时转账收款",
|
||||
7: "发起非实时转账",
|
||||
8: "未知",
|
||||
9: "未知",
|
||||
}
|
||||
doc.write(
|
||||
f"""{{ type:49,sub_type:2000,text:'{text_info_map[transfer_detail["paysubtype"]]}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',paysubtype:{transfer_detail["paysubtype"]},pay_memo:'{transfer_detail["pay_memo"]}',feedesc:'{transfer_detail["feedesc"]}',}},\n""")
|
||||
except Exception as e:
|
||||
logger.error(f'转账解析错误:{transfer_detail}\n{traceback.format_exc()}')
|
||||
def call(self, doc, message):
|
||||
is_send = message[4]
|
||||
timestamp = message[5]
|
||||
str_content = message[7]
|
||||
bytes_extra = message[10]
|
||||
display_content = message[12]
|
||||
call_detail = call_decompress(
|
||||
is_send, bytes_extra, display_content, str_content
|
||||
)
|
||||
is_chatroom = 1 if self.contact.is_chatroom else 0
|
||||
avatar = self.get_avatar_path(is_send, message)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f"""{{ type:50, text:'{call_detail["display_content"]}',call_type:{call_detail["call_type"]},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',}},\n""")
|
||||
|
||||
def export(self):
|
||||
print(f"【开始导出 HTML {self.contact.remark}】")
|
||||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||||
filename = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark,
|
||||
f'{self.contact.remark}.html')
|
||||
file_path = './app/resources/data/template.html'
|
||||
if not os.path.exists(file_path):
|
||||
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
|
||||
file_path = os.path.join(resource_dir, 'app', 'resources', 'data', 'template.html')
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
html_head, html_end = content.split('/*注意看这是分割线*/')
|
||||
f = open(filename, 'w', encoding='utf-8')
|
||||
html_head = html_head.replace("<title>出错了</title>", f"<title>{self.contact.remark}</title>")
|
||||
html_head = html_head.replace("<p id=\"title\">出错了</p>", f"<p id=\"title\">{self.contact.remark}</p>")
|
||||
f.write(html_head)
|
||||
self.rangeSignal.emit(len(messages))
|
||||
for index, message in enumerate(messages):
|
||||
type_ = message[2]
|
||||
sub_type = message[3]
|
||||
timestamp = message[5]
|
||||
if (type_ == 3 and self.message_types.get(3)) or (type_ == 34 and self.message_types.get(34)) or (
|
||||
type_ == 47 and self.message_types.get(47)):
|
||||
pass
|
||||
else:
|
||||
self.progressSignal.emit(1)
|
||||
|
||||
if type_ == 1 and self.message_types.get(type_):
|
||||
self.text(f, message)
|
||||
elif type_ == 3 and self.message_types.get(type_):
|
||||
self.image(f, message)
|
||||
elif type_ == 34 and self.message_types.get(type_):
|
||||
self.audio(f, message)
|
||||
elif type_ == 43 and self.message_types.get(type_):
|
||||
self.video(f, message)
|
||||
elif type_ == 47 and self.message_types.get(type_):
|
||||
self.emoji(f, message)
|
||||
elif type_ == 10000 and self.message_types.get(type_):
|
||||
self.system_msg(f, message)
|
||||
elif type_ == 49 and sub_type == 57 and self.message_types.get(1):
|
||||
self.refermsg(f, message)
|
||||
elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
|
||||
self.file(f, message)
|
||||
elif type_ == 49 and sub_type == 3 and self.message_types.get(4903):
|
||||
self.music_share(f, message)
|
||||
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
|
||||
self.share_card(f, message)
|
||||
elif type_ == 49 and sub_type == 2000 and self.message_types.get(492000):
|
||||
self.transfer(f, message)
|
||||
elif type_ == 50 and self.message_types.get(50):
|
||||
self.call(f, message)
|
||||
if index % 2000 == 0:
|
||||
print(f"【导出 HTML {self.contact.remark}】{index}/{len(messages)}")
|
||||
f.write(html_end)
|
||||
f.close()
|
||||
print(f"【完成导出 HTML {self.contact.remark}】{len(messages)}")
|
||||
self.count_finish_num(1)
|
||||
|
||||
def count_finish_num(self, num):
|
||||
"""
|
||||
记录子线程完成个数
|
||||
@param num:
|
||||
@return:
|
||||
"""
|
||||
self.num += 1
|
||||
print("子线程完成", self.num, "/", self.total_num)
|
||||
if self.num == self.total_num:
|
||||
# 所有子线程都完成之后就发送完成信号
|
||||
self.okSignal.emit(1)
|
||||
|
||||
|
||||
class OutputMedia(QThread):
|
||||
"""
|
||||
导出语音消息
|
||||
"""
|
||||
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, 34)
|
||||
for message in messages:
|
||||
is_send = message[4]
|
||||
msgSvrId = message[9]
|
||||
try:
|
||||
audio_path = media_msg_db.get_audio(
|
||||
msgSvrId, output_path=origin_path + "/voice"
|
||||
)
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(34)
|
||||
|
||||
|
||||
class OutputEmoji(QThread):
|
||||
"""
|
||||
导出表情包
|
||||
"""
|
||||
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, 47)
|
||||
for message in messages:
|
||||
str_content = message[7]
|
||||
try:
|
||||
pass
|
||||
# emoji_path = get_emoji(str_content, thumb=True, output_path=origin_path + '/emoji')
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(47)
|
||||
|
||||
|
||||
class OutputImage(QThread):
|
||||
"""
|
||||
导出图片
|
||||
"""
|
||||
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
self.child_thread_num = 2
|
||||
self.child_threads = [0] * (self.child_thread_num + 1)
|
||||
self.num = 0
|
||||
|
||||
def count1(self, num):
|
||||
self.num += 1
|
||||
print("图片导出完成一个")
|
||||
if self.num == self.child_thread_num:
|
||||
self.okSingal.emit(47)
|
||||
print("图片导出完成")
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, 3)
|
||||
base_path = os.path.join(OUTPUT_DIR, '聊天记录', self.contact.remark, 'image')
|
||||
for message in messages:
|
||||
str_content = message[7]
|
||||
BytesExtra = message[10]
|
||||
timestamp = message[5]
|
||||
try:
|
||||
image_path = hard_link_db.get_image(
|
||||
str_content, BytesExtra, thumb=False
|
||||
)
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_path)):
|
||||
image_thumb_path = hard_link_db.get_image(
|
||||
str_content, BytesExtra, thumb=True
|
||||
)
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_thumb_path)):
|
||||
continue
|
||||
image_path = image_thumb_path
|
||||
image_path = get_image(
|
||||
image_path, base_path=base_path
|
||||
)
|
||||
try:
|
||||
os.utime(origin_path + image_path[1:], (timestamp, timestamp))
|
||||
except:
|
||||
pass
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(47)
|
||||
|
||||
|
||||
class OutputImageChild(QThread):
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact, messages):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
self.messages = messages
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
for message in self.messages:
|
||||
str_content = message[7]
|
||||
BytesExtra = message[10]
|
||||
timestamp = message[5]
|
||||
try:
|
||||
image_path = hard_link_db.get_image(
|
||||
str_content, BytesExtra, thumb=False
|
||||
)
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_path)):
|
||||
image_thumb_path = hard_link_db.get_image(
|
||||
str_content, BytesExtra, thumb=True
|
||||
)
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_thumb_path)):
|
||||
continue
|
||||
image_path = image_thumb_path
|
||||
image_path = get_image(
|
||||
image_path, base_path=f"/data/聊天记录/{self.contact.remark}/image"
|
||||
)
|
||||
try:
|
||||
os.utime(origin_path + image_path[1:], (timestamp, timestamp))
|
||||
except:
|
||||
pass
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(47)
|
||||
print("图片子线程完成")
|
||||
193
app/util/exporter/exporter_json.py
Normal file
193
app/util/exporter/exporter_json.py
Normal file
@@ -0,0 +1,193 @@
|
||||
import json
|
||||
import random
|
||||
import os
|
||||
|
||||
from app.DataBase import msg_db
|
||||
from app.person import Me
|
||||
from .exporter import ExporterBase
|
||||
|
||||
|
||||
def merge_content(conversions_list) -> list:
|
||||
"""
|
||||
合并一组对话中连续发送的句子
|
||||
@param conversions_list:
|
||||
@return:
|
||||
"""
|
||||
merged_data = []
|
||||
current_role = None
|
||||
current_content = ""
|
||||
str_time = ''
|
||||
for item in conversions_list:
|
||||
if 'str_time' in item:
|
||||
str_time = item['str_time']
|
||||
else:
|
||||
str_time = ''
|
||||
if current_role is None:
|
||||
current_role = item["role"]
|
||||
current_content = item["content"]
|
||||
elif current_role == item["role"]:
|
||||
current_content += "\n" + item["content"]
|
||||
else:
|
||||
# merged_data.append({"role": current_role, "content": current_content, 'str_time': str_time})
|
||||
merged_data.append({"role": current_role, "content": current_content})
|
||||
current_role = item["role"]
|
||||
current_content = item["content"]
|
||||
str_time = item.get('str_time')
|
||||
|
||||
# 处理最后一组
|
||||
if current_role is not None:
|
||||
# merged_data.append({"role": current_role, "content": current_content,'str_time': str_time})
|
||||
merged_data.append({"role": current_role, "content": current_content})
|
||||
return merged_data
|
||||
|
||||
|
||||
def system_prompt():
|
||||
system = {
|
||||
"role": "system",
|
||||
# "content": f"你是{Me().name},一个聪明、热情、善良的男大学生,后面的对话来自{self.contact.remark}(!!!注意:对方的身份十分重要,你务必记住对方的身份,因为跟不同的人对话要用不同的态度、语气),你要认真地回答他"
|
||||
"content": f"你是{Me().name},一个聪明、热情、善良的人,后面的对话来自你的朋友,你要认真地回答他"
|
||||
}
|
||||
return system
|
||||
|
||||
|
||||
def message_to_conversion(group):
|
||||
conversions = [system_prompt()]
|
||||
while len(group) and group[-1][4] == 0:
|
||||
group.pop()
|
||||
for message in group:
|
||||
is_send = message[4]
|
||||
if len(conversions) == 1 and is_send:
|
||||
continue
|
||||
if is_send:
|
||||
json_msg = {
|
||||
"role": "assistant",
|
||||
"content": message[7]
|
||||
}
|
||||
else:
|
||||
json_msg = {
|
||||
"role": "user",
|
||||
"content": message[7]
|
||||
}
|
||||
json_msg['str_time'] = message[8]
|
||||
conversions.append(json_msg)
|
||||
if len(conversions) == 1:
|
||||
return []
|
||||
return merge_content(conversions)
|
||||
|
||||
|
||||
class JsonExporter(ExporterBase):
|
||||
def split_by_time(self, length=300):
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, type_=1, time_range=self.time_range)
|
||||
start_time = 0
|
||||
res = []
|
||||
i = 0
|
||||
while i < len(messages):
|
||||
message = messages[i]
|
||||
timestamp = message[5]
|
||||
is_send = message[4]
|
||||
group = [
|
||||
system_prompt()
|
||||
]
|
||||
while i < len(messages) and timestamp - start_time < length:
|
||||
if is_send:
|
||||
json_msg = {
|
||||
"role": "assistant",
|
||||
"content": message[7]
|
||||
}
|
||||
else:
|
||||
json_msg = {
|
||||
"role": "user",
|
||||
"content": message[7]
|
||||
}
|
||||
group.append(json_msg)
|
||||
i += 1
|
||||
if i >= len(messages):
|
||||
break
|
||||
message = messages[i]
|
||||
timestamp = message[5]
|
||||
is_send = message[4]
|
||||
while is_send:
|
||||
json_msg = {
|
||||
"role": "assistant",
|
||||
"content": message[7]
|
||||
}
|
||||
group.append(json_msg)
|
||||
i += 1
|
||||
if i >= len(messages):
|
||||
break
|
||||
message = messages[i]
|
||||
timestamp = message[5]
|
||||
is_send = message[4]
|
||||
start_time = timestamp
|
||||
res.append(
|
||||
{
|
||||
"conversations": group
|
||||
}
|
||||
)
|
||||
res_ = []
|
||||
for item in res:
|
||||
conversations = item['conversations']
|
||||
res_.append({
|
||||
'conversations': merge_content(conversations)
|
||||
})
|
||||
return res_
|
||||
|
||||
def split_by_intervals(self, max_diff_seconds=300):
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, type_=1, time_range=self.time_range)
|
||||
res = []
|
||||
i = 0
|
||||
current_group = []
|
||||
while i < len(messages):
|
||||
message = messages[i]
|
||||
timestamp = message[5]
|
||||
is_send = message[4]
|
||||
while is_send and i + 1 < len(messages):
|
||||
i += 1
|
||||
message = messages[i]
|
||||
is_send = message[4]
|
||||
current_group = [messages[i]]
|
||||
i += 1
|
||||
while i < len(messages) and messages[i][5] - current_group[-1][5] <= max_diff_seconds:
|
||||
current_group.append(messages[i])
|
||||
i += 1
|
||||
while i < len(messages) and messages[i][4]:
|
||||
current_group.append(messages[i])
|
||||
i += 1
|
||||
res.append(current_group)
|
||||
res_ = []
|
||||
for group in res:
|
||||
conversations = message_to_conversion(group)
|
||||
if conversations:
|
||||
res_.append({
|
||||
'conversations': conversations
|
||||
})
|
||||
return res_
|
||||
|
||||
def to_json(self):
|
||||
print(f"【开始导出 json {self.contact.remark}】")
|
||||
origin_path = self.origin_path
|
||||
os.makedirs(origin_path, exist_ok=True)
|
||||
filename = os.path.join(origin_path, f"{self.contact.remark}")
|
||||
|
||||
# res = self.split_by_time()
|
||||
res = self.split_by_intervals(60)
|
||||
# 打乱列表顺序
|
||||
random.shuffle(res)
|
||||
|
||||
# 计算切分比例
|
||||
split_ratio = 0.2 # 20% for the second list
|
||||
|
||||
# 计算切分点
|
||||
split_point = int(len(res) * split_ratio)
|
||||
|
||||
# 分割列表
|
||||
train_data = res[split_point:]
|
||||
dev_data = res[:split_point]
|
||||
with open(f'{filename}_train.json', "w", encoding="utf-8") as f:
|
||||
json.dump(train_data, f, ensure_ascii=False, indent=4)
|
||||
with open(f'{filename}_dev.json', "w", encoding="utf-8") as f:
|
||||
json.dump(dev_data, f, ensure_ascii=False, indent=4)
|
||||
self.okSignal.emit(1)
|
||||
|
||||
def run(self):
|
||||
self.to_json()
|
||||
146
app/util/exporter/exporter_txt.py
Normal file
146
app/util/exporter/exporter_txt.py
Normal file
@@ -0,0 +1,146 @@
|
||||
import os
|
||||
|
||||
from app.DataBase import msg_db
|
||||
from app.util.exporter.exporter import ExporterBase
|
||||
from app.config import OUTPUT_DIR
|
||||
from app.util.compress_content import parser_reply, share_card
|
||||
|
||||
|
||||
class TxtExporter(ExporterBase):
|
||||
def text(self, doc, message):
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
name = display_name
|
||||
doc.write(
|
||||
f'''{str_time} {name}\n{str_content}\n\n'''
|
||||
)
|
||||
|
||||
def image(self, doc, message):
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n[图片]\n\n'''
|
||||
)
|
||||
|
||||
def audio(self, doc, message):
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n[语音]\n\n'''
|
||||
)
|
||||
def emoji(self, doc, message):
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n[表情包]\n\n'''
|
||||
)
|
||||
|
||||
def file(self, doc, message):
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n[文件]\n\n'''
|
||||
)
|
||||
|
||||
def refermsg(self, doc, message):
|
||||
"""
|
||||
处理回复消息
|
||||
@param doc:
|
||||
@param message:
|
||||
@return:
|
||||
"""
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
content = parser_reply(message[11])
|
||||
refer_msg = content.get('refer')
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
if refer_msg:
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n{content.get('title')}\n引用:{refer_msg.get('displayname')}:{refer_msg.get('content')}\n\n'''
|
||||
)
|
||||
else:
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n{content.get('title')}\n引用:未知\n\n'''
|
||||
)
|
||||
|
||||
def system_msg(self, doc, message):
|
||||
str_content = message[7]
|
||||
str_time = message[8]
|
||||
str_content = str_content.replace('<![CDATA[', "").replace(
|
||||
' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
|
||||
doc.write(
|
||||
f'''{str_time} {str_content}\n\n'''
|
||||
)
|
||||
|
||||
def video(self, doc, message):
|
||||
str_time = message[8]
|
||||
is_send = message[4]
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n[视频]\n\n'''
|
||||
)
|
||||
def music_share(self, doc, message):
|
||||
is_send = message[4]
|
||||
str_time = message[8]
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}\n[音乐分享]\n\n'''
|
||||
)
|
||||
|
||||
def share_card(self, doc, message):
|
||||
is_send = message[4]
|
||||
bytesExtra = message[10]
|
||||
compress_content_ = message[11]
|
||||
str_time = message[8]
|
||||
card_data = share_card(bytesExtra, compress_content_)
|
||||
display_name = self.get_display_name(is_send, message)
|
||||
doc.write(
|
||||
f'''{str_time} {display_name}
|
||||
[链接]:title:{card_data.get('title')}
|
||||
description:{card_data.get('description')}
|
||||
url:{card_data.get('url')}
|
||||
name:{card_data.get('app_name')}
|
||||
\n\n'''
|
||||
)
|
||||
|
||||
def export(self):
|
||||
# 实现导出为txt的逻辑
|
||||
print(f"【开始导出 TXT {self.contact.remark}】")
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
os.makedirs(origin_path, exist_ok=True)
|
||||
filename = os.path.join(origin_path, self.contact.remark+'.txt')
|
||||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||||
total_steps = len(messages)
|
||||
with open(filename, mode='w', newline='', encoding='utf-8') as f:
|
||||
for index, message in enumerate(messages):
|
||||
type_ = message[2]
|
||||
sub_type = message[3]
|
||||
self.progressSignal.emit(int((index + 1) / total_steps * 100))
|
||||
if type_ == 1 and self.message_types.get(type_):
|
||||
self.text(f, message)
|
||||
elif type_ == 3 and self.message_types.get(type_):
|
||||
self.image(f, message)
|
||||
elif type_ == 34 and self.message_types.get(type_):
|
||||
self.audio(f, message)
|
||||
elif type_ == 43 and self.message_types.get(type_):
|
||||
self.video(f, message)
|
||||
elif type_ == 47 and self.message_types.get(type_):
|
||||
self.emoji(f, message)
|
||||
elif type_ == 10000 and self.message_types.get(type_):
|
||||
self.system_msg(f, message)
|
||||
elif type_ == 49 and sub_type == 57 and self.message_types.get(1):
|
||||
self.refermsg(f, message)
|
||||
elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
|
||||
self.file(f, message)
|
||||
elif type_ == 49 and sub_type == 3 and self.message_types.get(4903):
|
||||
self.music_share(f, message)
|
||||
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
|
||||
self.share_card(f, message)
|
||||
print(f"【完成导出 TXT {self.contact.remark}】")
|
||||
self.okSignal.emit(1)
|
||||
466
app/util/exporter/output.py
Normal file
466
app/util/exporter/output.py
Normal file
@@ -0,0 +1,466 @@
|
||||
import csv
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
from typing import List
|
||||
|
||||
import docx
|
||||
from PyQt5.QtCore import pyqtSignal, QThread
|
||||
from PyQt5.QtWidgets import QFileDialog
|
||||
from docx.oxml.ns import qn
|
||||
from docxcompose.composer import Composer
|
||||
|
||||
from app.util.exporter.exporter_ai_txt import AiTxtExporter
|
||||
from app.util.exporter.exporter_csv import CSVExporter
|
||||
from app.util.exporter.exporter_docx import DocxExporter
|
||||
from app.util.exporter.exporter_html import HtmlExporter
|
||||
from app.util.exporter.exporter_json import JsonExporter
|
||||
from app.util.exporter.exporter_txt import TxtExporter
|
||||
from app.DataBase.hard_link import decodeExtraBuf
|
||||
from app.config import OUTPUT_DIR
|
||||
from app.DataBase.package_msg import PackageMsg
|
||||
from app.DataBase import media_msg_db, hard_link_db, micro_msg_db, msg_db
|
||||
from app.log import logger
|
||||
from app.person import Me
|
||||
from app.util.image import get_image
|
||||
|
||||
os.makedirs(os.path.join(OUTPUT_DIR, '聊天记录'), exist_ok=True)
|
||||
|
||||
|
||||
class Output(QThread):
|
||||
"""
|
||||
发送信息线程
|
||||
"""
|
||||
startSignal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
rangeSignal = pyqtSignal(int)
|
||||
okSignal = pyqtSignal(int)
|
||||
batchOkSignal = pyqtSignal(int)
|
||||
nowContact = pyqtSignal(str)
|
||||
i = 1
|
||||
CSV = 0
|
||||
DOCX = 1
|
||||
HTML = 2
|
||||
CSV_ALL = 3
|
||||
CONTACT_CSV = 4
|
||||
TXT = 5
|
||||
JSON = 6
|
||||
AI_TXT = 7
|
||||
Batch = 10086
|
||||
|
||||
def __init__(self, contact, type_=DOCX, message_types={}, sub_type=[], time_range=None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.children = []
|
||||
self.last_timestamp = 0
|
||||
self.sub_type = sub_type
|
||||
self.time_range = time_range
|
||||
self.message_types = message_types
|
||||
self.sec = 2 # 默认1000秒
|
||||
self.contact = contact
|
||||
self.msg_id = 0
|
||||
self.output_type: int | List[int] = type_
|
||||
self.total_num = 1
|
||||
self.num = 0
|
||||
|
||||
def progress(self, value):
|
||||
self.progressSignal.emit(value)
|
||||
|
||||
def output_image(self):
|
||||
"""
|
||||
导出全部图片
|
||||
@return:
|
||||
"""
|
||||
return
|
||||
|
||||
def output_emoji(self):
|
||||
"""
|
||||
导出全部表情包
|
||||
@return:
|
||||
"""
|
||||
return
|
||||
|
||||
def to_csv_all(self):
|
||||
"""
|
||||
导出全部聊天记录到CSV
|
||||
@return:
|
||||
"""
|
||||
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录')
|
||||
os.makedirs(origin_path, exist_ok=True)
|
||||
filename = QFileDialog.getSaveFileName(None, "save file", os.path.join(os.getcwd(), 'messages.csv'),
|
||||
"csv files (*.csv);;all files(*.*)")
|
||||
if not filename[0]:
|
||||
return
|
||||
self.startSignal.emit(1)
|
||||
filename = filename[0]
|
||||
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
||||
columns = ['localId', 'TalkerId', 'Type', 'SubType',
|
||||
'IsSender', 'CreateTime', 'Status', 'StrContent',
|
||||
'StrTime', 'Remark', 'NickName', 'Sender']
|
||||
|
||||
packagemsg = PackageMsg()
|
||||
messages = packagemsg.get_package_message_all()
|
||||
# 写入CSV文件
|
||||
with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(columns)
|
||||
# 写入数据
|
||||
writer.writerows(messages)
|
||||
self.okSignal.emit(1)
|
||||
|
||||
def contact_to_csv(self):
|
||||
"""
|
||||
导出联系人到CSV
|
||||
@return:
|
||||
"""
|
||||
filename = QFileDialog.getSaveFileName(None, "save file", os.path.join(os.getcwd(), 'contacts.csv'),
|
||||
"csv files (*.csv);;all files(*.*)")
|
||||
if not filename[0]:
|
||||
return
|
||||
self.startSignal.emit(1)
|
||||
filename = filename[0]
|
||||
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
||||
columns = ['UserName', 'Alias', 'Type', 'Remark', 'NickName', 'PYInitial', 'RemarkPYInitial', 'smallHeadImgUrl',
|
||||
'bigHeadImgUrl', 'label', 'gender', 'telephone', 'signature', 'country/region', 'province', 'city']
|
||||
contacts = micro_msg_db.get_contact()
|
||||
# 写入CSV文件
|
||||
with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(columns)
|
||||
# 写入数据
|
||||
# writer.writerows(contacts)
|
||||
for contact in contacts:
|
||||
detail = decodeExtraBuf(contact[9])
|
||||
gender_code = detail.get('gender')
|
||||
if gender_code == 0:
|
||||
gender = '未知'
|
||||
elif gender_code == 1:
|
||||
gender = '男'
|
||||
else:
|
||||
gender = '女'
|
||||
writer.writerow([*contact[:9], contact[10], gender, detail.get('telephone'), detail.get('signature'),
|
||||
*detail.get('region')])
|
||||
|
||||
self.okSignal.emit(1)
|
||||
|
||||
def batch_export(self):
|
||||
print('开始批量导出')
|
||||
print(self.sub_type, self.message_types)
|
||||
print(len(self.contact))
|
||||
print([contact.remark for contact in self.contact])
|
||||
self.batch_num_total = len(self.contact) * len(self.sub_type)
|
||||
self.batch_num = 0
|
||||
self.rangeSignal.emit(self.batch_num_total)
|
||||
for contact in self.contact:
|
||||
# print('联系人', contact.remark)
|
||||
for type_ in self.sub_type:
|
||||
# print('导出类型', type_)
|
||||
if type_ == self.DOCX:
|
||||
self.to_docx(contact, self.message_types, True)
|
||||
elif type_ == self.TXT:
|
||||
# print('批量导出txt')
|
||||
self.to_txt(contact, self.message_types, True)
|
||||
elif type_ == self.AI_TXT:
|
||||
# print('批量导出txt')
|
||||
self.to_ai_txt(contact, self.message_types, True)
|
||||
elif type_ == self.CSV:
|
||||
self.to_csv(contact, self.message_types, True)
|
||||
elif type_ == self.HTML:
|
||||
self.to_html(contact, self.message_types, True)
|
||||
elif type_ == self.JSON:
|
||||
self.to_json(contact,self.message_types,True)
|
||||
|
||||
def batch_finish_one(self, num):
|
||||
self.nowContact.emit(self.contact[self.batch_num // len(self.sub_type)].remark)
|
||||
self.batch_num += 1
|
||||
if self.batch_num == self.batch_num_total:
|
||||
self.okSignal.emit(1)
|
||||
|
||||
def merge_docx(self, n):
|
||||
conRemark = self.contact.remark
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', conRemark)
|
||||
filename = f"{origin_path}/{conRemark}_{n}.docx"
|
||||
if n == 10086:
|
||||
# self.document.append(self.document)
|
||||
file = os.path.join(origin_path, f'{conRemark}.docx')
|
||||
try:
|
||||
self.document.save(file)
|
||||
except PermissionError:
|
||||
file = file[:-5] + f'{time.time()}' + '.docx'
|
||||
self.document.save(file)
|
||||
self.okSignal.emit(1)
|
||||
return
|
||||
doc = docx.Document(filename)
|
||||
self.document.append(doc)
|
||||
os.remove(filename)
|
||||
if n % 50 == 0:
|
||||
# self.document.append(self.document)
|
||||
file = os.path.join(origin_path, f'{conRemark}-{n // 50}.docx')
|
||||
try:
|
||||
self.document.save(file)
|
||||
except PermissionError:
|
||||
file = file[:-5] + f'{time.time()}' + '.docx'
|
||||
self.document.save(file)
|
||||
doc = docx.Document()
|
||||
doc.styles["Normal"].font.name = "Cambria"
|
||||
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
|
||||
self.document = Composer(doc)
|
||||
|
||||
def to_docx(self, contact, message_types, is_batch=False):
|
||||
doc = docx.Document()
|
||||
doc.styles["Normal"].font.name = "Cambria"
|
||||
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
|
||||
self.document = Composer(doc)
|
||||
Child = DocxExporter(contact, type_=self.DOCX, message_types=message_types, time_range=self.time_range)
|
||||
self.children.append(Child)
|
||||
Child.progressSignal.connect(self.progress)
|
||||
if not is_batch:
|
||||
Child.rangeSignal.connect(self.rangeSignal)
|
||||
Child.okSignal.connect(self.merge_docx if not is_batch else self.batch_finish_one)
|
||||
Child.start()
|
||||
|
||||
def to_json(self, contact, message_types, is_batch=False):
|
||||
Child = JsonExporter(contact, type_=self.JSON, message_types=message_types, time_range=self.time_range)
|
||||
self.children.append(Child)
|
||||
Child.progressSignal.connect(self.progress)
|
||||
if not is_batch:
|
||||
Child.rangeSignal.connect(self.rangeSignal)
|
||||
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
||||
Child.start()
|
||||
|
||||
def to_txt(self, contact, message_types, is_batch=False):
|
||||
Child = TxtExporter(contact, type_=self.TXT, message_types=message_types, time_range=self.time_range)
|
||||
self.children.append(Child)
|
||||
Child.progressSignal.connect(self.progress)
|
||||
if not is_batch:
|
||||
Child.rangeSignal.connect(self.rangeSignal)
|
||||
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
||||
Child.start()
|
||||
|
||||
def to_ai_txt(self, contact, message_types, is_batch=False):
|
||||
Child = AiTxtExporter(contact, type_=self.TXT, message_types=message_types, time_range=self.time_range)
|
||||
self.children.append(Child)
|
||||
Child.progressSignal.connect(self.progress)
|
||||
if not is_batch:
|
||||
Child.rangeSignal.connect(self.rangeSignal)
|
||||
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
||||
Child.start()
|
||||
|
||||
def to_html(self, contact, message_types, is_batch=False):
|
||||
Child = HtmlExporter(contact, type_=self.output_type, message_types=message_types, time_range=self.time_range)
|
||||
self.children.append(Child)
|
||||
Child.progressSignal.connect(self.progress)
|
||||
if not is_batch:
|
||||
Child.rangeSignal.connect(self.rangeSignal)
|
||||
Child.okSignal.connect(self.count_finish_num)
|
||||
Child.start()
|
||||
self.total_num = 1
|
||||
if message_types.get(34):
|
||||
# 语音消息单独的线程
|
||||
self.total_num += 1
|
||||
output_media = OutputMedia(contact, time_range=self.time_range)
|
||||
self.children.append(output_media)
|
||||
output_media.okSingal.connect(self.count_finish_num)
|
||||
output_media.progressSignal.connect(self.progressSignal)
|
||||
output_media.start()
|
||||
if message_types.get(47):
|
||||
# emoji消息单独的线程
|
||||
self.total_num += 1
|
||||
output_emoji = OutputEmoji(contact, time_range=self.time_range)
|
||||
self.children.append(output_emoji)
|
||||
output_emoji.okSingal.connect(self.count_finish_num)
|
||||
output_emoji.progressSignal.connect(self.progressSignal)
|
||||
output_emoji.start()
|
||||
if message_types.get(3):
|
||||
# 图片消息单独的线程
|
||||
self.total_num += 1
|
||||
output_image = OutputImage(contact, time_range=self.time_range)
|
||||
self.children.append(output_image)
|
||||
output_image.okSingal.connect(self.count_finish_num)
|
||||
output_image.progressSignal.connect(self.progressSignal)
|
||||
output_image.start()
|
||||
|
||||
def to_csv(self, contact, message_types, is_batch=False):
|
||||
Child = CSVExporter(contact, type_=self.CSV, message_types=message_types, time_range=self.time_range)
|
||||
self.children.append(Child)
|
||||
Child.progressSignal.connect(self.progress)
|
||||
if not is_batch:
|
||||
Child.rangeSignal.connect(self.rangeSignal)
|
||||
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
||||
Child.start()
|
||||
|
||||
def run(self):
|
||||
if self.output_type == self.DOCX:
|
||||
self.to_docx(self.contact, self.message_types)
|
||||
elif self.output_type == self.CSV_ALL:
|
||||
self.to_csv_all()
|
||||
elif self.output_type == self.CONTACT_CSV:
|
||||
self.contact_to_csv()
|
||||
elif self.output_type == self.TXT:
|
||||
self.to_txt(self.contact, self.message_types)
|
||||
elif self.output_type == self.AI_TXT:
|
||||
self.to_ai_txt(self.contact, self.message_types)
|
||||
elif self.output_type == self.CSV:
|
||||
self.to_csv(self.contact, self.message_types)
|
||||
elif self.output_type == self.HTML:
|
||||
self.to_html(self.contact, self.message_types)
|
||||
elif self.output_type == self.JSON:
|
||||
self.to_json(self.contact, self.message_types)
|
||||
elif self.output_type == self.Batch:
|
||||
self.batch_export()
|
||||
|
||||
def count_finish_num(self, num):
|
||||
"""
|
||||
记录子线程完成个数
|
||||
@param num:
|
||||
@return:
|
||||
"""
|
||||
self.num += 1
|
||||
if self.num == self.total_num:
|
||||
# 所有子线程都完成之后就发送完成信号
|
||||
if self.output_type == self.Batch:
|
||||
self.batch_finish_one(1)
|
||||
else:
|
||||
self.okSignal.emit(1)
|
||||
self.num = 0
|
||||
|
||||
def cancel(self):
|
||||
self.requestInterruption()
|
||||
|
||||
|
||||
class OutputMedia(QThread):
|
||||
"""
|
||||
导出语音消息
|
||||
"""
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact, time_range=None):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
self.time_range = time_range
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, 34, time_range=self.time_range)
|
||||
for message in messages:
|
||||
is_send = message[4]
|
||||
msgSvrId = message[9]
|
||||
try:
|
||||
audio_path = media_msg_db.get_audio(msgSvrId, output_path=origin_path + "/voice")
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(34)
|
||||
|
||||
|
||||
class OutputEmoji(QThread):
|
||||
"""
|
||||
导出表情包
|
||||
"""
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact, time_range=None):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
self.time_range = time_range
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, 47, time_range=self.time_range)
|
||||
for message in messages:
|
||||
str_content = message[7]
|
||||
try:
|
||||
pass
|
||||
# emoji_path = get_emoji(str_content, thumb=True, output_path=origin_path + '/emoji')
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(47)
|
||||
|
||||
|
||||
class OutputImage(QThread):
|
||||
"""
|
||||
导出图片
|
||||
"""
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact, time_range):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
self.child_thread_num = 2
|
||||
self.time_range = time_range
|
||||
self.child_threads = [0] * (self.child_thread_num + 1)
|
||||
self.num = 0
|
||||
|
||||
def count1(self, num):
|
||||
self.num += 1
|
||||
print('图片导出完成一个')
|
||||
if self.num == self.child_thread_num:
|
||||
self.okSingal.emit(47)
|
||||
print('图片导出完成')
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
messages = msg_db.get_messages_by_type(self.contact.wxid, 3, time_range=self.time_range)
|
||||
base_path = os.path.join(OUTPUT_DIR, '聊天记录', self.contact.remark, 'image')
|
||||
for message in messages:
|
||||
str_content = message[7]
|
||||
BytesExtra = message[10]
|
||||
timestamp = message[5]
|
||||
try:
|
||||
image_path = hard_link_db.get_image(str_content, BytesExtra, up_dir=Me().wx_dir, thumb=False)
|
||||
image_path = get_image(image_path, base_path=base_path)
|
||||
try:
|
||||
os.utime(origin_path + image_path[1:], (timestamp, timestamp))
|
||||
except:
|
||||
pass
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(47)
|
||||
|
||||
|
||||
class OutputImageChild(QThread):
|
||||
okSingal = pyqtSignal(int)
|
||||
progressSignal = pyqtSignal(int)
|
||||
|
||||
def __init__(self, contact, messages, time_range):
|
||||
super().__init__()
|
||||
self.contact = contact
|
||||
self.messages = messages
|
||||
self.time_range = time_range
|
||||
|
||||
def run(self):
|
||||
origin_path = os.path.join(os.getcwd(), OUTPUT_DIR, '聊天记录', self.contact.remark)
|
||||
for message in self.messages:
|
||||
str_content = message[7]
|
||||
BytesExtra = message[10]
|
||||
timestamp = message[5]
|
||||
try:
|
||||
image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False)
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_path)):
|
||||
image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True)
|
||||
if not os.path.exists(os.path.join(Me().wx_dir, image_thumb_path)):
|
||||
continue
|
||||
image_path = image_thumb_path
|
||||
image_path = get_image(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image')
|
||||
try:
|
||||
os.utime(origin_path + image_path[1:], (timestamp, timestamp))
|
||||
except:
|
||||
pass
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
self.progressSignal.emit(1)
|
||||
self.okSingal.emit(47)
|
||||
print('图片子线程完成')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
Reference in New Issue
Block a user