2025-06-10 12:35:20 +00:00
|
|
|
|
# coding=utf-8
|
|
|
|
|
|
"""
|
|
|
|
|
|
@project: MaxKB
|
|
|
|
|
|
@Author:虎虎
|
|
|
|
|
|
@file: application_chat.py
|
|
|
|
|
|
@date:2025/6/10 11:06
|
|
|
|
|
|
@desc:
|
|
|
|
|
|
"""
|
|
|
|
|
|
import datetime
|
|
|
|
|
|
import os
|
|
|
|
|
|
import re
|
|
|
|
|
|
from io import BytesIO
|
|
|
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
|
|
|
|
import openpyxl
|
|
|
|
|
|
import pytz
|
|
|
|
|
|
from django.core import validators
|
|
|
|
|
|
from django.db import models
|
|
|
|
|
|
from django.db.models import QuerySet, Q
|
|
|
|
|
|
from django.http import StreamingHttpResponse
|
2025-09-28 07:18:02 +00:00
|
|
|
|
from django.utils import timezone
|
2025-06-10 12:35:20 +00:00
|
|
|
|
from django.utils.translation import gettext_lazy as _, gettext
|
|
|
|
|
|
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
|
|
|
|
|
|
from rest_framework import serializers
|
|
|
|
|
|
|
2025-07-07 08:08:37 +00:00
|
|
|
|
from application.models import Chat, Application, ChatRecord
|
2025-10-22 06:53:42 +00:00
|
|
|
|
from common.db.search import get_dynamics_model, native_search, native_page_search, native_page_handler
|
2025-06-25 08:56:10 +00:00
|
|
|
|
from common.exception.app_exception import AppApiException
|
2025-06-10 12:35:20 +00:00
|
|
|
|
from common.utils.common import get_file_content
|
|
|
|
|
|
from maxkb.conf import PROJECT_DIR
|
2025-07-07 08:08:37 +00:00
|
|
|
|
from maxkb.settings import TIME_ZONE, edition
|
2025-06-10 12:35:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ApplicationChatResponseSerializers(serializers.Serializer):
|
|
|
|
|
|
id = serializers.UUIDField(required=True, label=_("chat id"))
|
|
|
|
|
|
abstract = serializers.CharField(required=True, label=_("summary"))
|
|
|
|
|
|
chat_user_id = serializers.UUIDField(required=True, label=_("Chat User ID"))
|
|
|
|
|
|
chat_user_type = serializers.CharField(required=True, label=_("Chat User Type"))
|
|
|
|
|
|
is_deleted = serializers.BooleanField(required=True, label=_("Is delete"))
|
|
|
|
|
|
application_id = serializers.UUIDField(required=True, label=_("Application ID"))
|
|
|
|
|
|
chat_record_count = serializers.IntegerField(required=True, label=_("Number of conversations"))
|
|
|
|
|
|
star_num = serializers.IntegerField(required=True, label=_("Number of Likes"))
|
|
|
|
|
|
trample_num = serializers.IntegerField(required=True, label=_("Number of thumbs-downs"))
|
|
|
|
|
|
mark_sum = serializers.IntegerField(required=True, label=_("Number of tags"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ApplicationChatRecordExportRequest(serializers.Serializer):
|
|
|
|
|
|
select_ids = serializers.ListField(required=True, label=_("Chat ID List"),
|
|
|
|
|
|
child=serializers.UUIDField(required=True, label=_("Chat ID")))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ApplicationChatQuerySerializers(serializers.Serializer):
|
2025-06-25 08:56:10 +00:00
|
|
|
|
workspace_id = serializers.CharField(required=False, allow_null=True, allow_blank=True, label=_("Workspace ID"))
|
2025-06-10 12:35:20 +00:00
|
|
|
|
abstract = serializers.CharField(required=False, allow_blank=True, allow_null=True, label=_("summary"))
|
2025-08-25 06:41:21 +00:00
|
|
|
|
username = serializers.CharField(required=False, allow_blank=True, allow_null=True, label=_("username"))
|
2025-06-10 12:35:20 +00:00
|
|
|
|
start_time = serializers.DateField(format='%Y-%m-%d', label=_("Start time"))
|
|
|
|
|
|
end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time"))
|
|
|
|
|
|
application_id = serializers.UUIDField(required=True, label=_("Application ID"))
|
|
|
|
|
|
min_star = serializers.IntegerField(required=False, min_value=0,
|
|
|
|
|
|
label=_("Minimum number of likes"))
|
|
|
|
|
|
min_trample = serializers.IntegerField(required=False, min_value=0,
|
|
|
|
|
|
label=_("Minimum number of clicks"))
|
|
|
|
|
|
comparer = serializers.CharField(required=False, label=_("Comparator"), validators=[
|
|
|
|
|
|
validators.RegexValidator(regex=re.compile("^and|or$"),
|
|
|
|
|
|
message=_("Only supports and|or"), code=500)
|
|
|
|
|
|
])
|
|
|
|
|
|
|
2025-06-25 08:56:10 +00:00
|
|
|
|
def is_valid(self, *, raise_exception=False):
|
|
|
|
|
|
super().is_valid(raise_exception=True)
|
|
|
|
|
|
workspace_id = self.data.get('workspace_id')
|
|
|
|
|
|
query_set = QuerySet(Application).filter(id=self.data.get('application_id'))
|
|
|
|
|
|
if workspace_id:
|
|
|
|
|
|
query_set = query_set.filter(workspace_id=workspace_id)
|
|
|
|
|
|
if not query_set.exists():
|
|
|
|
|
|
raise AppApiException(500, _('Application id does not exist'))
|
|
|
|
|
|
|
2025-06-10 12:35:20 +00:00
|
|
|
|
def get_end_time(self):
|
2025-09-28 07:18:02 +00:00
|
|
|
|
d = datetime.datetime.strptime(self.data.get('end_time'), '%Y-%m-%d').date()
|
|
|
|
|
|
naive = datetime.datetime.combine(d, datetime.time.max)
|
|
|
|
|
|
return timezone.make_aware(naive, timezone.get_default_timezone())
|
2025-06-10 12:35:20 +00:00
|
|
|
|
|
|
|
|
|
|
def get_start_time(self):
|
2025-09-28 07:18:02 +00:00
|
|
|
|
d = datetime.datetime.strptime(self.data.get('start_time'), '%Y-%m-%d').date()
|
|
|
|
|
|
naive = datetime.datetime.combine(d, datetime.time.min)
|
|
|
|
|
|
return timezone.make_aware(naive, timezone.get_default_timezone())
|
2025-06-10 12:35:20 +00:00
|
|
|
|
|
|
|
|
|
|
def get_query_set(self, select_ids=None):
|
|
|
|
|
|
end_time = self.get_end_time()
|
|
|
|
|
|
start_time = self.get_start_time()
|
|
|
|
|
|
query_set = QuerySet(model=get_dynamics_model(
|
|
|
|
|
|
{'application_chat.application_id': models.CharField(),
|
|
|
|
|
|
'application_chat.abstract': models.CharField(),
|
2025-08-25 06:41:21 +00:00
|
|
|
|
'application_chat.asker': models.JSONField(),
|
2025-06-10 12:35:20 +00:00
|
|
|
|
"star_num": models.IntegerField(),
|
|
|
|
|
|
'trample_num': models.IntegerField(),
|
|
|
|
|
|
'comparer': models.CharField(),
|
|
|
|
|
|
'application_chat.update_time': models.DateTimeField(),
|
2025-10-22 06:53:42 +00:00
|
|
|
|
'application_chat.id': models.UUIDField(),
|
|
|
|
|
|
'application_chat_record_temp.id': models.UUIDField()}))
|
2025-06-10 12:35:20 +00:00
|
|
|
|
|
|
|
|
|
|
base_query_dict = {'application_chat.application_id': self.data.get("application_id"),
|
|
|
|
|
|
'application_chat.update_time__gte': start_time,
|
|
|
|
|
|
'application_chat.update_time__lte': end_time,
|
|
|
|
|
|
}
|
|
|
|
|
|
if 'abstract' in self.data and self.data.get('abstract') is not None:
|
|
|
|
|
|
base_query_dict['application_chat.abstract__icontains'] = self.data.get('abstract')
|
2025-08-25 06:41:21 +00:00
|
|
|
|
if 'username' in self.data and self.data.get('username') is not None:
|
2025-08-27 03:47:05 +00:00
|
|
|
|
base_query_dict['application_chat.asker__username__icontains'] = self.data.get('username')
|
2025-08-25 06:41:21 +00:00
|
|
|
|
|
2025-06-10 12:35:20 +00:00
|
|
|
|
if select_ids is not None and len(select_ids) > 0:
|
|
|
|
|
|
base_query_dict['application_chat.id__in'] = select_ids
|
|
|
|
|
|
base_condition = Q(**base_query_dict)
|
|
|
|
|
|
min_star_query = None
|
|
|
|
|
|
min_trample_query = None
|
|
|
|
|
|
if 'min_star' in self.data and self.data.get('min_star') is not None:
|
|
|
|
|
|
min_star_query = Q(star_num__gte=self.data.get('min_star'))
|
|
|
|
|
|
if 'min_trample' in self.data and self.data.get('min_trample') is not None:
|
|
|
|
|
|
min_trample_query = Q(trample_num__gte=self.data.get('min_trample'))
|
|
|
|
|
|
if min_star_query is not None and min_trample_query is not None:
|
|
|
|
|
|
if self.data.get(
|
|
|
|
|
|
'comparer') is not None and self.data.get('comparer') == 'or':
|
|
|
|
|
|
condition = base_condition & (min_star_query | min_trample_query)
|
|
|
|
|
|
else:
|
|
|
|
|
|
condition = base_condition & (min_star_query & min_trample_query)
|
|
|
|
|
|
elif min_star_query is not None:
|
|
|
|
|
|
condition = base_condition & min_star_query
|
|
|
|
|
|
elif min_trample_query is not None:
|
|
|
|
|
|
condition = base_condition & min_trample_query
|
|
|
|
|
|
else:
|
|
|
|
|
|
condition = base_condition
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
'default_queryset': query_set.filter(condition).order_by("-application_chat.update_time")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def list(self, with_valid=True):
|
|
|
|
|
|
if with_valid:
|
|
|
|
|
|
self.is_valid(raise_exception=True)
|
|
|
|
|
|
return native_search(self.get_query_set(), select_string=get_file_content(
|
2025-07-12 02:30:47 +00:00
|
|
|
|
os.path.join(PROJECT_DIR, "apps", "application", 'sql',
|
|
|
|
|
|
('list_application_chat_ee.sql' if ['PE', 'EE'].__contains__(
|
2025-07-07 08:08:37 +00:00
|
|
|
|
edition) else 'list_application_chat.sql'))),
|
2025-06-10 12:35:20 +00:00
|
|
|
|
with_table_name=False)
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def paragraph_list_to_string(paragraph_list):
|
|
|
|
|
|
return "\n**********\n".join(
|
|
|
|
|
|
[f"{paragraph.get('title')}:\n{paragraph.get('content')}" for paragraph in
|
|
|
|
|
|
paragraph_list] if paragraph_list is not None else '')
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def to_row(row: Dict):
|
2025-07-07 08:08:37 +00:00
|
|
|
|
details = row.get('details') or {}
|
2025-10-31 06:00:06 +00:00
|
|
|
|
padding_problem_text = ' '.join((node.get("answer", "") or "") for key, node in details.items() if
|
2025-06-10 12:35:20 +00:00
|
|
|
|
node.get("type") == 'question-node')
|
|
|
|
|
|
search_dataset_node_list = [(key, node) for key, node in details.items() if
|
|
|
|
|
|
node.get("type") == 'search-dataset-node' or node.get(
|
|
|
|
|
|
"step_type") == 'search_step']
|
|
|
|
|
|
reference_paragraph_len = '\n'.join([str(len(node.get('paragraph_list',
|
|
|
|
|
|
[]))) if key == 'search_step' else node.get(
|
|
|
|
|
|
'name') + ':' + str(
|
|
|
|
|
|
len(node.get('paragraph_list', [])) if node.get('paragraph_list', []) is not None else '0') for
|
|
|
|
|
|
key, node in search_dataset_node_list])
|
|
|
|
|
|
reference_paragraph = '\n----------\n'.join(
|
|
|
|
|
|
[ApplicationChatQuerySerializers.paragraph_list_to_string(node.get('paragraph_list',
|
|
|
|
|
|
[])) if key == 'search_step' else node.get(
|
|
|
|
|
|
'name') + ':\n' + ApplicationChatQuerySerializers.paragraph_list_to_string(node.get('paragraph_list',
|
|
|
|
|
|
[])) for
|
|
|
|
|
|
key, node in search_dataset_node_list])
|
2025-07-07 08:08:37 +00:00
|
|
|
|
improve_paragraph_list = row.get('improve_paragraph_list') or []
|
2025-06-10 12:35:20 +00:00
|
|
|
|
vote_status_map = {'-1': '未投票', '0': '赞同', '1': '反对'}
|
|
|
|
|
|
return [str(row.get('chat_id')), row.get('abstract'), row.get('problem_text'), padding_problem_text,
|
|
|
|
|
|
row.get('answer_text'), vote_status_map.get(row.get('vote_status')), reference_paragraph_len,
|
|
|
|
|
|
reference_paragraph,
|
|
|
|
|
|
"\n".join([
|
|
|
|
|
|
f"{improve_paragraph_list[index].get('title')}\n{improve_paragraph_list[index].get('content')}"
|
|
|
|
|
|
for index in range(len(improve_paragraph_list))]),
|
2025-07-16 07:35:33 +00:00
|
|
|
|
row.get('asker').get('username'),
|
2025-07-07 08:08:37 +00:00
|
|
|
|
(row.get('message_tokens') or 0) + (row.get('answer_tokens') or 0), row.get('run_time'),
|
2025-06-10 12:35:20 +00:00
|
|
|
|
str(row.get('create_time').astimezone(pytz.timezone(TIME_ZONE)).strftime('%Y-%m-%d %H:%M:%S')
|
2025-07-07 08:08:37 +00:00
|
|
|
|
if row.get('create_time') is not None else None)]
|
2025-06-10 12:35:20 +00:00
|
|
|
|
|
2025-10-22 06:53:42 +00:00
|
|
|
|
@staticmethod
|
|
|
|
|
|
def reset_value(value):
|
|
|
|
|
|
if isinstance(value, str):
|
|
|
|
|
|
value = re.sub(ILLEGAL_CHARACTERS_RE, '', value)
|
|
|
|
|
|
if isinstance(value, datetime.datetime):
|
|
|
|
|
|
eastern = pytz.timezone(TIME_ZONE)
|
|
|
|
|
|
c = datetime.timezone(eastern._utcoffset)
|
|
|
|
|
|
value = value.astimezone(c)
|
|
|
|
|
|
return value
|
|
|
|
|
|
|
2025-06-10 12:35:20 +00:00
|
|
|
|
def export(self, data, with_valid=True):
|
|
|
|
|
|
if with_valid:
|
|
|
|
|
|
self.is_valid(raise_exception=True)
|
|
|
|
|
|
ApplicationChatRecordExportRequest(data=data).is_valid(raise_exception=True)
|
2025-07-07 08:08:37 +00:00
|
|
|
|
|
2025-06-10 12:35:20 +00:00
|
|
|
|
def stream_response():
|
2025-10-22 06:53:42 +00:00
|
|
|
|
workbook = openpyxl.Workbook(write_only=True)
|
|
|
|
|
|
worksheet = workbook.create_sheet(title='Sheet1')
|
|
|
|
|
|
current_page = 1
|
|
|
|
|
|
page_size = 500
|
2025-06-10 12:35:20 +00:00
|
|
|
|
headers = [gettext('Conversation ID'), gettext('summary'), gettext('User Questions'),
|
2025-07-16 10:44:51 +00:00
|
|
|
|
gettext('Problem after optimization'),
|
2025-06-10 12:35:20 +00:00
|
|
|
|
gettext('answer'), gettext('User feedback'),
|
|
|
|
|
|
gettext('Reference segment number'),
|
|
|
|
|
|
gettext('Section title + content'),
|
2025-07-12 02:30:47 +00:00
|
|
|
|
gettext('Annotation'), gettext('USER'), gettext('Consuming tokens'),
|
2025-06-10 12:35:20 +00:00
|
|
|
|
gettext('Time consumed (s)'),
|
|
|
|
|
|
gettext('Question Time')]
|
2025-10-22 06:53:42 +00:00
|
|
|
|
worksheet.append(headers)
|
|
|
|
|
|
for data_list in native_page_handler(page_size, self.get_query_set(data.get('select_ids')),
|
|
|
|
|
|
primary_key='application_chat_record_temp.id',
|
|
|
|
|
|
primary_queryset='default_queryset',
|
|
|
|
|
|
get_primary_value=lambda item: item.get('id'),
|
|
|
|
|
|
select_string=get_file_content(
|
|
|
|
|
|
os.path.join(PROJECT_DIR, "apps", "application", 'sql',
|
|
|
|
|
|
('export_application_chat_ee.sql' if ['PE',
|
|
|
|
|
|
'EE'].__contains__(
|
|
|
|
|
|
edition) else 'export_application_chat.sql'))),
|
|
|
|
|
|
with_table_name=False):
|
|
|
|
|
|
|
|
|
|
|
|
for item in data_list:
|
|
|
|
|
|
row = [self.reset_value(v) for v in self.to_row(item)]
|
|
|
|
|
|
worksheet.append(row)
|
|
|
|
|
|
current_page = current_page + 1
|
2025-06-10 12:35:20 +00:00
|
|
|
|
output = BytesIO()
|
|
|
|
|
|
workbook.save(output)
|
|
|
|
|
|
output.seek(0)
|
|
|
|
|
|
yield output.getvalue()
|
|
|
|
|
|
output.close()
|
|
|
|
|
|
workbook.close()
|
|
|
|
|
|
|
|
|
|
|
|
response = StreamingHttpResponse(stream_response(),
|
|
|
|
|
|
content_type='application/vnd.open.xmlformats-officedocument.spreadsheetml.sheet')
|
|
|
|
|
|
response['Content-Disposition'] = 'attachment; filename="data.xlsx"'
|
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
|
|
def page(self, current_page: int, page_size: int, with_valid=True):
|
|
|
|
|
|
if with_valid:
|
|
|
|
|
|
self.is_valid(raise_exception=True)
|
|
|
|
|
|
return native_page_search(current_page, page_size, self.get_query_set(), select_string=get_file_content(
|
2025-07-07 08:08:37 +00:00
|
|
|
|
os.path.join(PROJECT_DIR, "apps", "application", 'sql',
|
|
|
|
|
|
('list_application_chat_ee.sql' if ['PE', 'EE'].__contains__(
|
|
|
|
|
|
edition) else 'list_application_chat.sql'))),
|
2025-06-10 12:35:20 +00:00
|
|
|
|
with_table_name=False)
|
2025-07-07 08:08:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ChatCountSerializer(serializers.Serializer):
|
|
|
|
|
|
chat_id = serializers.UUIDField(required=True, label=_("Conversation ID"))
|
|
|
|
|
|
|
|
|
|
|
|
def get_query_set(self):
|
|
|
|
|
|
return QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id'))
|
|
|
|
|
|
|
|
|
|
|
|
def update_chat(self):
|
|
|
|
|
|
self.is_valid(raise_exception=True)
|
|
|
|
|
|
count_chat_record = native_search(self.get_query_set(), get_file_content(
|
|
|
|
|
|
os.path.join(PROJECT_DIR, "apps", "application", 'sql', 'count_chat_record.sql')), with_search_one=True)
|
|
|
|
|
|
QuerySet(Chat).filter(id=self.data.get('chat_id')).update(star_num=count_chat_record.get('star_num', 0) or 0,
|
|
|
|
|
|
trample_num=count_chat_record.get('trample_num',
|
|
|
|
|
|
0) or 0,
|
|
|
|
|
|
chat_record_count=count_chat_record.get(
|
|
|
|
|
|
'chat_record_count', 0) or 0,
|
|
|
|
|
|
mark_sum=count_chat_record.get('mark_sum', 0) or 0)
|
|
|
|
|
|
return True
|