2024-05-23 10:57:49 +00:00
|
|
|
|
# coding=utf-8
|
|
|
|
|
|
"""
|
|
|
|
|
|
@project: maxkb
|
|
|
|
|
|
@Author:虎
|
|
|
|
|
|
@file: base_parse_qa_handle.py
|
|
|
|
|
|
@date:2024/5/21 14:56
|
|
|
|
|
|
@desc:
|
|
|
|
|
|
"""
|
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-05-24 09:59:02 +00:00
|
|
|
|
def get_row_value(row, title_row_index_dict, field):
|
|
|
|
|
|
index = title_row_index_dict.get(field)
|
|
|
|
|
|
if index is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
if (len(row) - 1) >= index:
|
|
|
|
|
|
return row[index]
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_title_row_index_dict(title_row_list):
|
|
|
|
|
|
title_row_index_dict = {}
|
|
|
|
|
|
if len(title_row_list) == 1:
|
|
|
|
|
|
title_row_index_dict['content'] = 0
|
|
|
|
|
|
elif len(title_row_list) == 1:
|
|
|
|
|
|
title_row_index_dict['title'] = 0
|
|
|
|
|
|
title_row_index_dict['content'] = 1
|
|
|
|
|
|
else:
|
|
|
|
|
|
title_row_index_dict['title'] = 0
|
|
|
|
|
|
title_row_index_dict['content'] = 1
|
|
|
|
|
|
title_row_index_dict['problem_list'] = 2
|
|
|
|
|
|
for index in range(len(title_row_list)):
|
|
|
|
|
|
title_row = title_row_list[index]
|
2024-05-27 08:06:14 +00:00
|
|
|
|
if title_row is None:
|
|
|
|
|
|
title_row = ''
|
2024-05-24 09:59:02 +00:00
|
|
|
|
if title_row.startswith('分段标题'):
|
|
|
|
|
|
title_row_index_dict['title'] = index
|
|
|
|
|
|
if title_row.startswith('分段内容'):
|
|
|
|
|
|
title_row_index_dict['content'] = index
|
|
|
|
|
|
if title_row.startswith('问题'):
|
|
|
|
|
|
title_row_index_dict['problem_list'] = index
|
|
|
|
|
|
return title_row_index_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-05-23 10:57:49 +00:00
|
|
|
|
class BaseParseQAHandle(ABC):
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
def support(self, file, get_buffer):
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
2024-09-11 07:54:18 +00:00
|
|
|
|
def handle(self, file, get_buffer, save_image):
|
2024-05-23 10:57:49 +00:00
|
|
|
|
pass
|