Files
online_sys/frontend_大健康/reorganize_questions_data_only.py

171 lines
6.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
重新组织面试题数据 - 只修改数据不修改代码结构
将各个position的questions合并到对应industry的questions字段
"""
import json
import re
from datetime import datetime
def load_health_resume_data():
"""加载大健康岗位简历数据"""
try:
with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"Error loading health resume data: {e}")
return None
def parse_interview_content_to_flat_array(content):
"""解析面试题内容,转换为扁平的问答数组"""
if not content:
return []
questions = []
# 按大标题分割(# 一、二、三等)
if content.startswith('# '):
content = '\n' + content
sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content)
if len(sections) < 2:
return []
question_counter = 1
for i in range(1, len(sections), 2):
if i + 1 < len(sections):
section_title = sections[i].strip()
section_content = sections[i + 1].strip()
# 按问题编号分割 (1. 2. 3. 等)
question_parts = re.split(r'\n\s*(\d+\.)?\s*', section_content)
for j in range(1, len(question_parts)):
if j >= len(question_parts) or question_parts[j] is None:
continue
question_block = str(question_parts[j]).strip()
if not question_block or question_block.endswith('.'):
continue
# 提取问题和答案
lines = question_block.split('\n')
question_text = ""
answer_text = ""
in_answer = False
for line in lines:
line = line.strip()
if line.startswith('示例答案:'):
in_answer = True
continue
if not in_answer and line and not line.startswith('示例答案:'):
if question_text:
question_text += " "
question_text += line
elif in_answer and line:
if answer_text:
answer_text += " "
answer_text += line
if question_text:
questions.append({
"id": f"q{question_counter}",
"question": question_text,
"answer": answer_text
})
question_counter += 1
return questions
def reorganize_questions_by_industry():
"""重新组织面试题数据"""
try:
# 加载大健康数据
health_data = load_health_resume_data()
if not health_data:
print("Failed to load health resume data")
return False
# 按面试题类别分组
category_questions = {}
for item in health_data:
category = item.get('面试题', '')
interview_content = item.get('面试题内容', '')
if category and interview_content and category not in category_questions:
questions = parse_interview_content_to_flat_array(interview_content)
if questions:
category_questions[category] = questions
print(f"解析了 {len(category_questions)} 个面试题类别")
# 读取现有文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 创建备份
backup_filename = f'src/mocks/resumeInterviewMock.js.backup_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
with open(backup_filename, 'w', encoding='utf-8') as f:
f.write(content)
print(f"已创建备份文件: {backup_filename}")
updated_content = content
# 首先移除所有position级别的questions字段
updated_content = re.sub(
r',?\s*"questions": \[[^\]]*?\](?:\s*,\s*)?',
'',
updated_content,
flags=re.DOTALL
)
# 映射关系:行业名称 -> 面试题类别
industry_category_mapping = {
"健康管理": "健康管理类岗位面试题",
"健康检查": "健康检查类岗位面试题",
"康复治疗": "康复治疗类岗位面试题",
"医疗美容": "医疗美容类岗位面试题",
"运营管理": "运营类岗位面试题",
"心理健康": "心理健康类岗位面试题",
"供应链管理": "供应链类岗位面试题",
"药品制造": "药品制造类岗位面试题",
"检测分析": "检测分析类岗位面试题",
"临床研究": "临床研究类岗位面试题"
}
# 为每个行业添加对应的面试题
for industry_name, category in industry_category_mapping.items():
if category in category_questions:
questions_data = category_questions[category]
questions_json = json.dumps(questions_data, ensure_ascii=False, indent=6)
# 查找对应行业并添加questions字段
pattern = rf'("name": "{re.escape(industry_name)}"[^}}]*?"positions": \[[^\]]*?\]\s*)'
replacement = r'\1,\n "questions": ' + questions_json
updated_content = re.sub(
pattern,
replacement,
updated_content,
flags=re.DOTALL
)
print(f"✅ 为 {industry_name} 行业添加了面试题 ({len(questions_data)} 个问题)")
# 写回文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(updated_content)
print("面试题数据重组完成!")
return True
except Exception as e:
print(f"重组失败: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
reorganize_questions_by_industry()