251 lines
9.1 KiB
Python
251 lines
9.1 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
def extract_all_questions_from_content(content):
|
|||
|
|
"""从内容中提取所有面试题"""
|
|||
|
|
all_questions = []
|
|||
|
|
|
|||
|
|
# 删除"判断题:"等前缀
|
|||
|
|
content = re.sub(r'判断题[::]?\s*', '', content)
|
|||
|
|
|
|||
|
|
# 先按大类分割(# 一、二、三等)
|
|||
|
|
category_pattern = r'# ([一二三四五六七八九十]+、[^\n]+)'
|
|||
|
|
categories = re.split(category_pattern, content)
|
|||
|
|
|
|||
|
|
if len(categories) > 1:
|
|||
|
|
# 有分类的情况
|
|||
|
|
for i in range(1, len(categories), 2):
|
|||
|
|
if i+1 >= len(categories):
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
category_title = categories[i].strip()
|
|||
|
|
category_content = categories[i+1]
|
|||
|
|
|
|||
|
|
# 从该分类中提取所有问题
|
|||
|
|
questions = extract_questions_from_text(category_content)
|
|||
|
|
|
|||
|
|
if questions:
|
|||
|
|
all_questions.append({
|
|||
|
|
"category": category_title,
|
|||
|
|
"questions": questions
|
|||
|
|
})
|
|||
|
|
else:
|
|||
|
|
# 没有分类,直接提取所有问题
|
|||
|
|
questions = extract_questions_from_text(content)
|
|||
|
|
if questions:
|
|||
|
|
all_questions.append({
|
|||
|
|
"category": "综合面试题",
|
|||
|
|
"questions": questions
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return all_questions
|
|||
|
|
|
|||
|
|
def extract_questions_from_text(text):
|
|||
|
|
"""从文本中提取问题和答案"""
|
|||
|
|
questions = []
|
|||
|
|
question_id = 1
|
|||
|
|
|
|||
|
|
# 分割文本为行
|
|||
|
|
lines = text.split('\n')
|
|||
|
|
|
|||
|
|
current_question = None
|
|||
|
|
current_answer = []
|
|||
|
|
in_answer_section = False
|
|||
|
|
|
|||
|
|
for i, line in enumerate(lines):
|
|||
|
|
line = line.strip()
|
|||
|
|
|
|||
|
|
# 检查是否是问题行(数字开头)
|
|||
|
|
question_match = re.match(r'^(\d+)\.\s*(.+)$', line)
|
|||
|
|
|
|||
|
|
if question_match:
|
|||
|
|
# 先保存上一个问题
|
|||
|
|
if current_question and current_answer:
|
|||
|
|
answer_text = ' '.join(current_answer).strip()
|
|||
|
|
# 清理答案文本
|
|||
|
|
answer_text = re.sub(r'^(示例)?答案[::]?\s*', '', answer_text)
|
|||
|
|
answer_text = re.sub(r'\s+', ' ', answer_text)
|
|||
|
|
|
|||
|
|
if answer_text:
|
|||
|
|
questions.append({
|
|||
|
|
"id": f"q{question_id}",
|
|||
|
|
"question": current_question,
|
|||
|
|
"answer": answer_text
|
|||
|
|
})
|
|||
|
|
question_id += 1
|
|||
|
|
|
|||
|
|
# 开始新问题
|
|||
|
|
current_question = question_match.group(2).strip()
|
|||
|
|
current_answer = []
|
|||
|
|
in_answer_section = False
|
|||
|
|
|
|||
|
|
# 检查是否进入答案部分
|
|||
|
|
elif line and ('示例答案' in line or '答案:' in line or '答案:' in line):
|
|||
|
|
in_answer_section = True
|
|||
|
|
# 答案可能在同一行
|
|||
|
|
answer_on_same_line = re.sub(r'^.*(示例)?答案[::]?\s*', '', line).strip()
|
|||
|
|
if answer_on_same_line:
|
|||
|
|
current_answer.append(answer_on_same_line)
|
|||
|
|
|
|||
|
|
# 收集答案内容
|
|||
|
|
elif in_answer_section and line:
|
|||
|
|
# 检查是否是下一个问题或分类
|
|||
|
|
if not re.match(r'^(\d+)\.', line) and not line.startswith('#'):
|
|||
|
|
current_answer.append(line)
|
|||
|
|
|
|||
|
|
# 空行可能表示答案结束
|
|||
|
|
elif not line and in_answer_section:
|
|||
|
|
in_answer_section = False
|
|||
|
|
|
|||
|
|
# 保存最后一个问题
|
|||
|
|
if current_question and current_answer:
|
|||
|
|
answer_text = ' '.join(current_answer).strip()
|
|||
|
|
answer_text = re.sub(r'^(示例)?答案[::]?\s*', '', answer_text)
|
|||
|
|
answer_text = re.sub(r'\s+', ' ', answer_text)
|
|||
|
|
|
|||
|
|
if answer_text:
|
|||
|
|
questions.append({
|
|||
|
|
"id": f"q{question_id}",
|
|||
|
|
"question": current_question,
|
|||
|
|
"answer": answer_text
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 如果没有找到答案,尝试另一种模式
|
|||
|
|
if not questions:
|
|||
|
|
# 使用正则表达式匹配问题和答案
|
|||
|
|
pattern = r'(\d+)\.\s*([^\n]+)\s*\n\s*(?:示例)?答案[::]?\s*\n\s*([^\n]+(?:\n(?!\d+\.|#)[^\n]*)*)'
|
|||
|
|
matches = re.findall(pattern, text, re.MULTILINE)
|
|||
|
|
|
|||
|
|
question_id = 1
|
|||
|
|
for match in matches:
|
|||
|
|
question_text = match[1].strip()
|
|||
|
|
answer_text = match[2].strip()
|
|||
|
|
answer_text = re.sub(r'\s+', ' ', answer_text)
|
|||
|
|
|
|||
|
|
if question_text and answer_text:
|
|||
|
|
questions.append({
|
|||
|
|
"id": f"q{question_id}",
|
|||
|
|
"question": question_text,
|
|||
|
|
"answer": answer_text
|
|||
|
|
})
|
|||
|
|
question_id += 1
|
|||
|
|
|
|||
|
|
return questions
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
# 读取大健康岗位简历数据
|
|||
|
|
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
|
|||
|
|
health_data = json.load(f)
|
|||
|
|
|
|||
|
|
# 读取Mock文件
|
|||
|
|
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 收集每个岗位群的所有面试题
|
|||
|
|
industry_all_questions = {}
|
|||
|
|
|
|||
|
|
for item in health_data:
|
|||
|
|
industry = item.get('简历岗位群', '')
|
|||
|
|
interview_content = item.get('面试题内容', '')
|
|||
|
|
|
|||
|
|
if industry and interview_content:
|
|||
|
|
if industry not in industry_all_questions:
|
|||
|
|
industry_all_questions[industry] = []
|
|||
|
|
|
|||
|
|
# 提取该岗位的所有问题
|
|||
|
|
categories = extract_all_questions_from_content(interview_content)
|
|||
|
|
|
|||
|
|
# 合并到该岗位群的问题列表中
|
|||
|
|
for cat in categories:
|
|||
|
|
# 检查是否已有该分类
|
|||
|
|
existing_cat = None
|
|||
|
|
for existing in industry_all_questions[industry]:
|
|||
|
|
if existing['category'] == cat['category']:
|
|||
|
|
existing_cat = existing
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if existing_cat:
|
|||
|
|
# 合并问题,避免重复
|
|||
|
|
existing_questions = {q['question'] for q in existing_cat['questions']}
|
|||
|
|
for q in cat['questions']:
|
|||
|
|
if q['question'] not in existing_questions:
|
|||
|
|
existing_cat['questions'].append(q)
|
|||
|
|
else:
|
|||
|
|
# 添加新分类
|
|||
|
|
industry_all_questions[industry].append(cat)
|
|||
|
|
|
|||
|
|
# 转换为前端期望的格式并更新Mock文件
|
|||
|
|
industry_mapping = {
|
|||
|
|
'健康管理': 'health_1',
|
|||
|
|
'健康检查': 'health_2',
|
|||
|
|
'康复治疗': 'health_3',
|
|||
|
|
'慢性病管理': 'health_4',
|
|||
|
|
'轻医美': 'health_5',
|
|||
|
|
'心理健康': 'health_6',
|
|||
|
|
'社群运营': 'health_7',
|
|||
|
|
'药品供应链管理': 'health_8',
|
|||
|
|
'药品生产': 'health_9',
|
|||
|
|
'药品质量检测': 'health_10',
|
|||
|
|
'药物研发': 'health_11'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
updates = 0
|
|||
|
|
for orig_name, industry_id in industry_mapping.items():
|
|||
|
|
if orig_name in industry_all_questions:
|
|||
|
|
categories = industry_all_questions[orig_name]
|
|||
|
|
|
|||
|
|
# 转换为questions数组
|
|||
|
|
questions_array = []
|
|||
|
|
cat_id = 1
|
|||
|
|
total_questions = 0
|
|||
|
|
|
|||
|
|
for cat in categories:
|
|||
|
|
if cat['questions']:
|
|||
|
|
# 重新编号问题ID
|
|||
|
|
renumbered_questions = []
|
|||
|
|
for i, q in enumerate(cat['questions'], 1):
|
|||
|
|
renumbered_questions.append({
|
|||
|
|
"id": f"q{total_questions + i}",
|
|||
|
|
"question": q['question'],
|
|||
|
|
"answer": q['answer']
|
|||
|
|
})
|
|||
|
|
total_questions += len(renumbered_questions)
|
|||
|
|
|
|||
|
|
questions_array.append({
|
|||
|
|
"id": f"group_q{cat_id}",
|
|||
|
|
"question": cat['category'],
|
|||
|
|
"subQuestions": renumbered_questions
|
|||
|
|
})
|
|||
|
|
cat_id += 1
|
|||
|
|
|
|||
|
|
if questions_array:
|
|||
|
|
print(f"✓ {orig_name} ({industry_id}): {len(questions_array)} 个分类,共 {total_questions} 个面试题")
|
|||
|
|
|
|||
|
|
# 生成JSON字符串
|
|||
|
|
questions_json = json.dumps(questions_array, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
# 删除旧的questions字段
|
|||
|
|
pattern1 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\]),\s*"questions":\s*\[[^\]]*?\](\s*\}})'
|
|||
|
|
replacement1 = rf'\1\2'
|
|||
|
|
content = re.sub(pattern1, replacement1, content, flags=re.DOTALL)
|
|||
|
|
|
|||
|
|
# 添加新的questions字段
|
|||
|
|
pattern2 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\])(\s*\}})'
|
|||
|
|
replacement2 = rf'\1,\n "questions": {questions_json}\2'
|
|||
|
|
|
|||
|
|
new_content, count = re.subn(pattern2, replacement2, content, flags=re.DOTALL)
|
|||
|
|
if count > 0:
|
|||
|
|
content = new_content
|
|||
|
|
updates += 1
|
|||
|
|
|
|||
|
|
# 写回文件
|
|||
|
|
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(content)
|
|||
|
|
|
|||
|
|
print(f"\n✅ 完成!更新了 {updates} 个岗位群的完整面试题数据")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|