Files
online_sys/frontend_大健康/extract_complete_questions.py
KQL a7242f0c69 Initial commit: 教务系统在线平台
- 包含4个产业方向的前端项目:智能开发、智能制造、大健康、财经商贸
- 已清理node_modules、.yoyo等大文件,项目大小从2.6GB优化至631MB
- 配置完善的.gitignore文件

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-12 18:16:55 +08:00

251 lines
9.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
def extract_all_questions_from_content(content):
"""从内容中提取所有面试题"""
all_questions = []
# 删除"判断题:"等前缀
content = re.sub(r'判断题[:]?\s*', '', content)
# 先按大类分割(# 一、二、三等)
category_pattern = r'# ([一二三四五六七八九十]+、[^\n]+)'
categories = re.split(category_pattern, content)
if len(categories) > 1:
# 有分类的情况
for i in range(1, len(categories), 2):
if i+1 >= len(categories):
break
category_title = categories[i].strip()
category_content = categories[i+1]
# 从该分类中提取所有问题
questions = extract_questions_from_text(category_content)
if questions:
all_questions.append({
"category": category_title,
"questions": questions
})
else:
# 没有分类,直接提取所有问题
questions = extract_questions_from_text(content)
if questions:
all_questions.append({
"category": "综合面试题",
"questions": questions
})
return all_questions
def extract_questions_from_text(text):
"""从文本中提取问题和答案"""
questions = []
question_id = 1
# 分割文本为行
lines = text.split('\n')
current_question = None
current_answer = []
in_answer_section = False
for i, line in enumerate(lines):
line = line.strip()
# 检查是否是问题行(数字开头)
question_match = re.match(r'^(\d+)\.\s*(.+)$', line)
if question_match:
# 先保存上一个问题
if current_question and current_answer:
answer_text = ' '.join(current_answer).strip()
# 清理答案文本
answer_text = re.sub(r'^(示例)?答案[:]?\s*', '', answer_text)
answer_text = re.sub(r'\s+', ' ', answer_text)
if answer_text:
questions.append({
"id": f"q{question_id}",
"question": current_question,
"answer": answer_text
})
question_id += 1
# 开始新问题
current_question = question_match.group(2).strip()
current_answer = []
in_answer_section = False
# 检查是否进入答案部分
elif line and ('示例答案' in line or '答案:' in line or '答案:' in line):
in_answer_section = True
# 答案可能在同一行
answer_on_same_line = re.sub(r'^.*(示例)?答案[:]?\s*', '', line).strip()
if answer_on_same_line:
current_answer.append(answer_on_same_line)
# 收集答案内容
elif in_answer_section and line:
# 检查是否是下一个问题或分类
if not re.match(r'^(\d+)\.', line) and not line.startswith('#'):
current_answer.append(line)
# 空行可能表示答案结束
elif not line and in_answer_section:
in_answer_section = False
# 保存最后一个问题
if current_question and current_answer:
answer_text = ' '.join(current_answer).strip()
answer_text = re.sub(r'^(示例)?答案[:]?\s*', '', answer_text)
answer_text = re.sub(r'\s+', ' ', answer_text)
if answer_text:
questions.append({
"id": f"q{question_id}",
"question": current_question,
"answer": answer_text
})
# 如果没有找到答案,尝试另一种模式
if not questions:
# 使用正则表达式匹配问题和答案
pattern = r'(\d+)\.\s*([^\n]+)\s*\n\s*(?:示例)?答案[:]?\s*\n\s*([^\n]+(?:\n(?!\d+\.|#)[^\n]*)*)'
matches = re.findall(pattern, text, re.MULTILINE)
question_id = 1
for match in matches:
question_text = match[1].strip()
answer_text = match[2].strip()
answer_text = re.sub(r'\s+', ' ', answer_text)
if question_text and answer_text:
questions.append({
"id": f"q{question_id}",
"question": question_text,
"answer": answer_text
})
question_id += 1
return questions
def main():
# 读取大健康岗位简历数据
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
health_data = json.load(f)
# 读取Mock文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 收集每个岗位群的所有面试题
industry_all_questions = {}
for item in health_data:
industry = item.get('简历岗位群', '')
interview_content = item.get('面试题内容', '')
if industry and interview_content:
if industry not in industry_all_questions:
industry_all_questions[industry] = []
# 提取该岗位的所有问题
categories = extract_all_questions_from_content(interview_content)
# 合并到该岗位群的问题列表中
for cat in categories:
# 检查是否已有该分类
existing_cat = None
for existing in industry_all_questions[industry]:
if existing['category'] == cat['category']:
existing_cat = existing
break
if existing_cat:
# 合并问题,避免重复
existing_questions = {q['question'] for q in existing_cat['questions']}
for q in cat['questions']:
if q['question'] not in existing_questions:
existing_cat['questions'].append(q)
else:
# 添加新分类
industry_all_questions[industry].append(cat)
# 转换为前端期望的格式并更新Mock文件
industry_mapping = {
'健康管理': 'health_1',
'健康检查': 'health_2',
'康复治疗': 'health_3',
'慢性病管理': 'health_4',
'轻医美': 'health_5',
'心理健康': 'health_6',
'社群运营': 'health_7',
'药品供应链管理': 'health_8',
'药品生产': 'health_9',
'药品质量检测': 'health_10',
'药物研发': 'health_11'
}
updates = 0
for orig_name, industry_id in industry_mapping.items():
if orig_name in industry_all_questions:
categories = industry_all_questions[orig_name]
# 转换为questions数组
questions_array = []
cat_id = 1
total_questions = 0
for cat in categories:
if cat['questions']:
# 重新编号问题ID
renumbered_questions = []
for i, q in enumerate(cat['questions'], 1):
renumbered_questions.append({
"id": f"q{total_questions + i}",
"question": q['question'],
"answer": q['answer']
})
total_questions += len(renumbered_questions)
questions_array.append({
"id": f"group_q{cat_id}",
"question": cat['category'],
"subQuestions": renumbered_questions
})
cat_id += 1
if questions_array:
print(f"{orig_name} ({industry_id}): {len(questions_array)} 个分类,共 {total_questions} 个面试题")
# 生成JSON字符串
questions_json = json.dumps(questions_array, ensure_ascii=False, indent=2)
# 删除旧的questions字段
pattern1 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\]),\s*"questions":\s*\[[^\]]*?\](\s*\}})'
replacement1 = rf'\1\2'
content = re.sub(pattern1, replacement1, content, flags=re.DOTALL)
# 添加新的questions字段
pattern2 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\])(\s*\}})'
replacement2 = rf'\1,\n "questions": {questions_json}\2'
new_content, count = re.subn(pattern2, replacement2, content, flags=re.DOTALL)
if count > 0:
content = new_content
updates += 1
# 写回文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print(f"\n✅ 完成!更新了 {updates} 个岗位群的完整面试题数据")
if __name__ == "__main__":
main()