Initial commit: 教务系统在线平台
- 包含4个产业方向的前端项目:智能开发、智能制造、大健康、财经商贸 - 已清理node_modules、.yoyo等大文件,项目大小从2.6GB优化至631MB - 配置完善的.gitignore文件 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
231
frontend_大健康/extract_all_interview_questions.py
Normal file
231
frontend_大健康/extract_all_interview_questions.py
Normal file
@@ -0,0 +1,231 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
def parse_all_interview_questions(content):
|
||||
"""解析所有面试题内容,包括所有问题"""
|
||||
questions = []
|
||||
question_id = 1
|
||||
|
||||
# 删除"判断题:"等前缀
|
||||
content = re.sub(r'判断题:\s*', '', content)
|
||||
|
||||
# 分割成不同的问题类别(一、二、三等)
|
||||
sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content)
|
||||
|
||||
# 如果没有找到类别标记,尝试直接查找所有问题
|
||||
all_questions = []
|
||||
|
||||
if len(sections) > 1:
|
||||
# 有类别的情况
|
||||
for i in range(1, len(sections), 2):
|
||||
if i >= len(sections):
|
||||
break
|
||||
|
||||
section_title = sections[i].strip()
|
||||
section_content = sections[i + 1] if i + 1 < len(sections) else ""
|
||||
|
||||
# 提取该类别下的所有问题
|
||||
category_questions = extract_questions_from_section(section_content, question_id)
|
||||
question_id += len(category_questions)
|
||||
|
||||
if category_questions:
|
||||
all_questions.append({
|
||||
"category": section_title,
|
||||
"questions": category_questions
|
||||
})
|
||||
else:
|
||||
# 没有类别的情况,直接提取所有问题
|
||||
category_questions = extract_questions_from_section(content, question_id)
|
||||
if category_questions:
|
||||
all_questions.append({
|
||||
"category": "综合面试题",
|
||||
"questions": category_questions
|
||||
})
|
||||
|
||||
return all_questions
|
||||
|
||||
def extract_questions_from_section(content, start_id):
|
||||
"""从内容中提取所有问题和答案"""
|
||||
questions = []
|
||||
question_id = start_id
|
||||
|
||||
# 使用更宽松的模式匹配问题
|
||||
# 模式1: 数字. 问题
|
||||
pattern1 = r'\n(\d+)\.\s*([^\n]+?)[\n\s]+((?:示例)?答案[::]\s*[^\n]+(?:\n(?!\d+\.).*)*)'
|
||||
|
||||
# 模式2: 问题后跟答案段落
|
||||
pattern2 = r'\n(\d+)\.\s*([^\n]+)\n\s*\n\s*((?:示例)?答案[::])?\s*\n\s*([^\n]+(?:\n(?!\d+\.|示例答案).*)*)'
|
||||
|
||||
# 先尝试模式1
|
||||
matches = re.findall(pattern1, content, re.MULTILINE)
|
||||
|
||||
if not matches:
|
||||
# 尝试模式2
|
||||
matches = re.findall(pattern2, content, re.MULTILINE)
|
||||
matches = [(m[0], m[1], m[3]) for m in matches] # 调整格式
|
||||
|
||||
# 如果还是没有匹配,使用更简单的模式
|
||||
if not matches:
|
||||
lines = content.split('\n')
|
||||
current_question = None
|
||||
current_answer = []
|
||||
in_answer = False
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# 检查是否是新问题
|
||||
question_match = re.match(r'^(\d+)\.\s*(.+)$', line)
|
||||
if question_match:
|
||||
# 保存上一个问题
|
||||
if current_question and current_answer:
|
||||
answer_text = ' '.join(current_answer).strip()
|
||||
if answer_text:
|
||||
questions.append({
|
||||
"id": f"q{question_id}",
|
||||
"question": current_question,
|
||||
"answer": answer_text
|
||||
})
|
||||
question_id += 1
|
||||
|
||||
# 开始新问题
|
||||
current_question = question_match.group(2).strip()
|
||||
current_answer = []
|
||||
in_answer = False
|
||||
|
||||
# 检查是否是答案开始
|
||||
elif '答案' in line or '示例答案' in line:
|
||||
in_answer = True
|
||||
# 可能答案就在同一行
|
||||
answer_part = re.sub(r'^(示例)?答案[::]?\s*', '', line).strip()
|
||||
if answer_part:
|
||||
current_answer.append(answer_part)
|
||||
|
||||
# 收集答案内容
|
||||
elif in_answer and line:
|
||||
# 检查是否是下一个问题的开始
|
||||
if not re.match(r'^\d+\.', line):
|
||||
current_answer.append(line)
|
||||
else:
|
||||
in_answer = False
|
||||
|
||||
# 如果没有明确的答案标记,但有内容,也收集
|
||||
elif current_question and not in_answer and line and not re.match(r'^\d+\.', line):
|
||||
current_answer.append(line)
|
||||
|
||||
# 保存最后一个问题
|
||||
if current_question and current_answer:
|
||||
answer_text = ' '.join(current_answer).strip()
|
||||
if answer_text:
|
||||
questions.append({
|
||||
"id": f"q{question_id}",
|
||||
"question": current_question,
|
||||
"answer": answer_text
|
||||
})
|
||||
else:
|
||||
# 处理正则匹配的结果
|
||||
for match in matches:
|
||||
question_text = match[1].strip()
|
||||
answer_text = match[2].strip()
|
||||
|
||||
# 清理答案文本
|
||||
answer_text = re.sub(r'^(示例)?答案[::]?\s*', '', answer_text).strip()
|
||||
answer_text = re.sub(r'\s+', ' ', answer_text) # 合并多余空格
|
||||
|
||||
if question_text and answer_text:
|
||||
questions.append({
|
||||
"id": f"q{question_id}",
|
||||
"question": question_text,
|
||||
"answer": answer_text
|
||||
})
|
||||
question_id += 1
|
||||
|
||||
return questions
|
||||
|
||||
def main():
|
||||
# 读取大健康岗位简历数据
|
||||
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
|
||||
health_data = json.load(f)
|
||||
|
||||
# 读取Mock文件
|
||||
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 创建岗位群到面试题的映射
|
||||
industry_questions_map = {}
|
||||
|
||||
for item in health_data:
|
||||
industry = item.get('简历岗位群', '')
|
||||
interview_content = item.get('面试题内容', '')
|
||||
|
||||
if industry and interview_content and industry not in industry_questions_map:
|
||||
all_categories = parse_all_interview_questions(interview_content)
|
||||
|
||||
# 转换为前端期望的格式
|
||||
questions_array = []
|
||||
cat_id = 1
|
||||
|
||||
for category_data in all_categories:
|
||||
if category_data['questions']:
|
||||
questions_array.append({
|
||||
"id": f"group_q{cat_id}",
|
||||
"question": category_data['category'],
|
||||
"subQuestions": category_data['questions']
|
||||
})
|
||||
cat_id += 1
|
||||
|
||||
if questions_array:
|
||||
industry_questions_map[industry] = questions_array
|
||||
total_questions = sum(len(q['subQuestions']) for q in questions_array)
|
||||
print(f"✓ {industry}: 提取了 {len(questions_array)} 个分类,共 {total_questions} 个面试题")
|
||||
|
||||
# 映射岗位群名称到ID
|
||||
industry_mapping = {
|
||||
'健康管理': 'health_1',
|
||||
'健康检查': 'health_2',
|
||||
'康复治疗': 'health_3',
|
||||
'慢性病管理': 'health_4',
|
||||
'轻医美': 'health_5',
|
||||
'心理健康': 'health_6',
|
||||
'社群运营': 'health_7',
|
||||
'药品供应链管理': 'health_8',
|
||||
'药品生产': 'health_9',
|
||||
'药品质量检测': 'health_10',
|
||||
'药物研发': 'health_11'
|
||||
}
|
||||
|
||||
# 更新Mock文件
|
||||
updates = 0
|
||||
for orig_name, industry_id in industry_mapping.items():
|
||||
if orig_name in industry_questions_map:
|
||||
questions = industry_questions_map[orig_name]
|
||||
|
||||
# 生成questions的JSON字符串
|
||||
questions_json = json.dumps(questions, ensure_ascii=False, indent=2)
|
||||
|
||||
# 查找并替换questions字段
|
||||
# 先删除旧的questions字段
|
||||
pattern1 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\]),\s*"questions":\s*\[[^\]]*?\](\s*\}})'
|
||||
replacement1 = rf'\1\2'
|
||||
content = re.sub(pattern1, replacement1, content, flags=re.DOTALL)
|
||||
|
||||
# 再添加新的questions字段
|
||||
pattern2 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\])(\s*\}})'
|
||||
replacement2 = rf'\1,\n "questions": {questions_json}\2'
|
||||
|
||||
new_content, count = re.subn(pattern2, replacement2, content, flags=re.DOTALL)
|
||||
if count > 0:
|
||||
content = new_content
|
||||
updates += 1
|
||||
|
||||
# 写回文件
|
||||
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
print(f"\n✅ 完成!更新了 {updates} 个岗位群的完整面试题数据")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user