Files
online_sys/frontend_大健康/fix_duplicates_and_add_questions.py
KQL a7242f0c69 Initial commit: 教务系统在线平台
- 包含4个产业方向的前端项目:智能开发、智能制造、大健康、财经商贸
- 已清理node_modules、.yoyo等大文件,项目大小从2.6GB优化至631MB
- 配置完善的.gitignore文件

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-12 18:16:55 +08:00

222 lines
9.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
def parse_interview_questions(content):
"""解析面试题内容为结构化数据"""
questions = []
# 分割成不同的问题类别
sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content)
question_id = 1
for i in range(1, len(sections), 2):
if i >= len(sections):
break
section_title = sections[i].strip()
section_content = sections[i + 1] if i + 1 < len(sections) else ""
# 提取每个问题
question_blocks = re.split(r'\n\d+\.\s+', section_content)
for j, block in enumerate(question_blocks[1:], 1): # 跳过第一个空块
lines = block.strip().split('\n')
if not lines:
continue
question_text = lines[0].strip()
# 查找答案
answer_text = ""
for k, line in enumerate(lines):
if '示例答案' in line or '答案' in line:
# 获取答案内容
answer_lines = []
for answer_line in lines[k+1:]:
answer_line = answer_line.strip()
if answer_line and not answer_line.startswith('示例答案'):
if re.match(r'^\d+\.', answer_line):
break
answer_lines.append(answer_line)
answer_text = ' '.join(answer_lines)
break
if question_text and answer_text:
questions.append({
"id": f"q{question_id}",
"question": question_text,
"answer": answer_text
})
question_id += 1
return questions
def main():
# 读取大健康岗位简历数据
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
health_data = json.load(f)
# 读取Mock文件移除重复的岗位群
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 找到industries数组
match = re.search(r'const industries = \[(.*?)\];', content, re.DOTALL)
if not match:
print("未找到industries数组")
return
industries_content = match.group(1)
# 解析每个岗位群
industry_blocks = re.findall(r'\{[^}]*?"id":\s*"health_\d+"[^}]*?"positions":\s*\[[^\]]*?\][^}]*?\}', industries_content, re.DOTALL)
# 去重并保留第一个出现的
seen_ids = set()
unique_blocks = []
for block in industry_blocks:
id_match = re.search(r'"id":\s*"(health_\d+)"', block)
if id_match:
industry_id = id_match.group(1)
if industry_id not in seen_ids:
seen_ids.add(industry_id)
unique_blocks.append(block)
print(f"找到 {len(industry_blocks)} 个岗位群块,去重后剩余 {len(unique_blocks)}")
# 为每个岗位群添加面试题
industry_questions = {}
for item in health_data:
industry = item.get('简历岗位群', '')
interview_content = item.get('面试题内容', '')
if industry and interview_content and industry not in industry_questions:
questions = parse_interview_questions(interview_content)
if questions:
# 按类别分组
category_questions = {}
current_category = "综合面试题"
for q in questions:
if '岗位理解' in q['question']:
current_category = "岗位理解类问题"
elif '实践经验' in q['question'] or '案例' in q['question']:
current_category = "实践经验类问题"
elif '客户服务' in q['question'] or '客户' in q['question']:
current_category = "客户服务类问题"
elif '市场' in q['question'] or '趋势' in q['question']:
current_category = "市场与未来趋势类问题"
elif '技术' in q['question'] or '专业' in q['question']:
current_category = "专业技术类问题"
elif '团队' in q['question'] or '协作' in q['question']:
current_category = "团队协作类问题"
if current_category not in category_questions:
category_questions[current_category] = []
category_questions[current_category].append(q)
# 构建questions数组
questions_array = []
cat_id = 1
for category, cat_questions in category_questions.items():
questions_array.append({
"id": f"group_q{cat_id}",
"question": category,
"subQuestions": cat_questions
})
cat_id += 1
industry_questions[industry] = questions_array
# 映射岗位群名称到ID
industry_mapping = {
'健康管理': 'health_1',
'健康检查': 'health_2',
'康复治疗': 'health_3',
'慢性病管理': 'health_4',
'轻医美': 'health_5',
'心理健康': 'health_6',
'社群运营': 'health_7',
'药品供应链管理': 'health_8',
'药品生产': 'health_9',
'药品质量检测': 'health_10',
'药物研发': 'health_11'
}
# 更新每个岗位群块添加questions字段
updated_blocks = []
for block in unique_blocks:
# 获取岗位群名称
name_match = re.search(r'"name":\s*"([^"]+)"', block)
id_match = re.search(r'"id":\s*"(health_\d+)"', block)
if name_match and id_match:
industry_name = name_match.group(1)
industry_id = id_match.group(1)
# 查找对应的面试题
questions = None
for orig_name, mapped_id in industry_mapping.items():
if mapped_id == industry_id and orig_name in industry_questions:
questions = industry_questions[orig_name]
break
# 如果没找到,使用默认面试题
if not questions:
questions = [{
"id": "group_q1",
"question": f"{industry_name}专业认知",
"subQuestions": [
{
"id": "q1",
"question": f"你如何理解{industry_name}的核心价值?",
"answer": f"{industry_name}的核心价值在于通过专业技能和知识,为企业和客户创造价值,推动行业发展。"
},
{
"id": "q2",
"question": f"{industry_name}中最重要的能力是什么?",
"answer": "专业技能、沟通能力、团队协作和持续学习能力都是非常重要的。"
},
{
"id": "q3",
"question": f"你为什么选择{industry_name}这个方向?",
"answer": "我对这个领域充满热情,相信能够在这里发挥我的专业优势,为行业发展做出贡献。"
}
]
}]
# 在positions数组后添加questions字段
if '"questions"' not in block:
# 找到positions数组的结束位置
pos_end = block.rfind(']')
if pos_end > 0:
questions_json = json.dumps(questions, ensure_ascii=False, indent=4)
# 调整缩进
questions_json = questions_json.replace('\n', '\n ')
new_block = block[:pos_end+1] + ',\n "questions": ' + questions_json + block[pos_end+1:]
updated_blocks.append(new_block)
print(f" ✓ 为 {industry_name} ({industry_id}) 添加了面试题")
else:
updated_blocks.append(block)
else:
updated_blocks.append(block)
else:
updated_blocks.append(block)
# 重新构建industries数组
new_industries = 'const industries = [\n ' + ',\n '.join(updated_blocks) + '\n];'
# 替换原内容
new_content = content[:content.index('const industries')] + new_industries + content[content.index('];', content.index('const industries')) + 2:]
# 写回文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(new_content)
print(f"\n完成!处理了 {len(updated_blocks)} 个岗位群")
if __name__ == "__main__":
main()