Files
ALL-teach_sys/frontend_化工/extract_complete_questions.py

175 lines
6.2 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import json
# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
chemical_data = json.load(f)
# 收集所有岗位群的面试题
job_group_questions = {}
total_questions = 0
# 遍历所有岗位
for position in chemical_data:
job_group = position.get('简历岗位群', '')
interview_content = position.get('面试题内容', '')
if not job_group or not interview_content:
continue
# 初始化岗位群
if job_group not in job_group_questions:
job_group_questions[job_group] = []
# 解析面试题内容
lines = interview_content.split('\n')
current_question = None
current_answer = None
for line in lines:
line = line.strip()
# 识别问题行
if any(marker in line for marker in ['示例答案', '正确答案', '答案:', '正确选项']):
# 这是答案标记行
if current_question and not current_answer:
current_answer = ""
elif line and current_answer is not None:
# 收集答案内容
if line.startswith('-') or line.startswith(''):
current_answer += line + '\n'
elif not line.startswith('#') and not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'):
current_answer += line + ' '
elif line and (line[0].isdigit() or line.startswith('问题')):
# 保存之前的问答对
if current_question and current_answer:
# 检查是否已存在
is_duplicate = False
for existing in job_group_questions[job_group]:
if existing['question'] == current_question:
is_duplicate = True
break
if not is_duplicate:
job_group_questions[job_group].append({
'question': current_question.strip(),
'answer': current_answer.strip()
})
total_questions += 1
# 开始新问题
current_question = line
current_answer = None
# 保存最后一个问答对
if current_question and current_answer:
is_duplicate = False
for existing in job_group_questions[job_group]:
if existing['question'] == current_question:
is_duplicate = True
break
if not is_duplicate:
job_group_questions[job_group].append({
'question': current_question.strip(),
'answer': current_answer.strip()
})
total_questions += 1
# 输出统计信息
print(f"\n===== 化工岗位完整面试题统计 =====")
print(f"总岗位群数: {len(job_group_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量:")
for job_group, questions in sorted(job_group_questions.items()):
print(f" {job_group}: {len(questions)}")
# 保存提取的面试题
with open('complete_interview_questions.json', 'w', encoding='utf-8') as f:
json.dump(job_group_questions, f, ensure_ascii=False, indent=2)
print(f"\n✅ 已保存所有面试题到 complete_interview_questions.json")
# 生成更新脚本
print(f"\n正在生成更新脚本...")
with open('update_complete_interview_questions.py', 'w', encoding='utf-8') as f:
f.write('''#!/usr/bin/env python3
import json
import re
import subprocess
from datetime import datetime
# 读取提取的面试题数据
with open('complete_interview_questions.json', 'r', encoding='utf-8') as f:
job_group_questions = json.load(f)
# 读取现有mock文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 备份文件
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = f'src/mocks/resumeInterviewMock.js.backup_complete_{timestamp}'
with open(backup_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"已创建备份: {backup_file}")
# 更新每个岗位群的面试题
updated_count = 0
for job_group, questions in job_group_questions.items():
if not questions:
continue
# 构建subQuestions数组内容
sub_questions_items = []
for i, q in enumerate(questions):
question_obj = {
"id": f"q{i+1}",
"question": q['question'],
"answer": q['answer']
}
sub_questions_items.append(json.dumps(question_obj, ensure_ascii=False, indent=20))
# 创建完整的subQuestions内容
sub_questions_content = ',\\n'.join(sub_questions_items)
# 查找并替换对应岗位群的subQuestions
pattern = rf'("question"\\s*:\\s*"{re.escape(job_group)}"[^}}]*?"subQuestions"\\s*:\\s*)\\[[^\\]]*\\]'
replacement = rf'\\1[\\n{sub_questions_content}\\n ]'
new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)
if new_content != content:
content = new_content
updated_count += 1
print(f"✓ 已更新 {job_group}{len(questions)} 道面试题")
# 保存更新后的文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
# 验证语法
try:
result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
capture_output=True, text=True, encoding='utf-8')
if result.returncode == 0:
print(f"\\n✓ 语法检查通过")
print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")
print(f"\\n✅ 所有面试题更新成功完成!")
else:
print(f"\\n✗ 语法检查失败: {result.stderr}")
# 恢复备份
with open(backup_file, 'r', encoding='utf-8') as f:
content = f.read()
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print(f"已从备份恢复")
except Exception as e:
print(f"错误: {e}")
''')
print(f"✅ 已生成 update_complete_interview_questions.py")
print(f"\n请运行以下命令来更新所有面试题:")
print(f"python3 update_complete_interview_questions.py")