205 lines
7.3 KiB
Python
205 lines
7.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
from datetime import datetime
|
|||
|
|
|
|||
|
|
print("正在提取化工岗位简历中的所有面试题...")
|
|||
|
|
|
|||
|
|
# 读取化工岗位简历数据
|
|||
|
|
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
|
|||
|
|
chemical_data = json.load(f)
|
|||
|
|
|
|||
|
|
# 收集每个岗位群的所有面试题
|
|||
|
|
job_group_all_questions = {}
|
|||
|
|
total_questions = 0
|
|||
|
|
|
|||
|
|
# 遍历所有岗位,提取完整的面试题内容
|
|||
|
|
for position in chemical_data:
|
|||
|
|
job_group = position.get('简历岗位群', '')
|
|||
|
|
interview_content = position.get('面试题内容', '')
|
|||
|
|
|
|||
|
|
if not job_group or not interview_content:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 初始化岗位群
|
|||
|
|
if job_group not in job_group_all_questions:
|
|||
|
|
job_group_all_questions[job_group] = []
|
|||
|
|
|
|||
|
|
# 解析面试题内容
|
|||
|
|
questions = []
|
|||
|
|
|
|||
|
|
# 分割内容为行
|
|||
|
|
lines = interview_content.split('\n')
|
|||
|
|
|
|||
|
|
current_question = None
|
|||
|
|
current_answer = []
|
|||
|
|
in_answer = False
|
|||
|
|
|
|||
|
|
for i, line in enumerate(lines):
|
|||
|
|
line = line.strip()
|
|||
|
|
|
|||
|
|
# 跳过空行和标题行
|
|||
|
|
if not line or line.startswith('#'):
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 检测新的问题(以数字开头)
|
|||
|
|
if re.match(r'^[0-9]+[\.、]', line):
|
|||
|
|
# 保存之前的问答
|
|||
|
|
if current_question and current_answer:
|
|||
|
|
answer_text = '\n'.join(current_answer).strip()
|
|||
|
|
if answer_text and not any(skip in answer_text for skip in ['![', 'image']):
|
|||
|
|
questions.append({
|
|||
|
|
'question': current_question,
|
|||
|
|
'answer': answer_text
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 开始新问题
|
|||
|
|
current_question = line
|
|||
|
|
current_answer = []
|
|||
|
|
in_answer = False
|
|||
|
|
|
|||
|
|
# 检测答案标记
|
|||
|
|
elif any(marker in line for marker in ['示例答案', '答案:', '正确答案', '正确选项', '答案是']):
|
|||
|
|
in_answer = True
|
|||
|
|
# 如果答案在同一行
|
|||
|
|
if ':' in line or ':' in line:
|
|||
|
|
answer_part = line.split(':', 1)[-1].split(':', 1)[-1].strip()
|
|||
|
|
if answer_part:
|
|||
|
|
current_answer.append(answer_part)
|
|||
|
|
|
|||
|
|
# 收集选项(选择题)
|
|||
|
|
elif current_question and re.match(r'^[A-D][\.、]', line):
|
|||
|
|
current_question += '\n' + line
|
|||
|
|
|
|||
|
|
# 收集答案内容
|
|||
|
|
elif in_answer and line:
|
|||
|
|
if not line.startswith('#'):
|
|||
|
|
current_answer.append(line)
|
|||
|
|
|
|||
|
|
# 如果还没有明确的答案标记,但这可能是答案内容
|
|||
|
|
elif current_question and not re.match(r'^[0-9]+[\.、]', line):
|
|||
|
|
# 检查是否可能是答案(在问题后面的非问题行)
|
|||
|
|
if i > 0 and not in_answer:
|
|||
|
|
# 如果上一行是问题,这行可能是答案
|
|||
|
|
prev_line = lines[i-1].strip() if i > 0 else ''
|
|||
|
|
if re.match(r'^[0-9]+[\.、]', prev_line) or prev_line == current_question:
|
|||
|
|
in_answer = True
|
|||
|
|
current_answer.append(line)
|
|||
|
|
|
|||
|
|
# 保存最后一个问答
|
|||
|
|
if current_question and current_answer:
|
|||
|
|
answer_text = '\n'.join(current_answer).strip()
|
|||
|
|
if answer_text and not any(skip in answer_text for skip in ['![', 'image']):
|
|||
|
|
questions.append({
|
|||
|
|
'question': current_question,
|
|||
|
|
'answer': answer_text
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 将问题添加到岗位群(避免重复)
|
|||
|
|
for q in questions:
|
|||
|
|
# 清理问题和答案文本
|
|||
|
|
q['question'] = q['question'].strip()
|
|||
|
|
q['answer'] = q['answer'].strip()
|
|||
|
|
|
|||
|
|
# 检查是否重复
|
|||
|
|
is_duplicate = False
|
|||
|
|
for existing in job_group_all_questions[job_group]:
|
|||
|
|
# 比较问题的前50个字符来判断是否重复
|
|||
|
|
if existing['question'][:50] == q['question'][:50]:
|
|||
|
|
is_duplicate = True
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if not is_duplicate and len(q['question']) > 5 and len(q['answer']) > 5:
|
|||
|
|
job_group_all_questions[job_group].append(q)
|
|||
|
|
total_questions += 1
|
|||
|
|
|
|||
|
|
# 输出统计信息
|
|||
|
|
print(f"\n===== 提取完成 =====")
|
|||
|
|
print(f"总岗位群数: {len(job_group_all_questions)}")
|
|||
|
|
print(f"总面试题数: {total_questions}")
|
|||
|
|
print(f"\n各岗位群面试题数量:")
|
|||
|
|
|
|||
|
|
for job_group, questions in sorted(job_group_all_questions.items()):
|
|||
|
|
print(f" {job_group}: {len(questions)} 道题")
|
|||
|
|
|
|||
|
|
# 显示一些示例
|
|||
|
|
print(f"\n===== 面试题示例 =====")
|
|||
|
|
for job_group in ['化工安全', '化工检验检测', '化工生产']:
|
|||
|
|
if job_group in job_group_all_questions:
|
|||
|
|
questions = job_group_all_questions[job_group]
|
|||
|
|
print(f"\n【{job_group}】共 {len(questions)} 道题,前2题:")
|
|||
|
|
for i, q in enumerate(questions[:2], 1):
|
|||
|
|
print(f" {i}. {q['question'][:60]}...")
|
|||
|
|
print(f" 答: {q['answer'][:60]}...")
|
|||
|
|
|
|||
|
|
# 保存提取的所有面试题
|
|||
|
|
with open('all_interview_questions_complete.json', 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(job_group_all_questions, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
print(f"\n✅ 已保存到 all_interview_questions_complete.json")
|
|||
|
|
|
|||
|
|
# 读取现有mock文件并更新
|
|||
|
|
print(f"\n正在更新 resumeInterviewMock.js...")
|
|||
|
|
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 备份
|
|||
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|||
|
|
backup_file = f'src/mocks/resumeInterviewMock.js.backup_complete_{timestamp}'
|
|||
|
|
with open(backup_file, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(content)
|
|||
|
|
print(f"已创建备份: {backup_file}")
|
|||
|
|
|
|||
|
|
# 更新每个岗位群
|
|||
|
|
updated_count = 0
|
|||
|
|
for job_group, questions in job_group_all_questions.items():
|
|||
|
|
if not questions:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 为每个问题创建正确的格式
|
|||
|
|
formatted_questions = []
|
|||
|
|
for i, q in enumerate(questions, 1):
|
|||
|
|
formatted_questions.append({
|
|||
|
|
"id": f"q{i}",
|
|||
|
|
"question": q['question'],
|
|||
|
|
"answer": q['answer']
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 转换为JSON字符串
|
|||
|
|
sub_questions_str = json.dumps(formatted_questions, ensure_ascii=False, indent=8)
|
|||
|
|
|
|||
|
|
# 替换对应岗位群的subQuestions
|
|||
|
|
pattern = rf'("question"\s*:\s*"{re.escape(job_group)}岗位群面试题"\s*,\s*"subQuestions"\s*:\s*)\[[^\]]*\]'
|
|||
|
|
replacement = rf'\1{sub_questions_str}'
|
|||
|
|
|
|||
|
|
new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)
|
|||
|
|
|
|||
|
|
if new_content != content:
|
|||
|
|
content = new_content
|
|||
|
|
updated_count += 1
|
|||
|
|
print(f"✓ 已更新 {job_group}: {len(questions)} 道题")
|
|||
|
|
|
|||
|
|
# 保存更新后的文件
|
|||
|
|
if updated_count > 0:
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(content)
|
|||
|
|
print(f"\n✅ 成功更新了 {updated_count} 个岗位群,共 {total_questions} 道面试题")
|
|||
|
|
|
|||
|
|
# 验证语法
|
|||
|
|
import subprocess
|
|||
|
|
try:
|
|||
|
|
result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
|
|||
|
|
capture_output=True, text=True, encoding='utf-8')
|
|||
|
|
if result.returncode == 0:
|
|||
|
|
print("✓ 语法检查通过")
|
|||
|
|
else:
|
|||
|
|
print(f"✗ 语法检查失败: {result.stderr}")
|
|||
|
|
# 恢复备份
|
|||
|
|
with open(backup_file, 'r', encoding='utf-8') as f:
|
|||
|
|
backup_content = f.read()
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(backup_content)
|
|||
|
|
print("已从备份恢复")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"错误: {e}")
|