#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 重新组织面试题数据 - 只修改数据,不修改代码结构 将各个position的questions合并到对应industry的questions字段 """ import json import re from datetime import datetime def load_health_resume_data(): """加载大健康岗位简历数据""" try: with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: print(f"Error loading health resume data: {e}") return None def parse_interview_content_to_flat_array(content): """解析面试题内容,转换为扁平的问答数组""" if not content: return [] questions = [] # 按大标题分割(# 一、二、三等) if content.startswith('# '): content = '\n' + content sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content) if len(sections) < 2: return [] question_counter = 1 for i in range(1, len(sections), 2): if i + 1 < len(sections): section_title = sections[i].strip() section_content = sections[i + 1].strip() # 按问题编号分割 (1. 2. 3. 等) question_parts = re.split(r'\n\s*(\d+\.)?\s*', section_content) for j in range(1, len(question_parts)): if j >= len(question_parts) or question_parts[j] is None: continue question_block = str(question_parts[j]).strip() if not question_block or question_block.endswith('.'): continue # 提取问题和答案 lines = question_block.split('\n') question_text = "" answer_text = "" in_answer = False for line in lines: line = line.strip() if line.startswith('示例答案:'): in_answer = True continue if not in_answer and line and not line.startswith('示例答案:'): if question_text: question_text += " " question_text += line elif in_answer and line: if answer_text: answer_text += " " answer_text += line if question_text: questions.append({ "id": f"q{question_counter}", "question": question_text, "answer": answer_text }) question_counter += 1 return questions def reorganize_questions_by_industry(): """重新组织面试题数据""" try: # 加载大健康数据 health_data = load_health_resume_data() if not health_data: print("Failed to load health resume data") return False # 按面试题类别分组 category_questions = {} for item in health_data: category = item.get('面试题', '') interview_content = item.get('面试题内容', '') if category and interview_content and category not in category_questions: questions = parse_interview_content_to_flat_array(interview_content) if questions: category_questions[category] = questions print(f"解析了 {len(category_questions)} 个面试题类别") # 读取现有文件 with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 创建备份 backup_filename = f'src/mocks/resumeInterviewMock.js.backup_{datetime.now().strftime("%Y%m%d_%H%M%S")}' with open(backup_filename, 'w', encoding='utf-8') as f: f.write(content) print(f"已创建备份文件: {backup_filename}") updated_content = content # 首先移除所有position级别的questions字段 updated_content = re.sub( r',?\s*"questions": \[[^\]]*?\](?:\s*,\s*)?', '', updated_content, flags=re.DOTALL ) # 映射关系:行业名称 -> 面试题类别 industry_category_mapping = { "健康管理": "健康管理类岗位面试题", "健康检查": "健康检查类岗位面试题", "康复治疗": "康复治疗类岗位面试题", "医疗美容": "医疗美容类岗位面试题", "运营管理": "运营类岗位面试题", "心理健康": "心理健康类岗位面试题", "供应链管理": "供应链类岗位面试题", "药品制造": "药品制造类岗位面试题", "检测分析": "检测分析类岗位面试题", "临床研究": "临床研究类岗位面试题" } # 为每个行业添加对应的面试题 for industry_name, category in industry_category_mapping.items(): if category in category_questions: questions_data = category_questions[category] questions_json = json.dumps(questions_data, ensure_ascii=False, indent=6) # 查找对应行业并添加questions字段 pattern = rf'("name": "{re.escape(industry_name)}"[^}}]*?"positions": \[[^\]]*?\]\s*)' replacement = r'\1,\n "questions": ' + questions_json updated_content = re.sub( pattern, replacement, updated_content, flags=re.DOTALL ) print(f"✅ 为 {industry_name} 行业添加了面试题 ({len(questions_data)} 个问题)") # 写回文件 with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(updated_content) print("面试题数据重组完成!") return True except Exception as e: print(f"重组失败: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": reorganize_questions_by_industry()