#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 完全清理并重写所有面试题数据为扁平结构 确保所有岗位都使用正确的 questions: [{ id, question, answer }] 格式 """ import json import re import sys from datetime import datetime def load_health_resume_data(): """加载大健康岗位简历数据""" try: with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: print(f"Error loading health resume data: {e}") return None def parse_interview_content_to_flat_array(content): """解析面试题内容,转换为扁平的问答数组""" if not content: return [] questions = [] # 按大标题分割(# 一、二、三等) if content.startswith('# '): content = '\n' + content sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content) if len(sections) < 2: return [] question_counter = 1 for i in range(1, len(sections), 2): if i + 1 < len(sections): section_title = sections[i].strip() section_content = sections[i + 1].strip() # 按问题编号分割 (1. 2. 3. 等) question_parts = re.split(r'\n\s*(\d+\.)\s+', section_content) for j in range(1, len(question_parts), 2): if j + 1 < len(question_parts): question_block = question_parts[j + 1].strip() # 提取问题和答案 lines = question_block.split('\n') question_text = "" answer_text = "" in_answer = False for line in lines: line = line.strip() if line.startswith('示例答案:'): in_answer = True continue if not in_answer and line and not line.startswith('示例答案:'): if question_text: question_text += " " question_text += line elif in_answer and line: if answer_text: answer_text += " " answer_text += line if question_text: questions.append({ "id": f"q{question_counter}", "question": question_text, "answer": answer_text }) question_counter += 1 return questions def clean_and_update_all_questions(): """完全清理并更新所有面试题数据""" try: # 加载大健康数据 health_data = load_health_resume_data() if not health_data: print("Failed to load health resume data") return False # 创建岗位到面试题的映射 position_to_questions = {} for item in health_data: position_name = item.get('岗位名称', '') interview_content = item.get('面试题内容', '') if position_name and interview_content: questions = parse_interview_content_to_flat_array(interview_content) position_to_questions[position_name] = questions print(f"解析了 {len(position_to_questions)} 个岗位的面试题") # 读取现有文件 with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 找到所有岗位并完全重写questions字段 updated_content = content update_count = 0 for position_name, questions in position_to_questions.items(): if not questions: continue # 将questions数组转换为JavaScript格式的字符串 questions_js_parts = [] for q in questions: q_text = q['question'].replace('"', '\\"').replace('\n', '\\n') a_text = q['answer'].replace('"', '\\"').replace('\n', '\\n') question_js = ''' { "id": "%s", "question": "%s", "answer": "%s" }''' % (q['id'], q_text, a_text) questions_js_parts.append(question_js) questions_js = '''[ %s ]''' % ',\n'.join(questions_js_parts) # 使用更宽泛的正则表达式来匹配岗位 # 匹配从"title"开始到下一个position或结束的整个岗位定义 position_pattern = rf'"title": "{re.escape(position_name)}"[\s\S]*?(?="title":|^\]\s*;\s*$|^const\s+)' def replace_position_questions(match): matched_text = match.group(0) # 删除现有的questions字段(不管是什么格式) cleaned_text = re.sub(r',?\s*"questions": \[[^\]]*?\](?:\s*,\s*)?\s*(?=\]|\})', '', matched_text, flags=re.DOTALL) cleaned_text = re.sub(r',?\s*"questions": \[[\s\S]*?\](?:\s*,\s*)?\s*(?=\]|\})', '', cleaned_text, flags=re.DOTALL) # 在requirements后添加新的questions字段 if '"requirements":' in cleaned_text: cleaned_text = re.sub( r'("requirements": \[[^\]]*?\])', r'\1,\n "questions": ' + questions_js, cleaned_text, flags=re.DOTALL ) else: # 如果没有requirements字段,在最后一个字段后添加 cleaned_text = re.sub( r'(\s+)(\]|\})\s*$', r',\n "questions": ' + questions_js + r'\1\2', cleaned_text, flags=re.DOTALL ) return cleaned_text new_content = re.sub(position_pattern, replace_position_questions, updated_content, flags=re.MULTILINE) if new_content != updated_content: updated_content = new_content update_count += 1 print(f"✅ 重写 {position_name} 的面试题 ({len(questions)} 个问题)") # 最后清理任何残留的旧格式问题 # 删除任何包含subQuestions的问题结构 updated_content = re.sub( r'"questions": \[\s*\{[^}]*"subQuestions"[\s\S]*?\}\s*\]', '"questions": []', updated_content, flags=re.DOTALL ) # 删除任何独立的subQuestions结构 updated_content = re.sub( r'"subQuestions": \[[\s\S]*?\][\s,]*', '', updated_content, flags=re.DOTALL ) # 写回文件 with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(updated_content) print(f"\n🎉 成功清理并重写 {update_count} 个岗位的面试题数据!") return True except Exception as e: print(f"Error cleaning interview questions: {e}") import traceback traceback.print_exc() return False def main(): """主函数""" print("开始完全清理并重写面试题数据...") success = clean_and_update_all_questions() if success: print("面试题数据清理和重写完成!") else: print("面试题数据清理失败!") return success if __name__ == "__main__": main()