Files
teach_sys_Demo/analyze_data_mismatch.cjs

83 lines
3.1 KiB
JavaScript
Raw Permalink Normal View History

const fs = require('fs');
const path = require('path');
// 读取JSON文件
const jsonPath = path.join(__dirname, '网页未导入数据/文旅产业/文旅_作业海报.json');
const jsonData = JSON.parse(fs.readFileSync(jsonPath, 'utf-8'));
// 读取mockData.js
const mockDataPath = path.join(__dirname, 'src/data/mockData.js');
const mockContent = fs.readFileSync(mockDataPath, 'utf-8');
// 提取JSON中的课程名
const jsonCourseNames = jsonData.map(d => d['课程名称']);
const uniqueJsonNames = [...new Set(jsonCourseNames)];
// 提取mockData units中的课程名
const courseNamePattern = /name:\s*"([^"]+)",\s*level:\s*"[^"]+"/g;
const unitsSection = mockContent.match(/homework:\s*\[([\s\S]*?)\]\s*,\s*\/\/ 1v1定制求职策略数据/);
if (!unitsSection) {
console.log('无法找到homework数据');
process.exit(1);
}
const homeworkContent = unitsSection[1];
const unitsMatch = homeworkContent.match(/units:\s*\[([\s\S]*?)\]\s*,\s*\/\/ 保留原始list用于兼容/g);
let mockCourseNames = [];
if (unitsMatch) {
unitsMatch.forEach(unit => {
const names = [...unit.matchAll(/name:\s*"([^"]+)",\s*level:/g)].map(m => m[1]);
mockCourseNames.push(...names);
});
}
console.log('===== 数据统计 =====');
console.log('JSON文件总课程数:', jsonData.length);
console.log('JSON唯一课程名数:', uniqueJsonNames.length);
console.log('mockData units中课程数:', mockCourseNames.length);
console.log('\n===== 匹配情况 =====');
const matched = mockCourseNames.filter(name => uniqueJsonNames.includes(name));
const notMatched = mockCourseNames.filter(name => !uniqueJsonNames.includes(name));
console.log('成功匹配的课程数:', matched.length);
console.log('未匹配的课程数:', notMatched.length);
if (notMatched.length > 0) {
console.log('\n未在JSON中找到的课程:');
notMatched.forEach(name => console.log(` - ${name}`));
}
console.log('\n===== JSON中未使用的课程 =====');
const unusedInMock = uniqueJsonNames.filter(name => !mockCourseNames.includes(name));
console.log(`JSON中有但mockData没有使用的课程数: ${unusedInMock.length}`);
if (unusedInMock.length > 0 && unusedInMock.length < 50) {
console.log('\n部分未使用的课程:');
unusedInMock.slice(0, 20).forEach(name => console.log(` - ${name}`));
if (unusedInMock.length > 20) {
console.log(` ... 还有 ${unusedInMock.length - 20}`);
}
}
// 检查每个section
console.log('\n===== 各section课程数 =====');
const sections = homeworkContent.split(/name:\s*"(复合能力课|垂直能力课)"/);
for (let i = 1; i < sections.length; i += 2) {
const sectionName = sections[i];
const sectionContent = sections[i + 1];
const unitsInSection = sectionContent.match(/units:\s*\[([\s\S]*?)\]\s*,\s*\/\/ 保留原始list/);
if (unitsInSection) {
const coursesInSection = [...unitsInSection[1].matchAll(/name:\s*"([^"]+)",\s*level:/g)];
console.log(`${sectionName}: ${coursesInSection.length}个课程`);
// 检查有imageUrl的课程数
const withImageUrl = (unitsInSection[1].match(/imageUrl:/g) || []).length;
console.log(` - 有imageUrl的: ${withImageUrl}`);
}
}