初始化多多畅职企业内推平台项目
功能特性: - 3D地球动画与中国地图可视化 - 省份/城市/企业搜索功能 - 308家企业数据展示 - 响应式设计(PC端和移动端) - 企业详情页面与业务板块展示 - 官网新闻轮播图 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
208
scripts/fixCSV_v2.js
Normal file
208
scripts/fixCSV_v2.js
Normal file
@@ -0,0 +1,208 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// 扩展的CSV解析函数,收集完整记录和不完整字段
|
||||
function parseCSVWithSkipped(content) {
|
||||
const goodRecords = [];
|
||||
const skippedRows = [];
|
||||
let currentRow = [];
|
||||
let currentField = '';
|
||||
let inQuotes = false;
|
||||
let headers = null;
|
||||
let rowNum = 0;
|
||||
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const char = content[i];
|
||||
const nextChar = content[i + 1];
|
||||
|
||||
if (char === '"') {
|
||||
if (inQuotes && nextChar === '"') {
|
||||
currentField += '"';
|
||||
i++;
|
||||
} else {
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
} else if (char === ',' && !inQuotes) {
|
||||
currentRow.push(currentField.trim());
|
||||
currentField = '';
|
||||
} else if ((char === '\n' || char === '\r') && !inQuotes) {
|
||||
if (currentField || currentRow.length > 0) {
|
||||
currentRow.push(currentField.trim());
|
||||
|
||||
if (!headers) {
|
||||
headers = currentRow;
|
||||
} else if (currentRow.length === headers.length) {
|
||||
goodRecords.push({
|
||||
rowNum,
|
||||
fields: currentRow
|
||||
});
|
||||
} else {
|
||||
skippedRows.push({
|
||||
rowNum,
|
||||
fields: currentRow
|
||||
});
|
||||
}
|
||||
|
||||
currentRow = [];
|
||||
currentField = '';
|
||||
rowNum++;
|
||||
}
|
||||
if (char === '\r' && nextChar === '\n') {
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
currentField += char;
|
||||
}
|
||||
}
|
||||
|
||||
// 处理最后一行
|
||||
if (currentField || currentRow.length > 0) {
|
||||
currentRow.push(currentField.trim());
|
||||
if (headers && currentRow.length === headers.length) {
|
||||
goodRecords.push({
|
||||
rowNum,
|
||||
fields: currentRow
|
||||
});
|
||||
} else {
|
||||
skippedRows.push({
|
||||
rowNum,
|
||||
fields: currentRow
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return { headers, goodRecords, skippedRows };
|
||||
}
|
||||
|
||||
// 尝试修复被跳过的行
|
||||
function fixSkippedRows(goodRecords, skippedRows, headers) {
|
||||
const allRecords = [];
|
||||
let goodIndex = 0;
|
||||
let skipIndex = 0;
|
||||
|
||||
while (goodIndex < goodRecords.length || skipIndex < skippedRows.length) {
|
||||
if (goodIndex < goodRecords.length &&
|
||||
(skipIndex >= skippedRows.length || goodRecords[goodIndex].rowNum < skippedRows[skipIndex].rowNum)) {
|
||||
// 添加一个完整的记录
|
||||
allRecords.push(goodRecords[goodIndex].fields);
|
||||
goodIndex++;
|
||||
} else if (skipIndex < skippedRows.length) {
|
||||
const skipped = skippedRows[skipIndex];
|
||||
|
||||
// 尝试修复这条记录
|
||||
if (skipped.fields.length < headers.length) {
|
||||
// 字段太少,可能需要从下一行借字段
|
||||
const combined = [...skipped.fields];
|
||||
|
||||
// 查看下一个跳过的行
|
||||
let nextSkipIndex = skipIndex + 1;
|
||||
while (combined.length < headers.length && nextSkipIndex < skippedRows.length) {
|
||||
const nextSkipped = skippedRows[nextSkipIndex];
|
||||
if (nextSkipped.rowNum === skipped.rowNum + (nextSkipIndex - skipIndex)) {
|
||||
// 连续的跳过行,合并字段
|
||||
combined.push(...nextSkipped.fields);
|
||||
nextSkipIndex++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (combined.length === headers.length) {
|
||||
allRecords.push(combined);
|
||||
skipIndex = nextSkipIndex;
|
||||
} else if (combined.length > headers.length) {
|
||||
// 字段太多,取前5个
|
||||
allRecords.push(combined.slice(0, headers.length));
|
||||
skipIndex = nextSkipIndex;
|
||||
} else {
|
||||
// 还是不够,跳过
|
||||
console.log(`⚠️ 无法修复第${skipped.rowNum}行: 字段数${combined.length}/${headers.length}`);
|
||||
skipIndex++;
|
||||
}
|
||||
} else if (skipped.fields.length > headers.length) {
|
||||
// 字段太多,可能包含了下一条记录的一部分
|
||||
// 尝试拆分
|
||||
const firstRecord = skipped.fields.slice(0, headers.length);
|
||||
const remaining = skipped.fields.slice(headers.length);
|
||||
|
||||
allRecords.push(firstRecord);
|
||||
|
||||
// 将剩余字段作为新的跳过行处理
|
||||
if (remaining.length > 0) {
|
||||
skippedRows.splice(skipIndex + 1, 0, {
|
||||
rowNum: skipped.rowNum + 0.5,
|
||||
fields: remaining
|
||||
});
|
||||
}
|
||||
|
||||
skipIndex++;
|
||||
} else {
|
||||
// 字段数正确
|
||||
allRecords.push(skipped.fields);
|
||||
skipIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allRecords;
|
||||
}
|
||||
|
||||
// 转义CSV字段
|
||||
function escapeCSVField(field) {
|
||||
if (!field) return '';
|
||||
|
||||
if (field.includes(',') || field.includes('"') || field.includes('\n') || field.includes('\r')) {
|
||||
const escaped = field.replace(/"/g, '""');
|
||||
return `"${escaped}"`;
|
||||
}
|
||||
return field;
|
||||
}
|
||||
|
||||
console.log('正在修复CSV文件...\n');
|
||||
|
||||
const csvPath = path.join(__dirname, '..', '公司介绍.csv');
|
||||
const content = fs.readFileSync(csvPath, 'utf-8');
|
||||
|
||||
console.log('步骤1: 解析CSV,区分完整记录和跳过的行...');
|
||||
const { headers, goodRecords, skippedRows } = parseCSVWithSkipped(content);
|
||||
console.log(` 表头: ${headers.join(', ')}`);
|
||||
console.log(` 完整记录: ${goodRecords.length}`);
|
||||
console.log(` 跳过的行: ${skippedRows.length}\n`);
|
||||
|
||||
console.log('步骤2: 尝试修复跳过的行...');
|
||||
const allRecords = fixSkippedRows(goodRecords, skippedRows, headers);
|
||||
console.log(` 修复后总记录数: ${allRecords.length}\n`);
|
||||
|
||||
console.log('步骤3: 生成修复后的CSV内容...');
|
||||
const lines = [];
|
||||
lines.push(headers.map(escapeCSVField).join(','));
|
||||
|
||||
allRecords.forEach(record => {
|
||||
lines.push(record.map(escapeCSVField).join(','));
|
||||
});
|
||||
|
||||
const fixedContent = lines.join('\n');
|
||||
|
||||
// 输出到新文件
|
||||
const fixedPath = path.join(__dirname, '..', '公司介绍_fixed_v2.csv');
|
||||
fs.writeFileSync(fixedPath, fixedContent, 'utf-8');
|
||||
|
||||
console.log(`✅ 修复完成!`);
|
||||
console.log(` 原始完整记录: ${goodRecords.length}`);
|
||||
console.log(` 原始跳过记录: ${skippedRows.length}`);
|
||||
console.log(` 修复后总记录: ${allRecords.length}`);
|
||||
console.log(` 输出文件: 公司介绍_fixed_v2.csv\n`);
|
||||
|
||||
console.log('正在验证修复后的文件...');
|
||||
const verifyContent = fs.readFileSync(fixedPath, 'utf-8');
|
||||
const verifyResult = parseCSVWithSkipped(verifyContent);
|
||||
console.log(` 验证结果:`);
|
||||
console.log(` - 完整记录: ${verifyResult.goodRecords.length}`);
|
||||
console.log(` - 跳过的行: ${verifyResult.skippedRows.length}`);
|
||||
|
||||
if (verifyResult.skippedRows.length === 0) {
|
||||
console.log('\n✅ 修复成功!所有记录都符合格式要求。');
|
||||
console.log('请检查修复后的文件内容,如果正确,可以替换原文件。');
|
||||
} else {
|
||||
console.log(`\n⚠️ 还有 ${verifyResult.skippedRows.length} 行需要手动处理。`);
|
||||
}
|
||||
Reference in New Issue
Block a user