118 lines
3.4 KiB
JavaScript
118 lines
3.4 KiB
JavaScript
|
|
const fs = require('fs');
|
|||
|
|
const path = require('path');
|
|||
|
|
|
|||
|
|
// CSV文件路径
|
|||
|
|
const COMPANY_CSV = path.join(__dirname, '..', '公司介绍.csv');
|
|||
|
|
|
|||
|
|
// CSV解析函数(与convertCSV.js相同的逻辑)
|
|||
|
|
function parseCSV(content) {
|
|||
|
|
const data = [];
|
|||
|
|
let currentRow = [];
|
|||
|
|
let currentField = '';
|
|||
|
|
let inQuotes = false;
|
|||
|
|
let headers = null;
|
|||
|
|
|
|||
|
|
for (let i = 0; i < content.length; i++) {
|
|||
|
|
const char = content[i];
|
|||
|
|
const nextChar = content[i + 1];
|
|||
|
|
|
|||
|
|
if (char === '"') {
|
|||
|
|
if (inQuotes && nextChar === '"') {
|
|||
|
|
currentField += '"';
|
|||
|
|
i++;
|
|||
|
|
} else {
|
|||
|
|
inQuotes = !inQuotes;
|
|||
|
|
}
|
|||
|
|
} else if (char === ',' && !inQuotes) {
|
|||
|
|
currentRow.push(currentField.trim());
|
|||
|
|
currentField = '';
|
|||
|
|
} else if ((char === '\n' || char === '\r') && !inQuotes) {
|
|||
|
|
if (currentField || currentRow.length > 0) {
|
|||
|
|
currentRow.push(currentField.trim());
|
|||
|
|
|
|||
|
|
if (!headers) {
|
|||
|
|
headers = currentRow;
|
|||
|
|
} else if (currentRow.length === headers.length) {
|
|||
|
|
const row = {};
|
|||
|
|
headers.forEach((header, index) => {
|
|||
|
|
row[header] = currentRow[index];
|
|||
|
|
});
|
|||
|
|
data.push(row);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
currentRow = [];
|
|||
|
|
currentField = '';
|
|||
|
|
}
|
|||
|
|
if (char === '\r' && nextChar === '\n') {
|
|||
|
|
i++;
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
currentField += char;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (currentField || currentRow.length > 0) {
|
|||
|
|
currentRow.push(currentField.trim());
|
|||
|
|
if (headers && currentRow.length === headers.length) {
|
|||
|
|
const row = {};
|
|||
|
|
headers.forEach((header, index) => {
|
|||
|
|
row[header] = currentRow[index];
|
|||
|
|
});
|
|||
|
|
data.push(row);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return data;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
console.log('读取CSV文件...\n');
|
|||
|
|
const content = fs.readFileSync(COMPANY_CSV, 'utf-8');
|
|||
|
|
const data = parseCSV(content);
|
|||
|
|
|
|||
|
|
console.log(`✅ 解析到 ${data.length} 条数据记录\n`);
|
|||
|
|
|
|||
|
|
// 统计企业名称
|
|||
|
|
const companyNames = new Map();
|
|||
|
|
data.forEach((row, index) => {
|
|||
|
|
const name = row['企业名称'] || row['✅企业名称'];
|
|||
|
|
if (name) {
|
|||
|
|
if (companyNames.has(name)) {
|
|||
|
|
companyNames.get(name).push(index + 2); // +2 因为第1行是表头,索引从0开始
|
|||
|
|
} else {
|
|||
|
|
companyNames.set(name, [index + 2]);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
console.log(`📊 唯一企业数量: ${companyNames.size}\n`);
|
|||
|
|
|
|||
|
|
// 检查重复
|
|||
|
|
const duplicates = [];
|
|||
|
|
companyNames.forEach((indices, name) => {
|
|||
|
|
if (indices.length > 1) {
|
|||
|
|
duplicates.push({ name, indices });
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
if (duplicates.length > 0) {
|
|||
|
|
console.log(`⚠️ 发现 ${duplicates.length} 个重复的企业名称:\n`);
|
|||
|
|
duplicates.forEach(dup => {
|
|||
|
|
console.log(` "${dup.name}" 出现 ${dup.indices.length} 次,在数据行: ${dup.indices.join(', ')}`);
|
|||
|
|
});
|
|||
|
|
} else {
|
|||
|
|
console.log('✅ 没有发现重复的企业名称\n');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 列出所有企业名称(前50个)
|
|||
|
|
console.log('\n📝 前50个企业名称:');
|
|||
|
|
const names = Array.from(companyNames.keys());
|
|||
|
|
names.slice(0, 50).forEach((name, index) => {
|
|||
|
|
console.log(` ${index + 1}. ${name}`);
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
if (names.length > 50) {
|
|||
|
|
console.log(` ... 还有 ${names.length - 50} 个企业`);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
console.log(`\n总计: ${names.length} 家企业`);
|