171 lines
4.9 KiB
JavaScript
171 lines
4.9 KiB
JavaScript
|
|
const fs = require('fs');
|
|||
|
|
const path = require('path');
|
|||
|
|
|
|||
|
|
// CSV文件路径
|
|||
|
|
const BASE_DIR = path.join(__dirname, '..');
|
|||
|
|
const COMPANY_CSV = path.join(BASE_DIR, '公司介绍.csv');
|
|||
|
|
const IMAGE_CSV = path.join(BASE_DIR, '企业图片.csv');
|
|||
|
|
const BUSINESS_CSV = path.join(BASE_DIR, '企业业务板块和内推岗位.csv');
|
|||
|
|
|
|||
|
|
// CSV解析函数
|
|||
|
|
function parseCSV(content) {
|
|||
|
|
const data = [];
|
|||
|
|
let currentRow = [];
|
|||
|
|
let currentField = '';
|
|||
|
|
let inQuotes = false;
|
|||
|
|
let headers = null;
|
|||
|
|
|
|||
|
|
for (let i = 0; i < content.length; i++) {
|
|||
|
|
const char = content[i];
|
|||
|
|
const nextChar = content[i + 1];
|
|||
|
|
|
|||
|
|
if (char === '"') {
|
|||
|
|
if (inQuotes && nextChar === '"') {
|
|||
|
|
currentField += '"';
|
|||
|
|
i++;
|
|||
|
|
} else {
|
|||
|
|
inQuotes = !inQuotes;
|
|||
|
|
}
|
|||
|
|
} else if (char === ',' && !inQuotes) {
|
|||
|
|
currentRow.push(currentField.trim());
|
|||
|
|
currentField = '';
|
|||
|
|
} else if ((char === '\n' || char === '\r') && !inQuotes) {
|
|||
|
|
if (currentField || currentRow.length > 0) {
|
|||
|
|
currentRow.push(currentField.trim());
|
|||
|
|
|
|||
|
|
if (!headers) {
|
|||
|
|
headers = currentRow;
|
|||
|
|
} else if (currentRow.length === headers.length) {
|
|||
|
|
const row = {};
|
|||
|
|
headers.forEach((header, index) => {
|
|||
|
|
row[header] = currentRow[index];
|
|||
|
|
});
|
|||
|
|
data.push(row);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
currentRow = [];
|
|||
|
|
currentField = '';
|
|||
|
|
}
|
|||
|
|
if (char === '\r' && nextChar === '\n') {
|
|||
|
|
i++;
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
currentField += char;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (currentField || currentRow.length > 0) {
|
|||
|
|
currentRow.push(currentField.trim());
|
|||
|
|
if (headers && currentRow.length === headers.length) {
|
|||
|
|
const row = {};
|
|||
|
|
headers.forEach((header, index) => {
|
|||
|
|
row[header] = currentRow[index];
|
|||
|
|
});
|
|||
|
|
data.push(row);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return data;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function extractCity(region) {
|
|||
|
|
if (!region) return '';
|
|||
|
|
const match = region.match(/总部[::]\s*([^ \n]+)/);
|
|||
|
|
if (match) {
|
|||
|
|
let city = match[1].trim();
|
|||
|
|
city = city.replace(/^(江苏|浙江|广东|山东|河北|河南|四川|湖北|湖南|安徽|福建|陕西|辽宁)\s*/, '');
|
|||
|
|
if (!city.endsWith('市') && !city.includes('自治') && !city.includes('特别行政区')) {
|
|||
|
|
city += '市';
|
|||
|
|
}
|
|||
|
|
return city;
|
|||
|
|
}
|
|||
|
|
return '';
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
console.log('读取CSV文件...\n');
|
|||
|
|
|
|||
|
|
const companyData = parseCSV(fs.readFileSync(COMPANY_CSV, 'utf-8'));
|
|||
|
|
const imageData = parseCSV(fs.readFileSync(IMAGE_CSV, 'utf-8'));
|
|||
|
|
const businessData = parseCSV(fs.readFileSync(BUSINESS_CSV, 'utf-8'));
|
|||
|
|
|
|||
|
|
console.log(`✅ 读取 ${companyData.length} 条公司介绍数据`);
|
|||
|
|
console.log(`✅ 读取 ${imageData.length} 条企业图片数据`);
|
|||
|
|
console.log(`✅ 读取 ${businessData.length} 条业务板块数据\n`);
|
|||
|
|
|
|||
|
|
// 建立企业Map
|
|||
|
|
const companiesMap = new Map();
|
|||
|
|
|
|||
|
|
companyData.forEach((row) => {
|
|||
|
|
const name = row['企业名称'] || row['✅企业名称'];
|
|||
|
|
if (!name) return;
|
|||
|
|
|
|||
|
|
if (companiesMap.has(name)) return;
|
|||
|
|
|
|||
|
|
const region = row['地区'] || row['✅地区'] || '';
|
|||
|
|
const city = extractCity(region);
|
|||
|
|
|
|||
|
|
companiesMap.set(name, {
|
|||
|
|
name: name,
|
|||
|
|
city: city,
|
|||
|
|
hasImage: false,
|
|||
|
|
hasSegment: false
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// 标记有图片的企业
|
|||
|
|
imageData.forEach(row => {
|
|||
|
|
const name = row['企业名称'];
|
|||
|
|
if (name && companiesMap.has(name)) {
|
|||
|
|
companiesMap.get(name).hasImage = true;
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// 标记有业务板块的企业
|
|||
|
|
businessData.forEach(row => {
|
|||
|
|
const name = row['企业名称'] || row['✅企业名称'];
|
|||
|
|
if (name && companiesMap.has(name)) {
|
|||
|
|
companiesMap.get(name).hasSegment = true;
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// 检查缺失数据
|
|||
|
|
const noImage = [];
|
|||
|
|
const noSegment = [];
|
|||
|
|
const noCity = [];
|
|||
|
|
|
|||
|
|
companiesMap.forEach((company, name) => {
|
|||
|
|
if (!company.hasImage) noImage.push(name);
|
|||
|
|
if (!company.hasSegment) noSegment.push(name);
|
|||
|
|
if (!company.city) noCity.push(name);
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
console.log('========== 数据质量检查结果 ==========\n');
|
|||
|
|
|
|||
|
|
if (noImage.length > 0) {
|
|||
|
|
console.log(`❌ 缺少图片的企业 (${noImage.length}家):`);
|
|||
|
|
noImage.forEach((name, index) => {
|
|||
|
|
console.log(` ${index + 1}. ${name}`);
|
|||
|
|
});
|
|||
|
|
console.log('');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (noSegment.length > 0) {
|
|||
|
|
console.log(`❌ 缺少业务板块的企业 (${noSegment.length}家):`);
|
|||
|
|
noSegment.forEach((name, index) => {
|
|||
|
|
console.log(` ${index + 1}. ${name}`);
|
|||
|
|
});
|
|||
|
|
console.log('');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (noCity.length > 0) {
|
|||
|
|
console.log(`❌ 缺少城市信息的企业 (${noCity.length}家):`);
|
|||
|
|
noCity.forEach((name, index) => {
|
|||
|
|
console.log(` ${index + 1}. ${name}`);
|
|||
|
|
});
|
|||
|
|
console.log('');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (noImage.length === 0 && noSegment.length === 0 && noCity.length === 0) {
|
|||
|
|
console.log('✅ 所有企业数据完整!');
|
|||
|
|
}
|