Daniel пре 2 месеци
комит
048ae8d71d
1 измењених фајлова са 185 додато и 0 уклоњено
  1. +185
    -0
      gewinnspiel/cleanUpCsv.php

+ 185
- 0
gewinnspiel/cleanUpCsv.php Прегледај датотеку

@@ -0,0 +1,185 @@
<?php

if ($argc < 2) {
echo "Usage: php " . basename(__FILE__) . " <filename.csv>\n";
exit(1);
}

$filename = $argv[1];
$filepath = __DIR__ . '/input-csv/' . $filename;

if (!file_exists($filepath)) {
echo "Fehler: Datei '$filepath' nicht gefunden.\n";
exit(1);
}

$handle = fopen($filepath, 'r');
if (!$handle) {
echo "Fehler: Datei konnte nicht geöffnet werden.\n";
exit(1);
}

const COL_STREET = 5;
const COL_EMAIL = 7;

// Delimiter auto-erkennen
$firstLine = fgets($handle);
$firstLine = str_replace("\r", '', $firstLine);
$delimiter = substr_count($firstLine, ',') >= substr_count($firstLine, ';') ? ',' : ';';
rewind($handle);

echo "Erkannter Delimiter: '$delimiter'\n";

// Header einlesen
$header = fgetcsv($handle, 0, $delimiter);
$header = array_map(fn($cell) => mb_convert_encoding($cell, 'UTF-8', 'ISO-8859-1'), $header);

$emailMap = [];
$streetMap = [];
$rows = [];
$rowNumber = 1;

while (($row = fgetcsv($handle, 0, $delimiter)) !== false) {
$rowNumber++;
$row = array_map(fn($cell) => mb_convert_encoding($cell, 'UTF-8', 'ISO-8859-1'), $row);
$rows[$rowNumber] = $row;

$email = mb_strtolower(trim($row[COL_EMAIL] ?? ''));
$street = mb_strtolower(trim($row[COL_STREET] ?? ''));

if ($email !== '') {
$emailMap[$email]['original'] = trim($row[COL_EMAIL]);
$emailMap[$email]['rows'][] = $rowNumber;
}

if ($street !== '') {
$streetMap[$street]['original'] = trim($row[COL_STREET]);
$streetMap[$street]['rows'][] = $rowNumber;
}
}

fclose($handle);

// -------------------------------------------------------
// Hilfsfunktionen
// -------------------------------------------------------

function printBlock(array $data, string $label, array $header, array $rows): void
{
$colWidth = 22;
$separator = str_repeat('-', count($header) * ($colWidth + 3)) . "\n";
$headerLine = implode(' | ', array_map(fn($h) => str_pad(mb_substr($h, 0, $colWidth), $colWidth), $header));

echo "\n" . $separator;
echo "$label: {$data['original']}\n";
echo $separator;
echo "Zeile | $headerLine\n";
echo $separator;

foreach ($data['rows'] as $rowNum) {
$cells = array_map(
fn($cell) => str_pad(mb_substr($cell, 0, $colWidth), $colWidth),
$rows[$rowNum]
);
echo " $rowNum | " . implode(' | ', $cells) . "\n";
}

echo $separator;
}

function askDelete(array $data, string $label, array $header, array $rows, array &$rowsToDelete): void
{
printBlock($data, $label, $header, $rows);

$keep = $data['rows'][0];
$delete = array_slice($data['rows'], 1);

echo "Behalten: Zeile $keep – Löschen: Zeilen " . implode(', ', $delete) . "\n";
echo "[1] Löschen [Enter] Überspringen: ";
$input = trim(fgets(STDIN));

if ($input === '1') {
foreach ($delete as $rowNum) {
$rowsToDelete[$rowNum] = true;
}
echo "Zeilen " . implode(', ', $delete) . " zum Löschen vorgemerkt.\n";
} else {
echo "Übersprungen.\n";
}
}

// -------------------------------------------------------
// SCHRITT 1: E-Mail-Duplikate
// -------------------------------------------------------
$emailDuplicates = array_filter($emailMap, fn($entry) => count($entry['rows']) > 1);
$rowsToDelete = [];

echo "\n========================================\n";
echo " SCHRITT 1: Doppelte E-Mail-Adressen\n";
echo "========================================\n";

if (empty($emailDuplicates)) {
echo "Keine gefunden.\n";
} else {
echo count($emailDuplicates) . " doppelte E-Mail-Adresse(n) gefunden.\n";

foreach ($emailDuplicates as $data) {
askDelete($data, 'E-Mail', $header, $rows, $rowsToDelete);
}
}

// -------------------------------------------------------
// SCHRITT 2: Straßen-Duplikate (bereits gelöschte Zeilen ausblenden)
// -------------------------------------------------------
echo "\n========================================\n";
echo " SCHRITT 2: Doppelte Straßen\n";
echo "========================================\n";

foreach ($streetMap as $key => $data) {
$streetMap[$key]['rows'] = array_values(array_filter($data['rows'], fn($r) => !isset($rowsToDelete[$r])));
if (count($streetMap[$key]['rows']) < 2) {
unset($streetMap[$key]);
}
}

$streetDuplicates = array_filter($streetMap, fn($entry) => count($entry['rows']) > 1);

if (empty($streetDuplicates)) {
echo "Keine gefunden.\n";
} else {
echo count($streetDuplicates) . " doppelte Straße(n) gefunden.\n";

foreach ($streetDuplicates as $data) {
askDelete($data, 'Straße', $header, $rows, $rowsToDelete);
}
}

// -------------------------------------------------------
// CSV schreiben falls Zeilen zum Löschen vorgemerkt
// -------------------------------------------------------
if (empty($rowsToDelete)) {
echo "\nKeine Zeilen zum Löschen vorgemerkt. Keine Ausgabedatei erstellt.\n";
exit(0);
}

$outputDir = __DIR__ . '/output-csv';
$outputPath = $outputDir . '/' . $filename;

if (!is_dir($outputDir)) {
mkdir($outputDir, 0755, true);
}

$out = fopen($outputPath, 'w');
fputcsv($out, $header, $delimiter);

foreach ($rows as $rowNum => $row) {
if (isset($rowsToDelete[$rowNum])) {
continue;
}
fputcsv($out, $row, $delimiter);
}

fclose($out);

echo "\nBereinigte CSV gespeichert: output-csv/$filename\n";
echo "Gelöschte Zeilen gesamt: " . count($rowsToDelete) . "\n";

Loading…
Откажи
Сачувај