| @@ -0,0 +1,185 @@ | |||
| <?php | |||
| if ($argc < 2) { | |||
| echo "Usage: php " . basename(__FILE__) . " <filename.csv>\n"; | |||
| exit(1); | |||
| } | |||
| $filename = $argv[1]; | |||
| $filepath = __DIR__ . '/input-csv/' . $filename; | |||
| if (!file_exists($filepath)) { | |||
| echo "Fehler: Datei '$filepath' nicht gefunden.\n"; | |||
| exit(1); | |||
| } | |||
| $handle = fopen($filepath, 'r'); | |||
| if (!$handle) { | |||
| echo "Fehler: Datei konnte nicht geöffnet werden.\n"; | |||
| exit(1); | |||
| } | |||
| const COL_STREET = 5; | |||
| const COL_EMAIL = 7; | |||
| // Delimiter auto-erkennen | |||
| $firstLine = fgets($handle); | |||
| $firstLine = str_replace("\r", '', $firstLine); | |||
| $delimiter = substr_count($firstLine, ',') >= substr_count($firstLine, ';') ? ',' : ';'; | |||
| rewind($handle); | |||
| echo "Erkannter Delimiter: '$delimiter'\n"; | |||
| // Header einlesen | |||
| $header = fgetcsv($handle, 0, $delimiter); | |||
| $header = array_map(fn($cell) => mb_convert_encoding($cell, 'UTF-8', 'ISO-8859-1'), $header); | |||
| $emailMap = []; | |||
| $streetMap = []; | |||
| $rows = []; | |||
| $rowNumber = 1; | |||
| while (($row = fgetcsv($handle, 0, $delimiter)) !== false) { | |||
| $rowNumber++; | |||
| $row = array_map(fn($cell) => mb_convert_encoding($cell, 'UTF-8', 'ISO-8859-1'), $row); | |||
| $rows[$rowNumber] = $row; | |||
| $email = mb_strtolower(trim($row[COL_EMAIL] ?? '')); | |||
| $street = mb_strtolower(trim($row[COL_STREET] ?? '')); | |||
| if ($email !== '') { | |||
| $emailMap[$email]['original'] = trim($row[COL_EMAIL]); | |||
| $emailMap[$email]['rows'][] = $rowNumber; | |||
| } | |||
| if ($street !== '') { | |||
| $streetMap[$street]['original'] = trim($row[COL_STREET]); | |||
| $streetMap[$street]['rows'][] = $rowNumber; | |||
| } | |||
| } | |||
| fclose($handle); | |||
| // ------------------------------------------------------- | |||
| // Hilfsfunktionen | |||
| // ------------------------------------------------------- | |||
| function printBlock(array $data, string $label, array $header, array $rows): void | |||
| { | |||
| $colWidth = 22; | |||
| $separator = str_repeat('-', count($header) * ($colWidth + 3)) . "\n"; | |||
| $headerLine = implode(' | ', array_map(fn($h) => str_pad(mb_substr($h, 0, $colWidth), $colWidth), $header)); | |||
| echo "\n" . $separator; | |||
| echo "$label: {$data['original']}\n"; | |||
| echo $separator; | |||
| echo "Zeile | $headerLine\n"; | |||
| echo $separator; | |||
| foreach ($data['rows'] as $rowNum) { | |||
| $cells = array_map( | |||
| fn($cell) => str_pad(mb_substr($cell, 0, $colWidth), $colWidth), | |||
| $rows[$rowNum] | |||
| ); | |||
| echo " $rowNum | " . implode(' | ', $cells) . "\n"; | |||
| } | |||
| echo $separator; | |||
| } | |||
| function askDelete(array $data, string $label, array $header, array $rows, array &$rowsToDelete): void | |||
| { | |||
| printBlock($data, $label, $header, $rows); | |||
| $keep = $data['rows'][0]; | |||
| $delete = array_slice($data['rows'], 1); | |||
| echo "Behalten: Zeile $keep – Löschen: Zeilen " . implode(', ', $delete) . "\n"; | |||
| echo "[1] Löschen [Enter] Überspringen: "; | |||
| $input = trim(fgets(STDIN)); | |||
| if ($input === '1') { | |||
| foreach ($delete as $rowNum) { | |||
| $rowsToDelete[$rowNum] = true; | |||
| } | |||
| echo "Zeilen " . implode(', ', $delete) . " zum Löschen vorgemerkt.\n"; | |||
| } else { | |||
| echo "Übersprungen.\n"; | |||
| } | |||
| } | |||
| // ------------------------------------------------------- | |||
| // SCHRITT 1: E-Mail-Duplikate | |||
| // ------------------------------------------------------- | |||
| $emailDuplicates = array_filter($emailMap, fn($entry) => count($entry['rows']) > 1); | |||
| $rowsToDelete = []; | |||
| echo "\n========================================\n"; | |||
| echo " SCHRITT 1: Doppelte E-Mail-Adressen\n"; | |||
| echo "========================================\n"; | |||
| if (empty($emailDuplicates)) { | |||
| echo "Keine gefunden.\n"; | |||
| } else { | |||
| echo count($emailDuplicates) . " doppelte E-Mail-Adresse(n) gefunden.\n"; | |||
| foreach ($emailDuplicates as $data) { | |||
| askDelete($data, 'E-Mail', $header, $rows, $rowsToDelete); | |||
| } | |||
| } | |||
| // ------------------------------------------------------- | |||
| // SCHRITT 2: Straßen-Duplikate (bereits gelöschte Zeilen ausblenden) | |||
| // ------------------------------------------------------- | |||
| echo "\n========================================\n"; | |||
| echo " SCHRITT 2: Doppelte Straßen\n"; | |||
| echo "========================================\n"; | |||
| foreach ($streetMap as $key => $data) { | |||
| $streetMap[$key]['rows'] = array_values(array_filter($data['rows'], fn($r) => !isset($rowsToDelete[$r]))); | |||
| if (count($streetMap[$key]['rows']) < 2) { | |||
| unset($streetMap[$key]); | |||
| } | |||
| } | |||
| $streetDuplicates = array_filter($streetMap, fn($entry) => count($entry['rows']) > 1); | |||
| if (empty($streetDuplicates)) { | |||
| echo "Keine gefunden.\n"; | |||
| } else { | |||
| echo count($streetDuplicates) . " doppelte Straße(n) gefunden.\n"; | |||
| foreach ($streetDuplicates as $data) { | |||
| askDelete($data, 'Straße', $header, $rows, $rowsToDelete); | |||
| } | |||
| } | |||
| // ------------------------------------------------------- | |||
| // CSV schreiben falls Zeilen zum Löschen vorgemerkt | |||
| // ------------------------------------------------------- | |||
| if (empty($rowsToDelete)) { | |||
| echo "\nKeine Zeilen zum Löschen vorgemerkt. Keine Ausgabedatei erstellt.\n"; | |||
| exit(0); | |||
| } | |||
| $outputDir = __DIR__ . '/output-csv'; | |||
| $outputPath = $outputDir . '/' . $filename; | |||
| if (!is_dir($outputDir)) { | |||
| mkdir($outputDir, 0755, true); | |||
| } | |||
| $out = fopen($outputPath, 'w'); | |||
| fputcsv($out, $header, $delimiter); | |||
| foreach ($rows as $rowNum => $row) { | |||
| if (isset($rowsToDelete[$rowNum])) { | |||
| continue; | |||
| } | |||
| fputcsv($out, $row, $delimiter); | |||
| } | |||
| fclose($out); | |||
| echo "\nBereinigte CSV gespeichert: output-csv/$filename\n"; | |||
| echo "Gelöschte Zeilen gesamt: " . count($rowsToDelete) . "\n"; | |||