(no title)
balnaphone | 1 year ago
#!/usr/bin/perl -CSD -w -Mstrict -Mwarnings -MText::CSV
# chunk.pl -- split csv files into chunks
# Usage message and exit if needed
if (!@ARGV || $ARGV[0] eq '-h') {
print "Usage: $0 input_csv [chunk_size] [output_filename_format] [separator]\n";
print "Example: $0 input.csv 500 'input-%08d.csv' ','\n";
exit;
}
# Set command-line arguments
my ($INFILE, $CHUNKSIZE, $FMT, $SEP) = @ARGV;
$CHUNKSIZE //= 500;
$FMT //= "data-%08d.csv";
$SEP //= ",";
# Initialize CSV, file handles, and counters
my $csv = Text::CSV->new({ binary => 1, auto_diag => 1, sep_char => $SEP, eol => "\n" });
my ($i, $f, $out) = (0, 1, undef);
open my $in, "<:encoding(UTF-8)", $INFILE or die "Cannot open $INFILE: $!";
# Main loop
while (my $row = $csv->getline($in)) {
if ($i % $CHUNKSIZE == 0) {
close $out if defined $out;
open $out, ">:encoding(UTF-8)", sprintf($FMT, $f++) or die "Cannot open output file: $!";
}
$csv->print($out, $row) or die "Failed to write row: $!";
$i++;
}
# Clean up: close file handles
close $out if defined $out;
close $in;
balnaphone|1 year ago