我尝试合并3个文件文件1:4列制表符分隔文件
ID Column_1 Column_2 Column_3 A 100 100001 X. B 100 99999 Y. C 100 ……
您使用哈希是在正确的路径上,您只需要从每个表的列计算密钥。解决方案的部分:
$key
@order
%table
STDOUT
#!/usr/bin/perl use warnings; use strict; use autodie; use Text::CSV; my $csv = Text::CSV->new({ binary => 1, eol => "\n", sep_char => "\t", }) or die "CSV creation\n"; sub read_file($$) { my($file, $code) = @_; open(my $fh, '<', $file); while (my $row = $csv->getline( $fh )) { $code->($row); } $csv->eof or $csv->error_diag(); close($fh); } # Output table + row order my %table; my @order; # Table 1 read_file($ARGV[0], sub { my($row) = @_; #print "ROW 1 @{ $row }\n"; my($col1, $col2) = @{ $row }[1,2]; # column_1, column_2 define key my $key = "${col1}${col2}"; #print "KEY 1 ${key}\n"; # table 1 defines order push(@order, $key); # ID, column_1, column_2, column_3 from table 1 $table{$key} = $row; }); # Table 2 read_file($ARGV[1], sub { my($row) = @_; #print "ROW 2 @{ $row }\n"; my($col4, $col5, $col6) = @{ $row }; # column_4, column_5 define key my $key = "${col4}${col5}"; #print "KEY 2 ${key}\n"; # column_6 from table 2 push(@{ $table{$key} }, $col6); }); # Table 3 read_file($ARGV[2], sub { my($row) = @_; #print "ROW 3 @{ $row }\n"; my($col7, $col8, $col9, $col10) = @{ $row }; # column_7, column_10 define key my $key = "${col7}${col10}"; #print "KEY 3 ${key}\n"; # column_7, column_8 from table 2 push(@{ $table{$key} }, $col7, $col8); }); foreach my $key (@order) { $csv->print(\*STDOUT, $table{$key}); } exit 0;
测试运行:
$ perl dummy.pl dummy1.txt dummy2.txt dummy3.txt A 100 100001 X X 100 120000 B 100 99999 Y Y 100 66666 C 100 88888 Z Z 100 77777 D 99 100001 Y Y 99 100000 E 99 88888 Z Z 99 44444