#!/usr/bin/perl
use strict;
use Getopt::Long;
my %opts;
$opts{'patterns-file'} = './patterns';
GetOptions(\%opts, 'in-place-edit|i',
'fixed-strings|F',
'line-regexp|x',
'strip-spaces|s',
'patterns-file|p=s',
'help|h',
);
if ($opts{'help'}) {
print <<__EOF__;
$0 [options] [file(s) to process...]
--patterns-file, -p Name of patterns file, defaults to ./patterns
--in-place-edit, -i In-place edit mode. Use with caution!
--fixed-strings, -F Treat patterns as fixed-text strings, not regexes
--line-regexp, -x Patterns only match entire input lines.
--strip-spaces, -s Strip leading and trailing white space from patterns
--help, -h This help message
__EOF__
exit 0;
};
my @patterns; # array to hold the patterns to remove
my $re; # @patterns array converted to regex
my @files = @ARGV; # save a copy of the args
@ARGV = $opts{'patterns-file'};
while(<<>>) { # First, read the patterns file
chomp; # remove end-of-line char(s) - \n, \r\n, etc
if ($opts{'strip-spaces'}) {
s/^\s*|\s*$//g;
};
next if m/^\s*$/; # ignore empty lines
# Add line to @patterns array...
if ($opts{'fixed-strings'}) {
# Treat each pattern as fixed text, even
# regex "special" characters like . or *
push @patterns, quotemeta($_);
} else {
# Treat each pattern as a regular expression:
push @patterns, $_;
}
};
$re = join("|", @patterns); # convert @patterns array to regex string
if ($opts{'line-regexp'}) {
$re = '^(' . $re . ')$';
};
$re = qr/$re/; # pre-compile the regex
if ($opts{'in-place-edit'}) {
# script was run with `-i` option, turn on in-place
# editing, with .bak extension for backup copies
our $^I = '.bak';
};
@ARGV = @files; # restore the arg list
while(<<>>) { # read and process the remaining file(s)
next if m/$re/; # skip lines matching the regex
print
}
This script can process multiple input files and can optionally edit the files in-place if -i or --in-place-edit is used on the command line. The other options are documented by the -h or --help option.
The original version I wrote didn't support any options, but had a LOT of comments saying things like "uncomment the next line if you want to do X"...I decided it was easier and better to just use the Getopt::Long module. Both long and short options are supported.
The patterns file defaults to patterns in the current directory, but can be over-ridden by using the -p or --patterns-file option, which requires a filename argument.
All other args are the filename(s) to process. If the -i option is NOT used, all output goes to stdout.
Save the script as, e.g. remove-patterns.pl, make it executable with chmod +x remove-patterns.pl and run it like so:
$ ./remove-patterns.pl file1
LINE 1 ABCD
LINE 2 EFGA
LINE 3 HCJK
LINE 4 ABCDH
LINE 5 EFGAG
LINE 6 HCJKD
LINE 7 ABCDH
LINE 8 EFGAG
LINE 9 HCJKD
Or, with in-place editing of the input file(s) and using a different patterns file:
$ ./remove-patterns.pl -i -p different_patterns.txt file*
There will be no visible output if -i is used, the files are edited instead.
REMOVEshould be removed, to simplify the problem.