#!/usr/local/bin/perl -w eval 'exec /usr/local/bin/perl -w -S $0 ${1+"$@"}' if 0; # not running under some shell # The standard fold(1), implemented in Perl # copyright 1999, Clinton A. Pierce # Freely redistributable under the Perl Artistic License # Severely hacked at by Tom Christiansen: # code reformatting & rearranging, simplification, beautification # added pragmata # "screaming" code # argument parsing # pod documentation use strict; use locale; # for what a space is. my( $Byte_Only, # if they said -b, ignore tabs etc $Space_Break, # they said -s, so look for white space breaks $Width, # screen size $Tabstop, # tab stops are every Tabstop ); END { close STDOUT || die "$0: can't close stdout: $!\n"; $? = 1 if $? == 255; # from die } $Tabstop = 8; # sane tab stops $Width = 80; # default screen size sub usage { warn "$0: @_\n" if @_; die < | -] args -s split lines on whitespace where possible -b count bytes, not characters -w WIDTH maximum length of lines on output -WIDTH maximum length of lines on output (archaic form) USAGE } # do this by hand, because we don't like $opt_80, $opt_132, etc. # and we want to check for dups. --tchrist OPTION: while (@ARGV && $ARGV[0] =~ /^-(.+)/ && (shift, ($_ = $1), 1)) { next OPTION unless length; if (s/^b//) { warn "-b flag already set" if $Byte_Only++; redo OPTION; } if (s/^s//) { warn "-s flag already set" if $Space_Break++; redo OPTION; } # historical practice makes -72 and -w 72 the same if (s/^(\d.*)// || s/^w(.*)//) { $Width = $1 || shift; next OPTION; } usage("unexpected option: -$_"); } unless ($Width && $Width =~ /^\d+$/) { usage("illegal width value `$Width'"); } if ($Space_Break && $Width < $Tabstop) { usage("width must be greater than $Tabstop with the -s option"); } unshift(@ARGV, '-') unless @ARGV; for (@ARGV) { fold_file($_); } exit; ######## # If we are not in byte-only mode, we have to calculate # the new column based on the spec. This is superslow. sub adj_col { my($col, $char) = @_; die "XXX: called while byte count set" if $Byte_Only; # algorithm from BSD fold --tchrist if ($char eq "\b") { $col-- if $col } elsif ($char eq "\r") { $col = 0; } elsif ($char eq "\t") { $col += $Tabstop - ($col % $Tabstop) } else { $col++ } return $col; } # run fold on a given file sub fold_file { my($filename) = @_; my($column, $char, $output); $column = 0; open(INPUT, $filename) || die "Cannot open $filename: $!\n"; # the following hack allows us to dispense with the # slow getc() and the hairy adj_col() code because we # don't care about \t and \b anymore. This small adjustment # provides a screaming 3,000% speedup, so seems worth it! # --tchrist if ($Byte_Only) { my $soft = "(.{0,$Width})(?=\b.)"; # XXX: \b != \s my $hard = "(.{$Width})(?=.)"; if ($Space_Break) { while () { (s/$soft//o || s/$hard//o), print "$1\n" while length > $Width; print; } } else { s/$hard/$1\n/go, print while ; # SCREAM } close(INPUT) || die "can't close $filename: $!"; return; } CHAR: # bytewise processing. The horror! The horror! while (defined($char = getc(INPUT))) { if ($char eq "\n") { print $output, "\n"; $output = ""; $column = 0; next CHAR; } ADJUST: { $column = adj_col($column, $char); if ($column > $Width) { if ($Space_Break) { for (my $i = length($output); $i >= 0; $i--) { if (substr($output, $i, 1) =~ /\s/) { print substr($output, 0, $i+1), "\n"; $output = substr($output, $i+1); for ($column = $i = 0; $i < length($output); $i++) { $column = adj_col($column, substr($output, $i, 1)); } redo ADJUST; } } print $output, "\n"; $output = ""; $column = 0; redo ADJUST; } else { print "$output\n"; $output = $char; $column = adj_col(0, $char); } } else { $output .= $char; } } # ADJUST goto } close(INPUT) || die "can't close $filename: $!"; } __END__ =head1 NAME fold - wrap each input line to fit specified width =head1 SYNOPSIS B [B<-bs>] [B<-w> I] [I ...] =head1 DESCRIPTION The I command reads lines from the specified files (or standard input if none are specified) and writes them to the standard output with newlines inserted into lines longer than the specified column width. The default column width is 80, but this may be overridden using the B<-w> flag. For historical reasons, the width may be specified directly, as in C, omitting the B<-w>. The B<-s> flag causes breaks to occur after whitespace rather than in the middle of a word. This produces a ragged right edge, but is much nicer to look at. The B<-b> flag makes the program ignore embedded backspaces, tabs, and carriage returns when deciding where to split. This makes it run about thirty times faster. You might want to get used to using B<-b>. Current locale settings will be honored in determining what is meant by "whitespace" and "word characters". =head1 BUGS POSIX 1003.2 states that a newline will never be inserted immediately before or after a backspace or a carriage return, but this is not checked for. =head1 SEE ALSO expand(1), fmt(1) =head1 AUTHORS Clinton Pierce and Tom Christiansen. This code is freely modifiable and freely redistributable under Perl's Artistic License.