% Yawn. This is getting repetitive now. \title{Tangle for {\it WEB}Perl} \author{Simon Cozens} \def\WP{\hbox{{\sl WEB\/}Perl}} \def\pt{{\tt tangle.pl}} \def\pw{{\tt weave.pl}} \def\at#1{{\tt @#1}} \def\vis{{\tt\char` }}%` @* Introduction. This is \pt, a tangler for the \WP{} Literate Programming environment. To learn more about that, see the introduction to \pw{} or The \WP{} Book. Since I'm now used to writing \WP{} programs, I'm not going to go into a long introduction about what it's all about. I'm just going to get on and do it. Tangling takes place, as you probably would guess, in two parts; Firstly, we read and compress the data, then we expand it and spit it out again. Let's look at these two seperately. @d banner == "This is tangle, version 2.0.0\n" @ Firstly, the input phase. The text to be read in is input, merged and tokenised in the usual way, and then we walk through it. Documentation states are skipped outright, as are \at{w} and \at{f} sections and comments. This time we're going to be evil and compress newlines, which should cause interesting text-wrapping games% \footnote{Incidentally, Perl is case sensitive, which pisses me off.}. We need to look at \at{p} sections, \at{<} sections and \at{d} definitions. \at{p} sections are gathered together into a string which forms the main program code. \at{<} sections are placed into a hash, ready to replaced in the output phase, as are \at{f} definitions. (But they're obviously treated somewhat differently.) This phase is relatively easy. @ Then the output phase, which is a considerable amount of fun. When we get to the start of this phase, we're presented with a perl string, containing the whole program. The first thing I want to be sure of doing is getting macros right, since they're crucial. I'd like to see some modules which scan for a macro, and parse it correctly, returning a string which contains the replacement. Then we have a little recursive procedure that replaces each code-section reference by its definition from the hash. Right. Let's go. @p @ @ @ @ @ This will look rather similar to the new version of \pw, since they're based on the same code. Have a look at {\tt weave3.web} for more information on this. @= print banner; @ local @@tokenlist, $tokencount; $|=1; $changing=$finished=0; $maxdepth=25; @ print $input." ("; @* Input Phase. Now we need to start the main input phase, as described above. For the majority of the time, we're going to be skipping until we find control sequences, and then acting on them. We'll take the main input engine from \pw. @= @ local $newsourceline=0; @@merged=(); $merging=1; $phase=1; $state="skipping"; $program=""; open LOG,">t.log"; while ($finished==0) { @ @@merged[$newsourceline++]=$_; @ while (moretokens()) { print LOG thistoken(); if ($state eq "skipping") { @ } elsif ($state eq "defining" or $state eq "creating") { @ } else { print "! This can't happen.\n\n"; die "I'm in state $state, which is impossible.\n"; } } @ } @ Here's the tokeniser and token crushing stuff as before. @= @@tokenlist=split /(?<=[^a-zA-Z0-9_])|(?=[^a-zA-Z0-9_])/o; $tokencount=0; @@tokenlist=crushtokens(@@tokenlist); @ We don't do as much crushing this time---just command sequences and some other bits and bobs. @p sub crushtokens { my @@res; my $el1, $el2; while (@@_) { ($first, $second) = (shift, shift); if ($first eq "@@" or ($first eq "=" and $second eq "=")) { push(@@res, $first.$second) } else { push(@@res, $first); if ($second eq "@@" or $second eq "=") { unshift @@_, $second; } else { push (@@res, $second); } } } return @@res; } @ This doesn't actually happen. @= @ Our next move depends on what state we're in\footnote{And right at the moment, you don't want to know what state I'm in.}. If we're skipping, we want to reject anything that won't move us into a more useful state. @= $thistoken=thistoken(); @{ Save function call overhead. @} if ($thistoken eq "@@"."p") { skiptoken(); $state="creating"; } elsif ($thistoken eq "@@"."<") { @ skiptoken(); } elsif ($thistoken eq "@@"."d") { @ $state="skipping"; } elsif ($type eq "1") { $debug=1; skiptoken(); } elsif ($type eq "2") { $debug=2; skiptoken(); } else { skiptoken(); } @ We grab a module name in exactly the same way we did in \pw. Reusable code is really cool. @= $current_cs=""; skiptoken(); $current_cs = substr(careful_gimme("@@".">"),0,-2); $current_cs =tidyname($current_cs); if ($current_cs=~ /\.\.\.\s*$/) { $current_cs=cname($current_cs); } @ And here's how we define the code: @= @ else { push(@@cs_names, $current_cs) unless (scalar grep(/\Q$current_cs\E/, @@cs_names) > 0); } careful_skip() if (thistoken() eq "+"); if(thistoken() eq "=") { $state="defining"; } else { @ print "! This shouldn't be here.\n\n"; print "You tried to reference a code section in "; print "documentation state.\nYou can't do that, so "; print "I'm ignoring it.\n"; } @ Now, we have section name and a text stream, so we can add stuff to the relevant hash entry. @= if (thistoken() =~/@@/) { @ } else { ($state eq "defining"?$code{$current_cs}:$program).=thistoken(); skiptoken(); } @ This is fun, isn't it? @= $type=substr(thistoken(),1,1); if ($type eq "@@" or $type eq "<" or $type eq ">") { ($state eq "defining"?$code{$current_cs}:$program).=thistoken(); @{ We'll deal with it later. @} skiptoken(); } elsif ($type eq "{" or $type eq ":" or $type eq "." or $type eq "!" or $type eq "?" or $type eq "t") { careful_gimme($type eq "{"?"@@}":"@@>"); skiptoken(); } elsif ($type eq "f" or $type eq " " or $type eq "*") { $state="skipping"; } elsif ($type eq "d") { @ } elsif ($type eq "p") { if ($state eq "creating") { @ } else { $state="creating"; skiptoken(); } } elsif ($type eq "'" or $type eq "\"") { @ } elsif ($type eq "&") { skiptoken(); } elsif ($type eq "1") { $debug=1; } elsif ($type eq "2") { $debug=2; } else { @ print "! I don't understand this command sequence: "; print thistoken()."\n"; skiptoken(); } @ The hex and octal stuff are a waste of space, really. @= skiptoken(); ($state eq "defining"?$code{$current_cs}:$program).= ($type eq "'"?oct(gettoken()):hex(gettoken())); @ This is just a glorified error message. @= print "! Already creating!\n"; print "You tried to start an unnamed code section, while "; print "I was in the\nmiddle of one. This means your code may"; print "not be correct.\nI'm going to carry on anyway.\n"; skiptoken(); @* Output Phase. This is remarkably similar to the output phase for \pw{} 1.1; we start with an initial program $T_0$ and recursively replace module references with the expanded text from the hash. First, we set up the output: @= print ")\nOutput ("; @ @ @ Set up the output file name. @= $output=$input; $output=~ s/(.*)\..*/\1/; $stack=$output; $output.=".pl"; @ To kick off the action, this is what we do: @= print OUTPUT "#! ".`which perl`."\n"; print OUTPUT "# Created by Tangle on ".scalar localtime; print OUTPUT "\n# Do Not Modify!\n"; print OUTPUT expand($program,"root"); `chmod +x $output`; @ This is the actual expansion routine; the recursion is done when we investigate control sequences in the text. (Of which there shouldn't be that many, I think, as we've eliminated them all in the first pass. Maybe.) @p sub expand { local($returnee) = ""; local($text,$tree)=(shift, shift); local(@@tokenlist); local($tokencount)=0; $_=$text; @ while (moretokens()) { $this=thistoken(); if (is_macro($this)) { { local $_=$this; @ $returnee.=$expansion; } skiptoken(); } elsif (thistoken() =~/^@@/) { @ } else { $returnee.=gettoken(); } } return $returnee; } @ This bit shouldn't be a problem, apart from the part which causes the recursive call. @= $type=substr(thistoken(),1,1); if ($type eq "@@") { $returnee.="@@"; skiptoken(); } elsif ($type eq "<") { @ } else { print "\n! Couldn't grok $type command in output phase.\n"; skiptoken(); } @ Here it is\dots @= @ if (exists $code{$current_cs}) { if ($tree=~/:\Q$current_cs\E/) { print "\n! Recursive calling not allowed!\n"; print "Calling $current_cs\nnow will put me"; print " into an infinite loop, so I'm skipping it\n"; } else { print "."; $returnee.=expand($code{$current_cs},$tree.":".$current_cs); } } else { print "\n! Unknown Module\nI collected a reference to\n"; print $current_cs.", which does not exist.\n"; } @* The end. All we need do by means of tidying up is to close all the files we've opened. @= print ")\n\n"; close OUTPUT; `chmod +x $output` unless $^O =~/Win/i; @* Macros. Macro definitions I've been trying to avoid for a while, primarily because I can't remember how they work. I've looked at the \WP{} Book, and it's wrong. This isn't useful. I've also just realised that macros can contain other macros, which stresses me out. I'm going to have to go away and think about this one. Right. The way I'd like to do this is to have one function that recursively expands macros to as great a depth as it can. Simple macros can contain no other macros; the two other types can contain whatever types of macro they like. We store all the macros in a hash keyed to the macro name, a forward slash and the type. Here's something really basic to recognise the type of a macro, and whether or not a given token is a macro or not. @p sub macro_type { $this = shift; if ( exists $macros{$this."/num"} ) { return "num"; } elsif ( exists $macros{$this."/sim"} ) { return "sim"; } elsif ( exists $macros{$this."/par"} ) { return "par"; } else { return -1 } } sub is_macro { $this = shift; return macro_type($this) != -1 } @ Next, we start expanding a numeric macro. @p sub expand_num { local $result ="\$returnvalue = "; $_=$macros{shift()."/num"}; @ while ($_=gettoken()) { if ( is_macro($_) ) { undef $expansion; @ $result.="(".$expansion.")"; } elsif ( /[0-9\(\)\+\-\/\*\^\s]/) { $result .= $_ } else { print "\n! Illegal Character in Numeric Macro!\n"; print "You can't say ($_) in a numeric macro; I'm "; print "skipping it.\n"; } } eval $result ; @{ Ouch. @} if ($@@) { print "! Macro definition fried.\n"; print "Something went wrong evaluating your macro.\n"; print "I'm afraid I don't know why. I'm skipping it.\n"; return; } return $returnvalue; } @ Generic macro expansion routine. This is fairly important. We assume that it is provided with a token list in |@@tokenlist| and |$tokencount|, and with the macro in |$_|. @= $depth=0 unless defined $depth; if ($depth++>$maxdepth) { print "! Macro recursion count exceeded!\n"; print "More than $maxdepth iterations.\n"; print "Last macro expanded was $_\n"; die "Probably a source code error.\n"; } if ( ($mt=macro_type($_)) eq "num" ) { $expansion= expand_num($_); @{ Scared yet? @} } elsif ( $mt eq "par" ) { @ } elsif ( $mt eq "sim" ) { $expansion=expand_sim($_); } else { die "! Can't happen. (macros)\n"; } $depth--; @ Simple macros don't get expanded, so that's easy. @p sub expand_sim { return $macros{shift()."/sim"} } @ Here we cater for parametric macros; this doesn't do the expansion, but it does set up the string to pass to the expansion handler. @= $macro=$macname=$_; $fail=0; @@parlist=(); {local $result=''; skiptoken() if $macro eq thistoken(); if (thistoken() ne '(') { print "\n! No ( found\n"; print "Parametric macros should start with a open bracket\n"; print "I found ".thistoken()."\n"; $result .= $_.thistoken(); } else { @ if ($fail) { $result .= $macro; } else { $parexp = expand_par($macname,@@parlist); $result.=$parexp; } } $expansion=$result; } @ The checking of whether there are the correct number of parameters will take place in the expansion routine. @= $finished=0; $curr_arg=""; $_=gettoken(); until ($finished) { $_=gettoken(); if (/\(/) { print "Bracket opened.\n"; $bracecount=1; $curr_arg.=$_; until ($bracecount==0) { $_=gettoken(); $curr_arg.=$_; $bracecount-- if /\)/; $bracecount++ if /\(/; @ } print "current argument is $curr_arg.\n"; print "Bracket closed.\n"; } elsif (/\)/) { $finished=1; push @@parlist, $curr_arg; last; } elsif (/,/) { push @@parlist, $curr_arg; $curr_arg=""; } else { $curr_arg.=$_; } @ } @ Boo. @= if (!moretokens()) { print "\n! Macro use never ended!\n"; print "$tokencount, $#tokenlist\n"; print "You started using $macname, but didn't "; print "close the brackets.\n"; die "What happened?"; $fail=1; last; } @ Right. Time to expand a parametric macro. Brrrrr. @p sub expand_par { ($template,$replacement)=@@{$macros{($macname=shift)."/par"}}; local $result=""; @@arguments=@@_; @ $counter=0; foreach(@@arguments) { $counter++; $replacement=~s/#$counter/$_/g; } @ return $result; } @ If there ain't the same number of arguments as there are in the template, we're in a certain amount of trouble. We return a blank string, which isn't exactly undefined behaviour, but will confuse your program anyway. @= if ($#arguments != scalar grep /,/,$template) { print "\nUse of $macname doesn't match its definition\n"; print "You need ".(1+(scalar grep /,/,$template)); print " arguments, not ".(1+$#arguments)."\n"; print "Who knows what happens at this point?\n"; return ""; } @ This is the last of the macro expansion modules, and therefore the one I've been putting off for longest. In this, we've got to check to see if the replacement text from a parametric macro contains any further macros to be expanded. This is really, really scrappy. @= @@toksave=@@tokenlist; $tcsave=$tokencount; # local is broken. { local $output=''; local $_=$replacement; local @@tokenlist, $tokencount; @ while (moretokens()) { $_=gettoken(); if (is_macro($_)) { undef $expansion; @ $result.=$expansion; } else { $result .= $_ } } } @@tokenlist=@@toksave; $tokencount=$tcsave; # local is broken. skiptoken(-1); @ Phew. Recognising macros, we just steal from \pw{} @= skiptoken(); 1 while (($lhs = gettoken())=~/\s/); while (($type=gettoken())=~/\s/) {} # print "*D* LHS $lhs type $type : token ".thistoken()." lasttoken ".lasttoken()."\n"; if ($type eq "(") { @ } elsif ($type eq "=") { @ } elsif ($type eq "==") { @ } else { @ print "! That's no macro definition!\n"; print "You said $lhs $type, which makes no sense. Not to me, anyway.\n"; } @ Of course, it's not going to be as simple as that, since we have to do some pretty severe processing of both the LHS and RHS of parametric macros.%' @= @ 1 while (gettoken()=~/\s/); skiptoken(-1); unless (thistoken() eq "==") { print "\n! No equals sign in parametric macro!\n"; print "You said $template,\n"; print "but didn't provide two equals signs.\n"; } else { careful_skip(); careful_skip() while (thistoken()=~/\s/); @ @ } @ @ Firstly, the only things we allow on the LHS are hashes, numbers and commas. @= $template=$lhs."("; $params=careful_gimme(")"); if ($params =~ /[^0-9#,)]/) { print "! Parametric macro template broken.\n"; print "The template for $lsh should only contain"; print "hashs, numbers and commas.\n"; } $template.=$params; @ Now what happens? Uhm. Well, the right hand side of a macro is ended by something that is not-a-macro, that is, a control sequence. This requires a special form of |gimme()| and |carefulgimme()|, just like in \pw. These merely check for control sequences. @p sub specialgimme { $return=''; $whywefinished="EOF"; while (moretokens()) { $thisone=gettoken(); if ($thisone =~ /@@/ and $thisone ne "@@@@") { $whywefinished="FOUND"; last } else { $return.=$thisone; } } return ($whywefinished,$return); } sub specialcareful_gimme { ($status, $rv) = specialgimme(); if ($status eq "EOF") { while ($status eq "EOF" and $finished==0) { careful_skip(0); @{ End-of-line hack @} ($status, $newrv)=specialgimme(); $rv.=$newrv; } } return($rv); } @ Now we can use a simple call to our special functions to get the definition of the macro. Isn't that cool? @= $rhs=specialcareful_gimme(); $rhs=~s/\s*$//s; skiptoken(-1); @ Next we need to check it for integrity. This involves checking that we don't have any parameters in the right hand side that don't exist in the left hand side. Actually, there may well be no need to do this if someone thinks of a way of hacking it and chaining macros together, so I'll implement this another day.%' @= @ @ Stashing the macro is easy; you just put the template and the definition in an array, and place a reference to it in the hash. @= $macros{$lhs."/par"}=[$template, $rhs]; @ Phew. Compared with that lot, simple and numeric macros should be easy. Note: {\bf should}. @= 1 while (gettoken()=~/\s/); skiptoken(-1); @ $macros{$lhs."/num"}=$rhs; @ And the last one. @= 1 while (gettoken()=~/\s/); skiptoken(-1); @ $macros{$lhs."/sim"}=$rhs; @ Good gosh. I've actually finished. @* Common functionality. Here is the set of code and functions which is common to both {\tt tangle.pl} and {\tt weave.pl}, to save me typing it out twice, and to keep the code both easy to read and easy to maintain. This includes all the clever tokenisation stuff, error reporting, opening and closing files, and so on. It will be explained in more detail as we go along. Here, however, we include everything that isn't called directly: @= @ @ @ @* Tokenisation functions. First, dealing with tokenisation, my favourite. A series of very short and probably self-explanatory functions, which should really turn into macro definitions one day. We don't include the function which deals with turning the incoming token string into an array, because the tokenisation function is different between {\tt tangle.pl} and {\tt weave.pl}. This is because the tangler knows only code, where an underscore is regarded as part of the alphanumerics, and the weaver knows documentation, where the underscore is a special character and may need be escaped. Not only that, but the token initialisation must be done at parent level, and not in a subroutine, to ensure that the token list is |local| and therefore available to all the subs that want it. @= sub thistoken { return $tokenlist[$tokencount] } sub gettoken { skiptoken(); return lasttoken() } sub lasttoken { return $tokenlist[$tokencount-1] } sub nexttoken { return $tokenlist[$tokencount+1] } sub skiptoken { $howmany=1 unless defined ($howmany=shift) ; $tokencount+=$howmany ; } sub moretokens { return ($tokencount<=$#tokenlist);} @ Now something for which I should give a little explanation. This one returns the next $x$ characters in the token stream {\bf keeping the pointer in the same place}. @= sub lookahead { $howmany = shift; $ret=""; for ($i=$tokencount ; $i < $tokencount+$howmany; $i++) { $ret.=$tokenlist[$i] ;} return $ret; } @ This is a new idea I've had. It's a vaguely useful function to pull a string out of the token list given that the string ends with some expected parameters. (These will usually be \at{$>$}, for obvious reasons.) It will return a status indicator, and the string so far. (Which may or may not be all of the string we're looking for, since the input stream may end before the required characters.) Please note that this isn't clever enough to take any action on command strings found inside the string we're pulling out of the token list---% It'll just hand it back to you as it sees it. You'll also notice that there's an |if| statement there which deliberately avoids the use of |$temp_name=~/$expectation$/| @= sub gimme { $expectation = shift; $whywefinished="EOF"; $temp_name=""; while (moretokens()) { $temp_name.=gettoken(); if (substr($temp_name,-length($expectation)) eq $expectation) { $whywefinished="FOUND"; last } } return ($whywefinished, $temp_name); } @ Here's another pretty one. I don't know if it works, though. The idea is that this skips a number of tokens (by default 1) and is careful about the end of the line---if it hits the end of the line, it'll merge another line into the buffer and tokenise it, then continue trying to skip.%' @= sub careful_skip { $howmuch = 1 unless defined($howmuch = shift); if (moretokens()) { return if $howmuch<1; skiptoken(); } else { @ if ($phase == 1 ) { @ @@merged[$newsourceline++]=$_ if $merging; @{ \pw{} hack. @} } else { $_=$merged[$lineno++]; } @ } careful_skip(--$howmuch) if $howmuch>0; } @ And, similiarly, a careful version of the |gimme()| function. @= sub careful_gimme { $delimiter = "@@>" unless defined($delimiter = shift); ($status, $rv) = gimme($delimiter); if ($status eq "EOF") { while ($status eq "EOF" and $finished==0) { careful_skip(0); @{ End-of-line hack @} ($status, $newrv)=gimme($delimiter); $rv.=$newrv; } } return($rv); } @* Input and Output Routines. These are nice and easy, and identical between tangler and weaver. Let's get straight down to it. @ This bit opens up and sucks into an array the input file. (Likewise the change file.) @= chomp($input=shift); if (!open(INPUT, $input) and !open(INPUT, $input.".web" )) { do { print "! I can't find file `$input'\n\nPlease type another input file name: "; chomp($input=<>); } until (open(INPUT,$input)); } @@source=; chomp($changefile=shift); if ($changefile eq "") { $inputbase=$input; $inputbase=~s/\.web$//; if (-e $inputbase.".ch") { print "! Change file exists, but you've not told me to use it.\nI'll trust you know what you're doing.\n"; } $changeable=0; } else { if (!open(CHANGE, $changefile) and !open(CHANGE, $changefile.".ch" )) { do { print "! I can't find file `$input'\n\nPlease type another input file name: "; chomp($changefile=<>); } until (open(CHANGE,$changefile)); } @@change=; $changeable=1; } $changelineno=$sourcelineno=0; @{ Line numbers start at zero, 'cos I say so. @} @ The output routine is very similar, but we have to set the output name in our parent program (since {\tt tangle.pl} outputs |.pl| files, and {\tt weave.pl} outputs, by default at least, |.tex| files. @= @ @{ Warning - this module provided by parent! @} until (open(OUTPUT, ">$output")) { print "! I can't write on $output\n\n Please type another file name: "; chomp($output=<>); } @* Error Reporting. A basic function to make error reporting a bit nicer for the coder. @= print STDERR 1+(($errorfile eq "change")?$changelineno:$sourcelineno),". "; $errortoken = $tokencount unless defined $errortoken; @{ I keep forgetting to set it. @} if ($errortoken == -1) { @{ Show the whole line. @} if ($errorfile eq "change") { $errorline=$change[$changelineno]; } else { $errorline=$source[$sourcelineno]; } } else { $errortoken=$tokencount if undef $errortoken; $errorline=""; for (0..$errortoken) { $errorline.=$tokenlist[$_]; } $errorline.= "||"; for ($errortoken+1 .. $#tokenlist) { $errorline.=$tokenlist[$_];} } undef $errortoken; print STDERR $errorline; @* Change file implementation functions. {\it WEB\/}Perl programs come as two files; a program, or WEB file, and a change file for system- or site-specific operation. {\tt tangle.pl} and {\tt weave.pl} take these changes and incorporate them into the WEB. Note that the changing that {\it WEB\/}Perl does is done in quite a dissimilar manner to that of the original WEB system. Each line is compared against the next possible change, and when a match is found, {\bf every} line of the change must match before the change is applied. This part moves us to the next \at{x} in the changefile. The hook to deal with \at{w} formatting specifications is deliberately empty if you're reading this in {\tt TIE} and wondering why it doesn't tangle properly. This is because it will be filled in by {\tt WEAVE}, allowing us to use {\bf exactly} i the same code between all of the programs. @= $priming=1; while ($priming==1) { $changeline=$change[$changelineno]; if ($changeline=~ /\@@w/) { $_=$changeline; @ } elsif ($changeline=~ /\@@y/ or $changeline=~/\@@z/) { $errorfile="change"; $errortoken=-1; print STDERR "! Where is the matching \@@x?\n"; @ print STDERR "\n\nI found either an \@@y or a \@@z that didn't appear to\nfollow an \@@x, so I'm going to ignore everything I see\n until the next \@@x.\n\n"; } if ($changeline=~ /\@@x/) { $priming=0; @{ We've got what we came for. @} } $changelineno++; @{ Even when we're at an \at{x}, we'll be comparing the {\bf next line} after it. @} if ($changelineno>$#change) { @{ Finished reading change file @} $priming=0; $changeable=0; } } @ OK, at this stage, we've got to get the next line in the program. There are three things that could be happening here\footnote{This isn't as difficult as I thought!}---either we're not changing and the lines don't match, or we're not changing, the lines match and we need a closer look, or we are changing. Two of these cases are trivial. @= $changeline=$change[$changelineno]; $sourceline=$source[$sourcelineno]; if ($changeable==0) { $_=$sourceline; } else { if ($changing==0) { if ($changeline eq $sourceline) { @ if ($changing==1) { @ } } else {$_=$sourceline;} } else { @ } } @ When we've processed the line in question, we increment the line counter, {\bf and not before}. @= (($changing==0)?$sourcelineno:$changelineno)++; $finished=1 if ($sourcelineno > $#source); $changeable=0 if ($changelineno > $#change); print STDERR "." if ($sourcelineno%100)==0; print STDERR $sourcelineno if ($sourcelineno%500)==0; @ This is the bugger. We've got two lines that match. We hope there'll be more. We first need to save our position in source and change files, then step down them until we get to an \at{y} (or an \at{z}, at which point we become sarcastic.) We check whether each line matches, being careful not to hit {\tt EOF} in either file, breaking out if a match fails. @= $savesourcelineno=$sourcelineno; $savechangelineno=$changelineno; $checking=1; while ($checking==1) { $sourceline=$source[$sourcelineno]; $changeline=$change[$changelineno]; if ($changeline =~ /\@@y/) { @{ We made it! @} $changing=1; $checking=0; } elsif ($changeline ne $sourceline) { print STDERR "! Change failed to match!\n The change begun at line ".$savechangelineno." of the change file\n failed to match at line $sourcelineno of the source file.\nI'll ignore this change altogether.\n"; $checking=$changing=0; @ $savechangelineno=$changelineno; @{ Don't overwrite new position. @} } elsif ($sourcelineno>$#source) { print STDERR "! Ran out of source code.\n Your change begun at line ".$savechangelineno." matched too well,\n and I've got nothing left to process.\nIgnoring that change.\n"; $checking=$changing=0; $changelineno=$savechangelineno+1; @ $savechangelineno=$changelineno; } elsif ($changelineno>$#change or $changeline =~ /\@@z/ ) { print STDERR "! Where is the matching \@@y?\n Change begun at line ".$savechangelineno." didn't appear to have a \@@y, so I'm ignoring it.\n"; $checking=$changing=0; $changelineno=$savechangelineno+1; @ $savechangelineno=$changelineno; } $changelineno++; $sourcelineno++; } @ We're on the home stretch now; we just need to get the rest of a change file. We do this by spitting out lines from the change file until we hit an \at{z}, at which point we reprime the change buffer. @= if ($change[$changelineno]=~/\@@z/) { $changing=0; $changelineno++; $sourcelineno--; @ $_=$source[$sourcelineno]; } else { $_=$change[$changelineno]; } @* Dot notation expansion. Here's the function to canonicalise names, given a name ending with dots. The functionality was taken from weave version 1.1, with a few modifications. @= sub cname { $tryname=shift; $tryname=~s/(.*)\.\.\./\1/; $tryname=tidyname($tryname); $lastone=""; foreach (sort @@cs_names) { $thisone=substr(tidyname($_),0,length($tryname)); if (lc $tryname eq lc $lastone ) { if ($thisone eq $lastone) { print "! Module name confusion.\n"; print "I came across a reference to a module called $tryname... "; print " and this could refer to either ~".$_."~ or ~".$fulllast."~\n"; die "This is a rather serious problem, but I\'ll try and work around it.\n"; return ""; } return $fulllast; } if (lc $tryname gt lc $lastone and lc $tryname lt lc $thisone) { print "! Module name not found.\n\n"; print "I came across a reference to a module called $tryname...\n"; print "The closest I got was that it's somewhere between\n$fulllast and\n$_. \n"; print "I can't decide which to go for, so I'm giving up.\n\n"; print "I'd suggest that in future you gave me some more characters \nto identify th at one uniquely.\n"; die; return ""; } $fulllast=$_; $lastone=$thisone; } return $fulllast } @ To ensure comparisons are fair, we tidy up the names according to Knuth's rules: @= sub tidyname{ $tidyname=shift; $tidyname=~s/^\s*(.*)\s*$/\1/; $tidyname=~s/\s+/ /g; return $tidyname; }