while (not $html =~ m/\G\z/gc) # While we haven't worked to the end . . . { if ($html =~ m/\G( <[^>]+> )/xgc) { print "TAG: $1\n" } elsif ($html =~ m/\G( &\w+; )/xgc) { print "NAMED ENTITY: $1\n" } elsif ($html =~ m/\G( &\#\d+; )/xgc) { print "NUMERIC ENTITY: $1\n" } elsif ($html =~ m/\G( [^<>&\n]+ )/xgc) { print "TEXT: $1\n" } elsif ($html =~ m/\G \n /xgc) { print "NEWLINE\n" } elsif ($html =~ m/\G( . )/xgc) { print "ILLEGAL CHAR: $1\n" } else { die "$0: oops, this shouldn't happen!"; } } ----------------------------------------------------------------------------- Copyright 1997-2024 Jeffrey Friedl