my $need_close_anchor = 0; # True if we've seen , but not its closing .
while (not $html =~ m/\G\z/gc) # While we haven't worked our way to the end . . . 
{
  if ($html =~ m/\G(\w+)/gc) {
     . . . have a word or number in $1 -- can now check for profanity, for example . . . 
  } elsif ($html =~ m/\G[^<>&\w]+/gc) {
    # Other non-HTML stuff -- simply allow it.
  } elsif ($html =~ m/\G
]+)>/gci) {
     . . . have an image tag -- can check that it's appropriate . . . 
              
  } elsif (not $need_close_anchor and $html =~ m/\G]+)>/gci){
     . . . have a link anchor - can validate it . . . 
              
    $need_close_anchor = 1; # Note that we now need 
  } elsif ($need_close_anchor and $html =~ m{\G}gci){
    $need_close_anchor = 0; # Got what we needed; don't allow again
  } elsif ($html =~ m/\G&(#\d+|\w+);/gc){
    # Allow entities like > and {
  } else {
    # Nothing matched at this point, so it must be an error. Note the location, and grab a dozen or so
    # characters from the HTML so that we can issue an informative error message.
    my $location = pos($html); # Note where the unexpected HTML starts.
    my ($badstuff) = $html =~ m/\G(.{1,12})/s;
    die "Unexpected HTML at position $location: $badstuff\n";
  }
}
# Make sure there's no dangling 
if ($need_close_anchor) {
   die "Missing final "
}
-----------------------------------------------------------------------------
Copyright 1997-2025 Jeffrey Friedl