my $need_close_anchor = 0; # True if we've seen , but not its closing . while (not $html =~ m/\G\z/gc) # While we haven't worked our way to the end . . . { if ($html =~ m/\G(\w+)/gc) { . . . have a word or number in $1 -- can now check for profanity, for example . . . } elsif ($html =~ m/\G[^<>&\w]+/gc) { # Other non-HTML stuff -- simply allow it. } elsif ($html =~ m/\G]+)>/gci) { . . . have an image tag -- can check that it's appropriate . . . } elsif (not $need_close_anchor and $html =~ m/\G]+)>/gci){ . . . have a link anchor - can validate it . . . $need_close_anchor = 1; # Note that we now need } elsif ($need_close_anchor and $html =~ m{\G}gci){ $need_close_anchor = 0; # Got what we needed; don't allow again } elsif ($html =~ m/\G&(#\d+|\w+);/gc){ # Allow entities like > and { } else { # Nothing matched at this point, so it must be an error. Note the location, and grab a dozen or so # characters from the HTML so that we can issue an informative error message. my $location = pos($html); # Note where the unexpected HTML starts. my ($badstuff) = $html =~ m/\G(.{1,12})/s; die "Unexpected HTML at position $location: $badstuff\n"; } } # Make sure there's no dangling if ($need_close_anchor) { die "Missing final " } ----------------------------------------------------------------------------- Copyright 1997-2024 Jeffrey Friedl