Pattern pAtEnd = Pattern.compile("\\G\\z");
Pattern pWord = Pattern.compile("\\G\\w+");
Pattern pNonHtml = Pattern.compile("\\G[^\\w<>&]+");
Pattern pImgTag = Pattern.compile("\\G(?i)]+)>");
Pattern pLink = Pattern.compile("\\G(?i)]+)>");
Pattern pLinkX = Pattern.compile("\\G(?i)");
Pattern pEntity = Pattern.compile("\\G&(#\\d+|\\w+);");
Boolean needClose = false;
Matcher m = pAtEnd.matcher(html); // Any Pattern object can create our Matcher object
while (! m.usePattern(pAtEnd).find())
{
if (m.usePattern(pWord).find()) {
. . . have a word or number in m.group() -- can now check for profanity, etc . . .
} else if (m.usePattern(pImgTag).find()) {
. . . have an image tag -- can check that it's appropriate . . .
} else if (! needClose && m.usePattern(pLink).find()) {
. . . have a link anchor -- can validate it . . .
needClose = true;
} else if (needClose && m.usePattern(pLinkX).find()) {
System.out.println("/LINK [" + m.group() + "]");
needClose = false;
} else if (m.usePattern(pEntity).find()) {
// Allow entities like > and {
} else if (m.usePattern(pNonHtml).find()) {
// Other (non-word) non-HTML stuff -- simply allow it
} else {
// Nothing matched at this point, so it must be an error. Grab a dozen or so characters
// at our current location so that we can issue an informative error message
m.usePattern(Pattern.compile("\\G(?s).{1,12}")).find();
System.out.println("Bad char before '" + m.group() + "'");
System.exit(1);
}
}
if (needClose) {
System.out.println("Missing Final ");
System.exit(1);
}
-----------------------------------------------------------------------------
Copyright 1997-2024 Jeffrey Friedl