Pattern pAtEnd = Pattern.compile("\\G\\z"); Pattern pWord = Pattern.compile("\\G\\w+"); Pattern pNonHtml = Pattern.compile("\\G[^\\w<>&]+"); Pattern pImgTag = Pattern.compile("\\G(?i)]+)>"); Pattern pLink = Pattern.compile("\\G(?i)]+)>"); Pattern pLinkX = Pattern.compile("\\G(?i)"); Pattern pEntity = Pattern.compile("\\G&(#\\d+|\\w+);"); Boolean needClose = false; Matcher m = pAtEnd.matcher(html); // Any Pattern object can create our Matcher object while (! m.usePattern(pAtEnd).find()) { if (m.usePattern(pWord).find()) { . . . have a word or number in m.group() -- can now check for profanity, etc . . . } else if (m.usePattern(pImgTag).find()) { . . . have an image tag -- can check that it's appropriate . . . } else if (! needClose && m.usePattern(pLink).find()) { . . . have a link anchor -- can validate it . . . needClose = true; } else if (needClose && m.usePattern(pLinkX).find()) { System.out.println("/LINK [" + m.group() + "]"); needClose = false; } else if (m.usePattern(pEntity).find()) { // Allow entities like > and { } else if (m.usePattern(pNonHtml).find()) { // Other (non-word) non-HTML stuff -- simply allow it } else { // Nothing matched at this point, so it must be an error. Grab a dozen or so characters // at our current location so that we can issue an informative error message m.usePattern(Pattern.compile("\\G(?s).{1,12}")).find(); System.out.println("Bad char before '" + m.group() + "'"); System.exit(1); } } if (needClose) { System.out.println("Missing Final "); System.exit(1); } Pattern pWord = Pattern.compile("\\G\\w+"); Pattern pNonHtml = Pattern.compile("\\G[^\\w<>&]+"); Pattern pImgTag = Pattern.compile("\\G(?i)]+)>"); Pattern pLink = Pattern.compile("\\G(?i)]+)>"); Pattern pLinkX = Pattern.compile("\\G(?i)"); Pattern pEntity = Pattern.compile("\\G&(#\\d+|\\w+);"); Boolean needClose = false; Matcher m = pWord.matcher(html); // Any Pattern object can create our Matcher object Integer currentLoc = 0; // Begin at the start of the string while (currentLoc < html.length()) { if (m.usePattern(pWord).find(currentLoc)) { . . . have a word or number in m.group() -- can now check for profanity, etc . . . } else if (m.usePattern(pNonHtml).find(currentLoc)) { // Other (non-word) non-HTML stuff -- simply allow it } else if (m.usePattern(pImgTag).find(currentLoc)) { . . . have an image tag -- can check that it's appropriate . . . } else if (! needClose && m.usePattern(pLink).find(currentLoc)) { . . . have a link anchor -- can validate it . . . needClose = true; } else if (needClose && m.usePattern(pLinkX).find(currentLoc)) { System.out.println("/LINK [" + m.group() + "]"); needClose = false; } else if (m.usePattern(pEntity).find(currentLoc)) { // Allow entities like > and { } else { // Nothing matched at this point, so it must be an error. Grab a dozen or so characters // at our current location so that we can issue an informative error message m.usePattern(Pattern.compile("\\G(?s).{1,12}")).find(currentLoc); System.out.println("Bad char at '" + m.group() + "'"); System.exit(1); } currentLoc = m.end(); // The `current location' is now where the previous match ended } if (needClose) { System.out.println("Missing Final "); System.exit(1); } m.usePattern(pWord).region(start,end).find(currentLoc) ----------------------------------------------------------------------------- Copyright 1997-2024 Jeffrey Friedl