return to the main page Mastering Regular Expressions
Third Edition

Listings from page 399
Download all listings shown below.

Chapter 8; page 399 (download)

Pattern pAtEnd   = Pattern.compile("\\G\\z");
Pattern pWord    = Pattern.compile("\\G\\w+");
Pattern pNonHtml = Pattern.compile("\\G[^\\w<>&]+");
Pattern pImgTag  = Pattern.compile("\\G(?i)<img\\s+([^>]+)>");
Pattern pLink    = Pattern.compile("\\G(?i)<A\\s+([^>]+)>");
Pattern pLinkX   = Pattern.compile("\\G(?i)</A>");
Pattern pEntity  = Pattern.compile("\\G&(#\\d+|\\w+);");

Boolean needClose = false;
Matcher m = pAtEnd.matcher(html); // Any Pattern object can create our Matcher object

while (! m.usePattern(pAtEnd).find())
{
   if (m.usePattern(pWord).find()) {
       . . . have a word or number in m.group() -- can now check for profanity, etc . . . 
   } else if (m.usePattern(pImgTag).find()) {
       . . . have an image tag -- can check that it's appropriate . . . 
   } else if (! needClose && m.usePattern(pLink).find()) {
       . . . have a link anchor -- can validate it . . . 
      needClose = true;
   } else if (needClose && m.usePattern(pLinkX).find()) {
      System.out.println("/LINK [" + m.group() + "]");
      needClose = false;
   } else if (m.usePattern(pEntity).find()) {
      // Allow entities like &gt; and &#123;
   } else if (m.usePattern(pNonHtml).find()) {
      // Other (non-word) non-HTML stuff -- simply allow it
   } else {
      // Nothing matched at this point, so it must be an error. Grab a dozen or so characters
      // at our current location so that we can issue an informative error message
      m.usePattern(Pattern.compile("\\G(?s).{1,12}")).find();
      System.out.println("Bad char before '" + m.group() + "'");
      System.exit(1);

   }

}

if (needClose) {
   System.out.println("Missing Final </A>");
   System.exit(1);

}

Chapter 8; page 399 (download)

Pattern pWord    = Pattern.compile("\\G\\w+");
Pattern pNonHtml = Pattern.compile("\\G[^\\w<>&]+");
Pattern pImgTag  = Pattern.compile("\\G(?i)<img\\s+([^>]+)>");
Pattern pLink    = Pattern.compile("\\G(?i)<A\\s+([^>]+)>");
Pattern pLinkX   = Pattern.compile("\\G(?i)</A>");
Pattern pEntity  = Pattern.compile("\\G&(#\\d+|\\w+);");
Boolean needClose = false;
Matcher m = pWord.matcher(html);  // Any Pattern object can create our Matcher object
Integer currentLoc = 0;           // Begin at the start of the string

while (currentLoc < html.length())
{

   if (m.usePattern(pWord).find(currentLoc)) {
       . . . have a word or number in m.group() -- can now check for profanity, etc . . . 
   } else if (m.usePattern(pNonHtml).find(currentLoc)) {
      // Other (non-word) non-HTML stuff -- simply allow it
   } else if (m.usePattern(pImgTag).find(currentLoc)) {
       . . . have an image tag -- can check that it's appropriate . . . 
   } else if (! needClose && m.usePattern(pLink).find(currentLoc)) {
       . . . have a link anchor -- can validate it . . . 
      needClose = true;
   } else if (needClose && m.usePattern(pLinkX).find(currentLoc)) {
      System.out.println("/LINK [" + m.group() + "]");
      needClose = false;
   } else if (m.usePattern(pEntity).find(currentLoc)) {
      // Allow entities like &gt; and &#123;
   } else {
      // Nothing matched at this point, so it must be an error. Grab a dozen or so characters
      // at our current location so that we can issue an informative error message
      m.usePattern(Pattern.compile("\\G(?s).{1,12}")).find(currentLoc);
      System.out.println("Bad char at '" + m.group() + "'");
      System.exit(1);

   }
   currentLoc = m.end(); // The `current location' is now where the previous match ended

}

if (needClose) {
   System.out.println("Missing Final </A>");
   System.exit(1);

}

Chapter 8; page 399 (download)

m.usePattern(pWord).region(start,end).find(currentLoc)

Copyright © 2014 Jeffrey Friedl

Fetch additional Third-Edition listings and data:

Fetch listings from page(s)