Email Extraction
This is very similar to link extraction. You have to extract links from a page and verify that they are email addresses. Link tags have a method - isMailLink() to check if the HREF starts with "mailto:". Using an inner class in the NodeFilter example:
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class EmailLinkDemo
{
public static void main (String[] args) throws ParserException
{
Parser parser = new Parser ("http://urlIWantToParse.com");
NodeFilter filter = new NodeFilter ()
{
/**
* Accept nodes that are mail links.
* @param node The node to check.
*/
public boolean accept (Node node)
{
return (LinkTag.class.isAssignableFrom (node.getClass ())
&& ((LinkTag)node).isMailLink ());
}
};
NodeList links = new NodeList ();
for (NodeIterator e = parser.elements (); e.hasMoreNodes (); )
e.nextNode ().collectInto (links, filter);
for (int i = 0; i < links.size (); i++)
{
LinkTag linkTag = (LinkTag)links.elementAt (i);
System.out.print ("\"" + linkTag.getLinkText () + "\" => ");
System.out.println (linkTag.getLink ());
}
}
}