Using a Custom Visitor to Extract Links
Creating a custom visitor is more powerful than just the processing of links demonstrated here:
import org.htmlparser.Parser;
import org.htmlparser.RemarkNode;
import org.htmlparser.StringNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Tag;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.NodeVisitor;
class MyCustomizedVisitor extends NodeVisitor
{
public MyCustomizedVisitor ()
{
super (true); // recurse into children
}
public void visitTag (Tag tag)
{
// process tags here
if (tag instanceof LinkTag)
{
LinkTag linkTag = (LinkTag)tag;
System.out.print ("\"" + linkTag.getLinkText () + "\" => ");
System.out.println (linkTag.getLink ());
}
}
public void visitStringNode (StringNode stringNode)
{
// process text in the page here
}
public void visitEndTag (Tag endTag)
{
// process end tags here,
// checking for end tags can be useful when performing
// more involved page processing
}
public void visitRemarkNode (RemarkNode remarkNode)
{
// process remark nodes here
}
}
public class LinkDemo
{
public static void main (String[] args) throws ParserException
{
Parser parser = new Parser ("http://urlIWantToParse.com");
MyCustomizedVisitor visitor = new MyCustomizedVisitor ();
parser.visitAllNodesWith (visitor);
}
}