Using a Custom Visitor to Extract Links

Creating a custom visitor is more powerful than just the processing of links demonstrated here:

import org.htmlparser.Parser;
import org.htmlparser.RemarkNode;
import org.htmlparser.StringNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Tag;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.NodeVisitor;

class MyCustomizedVisitor extends NodeVisitor
{
    public MyCustomizedVisitor ()
    {
        super (true); // recurse into children
    }

    public void visitTag (Tag tag)
    {
        // process tags here
        if (tag instanceof LinkTag)
        {
            LinkTag linkTag = (LinkTag)tag;
            System.out.print ("\"" + linkTag.getLinkText () + "\" => ");
            System.out.println (linkTag.getLink ());
        }
    }

    public void visitStringNode (StringNode stringNode)
    {
        // process text in the page here
    }

    public void visitEndTag (Tag endTag)
    {
        // process end tags here,
        // checking for end tags can be useful when performing
        // more involved page processing
    }

    public void visitRemarkNode (RemarkNode remarkNode)
    {
        // process remark nodes here
    }
}

public class LinkDemo
{
    public static void main (String[] args) throws ParserException
    {
        Parser parser = new Parser ("http://urlIWantToParse.com");
        MyCustomizedVisitor visitor = new MyCustomizedVisitor ();
        parser.visitAllNodesWith (visitor);
    }
}




Last edited on Wednesday, January 7, 2004 5:24:34 pm.