Using Custom Tags to Extract Links
The use of custom tags provides for altered behaviour during the parse:
import org.htmlparser.Parser;
import org.htmlparser.PrototypicalNodeFactory;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.ParserException;
class MyLinkTag extends LinkTag
{
public void doSemanticAction () throws ParserException
{
System.out.print ("\"" + getLinkText () + "\" => ");
System.out.println (getLink ());
}
}
public class LinkDemo
{
public static void main (String[] args) throws ParserException
{
Parser parser = new Parser ("http://urlIWantToParse.com");
PrototypicalNodeFactory factory = new PrototypicalNodeFactory ();
factory.registerTag (new MyLinkTag ());
parser.setNodeFactory (factory);
for (NodeIterator e = parser.elements (); e.hasMoreNodes (); )
e.nextNode (); // just parsing the nodes executes doSemanticAction
}
}