1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<!-- $Id: head.tmpl,v 1.5 2002/12/15 01:30:47 carstenklapp Exp $ -->
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta name="robots" content="index,follow" />
<meta name="keywords" content="Email Extraction, PhpWiki" />
<meta name="description" content="This is very similar to [link extraction | LinkExtraction]. You have to extract links from a page and verify that they are email addresses. Link tags have a method - <i>isMailLink()</i> to check if the HREF starts with "mailto:". Using an inner class in the ~NodeFilter example:" />
<meta name="language" content="" />
<meta name="document-type" content="Public" />
<meta name="document-rating" content="General" />
<meta name="generator" content="phpWiki" />
<meta name="PHPWIKI_VERSION" content="1.3.4" />
<link rel="shortcut icon" href="/wiki/themes/default/images/favicon.ico" />
<link rel="home" title="HomePage" href="HomePage" />
<link rel="help" title="HowToUseWiki" href="HowToUseWiki" />
<link rel="copyright" title="GNU General Public License" href="http://www.gnu.org/copyleft/gpl.html#SEC1" />
<link rel="author" title="The PhpWiki Programming Team" href="http://phpwiki.sourceforge.net/phpwiki/ThePhpWikiProgrammingTeam" />
<link rel="search" title="FindPage" href="FindPage" />
<link rel="alternate" title="View Source: EmailExtraction" href="EmailExtraction?action=viewsource&version=7" />
<link rel="alternate" type="application/rss+xml" title="RSS" href="RecentChanges?format=rss" />
<link rel="bookmark" title="SandBox" href="SandBox" />
<link rel="bookmark" title="WikiWikiWeb" href="WikiWikiWeb" />
<link rel="stylesheet" title="MacOSX" type="text/css" charset="iso-8859-1" href="/wiki/themes/MacOSX/MacOSX.css" /><link rel="alternate stylesheet" title="Printer" type="text/css" charset="iso-8859-1" href="/wiki/themes/default/phpwiki-printer.css" media="print, screen" /><link rel="alternate stylesheet" title="Modern" type="text/css" charset="iso-8859-1" href="/wiki/themes/default/phpwiki-modern.css" /><style type="text/css">
<!--
body {background-image: url(/wiki/themes/MacOSX/images/bgpaper8.png);}
-->
</style>
<title>PhpWiki - Email Extraction</title>
</head>
<!-- End head -->
<!-- Begin body -->
<!-- $Id: body.tmpl,v 1.30 2002/09/02 14:36:58 rurban Exp $ -->
<body>
<!-- Begin top -->
<!-- $Id: top.tmpl,v 1.20 2002/12/15 01:30:47 carstenklapp Exp $ -->
<!-- End top -->
<!-- Begin browse -->
<!-- $Id: browse.tmpl,v 1.22 2002/02/19 23:00:26 carstenklapp Exp $ -->
<div class="wikitext"><p><b>Email Extraction</b></p>
<p>This is very similar to <a href="../index.php/LinkExtraction" class="named-wiki" title="LinkExtraction">link extraction</a>. You have to extract links from a page and verify that they are email addresses. Link tags have a method - <i>isMailLink()</i> to check if the HREF starts with "mailto:". Using an inner class in the NodeFilter example:</p>
<pre>
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class EmailLinkDemo
{
public static void main (String[] args) throws ParserException
{
Parser parser = new Parser ("http://urlIWantToParse.com");
NodeFilter filter = new NodeFilter ()
{
/**
* Accept nodes that are mail links.
* @param node The node to check.
*/
public boolean accept (Node node)
{
return (LinkTag.class.isAssignableFrom (node.getClass ())
&& ((LinkTag)node).isMailLink ());
}
};
NodeList links = new NodeList ();
for (NodeIterator e = parser.elements (); e.hasMoreNodes (); )
e.nextNode ().collectInto (links, filter);
for (int i = 0; i < links.size (); i++)
{
LinkTag linkTag = (LinkTag)links.elementAt (i);
System.out.print ("\"" + linkTag.getLinkText () + "\" => ");
System.out.println (linkTag.getLink ());
}
}
}</pre>
</div>
<!-- End browse -->
<!-- Begin bottom -->
<!-- $Id: bottom.tmpl,v 1.3 2002/09/15 20:21:16 rurban Exp $ -->
<!-- Add your Disclaimer here -->
<!-- Begin debug -->
<!-- $Id: debug.tmpl,v 1.9 2002/09/17 02:10:33 dairiki Exp $ -->
<table width="%100" border="0" cellpadding="0" cellspacing="0">
<tr><td>
</td><td>
<span class="debug">Page Execution took 0.256 seconds</span>
</td></tr></table>
<!-- This keeps the valid XHTML! icons from "hanging off the bottom of the scree" -->
<br style="clear: both;" />
<!-- End debug -->
<!-- End bottom -->
</body>
<!-- End body -->
<!-- phpwiki source:
$Id: prepend.php,v 1.13 2002/09/18 19:23:25 dairiki Exp $
$Id: ErrorManager.php,v 1.16 2002/09/14 22:23:36 dairiki Exp $
$Id: HtmlElement.php,v 1.27 2002/10/31 03:28:30 carstenklapp Exp $
$Id: XmlElement.php,v 1.17 2002/08/17 15:52:51 rurban Exp $
$Id: WikiCallback.php,v 1.2 2001/11/21 20:01:52 dairiki Exp $
$Id: index.php,v 1.99 2002/12/31 01:13:14 wainstead Exp $
$Id: main.php,v 1.90 2002/11/19 07:07:37 carstenklapp Exp $
$Id: config.php,v 1.68 2002/11/14 22:28:03 carstenklapp Exp $
$Id: FileFinder.php,v 1.11 2002/09/18 18:34:13 dairiki Exp $
$Id: Request.php,v 1.24 2002/12/14 16:21:46 dairiki Exp $
$Id: WikiUser.php,v 1.29 2002/11/19 07:07:38 carstenklapp Exp $
$Id: WikiDB.php,v 1.17 2002/09/15 03:56:22 dairiki Exp $
$Id: SQL.php,v 1.2 2001/09/19 03:24:36 wainstead Exp $
$Id: mysql.php,v 1.3 2001/12/08 16:02:35 dairiki Exp $
$Id: PearDB.php,v 1.28 2002/09/12 11:45:33 rurban Exp $
$Id: backend.php,v 1.3 2002/01/10 23:32:04 carstenklapp Exp $
$Id: DB.php,v 1.2 2002/09/12 11:45:33 rurban Exp $
From Pear CVS: Id: DB.php,v 1.13 2002/07/02 15:19:49 cox Exp
$Id: PEAR.php,v 1.1 2002/01/28 04:01:56 dairiki Exp $
From Pear CVS: Id: PEAR.php,v 1.29 2001/12/15 15:01:35 mj Exp
$Id: mysql.php,v 1.2 2002/09/12 11:45:33 rurban Exp $
From Pear CVS: Id: mysql.php,v 1.5 2002/06/19 00:41:06 cox Exp
$Id: common.php,v 1.2 2002/09/12 11:45:33 rurban Exp $
From Pear CVS: Id: common.php,v 1.8 2002/06/12 15:03:16 fab Exp
$Id: themeinfo.php,v 1.46 2002/03/08 20:31:14 carstenklapp Exp $
$Id: Theme.php,v 1.58 2002/10/12 08:55:03 carstenklapp Exp $
$Id: display.php,v 1.38 2002/09/15 20:17:58 rurban Exp $
$Id: Template.php,v 1.46 2002/09/15 15:05:47 rurban Exp $
$Id: WikiPlugin.php,v 1.27 2002/11/04 03:15:59 carstenklapp Exp $
$Id: BlockParser.php,v 1.29 2002/11/25 22:25:49 dairiki Exp $
$Id: InlineParser.php,v 1.19 2002/11/25 22:51:37 dairiki Exp $
$Id: interwiki.php,v 1.23 2002/10/06 16:45:10 dairiki Exp $
$Id: PageType.php,v 1.13 2002/09/04 20:39:47 dairiki Exp $
-->
</html>
|