4 * File: SLVbase.class.php
5 * Spam Link Verification (SLV) Base Class
7 * Copyright (C) 2006 by the following authors:
8 * Author Dirk Haun dirk AT haun-online DOT de
10 * Licensed under the GNU General Public License
16 if (strpos(strtolower($_SERVER['PHP_SELF']), 'slvbase.class.php') !== false) {
17 die('This file can not be used on its own!');
21 * Sends posts to SLV (linksleeve.org) for examination
23 * @author Dirk Haun dirk AT haun-online DOT de
24 * based on the works of Tom Willet (Spam-X) and Russ Jones (SLV)
32 var $_verbose = false;
39 $this->_debug = false;
40 $this->_verbose = false;
44 * Check for spam links
46 * @param string $post post to check for spam
47 * @return boolean true = spam found, false = no spam
49 * Note: Also returns 'false' in case of problems communicating with SLV.
50 * Error messages are logged in Geeklog's error.log
53 function CheckForSpam ($post)
57 require_once ('XML/RPC.php');
65 $links = $this->prepareLinks ($post);
70 if (!isset ($_SPX_CONF['timeout'])) {
71 $_SPX_CONF['timeout'] = 5; // seconds
74 if ($this->_verbose) {
75 SPAMX_log ("Sending to SLV: $links");
78 $params = array (new XML_RPC_Value ($links, 'string'));
79 $msg = new XML_RPC_Message ('slv', $params);
80 $cli = new XML_RPC_Client ('/slv.php', 'http://www.linksleeve.org');
83 $client->setDebug (1);
86 $resp = $cli->send ($msg, $_SPX_CONF['timeout']);
88 COM_errorLog ('Error communicating with SLV: ' . $cli->errstr
89 . '; Message was ' . $msg->serialize());
90 } else if ($resp->faultCode ()) {
91 COM_errorLog ('Error communicating with SLV. Fault code: '
92 . $resp->faultCode() . ', Fault reason: '
93 . $resp->faultString() . '; Message was '
96 $val = $resp->value();
97 // note that SLV returns '1' for acceptable posts and '0' for spam
98 if ($val->scalarval() != '1') {
100 SPAMX_log ("SLV: spam detected");
101 } else if ($this->_verbose) {
102 SPAMX_log ("SLV: no spam detected");
112 * Check against our whitelist of sites not to report to SLV. Note that
113 * URLs starting with $_CONF['site_url'] have already been removed earlier.
115 * @param array &$links array of URLs from a post
116 * @return void ($links is passed by reference and modified in place)
119 function checkWhitelist (&$links)
123 $result = DB_query ("SELECT value FROM {$_TABLES['spamx']} WHERE name='SLVwhitelist'", 1);
124 $nrows = DB_numRows ($result);
126 for ($i = 0; $i < $nrows; $i++) {
127 $A = DB_fetchArray ($result);
129 $val = str_replace ('#', '\\#', $val);
131 foreach ($links as $key => $link) {
132 if (!empty ($link)) {
133 if (preg_match ("#$val#i", $link)) {
144 * Extracts all the links from a post; expects HTML links, i.e. <a> tags
146 * @param string $comment The post to check
147 * @return array All the URLs in the post
150 function getLinks ($comment)
156 preg_match_all( "/<a[^>]*href=[\"']([^\"']*)[\"'][^>]*>(.*?)<\/a>/i",
157 $comment, $matches );
158 for ($i = 0; $i < count ($matches[0]); $i++) {
159 $url = $matches[1][$i];
160 if (!empty ($_CONF['site_url']) &&
161 strpos ($url, $_CONF['site_url']) === 0) {
162 // skip links to our own site
173 * Extract only the links from the post
175 * SLV has a problem with non-ASCII character sets, so we feed it the URLs
176 * only. We also remove all URLs containing our site's URL.
178 * Since we don't know if the post is in HTML or plain ASCII, we run it
179 * through getLinks() twice.
181 * @param string $comment The post to check
182 * @return string All the URLs in the post, sep. by linefeeds
185 function prepareLinks ($comment)
190 // some spam posts have extra backslashes
191 $comment = stripslashes ($comment);
193 // some spammers have yet to realize that we're not supporting BBcode
194 // but since we want the URLs, convert it here ...
195 $comment = preg_replace ('/\[url=([^\]]*)\]/i', '<a href="\1">',
197 $comment = str_replace (array ('[/url]', '[/URL]'),
198 array ('</a>', '</a>' ), $comment);
200 // get all links from <a href="..."> tags
201 $links = $this->getLinks ($comment);
203 // strip all HTML, then get all the plain text links
204 $comment = COM_makeClickableLinks (strip_tags ($comment));
205 $links += $this->getLinks ($comment);
207 if (count ($links) > 0) {
208 $this->checkWhitelist ($links);
209 $linklist = implode ("\n", $links);
212 return trim ($linklist);