Сабж. нужне любой (php/perl/питон и т.д) Google_Grabber... Самому нет возможности писать, работа не позволят ( времени хавает много...
PHP: Google Parser by lamarez IcQ:123424 Site:k0x.ru Use: php gp.php -q "your query" [-title] [-page ...] -help This help:). -q Your query. -page Page number. -num count of results. -title Show page title. Search pages with word "lamarez" Example: php gp.php -q "lamarez" Show 100 links of sites with word "lamarez" and their titles. Example: php gp.php -q "lamarez" -num 100 -title PHP: <? //error_reporting(0); function GetSome($domain, $path) { $hostname = gethostbyname($domain); $responce = ""; $errnum=0;$errstr=""; $fsock = fsockopen($hostname,80,$errnum,$errstr,5); if(!$fsock) { return 0; } $headers = "GET $path HTTP/1.1\n"; $headers .= "Host: $domain\n"; $headers .= "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.8.0.2) Gecko/20060308 Firefox/1.5.0.2\n"; $headers .= "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\n"; $headers .= "Accept-Language: ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3\n"; $headers .= "Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7\n"; $headers .= "Keep-Alive: 500\r\n\r\n"; fwrite ($fsock,$headers); while (!feof($fsock)) { $responce .= fread($fsock,1024); } fclose ($fsock); return $responce; } function GetCLI($str) { echo iconv("WINDOWS-1251","CP866",$str); $line = trim(fgets(STDIN)); return $line; } if(!isset($argv)) { die("Work only in console. Use: php gp.php"); } if(in_array('-help',$argv)) { $helptext = "Google Parser by lamarez\n" ."IcQ:123424\n" ."Site:k0x.ru\n\n" ."Use: php gp.php -q \"your query\" [-title] [-page ...]\n\n" ."-help This help:).\n" ."-q Your query.\n" ."-page Page number.\n" ."-num count of results.\n" ."-title Show page title.\n\n" ."Search pages with word \"lamarez\"\n" ."\tExample: php gp.php -q \"lamarez\"\n" ."Show 100 links of sites with word \"lamarez\" and their titles.\n" ."\tExample: php gp.php -q \"lamarez\" -num 100 -title\n"; die($helptext); } //////////////////////////////////////////////////////////// if(in_array('-page',$argv)) { $page = $argv[array_search('-page',$argv)+1]; } else { $page = 0; } //////////////////////////////////////////////////////////// if(in_array('-num',$argv)) { $num = $argv[array_search('-num',$argv)+1]; } else { $num = 10; } //////////////////////////////////////////////////////////// if(in_array('-q',$argv)) { $query = $argv[array_search('-q',$argv)+1]; } else { $query = GetCLI('Введите строку для поиска:'); } $title = (in_array("-title",$argv))?true:false; $cool = (in_array("-beautiful",$argv))?true:false; //////////////////////////////////////////////////////////// $query=urlencode($query); //$googletext=GetSome('www.google.com',"http://www.google.com/search?q=$query&num=$num&hl=ru&client=opera&rls=ru&start=".($page*$num)."&sa=N"); $googletext=file_get_contents("http://www.google.com/search?q=$query&num=$num&hl=ru&client=opera&rls=ru&start=".($page*$num)."&sa=N"); $googletext=str_replace('<b>','',$googletext); $googletext=str_replace('</b>','',$googletext); preg_match_all('#<a href="(\S+)" class=l>([^<]*)</a>#i',$googletext,$zret); for($i=0;$i<count($zret[1]);$i++) { if($cool){echo "--------------------------------------------------------------------------------\n";} if($title) { $titletext = htmlspecialchars_decode($zret[2][$i]); $titletext = iconv("WINDOWS-1251","CP866",$titletext); echo "\t".$titletext."\n"; } echo $zret[1][$i]."\n"; if($cool){echo "--------------------------------------------------------------------------------\n";} } ?> можна просто php script.php а затем через stdin ввести что нужно искать... а можна php script.php -num 100 -title -page 2 и опятьже ввести через stdin... © lamarez gay
А если что-то простое нужно, то вот PHP: <?php Header("Content-Type: text/html; charset=windows-1251"); echo '<style>input {width:220px;}</style>'; echo '<form action="" method="post">'; echo '<b>Запросы:</b><textarea rows=15 cols=72 name=str></textarea><br>'; echo '<b>кол-во URL на запрос:</b><input type=text name=count value="10"><br><br>'; echo ' <input type=submit value="Парсить"> <br> </form>'; if (!isset($_POST['str'])) die(); set_time_limit(0); ob_implicit_flush(); $start=0; $urls = ""; $numpa=$_POST['count']; $result=trim($numpa); $querys=$_POST['str']; $querys=explode("\n",trim($querys)); $links=fopen('./result.txt','w'); foreach($querys as $query) { $query=urlencode($query); //print $query."<br>"; $url='http://www.google.com/ie?q='.$query.'&num='.$result.'&hl=en&lr=&c2coff=1&start=0&sa=N'; $str=get_page($url); fwrite($links,$str); } fclose($links); $query = ParseUrls(); $exclude = array("google", "doc", "pdf", "rtf", "xls", "jsp", "swf"); for ($i=0; $i<count($query); $i++) { for ($n=0; $n<count($exclude); $n++) { if (stristr($query[$i],$exclude[$n])) $query[$i]="EMPTY"; } } $pieces_temp = array_unique($query); $query = array_values($pieces_temp); $data=fopen('./data.txt','w'); $urls=fopen('./urls.txt','w'); print "<ol>"; for ($k=0; $k<count($query); $k++) { $link = trim($query[$k]); if ($link!="EMPTY") { print "<li>".$link.chr(13).chr(10)."<br>"; fwrite($urls,$link."\r\n"); $content=get_page($link); $text = CleanText($content); fwrite($data, $text); } } print "</ol>"; fclose($data); fclose($urls); print "Done!!! <a href=./data.txt target=_blank>Parsed text here</a>, <a href=./urls.txt target=_blank>parsed URLs here</a>..."; //------------------------------------------------ function CleanText($content) //функция обработки страницы { $text = $content; $text = preg_replace("/<title>\s*(.*?)\s*<\/title>/is"," ",$text); $text = preg_replace("/<!--.*?-->/s"," ",$text); $text = preg_replace("/<[Ss][Cc][Rr][Ii][Pp][Tt].*?<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/s"," ",$text); $text = preg_replace("/<[Ss][Tt][Yy][Ll][Ee].*?<\/[Ss][Tt][Yy][Ll][Ee]>/s"," ",$text); $text = preg_replace("/<[^>]*>/s"," ",$text); $style='/\<style[\w\W]*?\<\/style\>/i'; $script = '/\<script[\w\W]*?\<\/script\>/i'; $doc = '/\<!doctype[\w\W]*?\>/i'; $text = preg_replace($doc, ' ', $text); $text = preg_replace($style, ' ', $text); $text = eregi_replace(' style="[^">]*"', ' ', $text); $text = strip_tags($text); $text = preg_replace($script, ' ', $text); $text = str_replace(" ", ' ', $text); $text = preg_replace ("/[\s,]+/", ' ', $text); $text = str_replace("...", ".", $text); $text = str_replace("..", ".", $text); $text = str_replace("!!!", "!", $text); $text = str_replace("!!", "!", $text); $text = str_replace("???", "?", $text); $text = str_replace("??", "?", $text); $text = str_replace('»', '"', $text); $text = str_replace('«', '"', $text); $text = str_replace(".", ".\r\n", $text); $text = str_replace("!", ".\r\n", $text); $text = str_replace("?", ".\r\n", $text); $text = str_replace("|", ".\r\n", $text); $text = str_replace(".\r\n.\r\n", ".\r\n", $text); $text = str_replace(".\r\n.\r\n", ".\r\n", $text); $text = str_replace(". \r\n", ".\r\n", $text); $text = str_replace("\r\n\r\n", "\r\n", $text); $text = str_replace("\r\n\r\n", "\r\n", $text); $text = str_replace("\t\t", " ", $text); $text = str_replace("\t", " ", $text); $text = str_replace(" ", " ", $text); $text = str_replace(" ", " ", $text); $text = str_replace(" .", ".", $text); $text = str_replace(" ,", ",", $text); $text = str_replace("- - - ", "- ", $text); $text = str_replace("- - ", "- ", $text); $text = str_replace("---", "-", $text); $text = str_replace("--", "-", $text); $text = str_replace("--", "-", $text); $text = str_replace(" ", " ", $text); $text = str_replace(" ", " ", $text); $text = str_replace("--", "-", $text); $text = str_replace("--", "-", $text); $text = str_replace("***", "*", $text); $text = str_replace("**", "*", $text); $text = str_replace("\r\n?", "\r\n", $text); $text = str_replace("\r\n(", "\r\n", $text); $text = str_replace("\r\n)", "\r\n", $text); $text = str_replace("\r\n'", "\r\n", $text); $text = str_replace("\r\n-", "\r\n", $text); $text = str_replace("\r\n*", "\r\n", $text); $text = str_replace("\r\n?", "\r\n", $text); $text = str_replace("\r\n-", "\r\n", $text); $text = str_replace("\r\n ", "\r\n", $text); $text = str_replace("<", "", $text); $text = str_replace(">", "", $text); $string = explode("\r\n", $text); // разбиваем на предложения $string_num = sizeof($string)-1; //print $string_num.'<br>'; $newstring = array(); for($k=0; $k<$string_num; $k++) { $TempString = trim($string[$k]); if (strlen($TempString)>210) { $TempString = ""; } elseif (strlen($TempString)<55) { $TempString = ""; } elseif (strpos($TempString, "[")!==false) { $TempString = ""; } elseif (strpos($TempString, ">")!==false) { $TempString = ""; } elseif (strpos($TempString, "http:")!==false) { $TempString = ""; } elseif (strpos($TempString, "www")!==false) { $TempString = ""; } elseif (strpos($TempString, "@")!==false) { $TempString = ""; } elseif (strpos($TempString, "©")!==false) { $TempString = ""; } elseif (strpos($TempString, "htm")!==false) { $TempString = ""; } elseif (strpos($TempString, "#8250")!==false) { $TempString = ""; } elseif (strpos($TempString, "#8249")!==false) { $TempString = ""; } elseif (strpos($TempString, "#8482")!==false) { $TempString = ""; } elseif (strpos($TempString, "®")!==false) { $TempString = ""; } elseif (strpos($TempString, "_")!==false) { $TempString = ""; } elseif (strpos($TempString, "<")!==false) { $TempString = ""; } elseif (strpos($TempString, ">")!==false) { $TempString = ""; } elseif (strpos($TempString, "&")!==false) { $TempString = ""; } else { $newstring[] = $TempString; } } $string_total = sizeof($newstring)-1; print 'Всего строк в тексте: '.$string_total.'<br>'; $text = implode("\r\n", $newstring); return $text; } //------------------------------------------------ function get_page($host){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $host); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_COOKIE, 0); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_REFERER, $host); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)"); curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0); $r = curl_exec($ch); curl_close($ch); return $r; } //------------------------------------------------ function ParseUrls() { $content = file_get_contents('./result.txt'); preg_match_all("|<a\s[^>]*?href\s*=\s*[\'\"]?(http://[^\s\'\">]+)[\s\'\"]?[^>]*?>(.+?)</a>|si", $content, $matches); $link_numbers = sizeof($matches[1]); for ($k=0; $k<$link_numbers; $k++) { $query[] = trim($matches[1][$k]); } return $query; } //------------------------------------------------ ?> Указываешь запросы, колличество урлов нужное для грабинга и все, результат в файл
Вот мой парсер ICQ.COM, принцип работы поисковика тот же что и у гугла https://forum.antichat.ru/thread56927.html