当前位置: 移动技术网 > IT编程>开发语言>PHP > php实现专业获取网站SEO信息类实例

php实现专业获取网站SEO信息类实例

2018年06月25日  | 移动技术网IT编程  | 我要评论

精品购物指南广告部,农业致富点子,2012元旦晚会

本文实例讲述了php实现专业获取网站seo信息类。分享给大家供大家参考。具体如下:

这个seo类的功能包括:
- 检查指定的网站响应
- 获取从该网站主页的语言和其他meta标签数据的
- 获取网站的导入链接,从alexa的流量排名
- 获取网站的导入链接,由谷歌索引的网页数量
- 获取网站的信任,从wot排名。
- 获取,因为它是第一个注册的网站域名年龄
- 获取的twitter网站页面的数量
- 获取的facebook链接的网站页面
- 获取网站谷歌网页速度等级
- 获取网站的谷歌网页排名

<?php
/**
 *
 * seo report for different metrics
 *
 * @category seo
 * @author chema <chema@garridodiaz.com>
 * @copyright (c) 2009-2012 open classifieds team
 * @license gpl v3
 * based on seo report script http://www.phpeasycode.com && php class seostats
 *
 */
class seoreport{
  /**
   *
   * check if a url is online/alive
   * @param string $url
   * @return bool
   */
  public static function is_alive($url)
  {
    $ch = curl_init();
    curl_setopt($ch, curlopt_url, $url);
    curl_setopt($ch, curlopt_binarytransfer, 1);
    curl_setopt($ch, curlopt_headerfunction, 'curlheadercallback');
    curl_setopt($ch, curlopt_failonerror, 1);
    curl_exec ($ch);
    $int_return_code = curl_getinfo($ch, curlinfo_http_code);
    curl_close ($ch);
    if ($int_return_code != 200 && $int_return_code != 302 && $int_return_code != 304)
    {
      return false;
    }
    else return true;
  }
  /**
   * http get request with curl.
   *
   * @param string $url string, containing the url to curl.
   * @return string returns string, containing the curl result.
   *
   */
  protected static function get_html($url)
  {
    $ch = curl_init($url);
    curl_setopt($ch,curlopt_returntransfer,1);
    curl_setopt($ch,curlopt_connecttimeout,5);
    curl_setopt($ch,curlopt_followlocation,1);
    curl_setopt($ch,curlopt_maxredirs,2);
    if(strtolower(parse_url($url, php_url_scheme)) == 'https')
    {
      curl_setopt($ch,curlopt_ssl_verifypeer,1);
      curl_setopt($ch,curlopt_ssl_verifyhost,1);
    }
    $str = curl_exec($ch);
    curl_close($ch);
    return ($str)?$str:false;
  }
  /**
   *
   * get the domain from any url
   * @param string $url
   */
  public static function domain_name($url)
  {
    $nowww = ereg_replace('www\.','',$url);
    $domain = parse_url($nowww);
    if(!empty($domain["host"]))
      return $domain["host"];
    else
      return $domain["path"];
  }
  /**
   *
   * get the metas from a url and the language of the site
   * @param string $url
   * @return array
   */
  public static function meta_info($url)
  {
    //doesn't work at mediatemple
    /*$html = new domdocument();
    if(!$html->loadhtmlfile($url))
      return false;*/
    if (!$html_content = self::get_html($url))
        return false;
    $html = new domdocument();
    $html->loadhtml($html_content);
       
    $xpath = new domxpath( $html );
    $url_info = array();
    $langs = $xpath->query( '//html' );
    foreach ($langs as $lang)
    {
      $url_info['language'] = $lang->getattribute('lang');
    }
    $metas = $xpath->query( '//meta' );
    foreach ($metas as $meta)
    {
      if ($meta->getattribute('name'))
      {
        $url_info[$meta->getattribute('name')] = $meta->getattribute('content');
      }
    }
    return $url_info;
  }
  /**
   *
   * alexa rank
   * @param string $url
   * @return integer
   */
  public static function alexa_rank($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<popularity url="(.*?)" text="([\d]+)"\/>/si', $data, $p);
    return ($l[2]) ? $l[2] : null;
  }
  /**
   *
   * alexa inbounds link
   * @param string $url
   * @return integer
   */
  public static function alexa_links($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<linksin num="([\d]+)"\/>/si', $data, $l);
    return ($l[1]) ? $l[1] : null;
  }
  /**
   * returns total amount of results for any google search,
   * requesting the deprecated websearch api.
   *
   * @param    string    $query   string, containing the search query.
   * @return    integer          returns a total count.
   */
  public static function google_pages($url)
  {
    //$query = self::domain_name($url);
    $url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q='.$url;
    $str = self::get_html($url);
    $data = json_decode($str);
    return (!isset($data->responsedata->cursor->estimatedresultcount))
        ? '0'
        : intval($data->responsedata->cursor->estimatedresultcount);
  }
  /**
   *
   * gets the inbounds links from a site
   * @param string $url
   * @param integer
   */
  public static function google_links($url)
  {
    $request   = "http://www.google.com/search?q=" . urlencode("link:" . $url) . "&hl=en";
    $data     = self::get_html($request);
    preg_match('/<div id=resultstats>(about )?([\d,]+) result/si', $data, $l);
    return ($l[2]) ? $l[2] : null;
  }
  /**
   *
   * web of trust rating
   * @param string $url
   * @reutn integer
   */
  public static function wot_rating($url)
  {
    $domain = self::domain_name($url);
    $request = "http://api.mywot.com/0.4/public_query2?target=" . $domain;
    $data   = self::get_html($request);
    preg_match_all('/<application name="(\d+)" r="(\d+)" c="(\d+)"\/>/si', $data, $regs);
    $trustworthiness = ($regs[2][0]) ? $regs[2][0] : null;
    return (is_numeric($trustworthiness))? $trustworthiness:null;
  }
   
  /**
   *
   * how old is the domain?
   * @param string $domain
   * @return integer unixtime
   */
  public static function domain_age($domain)
  {
    $request = "http://reports.internic.net/cgi/whois?whois_nic=" . $domain . "&type=domain";
    $data   = self::get_html($request);
    preg_match('/creation date: ([a-z0-9-]+)/si', $data, $p);
    return (!$p[1])?false:strtotime($p[1]);
  }
  /**
   *
   * counts how many tweets about the url
   * @param string $url
   * @return integer
   */
  public static function tweet_count($url)
  {
    $url = urlencode($url);
    $twitterendpoint = "http://urls.api.twitter.com/1/urls/count.json?url=%s";
    $filedata = file_get_contents(sprintf($twitterendpoint, $url));
    $json = json_decode($filedata, true);
    unset($filedata);        // free memory
    return (is_numeric($json['count']))? $json['count']:null;
  }
  /**
   * returns the total amount of facebook shares for a single page
   *
   * @link     https://graph.facebook.com/
   * @param     string   the url to check.
   * @return    integer  returns the total amount of facebook
   */
  public static function facebook_shares($q)
  {
    //execution and result of json
    $str = self::get_html('http://graph.facebook.com/?id='.urlencode($q));
    $data = json_decode($str);
    //return only number of facebook shares
    $r = $data->shares;
    return ($r != null) ? $r : intval('0');
  }
  /**
   *
   * get the pagespeed rank over 100
   * @param string $url
   * @return integer
   */
  public static function page_speed($url)
  {
    $url = 'https://developers.google.com/_apps/pagespeed/run_pagespeed?url='.$url.'&format=json';
    $str = self::get_html($url);
    $data = json_decode($str);
    return intval($data->results->score);
  }
  /**
   *
   * get google page rank
   * @param string $url
   * @return integer
   */
  public static function page_rank($url)
  {
     $query = "http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=".self::checkhash(self::hashurl($url)). "&features=rank&q=info:".$url."&num=100&filter=0";
      $data = self::get_html($query);//die(print_r($data));
    $pos  = strpos($data, "rank_");
    if($pos === false)
    {
      return null;
    }
    else
    {
      $pagerank = substr($data, $pos + 9);
      return $pagerank;
    }
  }
  // functions for google pagerank
  /**
   * to calculate pr functions
   */
  public static function strtonum($str, $check, $magic)
  {
    $int32unit = 4294967296; // 2^32
    $length = strlen($str);
    for ($i = 0; $i < $length; $i++) {
      $check *= $magic;
      //if the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31),
      // the result of converting to integer is undefined
      // refer to http://www.php.net/manual/en/language.types.integer.php
      if ($check >= $int32unit) {
        $check = ($check - $int32unit * (int) ($check / $int32unit));
        //if the check less than -2^31
        $check = ($check < -2147483648) ? ($check + $int32unit) : $check;
      }
      $check += ord($str{$i});
    }
    return $check;
  }
  /**
   * genearate a hash for a url
   */
  public static function hashurl($string)
  {
    $check1 = self::strtonum($string, 0x1505, 0x21);
    $check2 = self::strtonum($string, 0, 0x1003f);
    $check1 >>= 2;
    $check1 = (($check1 >> 4) & 0x3ffffc0 ) | ($check1 & 0x3f);
    $check1 = (($check1 >> 4) & 0x3ffc00 ) | ($check1 & 0x3ff);
    $check1 = (($check1 >> 4) & 0x3c000 ) | ($check1 & 0x3fff);
    $t1 = (((($check1 & 0x3c0) << 4) | ($check1 & 0x3c)) <<2 ) | ($check2 & 0xf0f );
    $t2 = (((($check1 & 0xffffc000) << 4) | ($check1 & 0x3c00)) << 0xa) | ($check2 & 0xf0f0000 );
    return ($t1 | $t2);
  }
  /**
   * genearate a checksum for the hash string
   */
  public static function checkhash($hashnum)
  {
    $checkbyte = 0;
    $flag = 0;
    $hashstr = sprintf('%u', $hashnum) ;
    $length = strlen($hashstr);
    for ($i = $length - 1; $i >= 0; $i --) {
      $re = $hashstr{$i};
      if (1 === ($flag % 2)) {
        $re += $re;
        $re = (int)($re / 10) + ($re % 10);
      }
      $checkbyte += $re;
      $flag ++;
    }
    $checkbyte %= 10;
    if (0 !== $checkbyte) {
      $checkbyte = 10 - $checkbyte;
      if (1 === ($flag % 2) ) {
        if (1 === ($checkbyte % 2)) {
          $checkbyte += 9;
        }
        $checkbyte >>= 1;
      }
    }
    return '7'.$checkbyte.$hashstr;
  }
}

使用范例

<?php
include 'seoreport.php';
ini_set('max_execution_time', 180);
  $url = (isset($_get['url']))?$_get['url']:'http://phpclasses.org';
  $meta_tags = seoreport::meta_info($url);
  //die(var_dump($meta_tags));
  //first check if site online
  if ($meta_tags!==false)
  {
    $stats = array();
    $stats['meta'] = $meta_tags;
    $stats['alexa']['rank'] = seoreport::alexa_rank($url);
    $stats['alexa']['links'] = seoreport::alexa_links($url);
    $stats['domain']['wot_rating'] = seoreport::wot_rating($url);  
    $stats['domain']['domain_age'] = seoreport::domain_age($url);  
    $stats['social']['twitter'] = seoreport::tweet_count($url);  
    $stats['social']['facebook'] = seoreport::facebook_shares($url);
    $stats['google']['page_rank'] = seoreport::page_rank($url);
    $stats['google']['page_speed'] = seoreport::page_speed($url);
    $stats['google']['pages'] = seoreport::google_pages($url);
    $stats['google']['links'] = seoreport::google_links($url);
    var_dump($stats);
  }
  else 'site not online. '.$url;

希望本文所述对大家的php程序设计有所帮助。

如对本文有疑问,请在下面进行留言讨论,广大热心网友会与你互动!! 点击进行留言回复

相关文章:

验证码:
移动技术网