当前位置: 移动技术网 > IT编程>开发语言>c# > c#远程html数据抓取实例分享

c#远程html数据抓取实例分享

2019年07月18日  | 移动技术网IT编程  | 我要评论

复制代码 代码如下:

/// <summary>
        /// 获取远程html
        /// </summary>
        /// <param name="url"></param>
        /// <param name="methed"></param>
        /// <param name="param"></param>
        /// <param name="html"></param>
        /// <returns></returns>
        public static bool gethttp(string url, string methed, string param, out string html)
        {
            methed = methed.tolower();

            if (param != null && methed == "get" && param.length > 0)
            {
                url += "?" + param;
            }

            try
            {
                msxml2.xmlhttp mx = new msxml2.xmlhttpclass();

                mx.open(methed, url, false, null, null);

                if (param != null && methed == "post" && param.length > 0)
                {
                    mx.setrequestheader("content-length", param.length.tostring());
                    mx.setrequestheader("content-type", "application/x-www-form-urlencoded");
                }

                mx.send(param);

                if (mx.readystate != 4)
                {
                    html = "远程连接失败:-4";
                    return false;
                }
                html = mx.responsetext;
                return true;
            }
            catch (exception ex)
            {
                html = "远程连接失败:"+ex.message;
                return false;
            }
        }

        public static bool gethttp1(string url, string methed, string param, string referer, string encode, out string html)
        {
            //return gethttp(url,methed,param,out html);

            //string encode = "utf-8";
            //string methed = sendtype.tostring();

            if (param != null && methed == "get" && param.length > 0)
            {
                if (url.indexof("?") >= 0)
                {
                    url += "&" + param;
                }
                else
                {
                    url += "?" + param;
                }
            }

            try
            {
                httpwebrequest webreq = (httpwebrequest)webrequest.create(url);

                webreq.proxy=null;
                webreq.timeout = 1000 * 6;
                webreq.contenttype = "application/x-www-form-urlencoded";
                webreq.useragent = "user-agent:mozilla/5.0 (windows nt 6.1; wow64; rv:24.0) gecko/20100101 firefox/24.0";

                //webreq.useragent = "mozilla/4.0 (compatible; msie 7.0; windows nt 6.1; wow64; trident/6.0; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; media center pc 6.0; .net4.0c; .net4.0e)";

                //谷歌的:user-agent:mozilla/5.0 (windows nt 6.1; wow64) applewebkit/537.36 (khtml, like gecko) chrome/28.0.1500.95 safari/537.36
                //火狐的:user-agent:mozilla/5.0 (windows nt 6.1; wow64; rv:24.0) gecko/20100101 firefox/24.0
                //标准格式为: 浏览器标识 (操作系统标识; 加密等级标识; 浏览器语言) 渲染引擎标识 版本信息

                //webreq.allowautoredirect = false;

                //频繁请求一个网址时,过段时间就会出现“基础连接已经关闭”
                //webreq.keepalive = false;
                //webreq.protocolversion = httpversion.version10;

                if (referer.length > 0)
                {
                    webreq.referer = referer;
                }

                cookiecontainer mycookies = new cookiecontainer();
                webreq.cookiecontainer = mycookies;

                //if (this.cookielist != null)
                //{
                //    webreq.cookiecontainer.add(this.getcookies(webreq.requesturi, this.cookielist));
                //}

                webreq.method = methed;

                //post 开始
                if (param != null && methed == "post")
                {
                    byte[] arrbyte = encoding.getencoding(encode).getbytes(param);
                    webreq.contentlength = arrbyte.length;

                    stream newstream = webreq.getrequeststream();
                    newstream.write(arrbyte, 0, arrbyte.length);
                    newstream.close();
                }
                //post 结束

 
                webresponse w = webreq.getresponse();

                //返回html
                using (httpwebresponse webres = (httpwebresponse)webreq.getresponse())
                {
                    using (stream datastream = webres.getresponsestream())
                    {
                        using (streamreader reader = new streamreader(datastream, encoding.getencoding(encode)))
                        {
                            html = reader.readtoend();
                            //this.cookielist = webreq.cookiecontainer.getcookies(webreq.requesturi);
                            webreq.abort();//可能会解决卡住或阻塞问题
                        }
                    }
                }
            }
            catch (exception ex)
            {

                html = "出现异常(httphelper.gethtml),远程连接失败:" + ex.message + " url:" + url;
                //system.windows.forms.messagebox.show(html);
                return false;
            }

            return true;
        }

如对本文有疑问, 点击进行留言回复!!

相关文章:

验证码:
移动技术网