德阳网站建设

曾平承接德阳企业网站、个人博客的建设。QQ:414252371 TEL:15883400129  德阳建站

上一篇: 建立一个企业网站需要做的事情 下一篇:企业网站的关键词分布

        德阳网站建设制作了一款软件来获取百度的搜索结果,并且提取其中的网站标题的链接,贴出代码,有后续开发者要记得发上来共享哦。

        页面很简单,一个文本框,一个按钮,一个WebBrowser用来显示网页,一个DataGridView用来展示获取的信息。

        先上效果图吧:

 

        Form1后置代码:

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Security.Policy;

namespace QBL
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void baidu_Click(object sender, EventArgs e)
        {
           
            int num = 100;//搜索条数
              string url = "http://www.baidu.com/s?wd=" + txtSearch.Text.Trim() + "&rn=" + num + "";
              Uri urls = new Uri(url);
              string html = search(url, "gb2312");
              this.webBrowser1.Url = urls;
              BaiduSearch baidu = new BaiduSearch();
              if (!string.IsNullOrEmpty(html))
              {
                  int count = baidu.GetSearchCount(html);//搜索条数
                 if (count > 0)
                 {
                     List<Keyword> keywords = baidu.GetKeywords(html, txtSearch.Text.Trim());
                     dataGridView1.DataSource = keywords;
                 }
 
             }

        }
         /// <summary>
         /// 搜索处理
         /// </summary>
         /// <param name="url">搜索网址</param>
         /// <param name="Chareset">编码</param>
         public string search(string url, string Chareset)
         {
             HttpState result = new HttpState();
             Uri uri = new Uri(url);
             HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
             myHttpWebRequest.UseDefaultCredentials = true;
             myHttpWebRequest.ContentType = "text/html";
             myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215;)";
             myHttpWebRequest.Method = "GET";
             myHttpWebRequest.CookieContainer = new CookieContainer();
 
             try
             {
                 HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse();
                 // 从 ResponseStream 中读取HTML源码并格式化 add by cqp
                 result.Html = readResponseStream(response, Chareset);
                 result.CookieContainer = myHttpWebRequest.CookieContainer;
                 return result.Html;
             }
             catch (Exception ex)
             {
                 return ex.ToString();
             }
 
         }
         public string readResponseStream(HttpWebResponse response, string Chareset)
         {
             string result = "";
             using (StreamReader responseReader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(Chareset)))
             {
                result = formatHTML(responseReader.ReadToEnd());
             }
 
             return result;
         }
        /// <summary>
         /// 描述:格式化网页源码
         ///
         /// </summary>
         /// <param name="htmlContent"></param>
         /// <returns></returns>
         public string formatHTML(string htmlContent)
         {
             string result = "";
 
             result = htmlContent.Replace("&raquo;", "").Replace("&nbsp;", "")
                     .Replace("&copy;", "").Replace("/r", "").Replace("/t", "")
                     .Replace("/n", "").Replace("&amp;", "&");
             return result;
         }


    }
}

        BaiduSearch.cs内容:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace QBL
{
    class BaiduSearch
    {
        protected string uri = "http://www.baidu.com/s?wd=";
         protected Encoding queryEncoding = Encoding.GetEncoding("gb2312");
         protected Encoding pageEncoding = Encoding.GetEncoding("gb2312");
         protected string resultPattern = @"(?<=找到相关结果[约]?)[0-9,]*?(?=个)";
         public int GetSearchCount(string html)
         {
             int result = 0;
             string searchcount = string.Empty;
 
             Regex regex = new Regex(resultPattern);
             Match match = regex.Match(html);
 
             if (match.Success)
             {
                 searchcount = match.Value;
             }
             else
             {
                 searchcount = "0";
            }
 
             if (searchcount.IndexOf(",") > 0)
             {
                 searchcount = searchcount.Replace(",", string.Empty);
             }
 
             int.TryParse(searchcount, out result);
 
            return result;
         }
 
         public List<Keyword> GetKeywords(string html, string word)
         {
             int i = 1;
             List<Keyword> keywords = new List<Keyword>();
             string ss="<h3 class=\"t\"><a.*?href=\"(?<url>.*?)\".*?>(?<content>.*?)</a>";
             MatchCollection mcTable = Regex.Matches(html,ss);
             foreach (Match mTable in mcTable)
             {
                 if (mTable.Success)
                 {
                     Keyword keyword = new Keyword();
                     keyword.ID = i++;
                     keyword.Title = Regex.Replace(mTable.Groups["content"].Value, "<[^>]*>", string.Empty);
                     keyword.Link = mTable.Groups["url"].Value;
                     keywords.Add(keyword);
 
                 }
             }
 
             return keywords;
         }

    }
}
        实体类HttpState.cs:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.Collections;

namespace QBL
{
    class HttpState
    {
        private string _statusDescription; 

  

        public string StatusDescription 

        { 

            get { return _statusDescription; } 

            set { _statusDescription = value; } 

        } 

 

       /// <summary> 

        /// 回调 址址, 登陆测试中使用 

        /// </summary> 

        private string _callBackUrl; 

  

       public string CallBackUrl 

        { 

            get { return _callBackUrl; } 

            set { _callBackUrl = value; } 

       } 

  

       /// <summary> 

        /// 网页网址 绝对路径格式 

       /// </summary> 

        private string _url; 

 

       public string Url 

        { 

            get { return _url; } 

           set { _url = value; } 

        } 
 

        /// <summary> 

       /// 字符串的形式的Cookie信息 

        /// </summary> 

        private string _cookies; 

  

       public string Cookies 

        { 

            get { return _cookies; } 

            set { _cookies = value; } 

        } 

  

        /// <summary> 

        /// Cookie信息 

        /// </summary> 

        private CookieContainer _cookieContainer = new CookieContainer(); 

  

       public CookieContainer CookieContainer 

       { 

            get { return _cookieContainer; } 

           set { _cookieContainer = value; } 

        } 

  

        /// <summary> 

       /// 网页源码 

       /// </summary> 

       private string _html; 

 

        public string Html 

       { 

            get { return _html; } 

            set { _html = value; } 

        } 

 

        /// <summary> 

       /// 验证码临时文件(绝对路径) 

        /// </summary> 

       private string _tmpValCodePic; 

  

        public string TmpValCodePic 

       { 

            get { return _tmpValCodePic; } 

            set { _tmpValCodePic = value; } 

        } 

 

        /// <summary> 

       /// 验证码临时文件名(相对路径) 

       /// </summary> 

        private string _tmpValCodeFileName = "emptyPic.gif"; 

  

        public string TmpValCodeFileName 

        { 

            get { return _tmpValCodeFileName; } 
            set { _tmpValCodeFileName = value; } 
        } 

  

        /// <summary> 

        /// 有验证码 

        /// </summary> 

        private bool _isValCode; 

  

        public bool IsValCode 

        { 

            get { return _isValCode; } 

            set { _isValCode = value; } 

        } 

  

        /// <summary> 

        /// 验证码URL 

        /// </summary> 

        private string _valCodeURL; 

  

       public string ValCodeURL 

        { 

            get { return _valCodeURL; } 

            set { _valCodeURL = value; } 

        } 

  

        /// <summary> 

        /// 验证码识别后的值 

        /// </summary> 

        private string _valCodeValue; 

  

        public string ValCodeValue 

        { 

            get { return _valCodeValue; } 

            set { _valCodeValue = value; } 

        } 

  

        /// <summary> 

        /// 其它参数 

        /// </summary> 
       private Hashtable _otherParams = new Hashtable(); 

  
        public Hashtable OtherParams 
     { 

            get { return _otherParams; } 

            set { _otherParams = value; } 

        } 

  

        // 重复添加处理 add by fengcj  09/11/19 PM 

        public void addOtherParam(object key, object value) 

        { 
            if (!this.OtherParams.ContainsKey(key)) 

                this.OtherParams.Add(key, value); 

            else

            { 

                this.OtherParams[key] = value; 

            } 

        } 

  

        public void removeOtherParam(object key) 

        { 
            this.OtherParams.Remove(key); 

        } 

  

        public object getOtherParam(object key) 

        { 
            return this.OtherParams[key]; 

        } 


    }
}
        最后是Keyword.cs:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace QBL
{
    class Keyword
    {
        public int ID { get; set; }

        public string Title { get; set; }

        public string Link { get; set; } 


    }
}
 

本文由【德阳网站建设】收集整理

点击这里获取该日志的TrackBack引用地址

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。

关于本文

您正在阅读的是:Winform C#版提取百度搜索结果中的链接的网站标题
德阳网站建设 发表于:2011-11-26 10:37:31
分类:建站技术
关键词:Winform C#  获取百度结果  提取搜索引擎链接  
订阅德阳网站建设

什么是RSS订阅?查看解释
订阅到您的在线阅读器

曾平

德阳网站建设

德阳网站建设

最近发表

最新评论及回复

最近留言

Search

网站分类

文章归档

图标汇集

曾平:本人承接德阳网站建设、个人博客建设。 QQ:414252371

DESIGN BY 曾平