利用WebClient和WebRequest类获得网页源代码2006-08-10 14:28:00
			
			【评论】
			【打印】
			【字体:大 中 小】
			本文链接:http://blog.pfan.cn/Csharpsky/17454.html
			
			
			
			
			利用.NET框架提供的 WebClient类 和 WebRequest类,我们可以很轻易地得到给定URL地址的源代码,很简单,以下是C#的完整的例子.
查看例子
GetPageHtml.aspx
<%@ Page language="c#" validateRequest = "false" Codebehind="GetPageHtml.aspx.cs" 
 AutoEventWireup="false" Inherits="eMeng.Exam.GetPageHtml" %>
  
    得到网页源代码
    
    
    
    
  
  
    
  
GetPageHtml.aspx.cs
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace eMeng.Exam
{
/// 
/// GetPageHtml 的摘要说明。
/// 
public class GetPageHtml : System.Web.UI.Page
{
protected System.Web.UI.WebControls.Button WebClientButton;
protected System.Web.UI.WebControls.Button WebRequestButton;
protected System.Web.UI.WebControls.TextBox ContentHtml;
protected System.Web.UI.WebControls.TextBox UrlText;
protected System.Web.UI.WebControls.Button GetText;
private string PageUrl = "";
private void Page_Load(object sender, System.EventArgs e)
 {}
#region Web Form Designer generated code
override protected void OnInit(EventArgs e)
 {
 InitializeComponent();
 base.OnInit(e);
}
/// 
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// 
private void InitializeComponent()
{    
	this.WebClientButton.Click += new System.EventHandler(this.WebClientButton_Click);
	this.WebRequestButton.Click += new System.EventHandler(this.WebRequestButton_Click);
	this.GetText.Click += new System.EventHandler(this.GetText_Click);
	this.Load += new System.EventHandler(this.Page_Load);
}
#endregion
private void WebClientButton_Click(object sender, System.EventArgs e)
{
 PageUrl = UrlText.Text;
 WebClient wc = new WebClient();
 wc.Credentials = CredentialCache.DefaultCredentials;
 ///方法一:
 Byte[] pageData = wc.DownloadData(PageUrl);
 ContentHtml.Text = Encoding.Default.GetString(pageData);	
 /// 方法二:
 /// ***************代码开始**********
 /// Stream resStream = wc.OpenRead(PageUrl);
 /// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
 /// ContentHtml.Text = sr.ReadToEnd();
 /// resStream.Close();
 /// **************代码结束********
 /// 
wc.Dispose();  
}
private void WebRequestButton_Click(object sender, System.EventArgs e)
{
 PageUrl = UrlText.Text;
 WebRequest  request = WebRequest.Create(PageUrl);
 WebResponse response = request.GetResponse();
 Stream resStream = response.GetResponseStream();	
 StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
 ContentHtml.Text = sr.ReadToEnd();
 resStream.Close(); 
 sr.Close();
}
private void GetText_Click(object sender, System.EventArgs e)
 {
	PageUrl = UrlText.Text;
	WebRequest  request = WebRequest.Create(PageUrl);
	WebResponse response = request.GetResponse();
	Stream resStream = response.GetResponseStream();	
	StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
	ContentHtml.Text = sr.ReadToEnd();
	resStream.Close(); 
	sr.Close();
	ContentHtml.Text = Regex.Replace(ContentHtml.Text,"<[^>]*>", "");
	//替换空格
	ContentHtml.Text = Regex.Replace(ContentHtml.Text,"\\s+", " ");
 }
}
}
阅读(2208) | 评论(0)
			
版权声明:编程爱好者网站为此博客服务提供商,如本文牵涉到版权问题,编程爱好者网站不承担相关责任,如有版权问题请直接与本文作者联系解决。谢谢!
			
			                
					 
	
评论