隔壁宿舍菜鸟学JAVA遇到了一个问题,在网上下载了一个JAVA帮助文档(也就和MSDN差不多的东西),十几万条信息,愣是按照首字母的顺序,排列成了27个静态网页,没有查询功能,该菜鸟每次写程序时都为查询烧透了脑筋。于是找到了我,求我给做个搜索功能。要回家了,没多少时间,所以只建了个索引表。 using System;using System.Text.RegularExpressions;using System.Net;using System.IO;using System.Text;using System.Xml;using System.Xml.Schema;using System.Xml.XPath;using System.Collections;using System.Data.SqlClient; public class search{ public static string ClientPage; public static string infor; public static string keys; public static HttpWebRequest HttpWReq; public static HttpWebResponse HttpWRes; public static MatchCollection m; public static MatchCollection m2; public static Encoding GetEncoding(HttpWebResponse response) { string name = response.ContentEncoding; Encoding code = Encoding.Default; if (name == "") { string contentType = response.ContentType; if (contentType.ToLower().IndexOf("charset") != -1) { name = contentType.Substring(contentType.ToLower().IndexOf("charset=") + "charset=".Length); } } if (name != "") { try { code = Encoding.GetEncoding(name); } catch { } } return code; } public static string TextContent(HttpWebResponse response) { string buffer = "", line; Stream stream = response.GetResponseStream(); StreamReader reader = new StreamReader(stream, GetEncoding(response)); //buffer = "<base href=http://localhost:1080 />"; while ((line = reader.ReadLine()) != null) { buffer += line + "\r\n"; } stream.Close(); return buffer; } public static void insert_xml(string k, string t) { XmlDocument xmlDoc = new XmlDocument(); xmlDoc.Load("G://webroot/dotnet/api/index-files/java.xml"); XmlNode node = xmlDoc.SelectSingleNode("elements"); XmlElement xe = xmlDoc.CreateElement("element"); XmlElement xe1 = xmlDoc.CreateElement("key"); XmlElement xe2 = xmlDoc.CreateElement("detail"); xe1.InnerText = k; xe.AppendChild(xe1); xe2.InnerText = t; xe.AppendChild(xe2); node.AppendChild(xe); xmlDoc.Save("G://webroot/dotnet/api/index-files/java.xml"); } /// <summary> /// ///// /// </summary> public static void seek(string url) { /* HttpWReq = (HttpWebRequest)WebRequest.Create(url); HttpWReq.Timeout = 20000; HttpWRes = (HttpWebResponse)HttpWReq.GetResponse(); */ ClientPage = new StreamReader((System.IO.Stream)File.OpenRead(url), System.Text.Encoding.GetEncoding("gb2312")).ReadToEnd(); string dl = @"\<DT\>.*DD[^。;]*(;|。)"; //show.Text=""; m = Regex.Matches(ClientPage, dl, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture); //show.Text += m.Count.ToString()+"<br>"; for (int i = 0; i < m.Count; i++) { m2 = Regex.Matches(m[i].Value, @"\<B\>[^\<]*\</B\>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture); if (m2.Count > 0) keys = Regex.Replace(m2[0].Value, "<[^>]*>", ""); else keys = "No Keys!"; //show.Text += "<font color=red>"+(i+1).ToString()+"</font> ."+"<font color=green>"+keys+"</font>" + m[i].Value + "</DD></DT><br>"; insert_xml(keys, m[i].Value + "</DD></DT>"); } } public static void Main() { for (int i = 1; i <= 27; i++) { Console.WriteLine("seeking page: " + i.ToString() + "......."); seek("G://webroot/dotnet/api/index-files/index-" + i.ToString() + ".html"); } }} 索引表:

评论