正文

D版MP3搜索引擎及其代码2006-09-22 22:37:00

【评论】 【打印】 【字体: 】 本文链接:http://blog.pfan.cn/bakers/18686.html

分享到:

  using System;using System.Text.RegularExpressions;using System.Net;using System.IO;using System.Text;//using System.ArgumentOutOfRangeException; using System.Data;using System.Data.SqlClient; public static class RegexTest{    public static  string req;    public static string infor;    public static string keys;    public static  string reg2;    public static  string reg3;    public static  string reg4;    public static  string reg1;    public static  string reg5;    public static string nums;    public static  string mainurl;    public static  string mainstyle;    public static  string mainsinger;    public static  string mainsong;    public static string mainspecial;    public static  string mainsize;    public static  int id = 0;    public static int k;    //public static static  MatchCollection m;       public static MatchCollection m1;    public static  MatchCollection m2;    public static MatchCollection m3;    public static MatchCollection m4;    public static MatchCollection m5;    public static MatchCollection mnum;    public static HttpWebResponse HttpWResp;    public static HttpWebRequest HttpWReq;    public static  SqlConnection Conn;    //   public static SqlCommand comm;     // public static string sql;    /*   void Page_Load(Object sender, EventArgs e)    {         Conn = new SqlConnection("server=localhost;database=index;uid=baker;pwd=baker");        Conn.Open();        keys = Request["url"];        if (keys != "") sick();        // Conn.Open();        // conne.Text = Conn.State.ToString();     }     void On_click(Object sender, EventArgs e)    {        if (url.Text != "")        {            keys = url.Text;            sick();        }    }*/     public static  void sick(string keys)    {         try        {            k = 1;            for (int j = 0; j < k; j++)            {                 req = "http://search.tom.com/searchmp3.php?singer=" + HttpUtility.UrlEncode(keys, Encoding.Default) + "&items=" + (j + 1).ToString();                HttpWReq = (HttpWebRequest)WebRequest.Create(req);                 HttpWResp = (HttpWebResponse)HttpWReq.GetResponse();                //   Response.Write(HttpWResP.StatusCode);                infor = TextContent(HttpWResp);                // reg = @"('(http|https|ftp|rtsp|mms|\d{1}):(\/\/|\\|\\\\){1}(([A-Za-z0-9_-]|['])+[.]){1,}([a-z0-9]{1,3})([^   \f\n\r\t\v\""\'\>]*\/)(([^   \f\n\r\t\v\""\'\>~])+[.]{1}(((m|M)(p|P)3)|((w|W)(M|m)(v|V))|((w|W)(M|m)(A|a))|((M|m)(p|P)(G|g))|((A|a)(S|s)(F|f))|((W|w)(A|a)(V|v))|((r|R)(M|m))|((m|M)(I|i)(D|d))|((A|a)(V|v)(I|i))|(M|m)(O|o)(V|v)|(S|s)(W|w)(F|f)|((R|r)(A|a)(M|m))|((G|g)(S|s))|(mpga)|((f|F)lash)))')";                // reg = @"('(http|https|ftp|rtsp|mms|\d{1}):(\/\/|\\|\\\\){1}(([A-Za-z0-9_-])+[.]){1,}(gov|mobi|name|travel|hk|sh|ac|free|ca|io|biz|info|fm|com|ws|edu|tw|no|net|jp|cz|com|cn|org|cc|tv|[0-9]{1,3})([^   \f\n\r\t\v\""\'\>]*\/)(([^   \f\n\r\t\v\""\'\>])+[.]{1}(((m|M)p3)|((w|W)mv)|((w|W)ma)|((M|m)pg)|((A|a)sf)|((W|w)av)|((r|R)m)|(M|m)ov|(S|s)wf|((f|F)lash)))')";                //Regex re=new Regex(reg);                reg1 = @"(.a\shref='(http|https|ftp|rtsp|mms|\d{1}):(\/\/|\\|\\\\){1}(([A-Za-z0-9_-])+[.]){1,}([a-z0-9]{1,3})([^   \f\n\r\t\v\""\'\>]*\/)(([^   \f\n\r\t\v\""\>~])+[\.]{1}(((m|M)(p|P)3)|((w|W)(M|m)(v|V))|((w|W)(M|m)(A|a))|((M|m)(p|P)(G|g))|((A|a)(S|s)(F|f))|((W|w)(A|a)(V|v))|((r|R)(M|m))|((m|M)(I|i)(D|d))|((A|a)(V|v)(I|i))|(M|m)(O|o)(V|v)|(S|s)(W|w)(F|f)|((R|r)(A|a)(M|m))|(gs)|(asx)|(flv)|(mpga)|((f|F)lash)))')\s+target=_blank.+[^(\n)/]";                reg2 = @"(?<=nbsp;)[0-9\.]{1,5}(M|K)";                reg3 = @"(?<=searchmp3.php.singer=)\w*([^\""]*)*";                reg4 = @"(?<=searchmp3.php.special=)\w*([^\""]*)*";                nums = @"(?<=找到)[^首]*";                 // reg5 = @"(?<=.br.{2})(((m|M)(p|P)3)|((w|W)(M|m)(v|V))|((w|W)(M|m)(A|a))|((M|m)(p|P)(G|g))|((A|a)(S|s)(F|f))|((W|w)(A|a)(V|v))|((r|R)(M|m))|((m|M)(I|i)(D|d))|((A|a)(V|v)(I|i))|(M|m)(O|o)(V|v)|(S|s)(W|w)(F|f)|((R|r)(A|a)(M|m))|((G|g)(S|s))|((f|F)lash)))[^\s]";                //ObjReg.Pattern = "<[^>]+>|</[^>]+>";                // m = Regex.Matches(infor, reg,  RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);                m2 = Regex.Matches(infor, reg2, RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);                m3 = Regex.Matches(infor, reg3, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);                m4 = Regex.Matches(infor, reg4, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);                m1 = Regex.Matches(infor, reg1, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);                mnum = Regex.Matches(infor, nums, RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);                //num.Text = mnum[0].Value + "---" + m1.Count.ToString() + "(" + m2.Count.ToString() + ")" + "(" + m3.Count.ToString() + ")" + "(" + m4.Count.ToString() + ")";//+ "(" + m5.Count.ToString() + ")"                string sum = Regex.Replace(mnum[0].Value, "<[^>]*>", "");                k=(int.Parse(sum))/20;                if (k > 20) k = 20;                int sumber = m1.Count;                for (int ii = 0; ii < sumber; ii = ii + 2)                {                    string mainurl1 = Regex.Replace(m1[ii].Value, @".a\shref=\'", "");                    mainurl = Regex.Replace(mainurl1, @"\'\s+target=_blank.+[^(\n)/]", "");                    mainstyle = Regex.Replace(mainurl, @"[^\.]*\.", "");                    mainsong = Regex.Replace(m1[ii].Value, "<[^>]*>", "");                    mainsinger = m3[ii / 2].Value;                    mainspecial = m4[ii / 2].Value;                    mainsize = m2[ii / 2].Value;                     /*                         html.Text += "<tr align=left><td>" + ((i + 2) / 2).ToString() + "</td>";                         html.Text += "<td ><a href=" + mainurl+ ">" +  mainsong+ "</a></td>";                        // html.Text += "<td Width=40%>"+((i + 2) / 2).ToString() + ":<a color=green href=" + m[i].Value + "> " + m[i].Value + "</a></td>";                         html.Text += "<td align=left>" +mainsize  + "</td>";                         html.Text += "<td align=left ><a href=search.aspx?singer=" +mainsinger+ ">"+mainsinger+"</a></td>";                         html.Text += "<td align=left ><a href=search.aspx?special=" + mainspecial +">"+ mainspecial + "</a></td>";                         html.Text += "<td align=left>" + mainstyle + "</td>";                          html.Text += "</tr>";                   */                    datainsert(mainurl, mainsong, mainsinger, mainspecial, mainsize, mainstyle);                    //  Console.WriteLine((i/2).ToString());                }                 // html.Text += "</table>";                id++;                Console.WriteLine(id.ToString() + ":  " + keys + " is added successfully!  counts= " + (sumber / 2).ToString());                HttpWResp.Close();            }        }         catch (Exception ee)        {            //Console.Response.WriteLine( "no connect!");             }         }        public static  Encoding GetEncoding(HttpWebResponse response)    {        string name = response.ContentEncoding;        Encoding code = Encoding.Default;        if (name == "")        {            string contentType = response.ContentType;            if (contentType.ToLower().IndexOf("charset") != -1)            {                name = contentType.Substring(contentType.ToLower().IndexOf("charset=") + "charset=".Length);            }        }         if (name != "")        {            try            {                code = Encoding.GetEncoding(name);            }            catch { }        }        return code;    }     public static  string TextContent(HttpWebResponse response)    {        string buffer = "", line;         Stream stream = response.GetResponseStream();        StreamReader reader = new StreamReader(stream, GetEncoding(response));         //buffer = "<base href=http://localhost:1080 />";        while ((line = reader.ReadLine()) != null)        {             buffer += line + "\r\n";        }        stream.Close();         return buffer;    }     public static void datainsert(string url1, string song1, string singer1, string special1, string sizes, string style)    {         string url = Regex.Replace(url1,@"[\""\'|]","");        string song=Regex.Replace(song1,@"[\""\'|]","");        string singer=Regex.Replace(singer1,@"[\""\'|]","");        string special=Regex.Replace(special1,@"[\""\'|]","");        String sql = "insert  into  infor(url,song,singer,special,sizes,type) values('"+url +"','"+ song+"','"+ singer+" ','"+ special+"','"+ sizes +"',' "+ style+" ')";        SqlCommand comm = new SqlCommand(sql, Conn);        comm.ExecuteNonQuery();     }    public static void  Main()    {        Conn = new SqlConnection("server=localhost;database=index;uid=baker;pwd=baker");        Conn.Open();         StreamReader objReader = new StreamReader("C:\\s.txt");        string sLine ;        sLine = objReader.ReadLine();       // ArrayList arrText = new ArrayList();         while (sLine != null)        {                             sick(sLine);            Console.WriteLine(sLine);            sLine = objReader.ReadLine();        }        objReader.Close();           }} 关于原理,其实最简单,不用多说,域名,已经注册,还不错吧。 测试过速度还行。 程序中包括 将搜索结果插入数据库,曾经有一天晚上  足足抓了 26万条信息。  

阅读(6269) | 评论(23)


版权声明:编程爱好者网站为此博客服务提供商,如本文牵涉到版权问题,编程爱好者网站不承担相关责任,如有版权问题请直接与本文作者联系解决。谢谢!

评论

loading...
您需要登录后才能评论,请 登录 或者 注册