当前位置: 移动技术网 > IT编程>开发语言>c# > c#实现网页图片提取工具代码分享

c#实现网页图片提取工具代码分享

2019年07月18日  | 移动技术网IT编程  | 我要评论
复制代码 代码如下:public array matchhtml(string html,string com)     

复制代码 代码如下:

public array matchhtml(string html,string com)
       {
           list<string> urls = new list<string>();
           html = html.tolower();
           //获取src标签中的url
           regex regexsrc = new regex("src=\"[^\"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]\"");
           foreach(match m in regexsrc.matches(html))
           {
               string src = m.value;
               src = src.replace("src=","").replace("\"","");
               if (!src.contains("http"))
                   src = com + src;
               if(!urls.contains(src))
               urls.add(src);
           }
           //获取href标签中url
           regex regexhref = new regex("href=\"[^\"]*[(.jpg)(.png)(.gif)(.bmp)(.ico)]\"");
           foreach (match m in regexhref.matches(html))
           {
               string href = m.value;
               href = href.replace("href=", "").replace("\"", "");
               if (!href.contains("http"))
                   href = com + href;
               if(!urls.contains(href))
               urls.add(href);
           }
           return urls.toarray();
       }

复制代码 代码如下:

[dllimport("kernel32.dll")]
       static extern bool setconsolemode(intptr hconsolehandle, int mode);
       [dllimport("kernel32.dll")]
       static extern bool getconsolemode(intptr hconsolehandle, out int mode);
       [dllimport("kernel32.dll")]
       static extern intptr getstdhandle(int handle);
       const int std_input_handle = -10;
       const int enable_quick_edit_mode = 0x40 | 0x80;
       public static void enablequickeditmode()
       {
           int mode; intptr handle = getstdhandle(std_input_handle);
           getconsolemode(handle, out mode);
           mode |= enable_quick_edit_mode;
           setconsolemode(handle, mode);
       }
       static void main(string[] args)
       {
           enablequickeditmode();
           int oldcount = 0;
           console.title = "takeimagefrominternet";
           string path = "e:\\download\\loading\\";
           while (true)
           {
               console.clear();
               string countfile = "e:\\countfile.txt";//用来计数的文本,以至于文件名不重复
               int cursor = 0;
               if (file.exists(countfile))
               {
                   string text = file.readalltext(countfile);
                   try
                   {
                       cursor =oldcount = convert.toint32(text);//次数多了建议使用long
                   }
                   catch { }
               }
               console.write("please input a url:");
               string url = "http://www.baidu.com/";
               string temp = console.readline();
               if (!string.isnullorempty(temp))
                   url = temp;
               match mcom = new regex(@"^(?i)http://(\w+\.){2,3}(com(\.cn)?|cn|net)\b").match(url);//获取域名
               string com = mcom.value;
               //console.writeline(mcom.value);
               console.write("please input a save path:");
               temp = console.readline();
               if (directory.exists(temp))
                   path = temp;
               console.writeline();
               webclient client = new webclient();
               byte[] htmldata = null;
               htmldata = client.downloaddata(url);
               memorystream mstream = new memorystream(htmldata);
               string html = "";
               using (streamreader sr = new streamreader(mstream))
               {
                   html = sr.readtoend();
               }
               array urls = new matchhtmlimageurl().matchhtml(html,com);

               foreach (string imageurl in urls)
               {
                  console.writeline(imageurl);
                   byte[] imagedata = null;
                   try
                   {
                       imagedata = client.downloaddata(imageurl);
                   }
                   catch { }
                   if (imagedata != null && imagedata.length>0)
                       using (memorystream ms = new memorystream(imagedata))
                       {
                           try
                           {

                               string ext = aping.utility.file.fileopration.extendname(imageurl);
                               imageformat format = imageformat.jpeg;
                               switch (ext)
                               {
                                   case ".jpg":
                                       format = imageformat.jpeg;
                                       break;
                                   case ".bmp":
                                       format = imageformat.bmp;
                                       break;
                                   case ".png":
                                       format = imageformat.png;
                                       break;
                                   case ".gif":
                                       format = imageformat.gif;
                                       break;
                                   case ".ico":
                                       format = imageformat.icon;
                                       break;
                                   default:
                                       continue;
                               }
                               image image = new bitmap(ms);
                               if (directory.exists(path))
                                   image.save(path + "\\" + cursor + ext, format);
                           }
                           catch(exception ex) { console.writeline(ex.message); }
                       }
                   cursor++;
               }
               mstream.close();
               file.writealltext(countfile, cursor.tostring(), encoding.utf8);
               console.writeline("take done...image count:"+(cursor-oldcount).tostring());
           }           
       }

如您对本文有疑问或者有任何想说的,请点击进行留言回复,万千网友为您解惑!

相关文章:

验证码:
移动技术网