[ 登录 ] - [ 注册 ] | 免费代理IP 数据无忧_无忧代理IP_DATA5U最新上线 | 代码示例DEMO | IP测试视频 | 用户协议
案例一·Https代理IP(API)示例   下载DEMO项目  

package com.data5u.test;

import java.io.BufferedInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.ProxyConfig;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;

/**
 * 这个DEMO主要为了测试爬虫(动态)代理IP的稳定性
 * 完美支持企业信息天眼查、电商Ebay、亚马逊、新浪微博、法院文书、分类信息等
 * 也可以作为爬虫参考项目,如需使用,请自行修改webParseHtml方法
 */
public class TestDynamicIpContinue {
	
	public static List ipList = new ArrayList<>();
	public static boolean gameOver = false;
	
	public static void main(String[] args) throws Exception {
		// 每隔几秒提取一次IP
		long fetchIpSeconds = 5;
		int testTime = 3;
		
		// 请填写无忧代理IP订单号,填写之后才可以提取到IP哦
		String order = "88888888888888888888888888888";
		
		// 你要抓去的目标网址
		// 企业信息天眼查 http://www.tianyancha.com/company/1184508115
		// 企业信息工商系统 http://www.gsxt.gov.cn/%7BLtkX_Us_Uuw_QRrZ9mfv2cbf8ANpkJNT8_EzigHHLIvfwbsXfxY0o15JwumCNmvtm_nv9Wtm2Iy_ptgrdpD7p-dP6C8an4IYel_Bx4EnhQhxk8Q4jptLj9IMw9N0lCP-4i0Q4MN55e0wtKOgDy4GEw-1493711400352%7D
		// 电商Ebay http://www.ebay.com/sch/tenco-tech/m.html?_ipg=200&_sop=12&_rdc=1
		// 电商天猫 https://list.tmall.com/search_product.htm?cat=56594003&brand=97814105&sort=s&style=g&search_condition=23&from=sn_1_cat&industryCatId=50025174#J_crumbs
		// 电商京东 https://search.jd.com/Search?keyword=%E8%8B%8F%E6%89%93%E7%B2%89&enc=utf-8&suggest=1.def.0.T15&wq=s%27d%27f&pvid=1d962d789b81461aa6cce40b26a90429
		// IP检测 http://ip.chinaz.com/getip.aspx
		// 匿名度检测 http://www.xxorg.com/tools/checkproxy/
		// 新浪微博 https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D3%26q%3D%E6%B1%BD%E8%BD%A6&queryVal=%E6%B1%BD%E8%BD%A6&type=user&page=2
		// 法院文书 https://m.itslaw.com/mobile
		// 分类信息百姓网 http://china.baixing.com/cheliang/
		String targetUrl = "http://pv.sohu.com/cityjson?ie=utf-8";
		
		// 设置referer信息,如果抓取淘宝、天猫需要设置
		String referer = "";
		// 开启对https的支持
		boolean https = true;
		// 是否输出Header信息
		boolean outputHeaderInfo = false;
		// 是否加载JS,加载JS会导致速度变慢
		boolean useJS = false;
		// 请求超时时间,单位毫秒,默认5秒
		int timeOut = 10000;
		
		if (order == null || "".equals(order)) {
			System.err.println("请输入爬虫(动态)代理订单号");
			return;
		}
		System.out.println(">>>>>>>>>>>>>>动态IP测试开始<<<<<<<<<<<<<<");
		System.out.println("***************");
		System.out.println("提取IP间隔 " + fetchIpSeconds + " 秒 ");
		System.out.println("爬虫目标网址  " + targetUrl);
		System.out.println("***************\n");
		
		/** 
		* 信任所有证书,当请求HTTPS网址时需要,否则报错:
		*  	sun.security.validator.ValidatorException: PKIX path building failed: 
		*	sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
		**/
		TrustCertsUtil.trustAllHttpsCertificates(); // HTTPCLIENT 跳过证书认证的方法请参考 http://www.data5u.com/help/article-67.html
		
		TestDynamicIpContinue tester = new TestDynamicIpContinue();
		new Thread(tester.new GetIP(fetchIpSeconds * 1000, testTime, order, targetUrl, useJS, timeOut, referer, https, outputHeaderInfo)).start();
	
		while(!gameOver){
			try {
				Thread.sleep(100);
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
		System.out.println(">>>>>>>>>>>>>>动态IP测试结束<<<<<<<<<<<<<<");
		System.exit(0);
	}
    
	// 信任所有证书	    
	public class TrustCertsUtil {
	
		public static void trustAllHttpsCertificates() {
	
			// 如果爬虫请求HTTPS网址,必须加入这两行
			System.setProperty("jdk.http.auth.proxying.disabledSchemes", "");
			System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
			
			try {
		        javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
		        javax.net.ssl.TrustManager tm = new MITM();
		        trustAllCerts[0] = tm;
		        javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("TLS");
		        sc.init(null, trustAllCerts, null);
		        javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
			} catch (Exception e) {
				System.err.println("【DATA5U】设置证书出错,原因:" + e.getMessage());
			}
		}
		
	    static class MITM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager {
	        public java.security.cert.X509Certificate[] getAcceptedIssuers() {
	            return null;
	        }
	
	        public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) {
	            return true;
	        }
	
	        public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) {
	            return true;
	        }
	
	        public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType)
	                throws java.security.cert.CertificateException {
	            return;
	        }
	
	        public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType)
	                throws java.security.cert.CertificateException {
	            return;
	        }
	    }
		
	}
    
	// 抓取IP138,检测IP
	public class Crawler extends Thread{
		@Override
		public void run() {
			webParseHtml(targetUrl);
		}
		
		long sleepMs = 200;
		boolean useJs = false;
		String targetUrl = "";
		int timeOut = 5000;
		String ipport = "";
		
		String referer;
		boolean https;
		boolean outputHeaderInfo;
		
		public Crawler(long sleepMs, String targetUrl, boolean useJs, int timeOut, String ipport, String referer, boolean https, boolean outputHeader) {
			this.sleepMs = sleepMs;
			this.targetUrl = targetUrl;
			this.useJs = useJs;
			this.timeOut = timeOut;
			this.ipport = ipport;
			
			this.referer = referer;
			this.https = https;
			this.outputHeaderInfo = outputHeader;
		}
		public String webParseHtml(String url) {
			String html = "";
			BrowserVersion[] versions = { BrowserVersion.CHROME, BrowserVersion.FIREFOX_38, BrowserVersion.INTERNET_EXPLORER_11, BrowserVersion.INTERNET_EXPLORER_8};
			WebClient client = new WebClient(versions[(int)(versions.length * Math.random())]);
			try {
				client.getOptions().setThrowExceptionOnFailingStatusCode(false);
				client.getOptions().setJavaScriptEnabled(useJs);
				client.getOptions().setCssEnabled(false);
				client.getOptions().setThrowExceptionOnScriptError(false);
				client.getOptions().setTimeout(timeOut);
				client.getOptions().setAppletEnabled(true);
				client.getOptions().setGeolocationEnabled(true);
				client.getOptions().setRedirectEnabled(true);
				
				// 对于HTTPS网站,加上这行代码可以跳过SSL验证
				client.getOptions().setUseInsecureSSL(https);
				
				if (referer != null && !"".equals(referer)) {
					client.addRequestHeader("Referer", referer);
				}
				
				if (ipport != null) {
					ProxyConfig proxyConfig = new ProxyConfig((ipport.split(",")[0]).split(":")[0], Integer.parseInt((ipport.split(",")[0]).split(":")[1]));
					client.getOptions().setProxyConfig(proxyConfig);
				}else {
					System.out.print(".");
					return "";
				}
			
				long startMs = System.currentTimeMillis();
				
				Page page = client.getPage(url);
				WebResponse response = page.getWebResponse();
				
				if (outputHeaderInfo) {
					// 输出header信息
					List headers = response.getResponseHeaders();
					for (NameValuePair nameValuePair : headers) {
						System.out.println(nameValuePair.getName() + "-->" + nameValuePair.getValue());
					}
				}
				
				boolean isJson = false ;
				if (response.getContentType().equals("application/json")) {
					html = response.getContentAsString();
					isJson = true ;
				}else if(page.isHtmlPage()){
					html = ((HtmlPage)page).asXml();
				}
				
				long endMs = System.currentTimeMillis();
				
				if (url.indexOf("2017.ip138.com") != -1) {
					System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + Jsoup.parse(html).select("center").text());
				}else if(url.equals("http://www.xxorg.com/tools/checkproxy/")) {
					System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + Jsoup.parse(html).select("#result .jiacu").text());
				}else if(isJson) {
					System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" +html);
				}else if(url.indexOf("tianyancha.com") != -1) {
					Document doc = Jsoup.parse(html);
					Elements els = doc.select(".c8");
					System.out.println(getName() + "企业基本信息:");
					for (Element element : els) {
						System.out.println("\t*" + element.text());
					}
					els = doc.select(".companyInfo-table tr");
					System.out.println(getName() + "企业股东信息:");
					for (Element element : els) {
						System.out.println("\t*" + element.text());
					}
					els = doc.select("#_container_check tr");
					System.out.println(getName() + "企业抽查息:");
					for (Element element : els) {
						System.out.println("\t*" + element.text());
					}
				}else{
					Document doc = Jsoup.parse(html);
					System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + doc.select("title").text());
				}
			} catch (Exception e) {
				System.err.println(ipport + ":" + e.getMessage());
			} finally {
				client.close();
			}
			return html;
		}
		
	}
	
	// 定时获取动态IP
	public class GetIP implements Runnable{
		long sleepMs = 1000;
		int maxTime = 3;
		String order = "";
		String targetUrl;
		boolean useJs;
		int timeOut;
		String referer;
		boolean https;
		boolean outputHeaderInfo;
		
		public GetIP(long sleepMs, int maxTime, String order, String targetUrl, boolean useJs, int timeOut, String referer, boolean https, boolean outputHeaderInfo) {
			this.sleepMs = sleepMs;
			this.maxTime = maxTime;
			this.order = order;
			this.targetUrl = targetUrl;
			this.useJs = useJs;
			this.timeOut = timeOut;
			this.referer=referer;
			this.https=https;
			this.outputHeaderInfo=outputHeaderInfo;
		}
		
		@Override
		public void run() {
			int time = 1;
			while(!gameOver){
				if(time >= 4){
					gameOver = true;
					break;
				}
				try {
					java.net.URL url = new java.net.URL("http://api.ip.data5u.com/dynamic/get.html?order=" + order + "&ttl&random=true");
					
			    	HttpURLConnection connection = (HttpURLConnection)url.openConnection();
			    	connection.setConnectTimeout(3000);
			    	connection = (HttpURLConnection)url.openConnection();
			    	
			        InputStream raw = connection.getInputStream();  
			        InputStream in = new BufferedInputStream(raw);  
			        byte[] data = new byte[in.available()];
			        int bytesRead = 0;  
			        int offset = 0;  
			        while(offset < data.length) {  
			            bytesRead = in.read(data, offset, data.length - offset);  
			            if(bytesRead == -1) {  
			                break;  
			            }  
			            offset += bytesRead;  
			        }  
			        in.close();  
			        raw.close();
					String[] res = new String(data, "UTF-8").split("\n");
					System.out.println(">>>>>>>>>>>>>>当前返回IP量 " + res.length);
					for (String ip : res) {
						new Crawler(100, targetUrl, useJs, timeOut, ip, referer, https, outputHeaderInfo).start();
					}
				} catch (Exception e) {
					System.err.println(">>>>>>>>>>>>>>获取IP出错, " + e.getMessage());
				}
				try {
					Thread.sleep(sleepMs);
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		}
	}
	
	
	public String joinList(List list){
		StringBuilder re = new StringBuilder();
		for (String string : list) {
			re.append(string).append(",");
		}
		return re.toString();
	}


	public String trim(String html) {
		if (html != null) {
			return html.replaceAll(" ", "").replaceAll("\n", "");
		}
		return null;
	}
	
}

案例二·隧道动态转发代理IP

package com.data5u.demo;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import java.net.URL;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSession;

/**
 * 测试无忧代理动态转发代理,本段代码支持请求HTTP和HTTPS协议的网址,比如http://www.example.com、https://www.example.com
 * @author www.data5u.com
 *
 */
public class TestHttps {

    private static void trustAllHttpsCertificates() throws Exception {
        javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
        javax.net.ssl.TrustManager tm = new miTM();
        trustAllCerts[0] = tm;
        javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("TLS");
        sc.init(null, trustAllCerts, null);
        javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
    }
    
    static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager {
        public java.security.cert.X509Certificate[] getAcceptedIssuers() {
            return null;
        }

        public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) {
            return true;
        }

        public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) {
            return true;
        }

        public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType)
                throws java.security.cert.CertificateException {
            return;
        }

        public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType)
                throws java.security.cert.CertificateException {
            return;
        }
    }
    
	public static void main(String[] args) {

		// 如果爬虫请求HTTPS网址,必须加入这两行
		System.setProperty("jdk.http.auth.proxying.disabledSchemes", "");
		System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
	    
		// 固定为tunnel.data5u.com:56789
		final String httpsIpport = "tunnel.data5u.com:56789";
		final String order = "【把这里换成你的IP提取码】"; // 用户名
		final String pwd = "【把这里换成你的动态转发密码】"; // 密码
		final String targetUrl = "http://myip.ipip.net/"; // 要抓取的目标网址
		
		int requestTime = 5;
		for(int i = 0; i < requestTime; i++) {
			final int x = i;
			new Thread(new Runnable() {
				@Override
				public void run() {
					try {

						long startTime = System.currentTimeMillis();

			    		// 如果爬虫请求HTTPS网址,必须加入这两行
			    		System.setProperty("jdk.http.auth.proxying.disabledSchemes", "");
			    		System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
			    		
						// 信任所有证书,当请求HTTPS网址时需要
						// 该部分必须在获取connection前调用
			            trustAllHttpsCertificates();
			            HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
			                public boolean verify(String urlHostName, SSLSession session) {
			                    return true;
			                }
			            });

				    	URL link = new URL(targetUrl);
			    		
						Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress((httpsIpport.split(":"))[0], Integer.parseInt((httpsIpport.split(":"))[1])));
						HttpURLConnection connection = (HttpURLConnection)link.openConnection(proxy);
						
						// Java系统自带的鉴权模式,请求HTTPS网址时需要
			    		Authenticator.setDefault(new Authenticator() {
			    			public PasswordAuthentication getPasswordAuthentication() {
			    				return new PasswordAuthentication(order, pwd.toCharArray());
			    			}
			    		});
			    		
						connection.setRequestMethod("GET");
				    	connection.setDoInput(true);
				    	connection.setDoOutput(true);
				    	connection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36");
						
				    	connection.setUseCaches(false);
				    	connection.setConnectTimeout(60000);
			    		
			    		connection.connect();
			    		
				        String line = null;
				        StringBuilder html = new StringBuilder();
				        BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8"));
				        while((line = reader.readLine()) != null){
				        	html.append(line);
				        }
				        try {
							if (reader != null) {
								reader.close();
							}
						} catch (Exception e) {
						}
				        
				        connection.disconnect();
				        
				        long endTime = System.currentTimeMillis();
				        
						System.out.println(x + " [OK]" + "→→→→→" + targetUrl + "  " + (endTime - startTime) + "ms  " + connection.getResponseCode() + "   " + html.toString());
					} catch (Exception e) {
						e.printStackTrace();
						System.err.println(x + " [ERR]" + "→→→→→" + e.getMessage());
					}					
				}
			}).start();
		}
		
	}
	
}

案例三·HttpUrlConnection设置Socks5代理IP

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;

import java.net.Authenticator;
import java.net.PasswordAuthentication;

/**因Socks5代理需要密码验证,所以本DEMO带验证逻辑**/
public class Socks5IpDemo {
	
	/**内置的密码验证类**/
	class BasicAuthenticator extends Authenticator {
		String userName;
		String password;
	
		public BasicAuthenticator(String userName, String password) {
			this.userName = userName;
			this.password = password;
		}
	
		@Override
		protected PasswordAuthentication getPasswordAuthentication() {
			return new PasswordAuthentication(userName, password.toCharArray());
		}
	}

   public static void main(String[] args) {
      try {
         String targetUrl = "http://pv.sohu.com/cityjson?ie=utf-8";
         HttpURLConnection connection = null;
         URL link = new URL(targetUrl);
         // 这个IP要换 成可用的IP哦,这里案例只是随便写的一个IP
         String ipport = "218.26.204.66:8080";
         String charset = "UTF-8";
         
         // 设置代理
         Proxy proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress((ipport.split(":"))[0], Integer.parseInt((ipport.split(":"))[1])));
         connection = (HttpURLConnection)link.openConnection(proxy);
        
         // 密码验证,用户名和密码要改为正确的哦
         Authenticator.setDefault(new BasicAuthenticator("data5u", "123321"));
         
         connection.setDoOutput(true);
         connection.setRequestProperty("User-agent", "");
         connection.setRequestProperty("Accept", "*/*");
         connection.setRequestProperty("Accept-Charset", charset);
         connection.setRequestProperty("Referer", "");
         connection.setRequestProperty("Upgrade-Insecure-Requests", "1");
         connection.setRequestProperty("Cookie", "");
         connection.setUseCaches(false);
         connection.setReadTimeout(10000);
         int rcode = connection.getResponseCode();
			
         if (rcode != 200) {
            System.out.println("使用代理IP连接网络失败,状态码:" + connection.getResponseCode());
         }else {
            String line = null;
            StringBuilder html = new StringBuilder();
            BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), charset));
            while((line = reader.readLine()) != null){
               html.append(line);
            }
		        
            try {
               if (reader != null) {
                  reader.close();
               }
            } catch (Exception e) {
            }
            
            System.out.println("请求" + targetUrl + ", 得到如下信息:");
		    System.out.println(html.toString());			
         }
      } catch (Exception e) {
         System.err.println("发生异常:" + e.getMessage());
      }
   }
}

微信公众号
关注微信公众号