在写Java爬虫的时候经常会遇到各式各样的厂商用着各式各样的“标准”的https,会导致爬虫出现各种各样的“意外”发生。真蛋疼。。。
httpclient 版本(huawei测试通过)1
2
3
4
5<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
1 | private static final String HTTP = "http"; |
java 原生版本(ali测试通过)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112package pw.gouzai;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import javax.net.ssl.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
/**
* @Description
* @auther Gouzai
* @create 2019-06-09 23:35
*/
public class appList {
static {
try {
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier
(
new HostnameVerifier() {
public boolean verify(String urlHostName, SSLSession session) {
return true;
}
}
);
} catch (Exception e) {
e.printStackTrace();
}
}
private static void trustAllHttpsCertificates()
throws NoSuchAlgorithmException, KeyManagementException {
TrustManager[] trustAllCerts = new TrustManager[1];
trustAllCerts[0] = new TrustAllManager();
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, null);
HttpsURLConnection.setDefaultSSLSocketFactory(
sc.getSocketFactory());
}
private static class TrustAllManager
implements X509TrustManager {
public X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkServerTrusted(X509Certificate[] certs,
String authType)
throws CertificateException {
}
public void checkClientTrusted(X509Certificate[] certs,
String authType)
throws CertificateException {
}
}
public static void main(String[] args) throws IOException {
URL url = new URL("https://h5api.m.taobao.com/h5/mtop.alihealth.search.expert.consulting.doctor/4.1/?jsv=2.4.11&appKey=12574478&t=1560673981585&sign=1f58a4169540ec29cc0d8467b3fe1758&api=mtop.alihealth.search.expert.consulting.doctor&v=4.1&ecode=1&type=jsonp&timeout=10000&H5Request=true&AntiCreep=true&dataType=jsonp&callback=mtopjsonp5&data=%7B%22r%22%3A0.65922612251997%2C%22noTags%22%3A%22-1%3B300%22%2C%22categoryCode%22%3A%22register_famous_doctors%2Cadvisory_paid%2Cadvisory_telephone%22%2C%22posx%22%3A113.45774%2C%22posy%22%3A23.105103%2C%22keyword%22%3A%22%22%2C%22isQuery%22%3Afalse%2C%22isDepart%22%3Afalse%2C%22startCount%22%3A-1%2C%22changeFilter%22%3A0%2C%22pageNum%22%3A1%2C%22pageSize%22%3A10%2C%22topNDoctorIds%22%3A%22%22%7D");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.addRequestProperty("Cookie", "thw=cn; cookie2=16c7164089a9719ac590d71406d6bd42; t=24bad7068751f7eb324961705d842916; _tb_token_=eadb50bb76313; cna=BO6LFYPBbCcCAdoTzq9dmVJ6; _m_h5_tk=86c9583685a2afd94f361baf5f734c7f_1560682980625; _m_h5_tk_enc=b4551086e1cf3917562b62bc9a203b04; isg=BG5usNi5v1yKHss4yLTn5Bs5tMYwbzJpNjeL3Jg32nEsew7VAP-CeRR9NqEwgiqB");
conn.addRequestProperty("Referer", "https://alihealth.m.taobao.com/view/pay_consult_user/doctorList");
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; MI NOTE LTE Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/11.9.4.974 UWS/2.14.0.42 Mobile Safari/537.36 AliApp(AK/4.7.13) UCBS/2.11.1.1 TTID/700159@alijk_android_4.7.13 WindVane/8.5.0 (AliHealthClient(alihealthclient)) T-UA=android_4.7.13_1080x1920_700159 ANDROID/700159@alijk_android_4.7.13");
conn.addRequestProperty("Accept", "*/*");
conn.addRequestProperty("f-refer", "wv_h5");
//conn.addRequestProperty("Accept-Encoding", "gzip, deflate, br");
conn.addRequestProperty("Accept-Language", "zh-CN,en-US;q=0.8");
conn.addRequestProperty("Host", "h5api.m.taobao.com");
conn.addRequestProperty("Connection", "keep-alive");
conn.connect();
InputStream is = conn.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line = null;
StringBuffer sb = new StringBuffer();
while ((line = br.readLine()) != null) {
sb.append(line);
}
String result = sb.toString();
result = result.substring("mtopjsonp4(".length()+1,result.length()-1);
System.err.println(result);
JSONObject json = JSONObject.parseObject(result);
System.err.println(json);
JSONObject data = json.getJSONObject("data");
JSONArray results = data.getJSONArray("results");
for (int i = 0; i < results.size(); i++) {
JSONObject jsonObject = results.getJSONObject(i);
System.err.println(jsonObject);
System.err.println(jsonObject.getString("provName"));
System.err.println(jsonObject.getString("hospitalName"));
System.err.println(jsonObject.getString("doctorTitle"));
System.err.println(jsonObject.getString("doctorName"));
System.err.println(jsonObject.getJSONArray("diseaseNames"));
System.err.println(jsonObject.getJSONArray("symList"));
System.err.println(jsonObject.getString("doctorPic"));
}
}
}
还有个小米版本的,得另外开篇文章写了。。。
真他妈的坑,还得修改jdk的配置才能运行的坑逼。。。