JS逆向——中国人民银行爬虫

广州金十信息科技有限公司面试题。。。
中国人民银行爬虫Cookie生成代码

太简单了,这个没必要分析了,直接AST处理一下即可。
但是面试官要求不能用js引擎,,,花点时间也不是事,最后还嫌弃我是用Java写爬虫的。。。
幸好嫌弃,后面的流程也没必要了。。。这么简单的题目,质量还真的不如哆啦的好。。。
建议换国内验证码作为面试题。

下面实现了两种,一种是直接执引擎计算结果,另外一种手动抠一下,有个模板的。还有python版本,,,不贴了,就酱紫

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288

import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.io.*;
import java.net.*;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

public class PBC {

private String index = "http://www.pbc.gov.cn";

private Invocable in = null;
private ScriptEngineManager manager = new ScriptEngineManager();
private ScriptEngine engine = manager.getEngineByName("javascript");

private CookieManager cookieManager = new CookieManager();

public PBC(){
if (engine instanceof Invocable) {
in = (Invocable) engine;
}
runJsCode("var console = {log:function(){}}");
runJsCode("var window = {}");
runJsCode("var document = {}");

cookieManager.setCookiePolicy(new CookiePolicy(){
public boolean shouldAccept(URI uri, HttpCookie cookie) {
System.err.println("更新Cookie池");
System.err.println(uri);
System.err.println(cookie.getName()+":\t\t"+cookie.getValue());
return true;
}
});
CookieHandler.setDefault(cookieManager);
}

public Object runJsCode(String jscode) {
try {
return engine.eval(jscode);
} catch (ScriptException e) {
e.printStackTrace();
}
return null;
}

public static void main(String[] args) throws IOException {
//String url = "http://www.pbc.gov.cn/fanxiqianju/135153/index.html";
String url = "http://www.pbc.gov.cn/fanxiqianju/135153/135155/135157/3807306/index.html";
new PBC().NotjsCodeForword(url);
}

private void forword(String url) throws IOException {
Map<String,String> headers = new HashMap<>();
headers.put("Host" ,"www.pbc.gov.cn");
headers.put("DNT" ,"1");
headers.put("Upgrade-Insecure-Requests" ,"1");
headers.put("User-Agent" ,"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36");
headers.put("Accept" ,"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
headers.put("Referer" ,"http://www.pbc.gov.cn/");
//headers.put("Accept-Encoding" ,"gzip, deflate");
headers.put("Accept-Language" ,"zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7");
headers.put("Connection" ,"keep-alive");
HttpURLConnection conn = createConnection(url, HttpMethodType.GET, null, headers);
InputStream inputStream = getInputStream(conn);
String html = getStringResult(inputStream);
int start = html.indexOf("eval");
int end = html.indexOf("var encode_version = 'sojson.v5'");
String eval = html.substring(start, end);
System.err.println("动态执行代码:\n"+eval);
runJsCode(eval);
runJsCode(sojsonv5);
System.err.println("核心跳转器:\n"+sojsonv5);
String x35ace3 = (String) engine.get("_0x35ace3");
String wzwsmethod = (String) engine.get("wzwsmethod");
System.err.println("控制跳转的类型为:\n"+wzwsmethod);
System.err.println("跳转的目标为:\n"+x35ace3);

String target = index + x35ace3;
toPage(target);
}

private void NotjsCodeForword(String url) throws IOException {
Map<String,String> headers = new HashMap<>();
headers.put("Host" ,"www.pbc.gov.cn");
headers.put("DNT" ,"1");
headers.put("Upgrade-Insecure-Requests" ,"1");
headers.put("User-Agent" ,"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36");
headers.put("Accept" ,"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
headers.put("Referer" ,"http://www.pbc.gov.cn/");
//headers.put("Accept-Encoding" ,"gzip, deflate");
headers.put("Accept-Language" ,"zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7");
headers.put("Connection" ,"keep-alive");
HttpURLConnection conn = createConnection(url, HttpMethodType.GET, null, headers);
InputStream inputStream = getInputStream(conn);
String html = getStringResult(inputStream);
System.err.println(html);
String jscode = html.substring(html.indexOf("var"), html.indexOf("'.split"));

String dynamicurl = jscode.substring(jscode.indexOf("|dynamicurl|")+"|dynamicurl|".length(),jscode.indexOf("|wzwsquestion|"));
String wzwsquestion = jscode.substring(jscode.indexOf("|wzwsquestion|")+"|wzwsquestion|".length(),jscode.indexOf("|wzwsfactor|"));
int wzwsfactor = Integer.parseInt(jscode.substring(jscode.indexOf("|wzwsfactor|")+"|wzwsfactor|".length(),jscode.indexOf("|wzwsmethod|")));

System.err.println(jscode);
System.err.println(dynamicurl);
System.err.println(wzwsquestion);
System.err.println(wzwsfactor);

String _0xb14971 = _0x344cd4(wzwsquestion, wzwsfactor);
String _0x10ace8 = Base64.getEncoder().encodeToString(_0xb14971.getBytes());
String target = index + dynamicurl + "?wzwschallenge=" + _0x10ace8;

toPage(target);
}

private String _0x344cd4(String wzwsquestion,int wzwsfactor) {
int _0x3c9135 = 0x0;
int _0x43beea = 0x0;
for (_0x43beea = 0x0; _0x43beea < wzwsquestion.length(); _0x43beea++) {
_0x3c9135 += wzwsquestion.charAt(_0x43beea);
}
_0x3c9135 *= wzwsfactor;
_0x3c9135 += 0x1b207;
return "WZWS_CONFIRM_PREFIX_LABEL" + _0x3c9135;
}

private void toPage(String url) throws IOException {

System.err.println("打开最终的页面:::");

Map<String,String> headers = new HashMap<>();
headers.put("Host" ,"www.pbc.gov.cn");
headers.put("Upgrade-Insecure-Requests" ,"1");
headers.put("DNT" ,"1");
headers.put("User-Agent" ,"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36");
headers.put("Accept" ,"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
headers.put("Referer" ,"http://www.pbc.gov.cn/kejisi/146812/index.html");
//headers.put("Accept-Encoding" ,"gzip, deflate");
headers.put("Accept-Language" ,"zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7");
headers.put("Connection" ,"keep-alive");
//headers.put("Cookie" ,"wzws_cid=5b1001a028f0bc373593f46ef82d2326c278f33609950d7302900ceb29aac0fbb5565eebf1a1688232fd78bb0ceec54b4beb33f14be625275936b142fbdc606b737f48ab968f3764007ca85e694dd5933fd610242c3c70b3f86fecdeb27464df");
HttpURLConnection conn = createConnection(url, HttpMethodType.GET, null, headers);
InputStream inputStream = getInputStream(conn);
String html = getStringResult(inputStream);
System.err.println(html);
}

public String getStringResult(InputStream inputStream) throws IOException {
InputStreamReader isr = new InputStreamReader(inputStream);
BufferedReader buff = new BufferedReader(isr);
String line = null;
String result = null;
StringBuffer sb = new StringBuffer();
while ((line = buff.readLine()) != null) {
sb.append(line);
}
result = sb.toString();
return result;
}

public static InputStream getInputStream(HttpURLConnection conn) throws IOException {
InputStream inputStream = null;
int responseCode = conn.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) {
inputStream = conn.getInputStream();
} else {
inputStream = conn.getErrorStream();
}
return inputStream;
}

public HttpURLConnection createConnection(String url,HttpMethodType method,String body, Map<String, String> headers) throws IOException {
HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection();
if (headers != null && !headers.isEmpty()) {
Set<String> keySet = headers.keySet();
for (String key : keySet) {
conn.addRequestProperty(key, headers.get(key));
}
}
conn.setRequestMethod(method.name());
if (method == HttpMethodType.POST) {
if (body != null && !"".equals(body)) {
PrintWriter pw = new PrintWriter(conn.getOutputStream());
pw.write((String) body);
pw.flush();
pw.close();
}
}
conn.setConnectTimeout(60*1000);
conn.setReadTimeout(60*1000);
conn.setInstanceFollowRedirects(true);
return conn;
}

enum HttpMethodType {
GET("GET")
,POST("POST");
String type;
HttpMethodType(String type) {
this.type = type;
}
public HttpMethodType getType(String name) {
if (name == null || "".equals(name)) return null;
HttpMethodType[] values = HttpMethodType.values();
for (int i = 0; i < values.length; i++) {
if (values[i].type.equals(name)) return values[i];
}
return null;
}
}

private String sojsonv5 = "\n" +
"function _0x412a72(_0x2a28c0) {\n" +
" var _0x2097d8 = \"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\";\n" +
" var _0x27d1f5, _0x4262d0, _0xc876d4;\n" +
" var _0x5526a7, _0x138cf5, _0x4093e6;\n" +
" _0xc876d4 = _0x2a28c0.length;\n" +
" _0x4262d0 = 0x0;\n" +
" _0x27d1f5 = '';\n" +
" while ((_0x4262d0 < _0xc876d4)) {\n" +
" _0x5526a7 = (_0x2a28c0.charCodeAt(_0x4262d0++) & 0xff);\n" +
" if ((_0x4262d0 == _0xc876d4)) {\n" +
" _0x27d1f5 += _0x2097d8.charAt((_0x5526a7 >> 0x2));\n" +
" _0x27d1f5 += _0x2097d8.charAt(((_0x5526a7 & 0x3) << 0x4));\n" +
" _0x27d1f5 += '==';\n" +
" break;\n" +
" }\n" +
" _0x138cf5 = _0x2a28c0.charCodeAt(_0x4262d0++);\n" +
" if ((_0x4262d0 == _0xc876d4)) {\n" +
" _0x27d1f5 += _0x2097d8.charAt((_0x5526a7 >> 0x2));\n" +
" _0x27d1f5 += _0x2097d8.charAt((((_0x5526a7 & 0x3) << 0x4) | ((_0x138cf5 & 0xf0) >> 0x4)));\n" +
" _0x27d1f5 += _0x2097d8.charAt(((_0x138cf5 & 0xf) << 0x2));\n" +
" _0x27d1f5 += '=';\n" +
" break;\n" +
" }\n" +
" _0x4093e6 = _0x2a28c0.charCodeAt(_0x4262d0++);\n" +
" _0x27d1f5 += _0x2097d8.charAt((_0x5526a7 >> 0x2));\n" +
" _0x27d1f5 += _0x2097d8.charAt(((_0x5526a7 & 0x3) << 0x4 | (_0x138cf5 & 0xf0) >> 0x4));\n" +
" _0x27d1f5 += _0x2097d8.charAt((((_0x138cf5 & 0xf) << 0x2) | (_0x4093e6 & 0xc0) >> 0x6));\n" +
" _0x27d1f5 += _0x2097d8.charAt((_0x4093e6 & 0x3f));\n" +
" }\n" +
" return _0x27d1f5;\n" +
"}\n" +
"\n" +
"function _0x344cd4() {\n" +
" var _0x3c9135 = 0x0;\n" +
" var _0x43beea = 0x0;\n" +
" for (_0x43beea = 0x0; (_0x43beea < wzwsquestion.length); _0x43beea++) {\n" +
" _0x3c9135 += wzwsquestion.charCodeAt(_0x43beea);\n" +
" }\n" +
" _0x3c9135 *= wzwsfactor;\n" +
" _0x3c9135 += 0x1b207;\n" +
" return (\"WZWS_CONFIRM_PREFIX_LABEL\" + _0x3c9135);\n" +
"}\n" +
"function _0x2ff265(_0x26b826, _0xea8bd1) {\n" +
" var _0x15a9ed = document.createElement(\"form\");\n" +
" _0x15a9ed.action = _0x26b826;\n" +
" _0x15a9ed.method = 'post';\n" +
" _0x15a9ed.style.display = \"none\";\n" +
" if ((_0xea8bd1.search('=') != -0x1)) {\n" +
" var _0x573df6 = _0xea8bd1.split('&');\n" +
" for (var _0x426cb4 = 0x0; (_0x426cb4 < _0x573df6.length); _0x426cb4++) {\n" +
" var _0x2a293f = document.createElement(\"textarea\");\n" +
" var _0x8ad1c0 = _0x573df6[_0x426cb4];\n" +
" var _0x422f0a = _0x8ad1c0.split('=');\n" +
" _0x2a293f.name = _0x422f0a[0x0];\n" +
" _0x2a293f.value = _0x422f0a[0x1];\n" +
" _0x15a9ed.appendChild(_0x2a293f);\n" +
" }\n" +
" }\n" +
" document.body.appendChild(_0x15a9ed);\n" +
" _0x15a9ed.submit();\n" +
" return _0x15a9ed;\n" +
"}\n" +
"var _0xb14971 = _0x344cd4();\n" +
"var _0x10ace8 = _0x412a72(_0xb14971.toString());\n" +
"var _0x35ace3 = (dynamicurl + \"?wzwschallenge=\") + _0x10ace8;" +
"// if (wzwsmethod == 'post') {\n" +
"// _0x2ff265(_0x35ace3, wzwsparams);\n" +
"// } else {\n" +
"// window.location = _0x35ace3;\n" +
"// }\n" +
"\n";
}
谢谢,爱你么么哒