package com.walker.wxtools.spider;
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.walker.infrastructure.utils.JsonUtils;
|
import com.walker.infrastructure.utils.StringUtils;
|
import com.walker.wxtools.AccountArticle;
|
import org.slf4j.Logger;
|
import org.slf4j.LoggerFactory;
|
import org.springframework.http.HttpEntity;
|
import org.springframework.http.HttpHeaders;
|
import org.springframework.http.HttpMethod;
|
import org.springframework.http.ResponseEntity;
|
import org.springframework.web.client.RestTemplate;
|
|
import java.util.ArrayList;
|
import java.util.List;
|
|
/**
|
* 公众号文章列表获取。
|
* <p>该方法需要获得登录信息,需要外部传入cookie等参数,从浏览器network中复制,header,parameter即可。</p>
|
* <pre>
|
* 1) 需要登录一个公众号,谁的都可以;
|
* 2) 在图文列表中,找到一个,点击编辑
|
* 3) 进入编辑界面,点击编辑器上面的'超链接'按钮,会弹出选择链接界面
|
* 4) 此时可以选择'其他公众号',搜索"河南邮政微邮局",这时就能查询出来公众号的文章列表
|
* 5) 浏览器调试模式,能看到请求的参数,此时复制:header/ parameter部分到文本编辑器中。(前缀带:号的不要)
|
* 6) 把复制的内容设置到对象中,调用方法:setCookieAndParameter('内容', '\n'),分隔符可以根据实际情况定。
|
* </pre>
|
* @author 时克英
|
* @date 2023-04-26
|
*/
|
public class AccountArticleSpider {
|
|
protected final transient Logger logger = LoggerFactory.getLogger(getClass());
|
|
public List<AccountArticle> request() throws RequestException {
|
HttpHeaders headers = new HttpHeaders();
|
headers.add("accept", "*/*");
|
headers.add("accept-language", "zh-CN,zh;q=0.9,en;q=0.8");
|
headers.add("user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36");
|
headers.add("x-requested-with", "XMLHttpRequest");
|
headers.add("cookie", this.cookie);
|
headers.add("referer", this.referer);
|
|
HttpEntity<String> request = new HttpEntity<>(null, headers);
|
|
// URL
|
StringBuilder urlObj = new StringBuilder(URL);
|
urlObj.append(StringUtils.CHAR_AND).append("fakeid=").append(fakeId);
|
urlObj.append(StringUtils.CHAR_AND).append("token=").append(token);
|
urlObj.append(StringUtils.CHAR_AND).append("begin=").append(begin);
|
urlObj.append(StringUtils.CHAR_AND).append("count=").append(count);
|
|
try{
|
// this.restTemplate.g
|
ResponseEntity<String> response = this.restTemplate.exchange(urlObj.toString(), HttpMethod.GET, request, String.class);
|
if(response.getStatusCodeValue() == 200){
|
logger.info("成功抓取一次: " + response.getBody());
|
ObjectNode data = JsonUtils.jsonStringToObjectNode(response.getBody());
|
|
// 判断返回是否错误响应
|
JsonNode baseResp = data.get("base_resp");
|
int ret = baseResp.get("ret").intValue();
|
logger.info("-----------> ret = " + ret);
|
if(ret != 0){
|
logger.warn("没有返回数据,说明返回的异常响应:{}", baseResp.textValue());
|
throw new RequestException(true, response.getBody(), null);
|
}
|
|
//
|
if(data.get("app_msg_cnt").intValue() <= 0){
|
logger.info("app_msg_cnt <= 0,没有返回任何数据");
|
return null;
|
}
|
|
String arrays = data.get("app_msg_list").toString();
|
logger.debug(arrays);
|
ArrayNode arrayNode = JsonUtils.toJsonArray(arrays);
|
|
List<AccountArticle> resulList = new ArrayList<>(8);
|
|
if(arrayNode != null && arrayNode.size() > 0){
|
JsonNode jsonNode = null;
|
AccountArticle article = null;
|
for(int i=0; i<arrayNode.size(); i++){
|
jsonNode = arrayNode.get(i);
|
article = new AccountArticle();
|
article.setAid(jsonNode.get("aid").asText());
|
article.setTitle(jsonNode.get("title").asText());
|
article.setLink(jsonNode.get("link").textValue());
|
article.setCover(jsonNode.get("cover").textValue());
|
article.setUpdateTime(jsonNode.get("update_time").longValue());
|
article.setAppmsgid(jsonNode.get("appmsgid").textValue());
|
resulList.add(article);
|
}
|
}
|
return resulList;
|
|
} else {
|
logger.error("调用接口返回结果失败,code = " + response.getStatusCodeValue());
|
throw new RequestException(true, response.getStatusCodeValue() + ":" + response.getBody(), null);
|
}
|
|
} catch (Exception ex){
|
logger.error("请求调用接口错误:" + ex.getMessage(), ex);
|
throw new RequestException(false, ex.getMessage(), ex);
|
}
|
}
|
|
// public void setRequestParameter(String content){
|
// }
|
|
public void setRestTemplate(RestTemplate restTemplate) {
|
this.restTemplate = restTemplate;
|
}
|
|
public void setFakeId(String fakeId) {
|
this.fakeId = fakeId;
|
}
|
|
public void setToken(String token) {
|
this.token = token;
|
}
|
|
public void setBegin(int begin) {
|
this.begin = begin;
|
}
|
|
public void setCount(int count) {
|
this.count = count;
|
}
|
|
public void setCookie(String cookie) {
|
this.cookie = cookie;
|
}
|
|
public void setReferer(String referer) {
|
this.referer = referer;
|
}
|
|
private String cookie;
|
private String referer;
|
private String fakeId;
|
private String token;
|
private int begin = 0;
|
private int count = 5;
|
|
private RestTemplate restTemplate;
|
|
private static final String URL = "https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&lang=zh_CN&f=json&ajax=1&type=9&query=";
|
}
|