package com.walker.wxtools.spider;
import com.walker.infrastructure.utils.KeyValue;
import com.walker.infrastructure.utils.StringUtils;
import com.walker.spider.KeyValueParse;
import com.walker.wxtools.AccountArticle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.client.RestTemplate;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;
/**
* 模拟采集定时任务,不会终止。
*
通过直接设置登录参数,可以实现持续不断采集。
* 该对象是一个模拟程序,但也可以直接使用,它从给定的参数,获取公众号文章列表集合。
* 具体用法,请见对象:{@linkplain AccountArticleSpider}
*
* @author 时克英
* @date 2023-04-27
*/
public class MockSpiderScheduler {
protected final transient Logger logger = LoggerFactory.getLogger(getClass());
private int responseErrorCount = 0;
private static final long sleepSeconds = 60 * 2;
private static final long stopSeconds = 600 * 2;
// 线程间隔时间
private long sleepTime = sleepSeconds;
// 当前采集页数
private int currentPage = 0;
// 采集几页后,复位重新采集(最新的)
private int resetPageSize = 5;
public MockSpiderScheduler(){
wantedKeys.add(KEY_COOKIE);
wantedKeys.add(KEY_REFERER);
wantedKeys.add(KEY_FAKEID);
wantedKeys.add(KEY_TOKEN);
this.restTemplate = new RestTemplate();
}
/**
* 仅内部测试
*/
@Deprecated
private void scheduleRequest(){
try {
this.spider.request();
} catch (RequestException e) {
if(e.isResponseError()){
if(this.responseErrorCount >= 10){
this.responseErrorCount = 0;
}
logger.error("请求响应错误,很可能是cookie过期,提醒用户重新获取登录信息。msg=" + e.getMessage(), e);
this.responseErrorCount ++;
if(this.responseErrorCount > 2){
this.cookieExpired();
}
} else {
logger.error("远程调用其它错误,很可能是网络问题,需要尝试重试,msg=" + e.getMessage(), e);
}
}
}
/**
* 通知用户登录过期,需要重新设置cookie
*/
protected void cookieExpired(){
logger.info("通知用户重新登录,并提交cookie信息");
}
/**
* 业务保存采集的数据
* @param list
*/
protected void saveData(List list){
if(!StringUtils.isEmptyList(list)){
logger.info("采集到数量:" + list.size());
logger.info("数据库保存开始...");
for(AccountArticle article: list){
logger.info(article.toString());
}
} else {
logger.warn("采集成功一次,但空数据");
}
}
/**
* 设置登录参数
* @param content 从浏览器中粘贴的:cookie和parameter
* @param delimiter 每一行分隔符,默认为'\n'
*/
public void setCookieAndParameter(String content, String delimiter){
if(StringUtils.isEmpty(delimiter)){
delimiter = "\n";
}
this.keyValueParse = new KeyValueParse();
this.keyValueParse.setDelimiter(delimiter);
List> list = this.keyValueParse.parse(content, this.wantedKeys);
if(list == null || list.size() == 0){
throw new IllegalArgumentException("未解析出来任何请求参数! content=" + content);
}
String fakeId = null;
String token = null;
int count = 5;
String cookie = null;
String referer = null;
for(KeyValue kv : list){
if(kv.getKey().equals(KEY_COOKIE)){
cookie = kv.getValue();
} else if(kv.getKey().equals(KEY_FAKEID)){
fakeId = kv.getValue();
} else if (kv.getKey().equals(KEY_REFERER)){
referer = kv.getValue();
} else if (kv.getKey().equals(KEY_TOKEN)) {
token = kv.getValue();
}
}
if(StringUtils.isEmpty(fakeId) || StringUtils.isEmpty(token)
|| StringUtils.isEmpty(cookie) || StringUtils.isEmpty(referer)){
throw new IllegalArgumentException("解析返回参数不足,无法完成调用。");
}
this.spider = new AccountArticleSpider();
this.spider.setRestTemplate(this.restTemplate);
this.spider.setFakeId(fakeId);
this.spider.setToken(token);
this.spider.setCount(count);
this.spider.setBegin(this.currentPage);
this.spider.setCookie(cookie);
this.spider.setReferer(referer);
// this.scheduleRequest();
if(this.thread == null){
logger.info("创建线程...");
this.thread = new Thread(new GatherTask());
this.thread.start();
}
}
private class GatherTask implements Runnable{
private Random random = new Random(100);
@Override
public void run() {
logger.info("............线程启动:" + new Date());
while (!stop){
try {
List list = spider.request();
saveData(list);
sleepTime = sleepSeconds;
logger.info("采集第 {} 页完成", currentPage);
currentPage ++;
if(currentPage >= resetPageSize){
logger.debug("达到触发页数,复位重新开始采集(最新的)");
currentPage = 0;
}
// 重新设置采集页码
spider.setBegin(currentPage);
} catch (RequestException e) {
if(e.isResponseError()){
if(responseErrorCount >= 10){
responseErrorCount = 0;
}
logger.error("请求响应错误,很可能是cookie过期,提醒用户重新获取登录信息。msg=" + e.getMessage(), e);
responseErrorCount ++;
if(responseErrorCount > 2){
// 需要重新设置登录信息,强制休眠更长时间。
cookieExpired();
sleepTime = stopSeconds;
}
} else {
logger.error("远程调用其它错误,很可能是网络问题,需要尝试重试,msg=" + e.getMessage(), e);
}
} catch (Exception ex){
logger.error("其他异常,可能数据保存错误:" + ex.getMessage(), ex);
} finally {
int rnd = this.random.nextInt(10);
if(rnd == 0){
rnd = 2;
}
try {
logger.debug("线程休眠 {} 秒", sleepTime * rnd);
TimeUnit.SECONDS.sleep(sleepTime * rnd);
} catch (InterruptedException e) {}
}
}
logger.warn("采集线程终止!");
}
}
/**
* 终止程序执行。
*/
public void terminate(){
this.stop = true;
}
private Thread thread = null;
private boolean stop = false;
private RestTemplate restTemplate = null;
private AccountArticleSpider spider = null;
private KeyValueParse keyValueParse;
private List wantedKeys = new ArrayList<>(8);
public static final String KEY_COOKIE = "cookie";
public static final String KEY_REFERER = "referer";
public static final String KEY_FAKEID = "fakeid";
public static final String KEY_TOKEN = "token";
}