这篇文章说明了一个人吃饱了有多么无聊
之前我坛有两位dalao爬了两次,但是耗时都很长.
于是我就想再爬一次.
而且要更快,更多,更方便.
在做MCBBS水怪鉴定时我了解到有一个API,数据详细得一批.
而且还是JSON!
这可比正则匹配HTML快多了.
走着.
目标
- 爬取更详细的数据
- 统计更多的全站数据
- 方便的增减进程
基本构思
- 用MySQL存储数据
- 使用MySQL分发UID,实现方便的增减进程而不影响整体
- 当然要使用API
开始
API地址:http://www.mcbbs.net/api/mobile/index.php?module=profile&uid=
比如我:
http://www.mcbbs.net/api/mobile/index.php?module=profile&uid=2048697
然后我们能够拿到一大串JSON.
格式化以后就像这样:
{
"Version": "1",
"Charset": "UTF-8",
"Variables": {
"cookiepre": "ZxYQ_8cea_",
"auth": null,
"saltkey": "iq3qKD4k",
"member_uid": "0",
"member_username": "",
"member_avatar": "http:\/\/www.mcbbs.net\/uc_server\/avatar.php?uid=0&size=small",
"groupid": "7",
"formhash": "6c426f67",
"ismoderator": null,
"readaccess": "1",
"notice": {
"newpush": "0",
"newpm": "0",
"newprompt": "0",
"newmypost": "0"
},
"space": {
"uid": "2048697",
"username": "Zapic",
"status": "0",
"emailstatus": "1",
"avatarstatus": "1",
"videophotostatus": "0",
"adminid": "0",
"groupid": "22",
"groupexpiry": "0",
"extgroupids": "",
"regdate": "2017-1-7 09:56",
"credits": "2104",
"notifysound": "0",
"timeoffset": "9999",
"newpm": "0",
"newprompt": "0",
"accessmasks": "0",
"allowadmincp": "0",
"onlyacceptfriendpm": "0",
"conisbind": "0",
"freeze": "0",
"extcredits1": "271",
"extcredits2": "51",
"extcredits3": "5",
"extcredits4": "3",
"extcredits5": "1",
"extcredits6": "0",
"extcredits7": "38",
"extcredits8": "327",
"friends": "12",
"posts": "932",
"threads": "87",
"digestposts": "0",
"doings": "0",
"blogs": "0",
"albums": "0",
"sharings": "0",
"attachsize": " 0 B ",
"views": "90",
"oltime": "232",
"todayattachs": "0",
"todayattachsize": "0",
"feeds": "0",
"follower": "1",
"following": "0",
"newfollower": "1",
"blacklist": "0",
"videophoto": "",
"spacename": "",
"spacedescription": "",
"domain": "",
"addsize": "0",
"addfriend": "0",
"menunum": "0",
"theme": "",
"spacecss": "",
"blockposition": "",
"recentnote": "[\u6765\u81eaZapic]\u53c8\u53cc\u53d2\u53d5\u662f\u8bba\u575b\u722c\u53d6\u8ba1\u5212!",
"spacenote": "",
"privacy": {
"feed": {
"poll": "1",
"joinpoll": "1",
"invite": "1",
"task": "1",
"profile": "1",
"newthread": "1"
},
"view": {
"index": "0",
"friend": "3",
"wall": "3",
"home": "1",
"doing": "1",
"blog": "1",
"album": "1",
"share": "1"
},
"profile": {
"field1": "0",
"field2": "0",
"site": "0",
"qq": "0",
"gender": "3",
"birthday": "3"
},
"filter_note": [],
"filter_icon": [],
"filter_gid": []
},
"feedfriend": "2624518,136103,42863,1694714,2505673,1749236,458143,1741243,2578620,2212225,1496294,1523844",
"acceptemail": [],
"magicgift": "",
"stickblogs": "",
"publishfeed": "0",
"customshow": "26",
"customstatus": "",
"medals": [{
"name": "Java\u6b63\u7248\u52cb\u7ae0",
"image": "m_p_pc.png",
"description": "\u7ecf\u8bba\u575b\u9a8c\u8bc1\u7684Java\u6b63\u7248\u73a9\u5bb6\uff0c\u652f\u6301\u6b63\u7248\uff01",
"medalid": "78"
}, {
"name": "\u77f3\u9550\u77ff\u5de5\u52cb\u7ae0",
"image": "m_a2.png",
"description": "\u52a0\u5165\u8bba\u575b\u6709\u6bb5\u65f6\u95f4\u7684MC\u73a9\u5bb6\uff0850\u94bb\u77f3\uff09",
"medalid": "101"
}, {
"name": "\u5c0f\u9ea6\u79cd\u52cb\u7ae0",
"image": "m_rc1.png",
"description": "\u5c0f\u6709\u4eba\u6c14\u7684MC\u73a9\u5bb6\uff0880\u4eba\u6c14\uff09",
"medalid": "107"
}, {
"name": "\u94c1\u9550\u77ff\u5de5\u52cb\u7ae0",
"image": "m_a3.png",
"description": "\u5df2\u7ecf\u5728\u8bba\u575b\u6d3b\u8dc3\u4e86\u5f88\u957f\u65f6\u95f4\u7684\u73a9\u5bb6\uff08200\u94bb\u77f3\uff09",
"medalid": "102"
}, {
"name": "\u9aa8\u7070\u52cb\u7ae0",
"image": "m_c4.png",
"description": "\u6ce8\u518c\u8d85\u8fc7\u4e24\u5e74\u7684\u8001\u7528\u6237\uff0c\u9aa8\u7070\u7ea7\u73a9\u5bb6\uff01",
"medalid": "112"
}],
"sightml": "<a href=\"http:\/\/www.mcbbs.net\/plugin.php?id=link_redirect&target=https%3A%2F%2Fi.zapic.ml\" target=\"_blank\"><img id=\"aimg_uh7ST\" onclick=\"zoom(this, this.src, 0, 0, 0)\" class=\"zoom\" width=\"700\" height=\"165\" src=\"https:\/\/i.zapic.ml\/ip?key=2e0c7\" border=\"0\" alt=\"\" \/><\/a><br \/>\r\n<img id=\"aimg_t5gmZ\" onclick=\"zoom(this, this.src, 0, 0, 0)\" class=\"zoom\" width=\"1\" height=\"1\" src=\"https:\/\/i.zapic.ml\/ip\/check.php?key=2e0c7\" border=\"0\" alt=\"\" \/><br \/>\r\n1w+\u8bbf\u95ee\u7eaa\u5ff5! \u2193\u4f60\u4ee5\u4e3a\u6211\u5929\u5929\u6362\u7b7e\u540d\u6863\u7684\u56fe?\u5176\u5b9e\u6211\u53ea\u662f\u5077\u5077\u6478\u6478\u5199\u4e86\u4e2a\u811a\u672c\u2193<br \/>\r\n<img id=\"aimg_j6lb4\" onclick=\"zoom(this, this.src, 0, 0, 0)\" class=\"zoom\" src=\"https:\/\/i.zapic.ml\/ip\/randpic.php\" onmouseover=\"img_onmouseoverfunc(this)\" onload=\"thumbImg(this)\" border=\"0\" alt=\"\" \/>",
"groupterms": "a:0:{}",
"authstr": "1544970425\t1\tb3ufc0",
"groups": "",
"attentiongroup": "",
"gender": "0",
"birthyear": "2002",
"birthmonth": "9",
"birthday": "29",
"constellation": "\u5929\u79e4\u5ea7",
"zodiac": "\u9a6c",
"nationality": "",
"birthprovince": "",
"birthcity": "",
"resideprovince": "",
"residecity": "",
"residedist": "",
"residecommunity": "",
"residesuite": "",
"graduateschool": "",
"company": "",
"education": "",
"occupation": "",
"position": "",
"revenue": "",
"affectivestatus": "",
"lookingfor": "",
"bloodtype": "",
"height": "",
"weight": "",
"site": "https:\/\/i.zapic.ml",
"bio": "",
"interest": "",
"field1": "Zapic233",
"field2": "Zapic233",
"field3": "",
"field4": "",
"field5": "",
"field6": "",
"field7": "",
"field8": "",
"birthdist": "",
"birthcommunity": "",
"regipport": "0",
"lastipport": "0",
"lastvisit": "2019-7-27 18:53",
"lastactivity": "2019-7-27 18:53",
"lastpost": "2019-7-27 09:30",
"lastsendmail": "0",
"notifications": "0",
"myinvitations": "0",
"pokes": "0",
"pendingfriends": "0",
"invisible": "1",
"buyercredit": "0",
"sellercredit": "0",
"favtimes": "0",
"sharetimes": "0",
"profileprogress": "100",
"port": "3278",
"admingroup": {
"icon": ""
},
"group": {
"type": "member",
"grouptitle": "Lv.8 \u8003\u53e4\u5bb6",
"creditshigher": "2000",
"creditslower": "5000",
"stars": "8",
"color": "",
"icon": "",
"readaccess": "120",
"allowgetattach": "1",
"allowgetimage": "1",
"allowmediacode": "1",
"maxsigsize": "1000",
"allowbegincode": "0",
"userstatusby": "1"
},
"lastactivitydb": "1564224791",
"buyerrank": "0",
"sellerrank": "0"
},
"extcredits": {
"1": {
"img": "",
"title": "\u4eba\u6c14",
"unit": "\u70b9",
"ratio": "1",
"showinthread": null,
"allowexchangein": null,
"allowexchangeout": null
},
"2": {
"img": "",
"title": "\u91d1\u7c92",
"unit": "\u7c92",
"ratio": "1",
"showinthread": null,
"allowexchangein": "1",
"allowexchangeout": "1"
},
"3": {
"img": "",
"title": "\u91d1\u952d",
"unit": "\u5757",
"ratio": "100",
"showinthread": null,
"allowexchangein": "1",
"allowexchangeout": "1"
},
"4": {
"img": "",
"title": "\u7eff\u5b9d\u77f3",
"unit": "\u9897",
"ratio": "0",
"showinthread": null,
"allowexchangein": null,
"allowexchangeout": null
},
"5": {
"img": "",
"title": "\u4e0b\u754c\u4e4b\u661f",
"unit": "\u679a",
"ratio": "0",
"showinthread": null,
"allowexchangein": null,
"allowexchangeout": null
},
"6": {
"img": "",
"title": "\u8d21\u732e",
"unit": "\u70b9",
"ratio": "0",
"showinthread": null,
"allowexchangein": null,
"allowexchangeout": null
},
"7": {
"img": "",
"title": "\u7231\u5fc3",
"unit": "\u5fc3",
"ratio": "0",
"showinthread": null,
"allowexchangein": null,
"allowexchangeout": null
},
"8": {
"img": "",
"title": "\u94bb\u77f3",
"unit": "\u9897",
"ratio": "5000",
"showinthread": null,
"allowexchangein": null,
"allowexchangeout": null
}
}
}
}
真的是…又长废物又多啊…
不管了,先Get他:
<?php
$uid=2048797;
$raw=file_get_contents("http://www.mcbbs.net/api/mobile/index.php?module=profile &uid=".$uid);
//请不要学我偷懒用file_get_contents,curl当然是最好的.
?>
然后解析再拆开:
$data=json_decode($raw,true);
$ex1 = $data['extcredits1'];
$ex2 = $data['extcredits2'];
$ex3 = $data['extcredits3'];
$ex4 = $data['extcredits4'];
$ex5 = $data['extcredits5'];
$ex6 = $data['extcredits6'];
$ex7 = $data['extcredits7'];
$ex8 = $data['extcredits8'];
$posts = $data['posts'];
$threads = $data['threads'];
$friends = $data['friends'];
$credits = $data['credits'];
$groupid = $data['groupid'];
$groupfriendlyname = $data['group']['grouptitle'];
$grouptype = $data['group']['type'];
$exgroupids = $data['extgroupids'];
$regdate = date("Y-m-d",strtotime($data['regdate']));
$username = $data['username'];
$friendlyregdate = date("Ym", strtotime ($data['regdate']));
$medals=count($data['medals']);
这样我们就啥都有了.
然后我们用echo输出一波:
271
51
5
3
1
0
38
327
932
87
12
2104
22
Lv.8 考古家
member
2017-01-07
Zapic
201701
5
这样,基本的数据抓取应该没问题了.
那么开始连接数据库:
$myhost = "localhost";
$myuser = "localuser";
$mypass = "butidonthavepassword";
$mydb = "mcbbs";
$myport = 3306;
$t_prefix = "mb_";
$con = mysqli_connect($myhost, $myuser, $mypass, $mydb, $myport);
if (mysqli_connect_errno($con)) {
echo "Failed to connect to MySQL: " . mysqli_connect_error();
}
然后,开始建立数据表.
preload($con);
function preload($con)
{
//注册日期统计表
mysqli_query($con, "CREATE table if not exists {$t_prefix}regdatestats(date int primary key,count int)");
//勋章统计表
mysqli_query($con, "CREATE table if not exists {$t_prefix}medalstats(mid int primary key,friendlyname text,count int)");
//用户数据统计表
mysqli_query($con, "CREATE table if not exists {$t_prefix}userstats(uid int primary key,username text,credits int,ex1 int,ex2 int,ex3 int,ex4 int,ex5 int,ex6 int,ex7 int,ex8 int,posts int,threads int,friends int,medalscount int,ugroup text,exgroupids text,regdate text)");
//全局统计表
mysqli_query($con, "CREATE table if not exists {$t_prefix}globalstat(id int primary key,credits long,ex1 long,ex2 long,ex3 long,ex4 long,ex5 long,ex6 long,ex7 long,ex8 long,posts long,threads long,friends long,medalscount long)");
//组统计表
mysqli_query($con, "CREATE table if not exists {$t_prefix}groupstats(groupid int primary key,friendlyname text,grouptype text,groupcredits long,count int,ex1 long,ex2 long,ex3 long,ex4 long,ex5 long,ex6 long,ex7 long,ex8 long,posts long,threads long,friends long,medalscount long)");
//加入第一条全局统计记录,如果存在则不操作
mysqli_query($con, "insert {$t_prefix}globalstat (id,credits,ex1,ex2,ex3,ex4,ex5,ex6,ex7,ex8,posts,threads,friends,medalscount) values (1,0,0,0,0,0,0,0,0,0,0,0,0,0) on duplicate key update credits=credits");
}
表建完了,接下来把数据写进去:
mysqli_query($con, "insert into {$t_prefix}regdatestats (date,count) values ({$friendlyregdate},1) on duplicate key update count=count+1");
//检测这人有没有勋章
if ($data['medals'] == "") {
$medals = 0;
} else {
foreach ($data['medals'] as $mds) {
mysqli_query($con, "insert into {$t_prefix}medalstats (mid,friendlyname,count) values ({$mds['medalid']},\"{$mds['name']}\",1) on duplicate key update count=count+1");
}
}
//组统计
mysqli_query($con, "insert into {$t_prefix}groupstats (groupid,friendlyname,grouptype,groupcredits,count,ex1,ex2,ex3,ex4,ex5,ex6,ex7,ex8,posts,threads,friends,medalscount) values ({$groupid},\"{$groupfriendlyname}\",\"{$grouptype}\",{$credits},1,{$ex1},{$ex2},{$ex3},{$ex4},{$ex5},{$ex6},{$ex7},{$ex8},{$posts},{$threads},{$friends},{$medals}) on duplicate key update count=count+1, ex1=ex1+{$ex1}, ex2=ex2+{$ex2}, ex3=ex3+{$ex3}, ex4=ex4+{$ex4}, ex5=ex5+{$ex5}, ex6=ex6+{$ex6}, ex7=ex7+{$ex7}, ex8=ex8+{$ex8}, posts=posts+{$posts}, threads=threads+{$threads}, friends=friends+{$friends}, medalscount=medalscount+{$medals}");
//全局统计
mysqli_query($con, "update {$t_prefix}globalstat set credits=credits+{$credits},ex1=ex1+{$ex1}, ex2=ex2+{$ex2}, ex3=ex3+{$ex3}, ex4=ex4+{$ex4}, ex5=ex5+{$ex5}, ex6=ex6+{$ex6}, ex7=ex7+{$ex7}, ex8=ex8+{$ex8}, posts=posts+{$posts}, threads=threads+{$threads}, friends=friends+{$friends}, medalscount=medalscount+{$medals},credits=credits+{$credits}");
//个人统计
$udata = mysqli_query($con, "insert into {$t_prefix}userstats (uid,username,credits,ex1,ex2,ex3,ex4,ex5,ex6,ex7,ex8,posts,threads,friends,medalscount,ugroup,exgroupids,regdate) values ({$uid},\"{$username}\",{$credits},{$ex1},{$ex2},{$ex3},{$ex4},{$ex5},{$ex6},{$ex7},{$ex8},{$posts},{$threads},{$friends},{$medals},\"{$groupfriendlyname}\",\"{$exgroupids}\",\"{$regdate}\") on duplicate key update uid=uid");
//检测是否成功
if (!$udata){
echo "failed to insert UID:".$uid;
echo "\n".mysqli_error($con);
}else{
echo "UID:".$uid.". OK!";
}
然后整个代码看起来就像这样:
<?php
$myhost = "localhost";
$myuser = "localuser";
$mypass = "butidonthavepassword";
$mydb = "mcbbs";
$myport = 3306;
$t_prefix = "mb_";
$con = mysqli_connect($myhost, $myuser, $mypass, $mydb, $myport);
if (mysqli_connect_errno($con)) {
echo "Failed to connect to MySQL: " . mysqli_connect_error();
}
preload($con);
$uid=2048797;
$raw=file_get_contents("http://www.mcbbs.net/api/mobile/index.php?module=profile &uid=".$uid);
$data=json_decode($raw,true);
$ex1 = $data['extcredits1'];
$ex2 = $data['extcredits2'];
$ex3 = $data['extcredits3'];
$ex4 = $data['extcredits4'];
$ex5 = $data['extcredits5'];
$ex6 = $data['extcredits6'];
$ex7 = $data['extcredits7'];
$ex8 = $data['extcredits8'];
$posts = $data['posts'];
$threads = $data['threads'];
$friends = $data['friends'];
$credits = $data['credits'];
$groupid = $data['groupid'];
$groupfriendlyname = $data['group']['grouptitle'];
$grouptype = $data['group']['type'];
$exgroupids = $data['extgroupids'];
$regdate = date("Y-m-d",strtotime($data['regdate']));
$username = $data['username'];
$friendlyregdate = date("Ym", strtotime ($data['regdate']));
$medals=count($data['medals']);
mysqli_query($con, "insert into {$t_prefix}regdatestats (date,count) values ({$friendlyregdate},1) on duplicate key update count=count+1");
if ($data['medals'] == "") {
$medals = 0;
} else {
foreach ($data['medals'] as $mds) {
mysqli_query($con, "insert into {$t_prefix}medalstats (mid,friendlyname,count) values ({$mds['medalid']},\"{$mds['name']}\",1) on duplicate key update count=count+1");
}
}
mysqli_query($con, "insert into {$t_prefix}groupstats (groupid,friendlyname,grouptype,groupcredits,count,ex1,ex2,ex3,ex4,ex5,ex6,ex7,ex8,posts,threads,friends,medalscount) values ({$groupid},\"{$groupfriendlyname}\",\"{$grouptype}\",{$credits},1,{$ex1},{$ex2},{$ex3},{$ex4},{$ex5},{$ex6},{$ex7},{$ex8},{$posts},{$threads},{$friends},{$medals}) on duplicate key update count=count+1, ex1=ex1+{$ex1}, ex2=ex2+{$ex2}, ex3=ex3+{$ex3}, ex4=ex4+{$ex4}, ex5=ex5+{$ex5}, ex6=ex6+{$ex6}, ex7=ex7+{$ex7}, ex8=ex8+{$ex8}, posts=posts+{$posts}, threads=threads+{$threads}, friends=friends+{$friends}, medalscount=medalscount+{$medals}");
mysqli_query($con, "update mb_globalstat set credits=credits+{$credits},ex1=ex1+{$ex1}, ex2=ex2+{$ex2}, ex3=ex3+{$ex3}, ex4=ex4+{$ex4}, ex5=ex5+{$ex5}, ex6=ex6+{$ex6}, ex7=ex7+{$ex7}, ex8=ex8+{$ex8}, posts=posts+{$posts}, threads=threads+{$threads}, friends=friends+{$friends}, medalscount=medalscount+{$medals},credits=credits+{$credits}");
$udata = mysqli_query($con, "insert into {$t_prefix}userstats (uid,username,credits,ex1,ex2,ex3,ex4,ex5,ex6,ex7,ex8,posts,threads,friends,medalscount,ugroup,exgroupids,regdate) values ({$uid},\"{$username}\",{$credits},{$ex1},{$ex2},{$ex3},{$ex4},{$ex5},{$ex6},{$ex7},{$ex8},{$posts},{$threads},{$friends},{$medals},\"{$groupfriendlyname}\",\"{$exgroupids}\",\"{$regdate}\") on duplicate key update uid=uid");
if (!$udata){
echo "failed to insert UID:".$uid;
echo "\n".mysqli_error($con);
}else{
echo "UID:".$uid.". OK!";
}
function preload($con)
{
mysqli_query($con, "CREATE table if not exists {$t_prefix}regdatestats(date int primary key,count int)");
mysqli_query($con, "CREATE table if not exists {$t_prefix}medalstats(mid int primary key,friendlyname text,count int)");
mysqli_query($con, "CREATE table if not exists {$t_prefix}userstats(uid int primary key,username text,credits int,ex1 int,ex2 int,ex3 int,ex4 int,ex5 int,ex6 int,ex7 int,ex8 int,posts int,threads int,friends int,medalscount int,ugroup text,exgroupids text,regdate text)");
mysqli_query($con, "CREATE table if not exists {$t_prefix}globalstat(id int primary key,credits long,ex1 long,ex2 long,ex3 long,ex4 long,ex5 long,ex6 long,ex7 long,ex8 long,posts long,threads long,friends long,medalscount long)");
mysqli_query($con, "CREATE table if not exists {$t_prefix}groupstats(groupid int primary key,friendlyname text,grouptype text,groupcredits long,count int,ex1 long,ex2 long,ex3 long,ex4 long,ex5 long,ex6 long,ex7 long,ex8 long,posts long,threads long,friends long,medalscount long)");
mysqli_query($con, "insert {$t_prefix}globalstat (id,credits,ex1,ex2,ex3,ex4,ex5,ex6,ex7,ex8,posts,threads,friends,medalscount) values (1,0,0,0,0,0,0,0,0,0,0,0,0,0) on duplicate key update credits=credits");
if (!$udata){
echo "failed to insert UID:".$uid;
echo "\n".mysqli_error($con);
}else{
echo "UID:".$uid.". OK!";
}
}
这样就能够爬取一位用户的数据了.
但是我们不可能改完一次UID跑一次脚本,所以我们要让程序自动获取UID.同时为了让多个进程之间同步,这里就使用MySQL数据库存取UID.
在preload方法里加一个创建UID存储表的操作:
function preload($con){
//创建UID表
mysqli_query($con, "CREATE table if not exists {$t_prefix}uid(id int primary key,uid int)");
//插入第一条数据,存在则不操作
mysqli_query($con, "INSERT into {$t_prefix}uid (id,uid) values (1,1) on duplicate key update uid=uid");
//......
然后再整一个读取UID的方法:
function getuid($con){
$uid=mysqli_query("select * from {$t_prefix}uid");
$uid=mysqli_fetch_assoc($uid)['uid'];
//UID自增
mysqli_query("update {$t_prefix} set uid=uid+1");
return $uid;
}
再把这个丢进主体里,反手给他套一个循环.
//......
$con = mysqli_connect($myhost, $myuser, $mypass, $mydb, $myport);
if (mysqli_connect_errno($con)) {
echo "Failed to connect to MySQL: " . mysqli_connect_error();
}
preload($con);
while(true){
$uid=getuid($con);
$raw=file_get_contents("http://www.mcbbs.net/api/mobile/index.php?module=profile &uid=".$uid);
$data=json_decode($raw,true);
$ex1 = $data['extcredits1'];
$ex2 = $data['extcredits2'];
//...
//
}
function preload($con){
//......
然后就可以运行了.
UID:1 OK!
UID:2 OK!
......
然后多开几个并行,你就会发现:
你写的程序跟屎一样
root@zapic-1:~# mysql
Welcome to the MariaDB monitor. Commands end with ; or \g.
Your MariaDB connection id is 3057
Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
MariaDB [(none)]> use mcbbs
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
MariaDB [mcbbs]> select * from mb_userstats where uid=2034;
Empty set (0.00 sec)
没错,漏了.
问题出在哪?
脏读了.
什么?
例子:
一个进程读取到了UID为2098,于是马上开始update.
不巧,另一个进程也开始读取,但是update还没有完成,于是也读到了2098.
然后update两次.
这时update完成了,另一个进程读取,3000.
2099不见了.
而且进程越多,这种现象越明显.
怎么办?
凉拌?
不,MySQL有事务锁.
开始事务之后里面执行一次update,这样这张表就会被锁住,仅允许当前进程读写,直到进程释放事务锁.
那么问题就简单了.
我们可以先在update的同时锁住这张表,这个功能MySQL提供给我们了.
start transaction;
update {$t_prefix}uid set uid=uid+1;
select uid from {$t_prefix}uid;
commit;
是不是很好奇为什么要先update再select?
我也不知道,反正规定就是先update才能成功锁住这张表.
对了,请务必记得commit,不然其他进程都会被堵死,因为锁住之后只有这个进程能够读写这张表.
这样就可以避免脏读了.
改好之后的getuid方法就像这样:
function getuid($con)
{
mysqli_query($con, "start transaction");
mysqli_query($con, "update {$t_prefix}uid set uid=uid+1");
$uid = mysqli_query($con, "select uid from {$t_prefix}uid");
$uid = mysqli_fetch_assoc($uid)['uid'] - 1;
mysqli_query($con, "commit;");
return $uid;
}
到这里,我们已经能够随意的增减爬虫进程了.
只要数据库顶得住,来多少爬虫都同步.
大概就这样,应该没有什么问题了,开始爬吧.
完整代码请走传送门
虽然不知道大佬你在写什么,但是我知道这代码高亮不好看 :tieba9:
我也觉得巨tm丑,但是我找不到替代的东西.
事实上确实高亮了嘛.
:tieba23: 你这回复没邮件提醒啊,wp万岁.
不是,我这不是没有靠谱的邮件发送方案嘛…
:orz1: sendcloud用stmp,马化腾邮箱不拦,免费额度够用.还有你这证书是不是有问题啊,我火狐访问525错误.
目前穷得很,没钱搞事情,国内机子被封,用的内网传统,日常出问题.
感谢帮助,我研究一下这两个东西.
顺便,能留个友链么?
友链给你挂了,我比你更穷.我是60块钱一年,美国回程cn2虚拟空间.
蹭的免费内网穿透,家里放一台手机,速度貌似还行.
顺便看看邮件OK了没有,我觉得我可以再写一个模板 :tieba9:
:tieba23: 这就是大佬吧,邮件可以了,手机NetHunter?
然后想起来每日免费10次测试用掉4次加上这次来回又4次今天药丸了(x
手机用的Linux deploy 然后Apache+PHP7.2+MariaDB+内网穿透+CDN得真实IP
基本还行,除了时不时炸两下然后就525 SSL Hand......
也给你挂上了友链了. :orz12:
:orz9: 我一天40封,不知道是我用的久还是干嘛.http://www.sendcloud.net/doc/faq/#1-sendcloud
这里面有额度申请的方法.
:tieba22: 刚刚刷新了下,发现了Highlight,但是从首页点进来并不会生效. :tieba7: 你可以看看我的博客.