php 爬取抖音评论数据
<p>1.安装抓包工具(filder)和模拟器(夜神模拟器),可参考博文:https://blog.csdn.net/weixin_42223833/article/details/110009432。安装包和详细配置都有。</p><p>注意:抖音有ssl验证,夜神模拟器安装xposed和JustTrustMe之后,才能正常抓包,不然打开抖音是没有网的。</p>
<p>2.破解X-grogon签名算法。因为抖音有签名验证机制,要想直接使用抖音接口,必须破解签名算法!这里有能力的大佬可以通过反编译APP去破解,当然我是没有这个能力的,只能通过抓包工具把数据下载下来。</p>
<p>配置filder下载数据包:</p>
<p><img src="https://img2020.cnblogs.com/blog/1776077/202101/1776077-20210116172101688-465702406.png" alt="" loading="lazy"></p>
<p> </p>
<div class="cnblogs_code">
<pre> <span style="color: rgba(0, 0, 255, 1)">static</span><span style="color: rgba(0, 0, 0, 1)"> function OnBeforeResponse(oSession: Session) {
</span><span style="color: rgba(0, 0, 255, 1)">if</span> (m_Hide304s && oSession.responseCode == <span style="color: rgba(128, 0, 128, 1)">304</span><span style="color: rgba(0, 0, 0, 1)">) {
oSession[</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">ui-hide</span><span style="color: rgba(128, 0, 0, 1)">"</span>] = <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">true</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
}
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">将关键接口标青色,只能是域名 </span>
<span style="color: rgba(0, 0, 255, 1)">if</span> (oSession.HostnameIs(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">ihotel.meituan.com</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)) {
oSession[</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">ui-color</span><span style="color: rgba(128, 0, 0, 1)">"</span>] = <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">#00FFFF</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
}
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">抓取接口的数据</span>
<span style="color: rgba(0, 0, 255, 1)">if</span> (oSession.uriContains(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">https://ihotel.meituan.com/hbsearch/HotelSearch</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)){
</span><span style="color: rgba(0, 0, 255, 1)">var</span> strBody=<span style="color: rgba(0, 0, 0, 1)">oSession.GetResponseBodyAsString();
</span><span style="color: rgba(0, 0, 255, 1)">var</span> host=<span style="color: rgba(0, 0, 0, 1)">oSession.PathAndQuery;
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">FiddlerObject.alert(host);</span>
<span style="color: rgba(0, 0, 255, 1)">var</span> begin=host.indexOf(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">cateId=</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">);
</span><span style="color: rgba(0, 0, 255, 1)">var</span> end=host.indexOf(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">distance=</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">);
</span><span style="color: rgba(0, 0, 255, 1)">var</span> name=host.Substring(begin,<span style="color: rgba(128, 0, 128, 1)">50</span><span style="color: rgba(0, 0, 0, 1)">);
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">FiddlerObject.alert(name);</span>
<span style="color: rgba(0, 0, 255, 1)">var</span> date = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> Date();
</span><span style="color: rgba(0, 0, 255, 1)">var</span> seperator1 = <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">-</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
</span><span style="color: rgba(0, 0, 255, 1)">var</span> year =<span style="color: rgba(0, 0, 0, 1)"> date.getFullYear();
</span><span style="color: rgba(0, 0, 255, 1)">var</span> month = date.getMonth() + <span style="color: rgba(128, 0, 128, 1)">1</span><span style="color: rgba(0, 0, 0, 1)">;
</span><span style="color: rgba(0, 0, 255, 1)">var</span> strDate =<span style="color: rgba(0, 0, 0, 1)"> date.getDate();
</span><span style="color: rgba(0, 0, 255, 1)">if</span> (month >= <span style="color: rgba(128, 0, 128, 1)">1</span> && month <= <span style="color: rgba(128, 0, 128, 1)">9</span><span style="color: rgba(0, 0, 0, 1)">) {
month </span>= <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">0</span><span style="color: rgba(128, 0, 0, 1)">"</span> +<span style="color: rgba(0, 0, 0, 1)"> month;
}
</span><span style="color: rgba(0, 0, 255, 1)">if</span> (strDate >= <span style="color: rgba(128, 0, 128, 1)">0</span> && strDate <= <span style="color: rgba(128, 0, 128, 1)">9</span><span style="color: rgba(0, 0, 0, 1)">) {
strDate </span>= <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">0</span><span style="color: rgba(128, 0, 0, 1)">"</span> +<span style="color: rgba(0, 0, 0, 1)"> strDate;
}
</span><span style="color: rgba(0, 0, 255, 1)">var</span> currentdate = year + seperator1 + month + seperator1 +<span style="color: rgba(0, 0, 0, 1)"> strDate;
</span><span style="color: rgba(0, 0, 255, 1)">var</span> filetitle=currentdate + <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">_</span><span style="color: rgba(128, 0, 0, 1)">'</span> +<span style="color: rgba(0, 0, 0, 1)"> name;
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">FiddlerObject.alert(currentdate);</span>
<span style="color: rgba(0, 0, 255, 1)">var</span> dir=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">D:/phpstudy_pro/WWW/data/mt/</span><span style="color: rgba(128, 0, 0, 1)">"</span> +<span style="color: rgba(0, 0, 0, 1)"> currentdate;
</span><span style="color: rgba(0, 0, 255, 1)">var</span> filename = dir + <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">/</span><span style="color: rgba(128, 0, 0, 1)">"</span> + filetitle + <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">.json</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
</span><span style="color: rgba(0, 0, 255, 1)">var</span> curDate = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> Date();
</span><span style="color: rgba(0, 0, 255, 1)">var</span><span style="color: rgba(0, 0, 0, 1)"> sw : System.IO.StreamWriter;
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">文件夹不存在,则自动创建</span>
<span style="color: rgba(0, 0, 255, 1)">if</span> (!<span style="color: rgba(0, 0, 0, 1)">System.IO.Directory.Exists(dir))
{
System.IO.Directory.CreateDirectory(dir);
}
</span><span style="color: rgba(0, 0, 255, 1)">if</span><span style="color: rgba(0, 0, 0, 1)"> (System.IO.File.Exists(filename)){
sw </span>=<span style="color: rgba(0, 0, 0, 1)"> System.IO.File.AppendText(filename);
sw.Write(strBody);
}
</span><span style="color: rgba(0, 0, 255, 1)">else</span><span style="color: rgba(0, 0, 0, 1)">{
sw </span>=<span style="color: rgba(0, 0, 0, 1)"> System.IO.File.CreateText(filename);
sw.Write(strBody);
}
sw.Close();
sw.Dispose();
}</span></pre>
</div>
<p>注意:因为抖音每次只显示20条评论,所以需要通过模拟器的宏操作不断下拉获取下一页数据:</p>
<p><img src="https://img2020.cnblogs.com/blog/1776077/202101/1776077-20210116172349248-2120960899.png" alt="" loading="lazy"></p>
<p> </p>
<p> 录制一段下拉 的操作,不断执行就好。</p>
<p>3.处理数据</p>
<p>数据已经下载好了,接下来只要处理,提取出自己需要的数据就行了。</p>
<p><img src="https://img2020.cnblogs.com/blog/1776077/202101/1776077-20210116172504623-1508663044.png" alt="" loading="lazy"></p>
<p> </p>
<p> 这里我使用的是php语言处理数据。</p>
<div class="cnblogs_code">
<pre><?<span style="color: rgba(0, 0, 0, 1)">php
$dir</span>=<span style="color: rgba(0, 0, 0, 1)">dirname(__FILE__);
$dir</span>=str_replace(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">\\</span><span style="color: rgba(128, 0, 0, 1)">'</span>,<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,$dir);
$listArr </span>= glob($dir.<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">/commend/*.json</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">);
$arr</span>=<span style="color: rgba(0, 0, 0, 1)">[];
</span><span style="color: rgba(0, 0, 255, 1)">foreach</span> ($listArr <span style="color: rgba(0, 0, 255, 1)">as</span> $k=><span style="color: rgba(0, 0, 0, 1)">$file){
$json</span>=<span style="color: rgba(0, 0, 0, 1)">file_get_contents($file);
$data</span>=json_decode($json,<span style="color: rgba(0, 0, 255, 1)">true</span><span style="color: rgba(0, 0, 0, 1)">);
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论</span>
<span style="color: rgba(0, 0, 255, 1)">if</span> (!empty($data[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">comments</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">])){
$comments</span>=$data[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">comments</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
</span><span style="color: rgba(0, 0, 255, 1)">foreach</span> ($comments <span style="color: rgba(0, 0, 255, 1)">as</span> $id=><span style="color: rgba(0, 0, 0, 1)">$comment){
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论</span>
$text=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">text</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论的用户</span>
$user=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">user</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
$nickname</span>=$user[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">nickname</span><span style="color: rgba(128, 0, 0, 1)">'</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">昵称</span>
$signature=$user[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">signature</span><span style="color: rgba(128, 0, 0, 1)">'</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">签名</span>
$avatar=$user[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">avatar_168x168</span><span style="color: rgba(128, 0, 0, 1)">'</span>][<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">url_list</span><span style="color: rgba(128, 0, 0, 1)">'</span>][<span style="color: rgba(128, 0, 128, 1)">1</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">头像</span>
$lable=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">label_text</span><span style="color: rgba(128, 0, 0, 1)">'</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">身份
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论id</span>
$cid=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">cid</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论时间</span>
$createTme=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">create_time</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">视频id</span>
$awemeId=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">aweme_id</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">数据整合</span>
$arr[$cid]=<span style="color: rgba(0, 0, 0, 1)">[
</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">awemeId</span><span style="color: rgba(128, 0, 0, 1)">'</span>=><span style="color: rgba(0, 0, 0, 1)">$awemeId,
</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">text</span><span style="color: rgba(128, 0, 0, 1)">'</span>=><span style="color: rgba(0, 0, 0, 1)">$text,
</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">nickname</span><span style="color: rgba(128, 0, 0, 1)">'</span>=><span style="color: rgba(0, 0, 0, 1)">$nickname,
</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">signature</span><span style="color: rgba(128, 0, 0, 1)">'</span>=><span style="color: rgba(0, 0, 0, 1)">$signature,
</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">avatar</span><span style="color: rgba(128, 0, 0, 1)">'</span>=><span style="color: rgba(0, 0, 0, 1)">$avatar,
</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">lable</span><span style="color: rgba(128, 0, 0, 1)">'</span>=><span style="color: rgba(0, 0, 0, 1)">$lable,
</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">createTme</span><span style="color: rgba(128, 0, 0, 1)">'</span>=><span style="color: rgba(0, 0, 0, 1)">$createTme,
];
}
}
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论为空则跳出循环</span>
<span style="color: rgba(0, 0, 255, 1)">continue</span><span style="color: rgba(0, 0, 0, 1)">;
}
echo </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)"><pre></span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
print_r($arr);
echo </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)"></pre></span><span style="color: rgba(128, 0, 0, 1)">"</span>;</pre>
</div>
<p> </p>
</div>
<div id="MySignature" role="contentinfo">
<div>
<p style="padding-top: 5px; padding-right: 10px; padding-bottom: 10px; font-family: 微软雅黑; font-size: 14px; height: 150px" id="PSignature">
♥ 作者:<span style="font-weight: bold; font-size: large">离岸少年</span>
<br>
♠ 出处:https://www.cnblogs.com/jackzhuo/
<br>
♣ 本博客大多为学习笔记或读书笔记,本文如对您有帮助,还请多推荐下此文,如有错误欢迎指正,相互学习,共同进步。
</p>
</div><br><br>
来源:https://www.cnblogs.com/jackzhuo/p/14286681.html
頁:
[1]