魏露露 發表於 2021-1-16 17:27:00

php 爬取抖音评论数据

<p>1.安装抓包工具(filder)和模拟器(夜神模拟器),可参考博文:https://blog.csdn.net/weixin_42223833/article/details/110009432。安装包和详细配置都有。</p>
<p>注意:抖音有ssl验证,夜神模拟器安装xposed和JustTrustMe之后,才能正常抓包,不然打开抖音是没有网的。</p>
<p>2.破解X-grogon签名算法。因为抖音有签名验证机制,要想直接使用抖音接口,必须破解签名算法!这里有能力的大佬可以通过反编译APP去破解,当然我是没有这个能力的,只能通过抓包工具把数据下载下来。</p>
<p>配置filder下载数据包:</p>
<p><img src="https://img2020.cnblogs.com/blog/1776077/202101/1776077-20210116172101688-465702406.png" alt="" loading="lazy"></p>
<p>&nbsp;</p>
<div class="cnblogs_code">
<pre>    <span style="color: rgba(0, 0, 255, 1)">static</span><span style="color: rgba(0, 0, 0, 1)"> function OnBeforeResponse(oSession: Session) {
      </span><span style="color: rgba(0, 0, 255, 1)">if</span> (m_Hide304s &amp;&amp; oSession.responseCode == <span style="color: rgba(128, 0, 128, 1)">304</span><span style="color: rgba(0, 0, 0, 1)">) {
            oSession[</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">ui-hide</span><span style="color: rgba(128, 0, 0, 1)">"</span>] = <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">true</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
      }
      </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">将关键接口标青色,只能是域名   </span>
      <span style="color: rgba(0, 0, 255, 1)">if</span> (oSession.HostnameIs(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">ihotel.meituan.com</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)) {
            oSession[</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">ui-color</span><span style="color: rgba(128, 0, 0, 1)">"</span>] = <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">#00FFFF</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
      }
      
      </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">抓取接口的数据</span>
      <span style="color: rgba(0, 0, 255, 1)">if</span> (oSession.uriContains(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">https://ihotel.meituan.com/hbsearch/HotelSearch</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)){
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> strBody=<span style="color: rgba(0, 0, 0, 1)">oSession.GetResponseBodyAsString();
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> host=<span style="color: rgba(0, 0, 0, 1)">oSession.PathAndQuery;
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">FiddlerObject.alert(host);</span>
            <span style="color: rgba(0, 0, 255, 1)">var</span> begin=host.indexOf(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">cateId=</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">);
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> end=host.indexOf(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">distance=</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">);
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> name=host.Substring(begin,<span style="color: rgba(128, 0, 128, 1)">50</span><span style="color: rgba(0, 0, 0, 1)">);
            
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">FiddlerObject.alert(name);</span>
            
            <span style="color: rgba(0, 0, 255, 1)">var</span> date = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> Date();
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> seperator1 = <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">-</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> year =<span style="color: rgba(0, 0, 0, 1)"> date.getFullYear();
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> month = date.getMonth() + <span style="color: rgba(128, 0, 128, 1)">1</span><span style="color: rgba(0, 0, 0, 1)">;
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> strDate =<span style="color: rgba(0, 0, 0, 1)"> date.getDate();
            </span><span style="color: rgba(0, 0, 255, 1)">if</span> (month &gt;= <span style="color: rgba(128, 0, 128, 1)">1</span> &amp;&amp; month &lt;= <span style="color: rgba(128, 0, 128, 1)">9</span><span style="color: rgba(0, 0, 0, 1)">) {
                month </span>= <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">0</span><span style="color: rgba(128, 0, 0, 1)">"</span> +<span style="color: rgba(0, 0, 0, 1)"> month;
            }
            </span><span style="color: rgba(0, 0, 255, 1)">if</span> (strDate &gt;= <span style="color: rgba(128, 0, 128, 1)">0</span> &amp;&amp; strDate &lt;= <span style="color: rgba(128, 0, 128, 1)">9</span><span style="color: rgba(0, 0, 0, 1)">) {
                strDate </span>= <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">0</span><span style="color: rgba(128, 0, 0, 1)">"</span> +<span style="color: rgba(0, 0, 0, 1)"> strDate;
            }
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> currentdate = year + seperator1 + month + seperator1 +<span style="color: rgba(0, 0, 0, 1)"> strDate;
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> filetitle=currentdate + <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">_</span><span style="color: rgba(128, 0, 0, 1)">'</span> +<span style="color: rgba(0, 0, 0, 1)"> name;
            
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">FiddlerObject.alert(currentdate);</span>
            <span style="color: rgba(0, 0, 255, 1)">var</span> dir=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">D:/phpstudy_pro/WWW/data/mt/</span><span style="color: rgba(128, 0, 0, 1)">"</span> +<span style="color: rgba(0, 0, 0, 1)"> currentdate;
            
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> filename = dir + <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">/</span><span style="color: rgba(128, 0, 0, 1)">"</span> + filetitle + <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">.json</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
            </span><span style="color: rgba(0, 0, 255, 1)">var</span> curDate = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> Date();
            </span><span style="color: rgba(0, 0, 255, 1)">var</span><span style="color: rgba(0, 0, 0, 1)"> sw : System.IO.StreamWriter;
            
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">文件夹不存在,则自动创建</span>
            <span style="color: rgba(0, 0, 255, 1)">if</span> (!<span style="color: rgba(0, 0, 0, 1)">System.IO.Directory.Exists(dir))
            {
                System.IO.Directory.CreateDirectory(dir);
            }
            
            </span><span style="color: rgba(0, 0, 255, 1)">if</span><span style="color: rgba(0, 0, 0, 1)"> (System.IO.File.Exists(filename)){
                sw </span>=<span style="color: rgba(0, 0, 0, 1)"> System.IO.File.AppendText(filename);
                sw.Write(strBody);
            }
            </span><span style="color: rgba(0, 0, 255, 1)">else</span><span style="color: rgba(0, 0, 0, 1)">{
                sw </span>=<span style="color: rgba(0, 0, 0, 1)"> System.IO.File.CreateText(filename);
                sw.Write(strBody);
            }

            sw.Close();
            sw.Dispose();
      }</span></pre>
</div>
<p>注意:因为抖音每次只显示20条评论,所以需要通过模拟器的宏操作不断下拉获取下一页数据:</p>
<p><img src="https://img2020.cnblogs.com/blog/1776077/202101/1776077-20210116172349248-2120960899.png" alt="" loading="lazy"></p>
<p>&nbsp;</p>
<p>&nbsp;录制一段下拉 的操作,不断执行就好。</p>
<p>3.处理数据</p>
<p>数据已经下载好了,接下来只要处理,提取出自己需要的数据就行了。</p>
<p><img src="https://img2020.cnblogs.com/blog/1776077/202101/1776077-20210116172504623-1508663044.png" alt="" loading="lazy"></p>
<p>&nbsp;</p>
<p>&nbsp;这里我使用的是php语言处理数据。</p>
<div class="cnblogs_code">
<pre>&lt;?<span style="color: rgba(0, 0, 0, 1)">php
$dir</span>=<span style="color: rgba(0, 0, 0, 1)">dirname(__FILE__);
$dir</span>=str_replace(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">\\</span><span style="color: rgba(128, 0, 0, 1)">'</span>,<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,$dir);
$listArr </span>= glob($dir.<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">/commend/*.json</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">);

$arr</span>=<span style="color: rgba(0, 0, 0, 1)">[];
</span><span style="color: rgba(0, 0, 255, 1)">foreach</span> ($listArr <span style="color: rgba(0, 0, 255, 1)">as</span> $k=&gt;<span style="color: rgba(0, 0, 0, 1)">$file){
    $json</span>=<span style="color: rgba(0, 0, 0, 1)">file_get_contents($file);
    $data</span>=json_decode($json,<span style="color: rgba(0, 0, 255, 1)">true</span><span style="color: rgba(0, 0, 0, 1)">);
    </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论</span>
    <span style="color: rgba(0, 0, 255, 1)">if</span> (!empty($data[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">comments</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">])){
      $comments</span>=$data[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">comments</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
      </span><span style="color: rgba(0, 0, 255, 1)">foreach</span> ($comments <span style="color: rgba(0, 0, 255, 1)">as</span> $id=&gt;<span style="color: rgba(0, 0, 0, 1)">$comment){
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论</span>
            $text=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">text</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论的用户</span>
            $user=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">user</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
            $nickname</span>=$user[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">nickname</span><span style="color: rgba(128, 0, 0, 1)">'</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">昵称</span>
            $signature=$user[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">signature</span><span style="color: rgba(128, 0, 0, 1)">'</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">签名</span>
            $avatar=$user[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">avatar_168x168</span><span style="color: rgba(128, 0, 0, 1)">'</span>][<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">url_list</span><span style="color: rgba(128, 0, 0, 1)">'</span>][<span style="color: rgba(128, 0, 128, 1)">1</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">头像</span>
            $lable=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">label_text</span><span style="color: rgba(128, 0, 0, 1)">'</span>];<span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">身份
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论id</span>
            $cid=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">cid</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论时间</span>
            $createTme=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">create_time</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">视频id</span>
            $awemeId=$comment[<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">aweme_id</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">];
            </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">数据整合</span>
            $arr[$cid]=<span style="color: rgba(0, 0, 0, 1)">[
                </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">awemeId</span><span style="color: rgba(128, 0, 0, 1)">'</span>=&gt;<span style="color: rgba(0, 0, 0, 1)">$awemeId,
                </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">text</span><span style="color: rgba(128, 0, 0, 1)">'</span>=&gt;<span style="color: rgba(0, 0, 0, 1)">$text,
                </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">nickname</span><span style="color: rgba(128, 0, 0, 1)">'</span>=&gt;<span style="color: rgba(0, 0, 0, 1)">$nickname,
                </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">signature</span><span style="color: rgba(128, 0, 0, 1)">'</span>=&gt;<span style="color: rgba(0, 0, 0, 1)">$signature,
                </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">avatar</span><span style="color: rgba(128, 0, 0, 1)">'</span>=&gt;<span style="color: rgba(0, 0, 0, 1)">$avatar,
                </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">lable</span><span style="color: rgba(128, 0, 0, 1)">'</span>=&gt;<span style="color: rgba(0, 0, 0, 1)">$lable,
                </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">createTme</span><span style="color: rgba(128, 0, 0, 1)">'</span>=&gt;<span style="color: rgba(0, 0, 0, 1)">$createTme,
            ];
      }
    }
    </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">评论为空则跳出循环</span>
    <span style="color: rgba(0, 0, 255, 1)">continue</span><span style="color: rgba(0, 0, 0, 1)">;
}

echo </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">&lt;pre&gt;</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">;
print_r($arr);
echo </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">&lt;/pre&gt;</span><span style="color: rgba(128, 0, 0, 1)">"</span>;</pre>
</div>
<p>&nbsp;</p>

</div>
<div id="MySignature" role="contentinfo">
    <div>
<p style="padding-top: 5px; padding-right: 10px; padding-bottom: 10px; font-family: 微软雅黑; font-size: 14px; height: 150px" id="PSignature">
            ♥ 作者:<span style="font-weight: bold; font-size: large">离岸少年</span>
            <br>
            ♠ 出处:https://www.cnblogs.com/jackzhuo/
            <br>
            ♣ 本博客大多为学习笔记或读书笔记,本文如对您有帮助,还请多推荐下此文,如有错误欢迎指正,相互学习,共同进步。
      </p>
    </div><br><br>
来源:https://www.cnblogs.com/jackzhuo/p/14286681.html
頁: [1]
查看完整版本: php 爬取抖音评论数据