update

daacheng · daacheng · commit ab8f4d48f294 · 2021-04-21T23:56:27.000+08:00
update
diff --git a/spiders/main.md b/spiders/main.md
@@ -1,11 +1,11 @@
 ## Python&#29228;&#34411;
-1. [Python&#29228;&#34411;&#20043;requests&#24211;](Python&#29228;&#34411;&#20043;requests&#24211;.md)
-
-2. [Python&#29228;&#34411;&#20043;BeautifulSoup](Python&#29228;&#34411;&#20043;BeautifulSoup.md)
-3. [Python&#29228;&#34411;&#20043;xpath&#35299;&#26512;](Python&#29228;&#34411;xpath&#35299;&#26512;.md)
-4. [Python&#29228;&#34411;&#20043;selenium&#33258;&#21160;&#21270;](Python&#29228;&#34411;&#20043;selenium&#33258;&#21160;&#21270;.md)
-5. [Python&#29228;&#34411;&#20043;&#26500;&#24314;&#33258;&#24049;&#30340;&#20195;&#29702;IP&#27744;](Python&#29228;&#34411;&#20043;&#26500;&#24314;&#33258;&#24049;&#30340;&#20195;&#29702;IP&#27744;.md)
-6. [Python&#29228;&#34411;&#20043;&#30693;&#20046;&#38035;&#40060;&#36148;&#22270;&#29255;&#29228;&#21462;](Python&#29228;&#34411;&#20043;&#30693;&#20046;&#38035;&#40060;&#36148;&#22270;&#29255;&#29228;&#21462;.md)
-7. [Python&#29228;&#34411;&#20043;uiautomator2&#24120;&#29992;&#25805;&#20316;](Python&#29228;&#34411;&#20043;uiautomator2&#24120;&#29992;&#25805;&#20316;.md)
-8. [python&#29228;&#34411;&#20043;&#19968;&#23576;&#35770;&#22363;&#21457;&#24086;&#25968;&#25454;&#29228;&#21462;](python&#29228;&#34411;&#20043;&#19968;&#23576;&#35770;&#22363;&#21457;&#24086;&#25968;&#25454;&#29228;&#21462;.md)
-9. [python&#29228;&#34411;&#20043;&#32593;&#26131;&#20113;&#38899;&#20048;&#27468;&#21333;&#27468;&#26354;&#21015;&#34920;&#21450;&#28909;&#38376;&#35780;&#35770;&#25968;&#25454;&#25235;&#21462;](python&#29228;&#34411;&#20043;&#32593;&#26131;&#20113;&#38899;&#20048;.md)
+* [Python&#29228;&#34411;&#20043;requests&#24211;](Python&#29228;&#34411;&#20043;requests&#24211;.md)
+* [Python&#29228;&#34411;&#20043;BeautifulSoup](Python&#29228;&#34411;&#20043;BeautifulSoup.md)
+* [Python&#29228;&#34411;&#20043;xpath&#35299;&#26512;](Python&#29228;&#34411;xpath&#35299;&#26512;.md)
+* [Python&#29228;&#34411;&#20043;selenium&#33258;&#21160;&#21270;](Python&#29228;&#34411;&#20043;selenium&#33258;&#21160;&#21270;.md)
+* [Python&#29228;&#34411;&#20043;&#26500;&#24314;&#33258;&#24049;&#30340;&#20195;&#29702;IP&#27744;](Python&#29228;&#34411;&#20043;&#26500;&#24314;&#33258;&#24049;&#30340;&#20195;&#29702;IP&#27744;.md)
+* [Python&#29228;&#34411;&#20043;&#30693;&#20046;&#38035;&#40060;&#36148;&#22270;&#29255;&#29228;&#21462;](Python&#29228;&#34411;&#20043;&#30693;&#20046;&#38035;&#40060;&#36148;&#22270;&#29255;&#29228;&#21462;.md)
+* [Python&#29228;&#34411;&#20043;uiautomator2&#24120;&#29992;&#25805;&#20316;](Python&#29228;&#34411;&#20043;uiautomator2&#24120;&#29992;&#25805;&#20316;.md)
+* [python&#29228;&#34411;&#20043;&#19968;&#23576;&#35770;&#22363;&#21457;&#24086;&#25968;&#25454;&#29228;&#21462;](python&#29228;&#34411;&#20043;&#19968;&#23576;&#35770;&#22363;&#21457;&#24086;&#25968;&#25454;&#29228;&#21462;.md)
+* [python&#29228;&#34411;&#20043;&#32593;&#26131;&#20113;&#38899;&#20048;&#27468;&#21333;&#27468;&#26354;&#21015;&#34920;&#21450;&#28909;&#38376;&#35780;&#35770;&#25968;&#25454;&#25235;&#21462;](python&#29228;&#34411;&#20043;&#32593;&#26131;&#20113;&#38899;&#20048;.md)
+* [&#19968;&#20123;&#27010;&#24565;](&#27010;&#24565;.md)
diff --git a/spiders/&#27010;&#24565;.md b/spiders/&#27010;&#24565;.md
@@ -0,0 +1,58 @@
+## 1.robots&#21327;&#35758;
+&#20063;&#21483;robots.txt&#65292;&#26159;&#23384;&#25918;&#22312;&#32593;&#31449;&#26681;&#30446;&#24405;&#19979;&#30340;&#25991;&#26412;&#25991;&#20214;&#65292;&#29992;&#26469;&#21578;&#35785;&#25628;&#32034;&#24341;&#25806;&#35813;&#32593;&#31449;&#21738;&#20123;&#20869;&#23481;&#26159;&#19981;&#24212;&#35813;&#34987;&#25235;&#21462;&#30340;&#65292;&#21738;&#20123;&#26159;&#21487;&#20197;&#25235;&#21462;&#30340;&#12290;
+
+&#22914;https://www.csdn.net/robots.txt
+```python
+User-agent: *
+Disallow: /scripts
+Disallow: /public
+Disallow: /css/
+Disallow: /images/
+Disallow: /content/
+Disallow: /ui/
+Disallow: /js/
+Disallow: /scripts/
+Disallow: /article_preview.html*
+Disallow: /tag/
+Disallow: /*?*
+Disallow: /link/
+
+Sitemap: https://www.csdn.net/sitemap-aggpage-index.xml
+Sitemap: https://www.csdn.net/article/sitemap.txt
+```
+## 2.&#24120;&#35265;&#30340;&#21453;&#29228;&#34411;&#25514;&#26045;
+#### 1.&#35831;&#27714;&#22836;&#26657;&#39564;
+&#19968;&#33324;&#32593;&#31449;&#20250;&#23545;&#35831;&#27714;&#22836;&#36827;&#34892;&#26657;&#39564;&#65292;&#27604;&#22914;Host&#65292;UA&#65292;Content-Type&#23383;&#27573;&#31561;&#65292;&#27169;&#25311;&#35831;&#27714;&#30340;&#26102;&#20505;&#65292;&#36825;&#20123;&#24120;&#35265;&#30340;&#35831;&#27714;&#22836;&#26368;&#22909;&#26159;&#24102;&#19978;&#12290;
+#### 2.IP&#35775;&#38382;&#27425;&#25968;&#25511;&#21046;
+&#21516;&#19968;&#20010;IP&#22320;&#22336;&#30701;&#26102;&#38388;&#20869;&#22823;&#37327;&#21457;&#36215;&#35831;&#27714;&#65292;&#20250;&#24341;&#36215;IP&#38480;&#21046;&#65292;&#35299;&#20915;&#26041;&#27861;&#26159;&#29992;&#20195;&#29702;IP&#65292;&#25110;&#32773;&#26500;&#24314;&#33258;&#24049;&#30340;&#20195;&#29702;IP&#27744;&#12290;
+#### 3.&#25509;&#21475;&#35831;&#27714;&#39057;&#29575;&#38480;&#21046;
+&#26377;&#30340;&#32593;&#31449;&#20250;&#25511;&#21046;&#25509;&#21475;&#35775;&#38382;&#30340;&#39057;&#29575;&#65292;&#27604;&#22914;&#26377;&#20123;&#26597;&#35810;&#25509;&#21475;&#65292;&#25511;&#21046;&#20004;&#19977;&#31186;&#35775;&#38382;&#19968;&#27425;&#12290;
+#### 4.&#25509;&#21475;&#35775;&#38382;&#27425;&#25968;&#38480;&#21046;
+&#27599;&#22825;&#38480;&#21046;&#26576;&#20010;IP&#25110;&#36134;&#21495;&#35775;&#38382;&#25509;&#21475;&#30340;&#27425;&#25968;&#65292;&#36798;&#21040;&#19978;&#38480;&#21518;&#20986;&#29616;&#20108;&#27425;&#39564;&#35777;&#25110;&#32773;&#30452;&#25509;&#23553;&#36134;&#21495;/IP.&#27604;&#22914;&#30331;&#24405;&#25509;&#21475;
+#### 5.&#34892;&#20026;&#35748;&#35777;
+&#35831;&#27714;&#27425;&#25968;&#36807;&#22810;&#20250;&#20986;&#29616;&#20154;&#24037;&#35748;&#35777;&#65292;&#22914;&#22270;&#29255;&#39564;&#35777;&#30721;&#65292;&#28369;&#21160;&#35748;&#35777;&#65292;&#28857;&#20987;&#35748;&#35777;&#31561;&#65292;&#21487;&#20197;&#23545;&#25509;&#25171;&#30721;&#24179;&#21488;&#12290;
+#### 6&#65292;&#33258;&#21160;&#21270;&#29615;&#22659;&#26816;&#27979;
+selenium&#33258;&#21160;&#21270;&#24037;&#20855;&#26377;&#30340;&#32593;&#31449;&#20250;&#26816;&#27979;&#20986;&#26469;&#65292;&#22823;&#37096;&#20998;&#21487;&#20197;&#36890;&#36807;&#19979;&#38754;&#20004;&#31181;&#26041;&#24335;&#36339;&#36807;&#26816;&#27979;,&#19979;&#38754;&#20004;&#31181;&#26041;&#24335;&#26080;&#27861;&#22788;&#29702;&#65292;&#36824;&#21487;&#20197;&#23581;&#35797;&#25226;&#39029;&#38754;&#25913;&#20026;&#31227;&#21160;&#31471;&#39029;&#38754;(&#25163;&#26426;&#27169;&#24335;)&#65292;&#26368;&#21518;&#36824;&#26377;&#19968;&#31181;&#26041;&#27861;&#23601;&#26159;&#20195;&#29702;&#26381;&#21153;&#22120;&#25318;&#25130;&#20462;&#25913;js&#20195;&#30721;&#65292;&#25226;&#26816;&#27979;selenium&#30340;js&#20462;&#25913;&#25481;&#12290;
+```python
+options = webdriver.ChromeOptions()
+# &#36530;&#36991;&#37096;&#20998;&#32593;&#31449;selenium&#26816;&#27979;
+options.add_experimental_option('excludeSwitches', ['enable-automation'])
+options.add_experimental_option("useAutomationExtension", False)
+
+driver = webdriver.Chrome(executable_path=chromedriver_path, options=options)
+
+# &#36530;&#36991;&#37096;&#20998;&#32593;&#31449;selenium&#26816;&#27979;
+script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});"
+driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": script})
+```
+
+&#23545;&#20110;&#31227;&#21160;&#31471;appium&#30340;&#26816;&#27979;&#65292;&#21487;&#20197;&#23581;&#35797;&#26367;&#25442;&#20026;uiautomator2&#23454;&#29616;&#33258;&#21160;&#21270;
+
+#### 7.&#25968;&#25454;&#21160;&#24577;&#21152;&#36733;
+&#26377;&#30340;&#25968;&#25454;&#19981;&#26159;&#36890;&#36807;html&#39029;&#38754;&#30340;&#25509;&#21475;&#35831;&#27714;&#36820;&#22238;&#30340;&#65292;&#25235;&#21253;&#20998;&#26512;&#35831;&#27714;&#65292;&#25214;&#21040;&#27491;&#30830;&#30340;&#25968;&#25454;&#25509;&#21475;&#12290;
+#### 8.&#35831;&#27714;&#21442;&#25968;&#21152;&#23494;
+&#32593;&#26131;&#20113;&#38899;&#20048;&#30340;post&#35831;&#27714;&#30340;&#35831;&#27714;&#20307;&#23601;&#26159;&#21069;&#31471;&#32463;&#36807;js&#21152;&#23494;&#21518;&#35745;&#31639;&#24471;&#21040;&#30340;&#65292;&#38656;&#35201;&#36870;&#21521;js&#20195;&#30721;
+#### 9.&#36820;&#22238;&#25968;&#25454;&#21152;&#23494;
+&#38656;&#35201;&#36870;&#21521;js&#20195;&#30721;&#65292;&#20998;&#26512;&#22914;&#20309;&#35299;&#23494;&#12290;&#36824;&#26377;&#19968;&#31181;&#20687;&#22823;&#20247;&#28857;&#35780;&#30340;&#35780;&#35770;&#65292;&#38656;&#35201;&#36890;&#36807;&#23450;&#20301;&#21435;&#25214;&#21040;&#25991;&#26412;&#12290;
+#### 10.&#21160;&#24577;&#26356;&#26032;cookies
+&#21326;&#20026;&#25163;&#26426;&#20113;&#26381;&#21153;&#65292;&#27599;&#27425;&#35831;&#27714;&#25509;&#21475;&#37117;&#20250;&#37325;&#26032;&#35774;&#32622;cookies&#65292;&#24182;&#19988;&#35831;&#27714;&#22836;&#21442;&#25968;&#20063;&#38656;&#35201;&#36319;&#30528;cookies&#19968;&#36215;&#21464;&#21270;

-Original file line number
+Diff line change
@@ @@ -1,11 +1,11 @@ @@
 ## Python爬虫
 -1. [Python爬虫之requests库](Python爬虫之requests库.md)
+-
 -2. [Python爬虫之BeautifulSoup](Python爬虫之BeautifulSoup.md)
 -3. [Python爬虫之xpath解析](Python爬虫xpath解析.md)
 -4. [Python爬虫之selenium自动化](Python爬虫之selenium自动化.md)
 -5. [Python爬虫之构建自己的代理IP池](Python爬虫之构建自己的代理IP池.md)
 -6. [Python爬虫之知乎钓鱼贴图片爬取](Python爬虫之知乎钓鱼贴图片爬取.md)
 -7. [Python爬虫之uiautomator2常用操作](Python爬虫之uiautomator2常用操作.md)
 -8. [python爬虫之一尘论坛发帖数据爬取](python爬虫之一尘论坛发帖数据爬取.md)
 -9. [python爬虫之网易云音乐歌单歌曲列表及热门评论数据抓取](python爬虫之网易云音乐.md)
 +* [Python爬虫之requests库](Python爬虫之requests库.md)
 +* [Python爬虫之BeautifulSoup](Python爬虫之BeautifulSoup.md)
 +* [Python爬虫之xpath解析](Python爬虫xpath解析.md)
 +* [Python爬虫之selenium自动化](Python爬虫之selenium自动化.md)
 +* [Python爬虫之构建自己的代理IP池](Python爬虫之构建自己的代理IP池.md)
 +* [Python爬虫之知乎钓鱼贴图片爬取](Python爬虫之知乎钓鱼贴图片爬取.md)
 +* [Python爬虫之uiautomator2常用操作](Python爬虫之uiautomator2常用操作.md)
 +* [python爬虫之一尘论坛发帖数据爬取](python爬虫之一尘论坛发帖数据爬取.md)
 +* [python爬虫之网易云音乐歌单歌曲列表及热门评论数据抓取](python爬虫之网易云音乐.md)
 +* [一些概念](概念.md)