import os import time import requests import pandas as pd import random from fake_useragent import UserAgent from lxml import etree from urllib.parse import urlencode class SteamCsgo: def __init__(self, start, save_file_path, page_num): # 确认起å§ç¬åå¼ ç¬¬å 个åå self.start = start # 确认è¦ç¬å¤å°é¡µ self.page_num = page_num # 确认åå¨ä½ç½® self.save_file_path = save_file_path # åå§åæ°æ®å表 self.item_datas = [] # å®ä¹urlå头 self.base_url = 'https://steamcommunity.com/market/search/render/?query=&' def get_page(self): count = 0 for page in range(self.page_num): params = { 'start': self.start + 100 * page, 'count': 400, 'search_descriptions': 0, #'sort_column': 'price', 'sort_dir': 'asc', 'appid': 730 } current_url = 'https://steamcommunity.com/market/search/render/?query=&' + urlencode(params) try: requests.DEFAULT_RETRIES = 5 # å¢å éè¯è¿æ¥æ¬¡æ° s = requests.session() s.keep_alive = False # å ³éå¤ä½è¿æ¥ res = requests.get(url=current_url, headers=self.init_headers()) if res.status_code == 200: count = count + 1 if count % 10 == 0: time.sleep(60) print('å·²æåè·å第{}页'.format(page + 1)) page_info = res.json() self.parse_page(page_info) self.save_to_csv() time.sleep(random.random() * 15) else: print('失败') except requests.ConnectionError as e: print(e) print('{}页è·å失败'.format(page)) return None def parse_page(self, page_info): page_html = page_info['results_html'].replace('\r', '').replace('\n', '').replace('\t', '') tree = etree.HTML(page_html) for i in tree.xpath('//a[@class="market_listing_row_link"]'): info = {} info['饰åå称'] = i.xpath('.//span[@class="market_listing_item_name"]/text()')[0] # å称 info['饰åä»·æ ¼'] = i.xpath('.//span[@class="normal_price"]/text()')[0] # èµ·ä»· info['å½åå¨å®æ°é'] = i.xpath('.//span[@class="market_listing_num_listings_qty"]/@data-qty')[0] # å½åå¨å®æ°é self.item_datas.append(info) # åå¨å°csv def save_to_csv(self): df = pd.DataFrame(self.item_datas) df = df.reindex(columns=['饰åå称', '饰åä»·æ ¼', 'å½åå¨å®æ°é']) if os.path.exists(self.save_file_path) and os.path.getsize(self.save_file_path): df.to_csv(self.save_file_path, mode='a', encoding='utf-8', header=None, index=False) else: df.to_csv(self.save_file_path, mode='a', encoding='utf-8', index=False) print('å·²å建' + self.save_file_path) self.item_datas = [] # çæéæºua def init_headers(self): headers = { 'User-Agent': UserAgent().random, 'Accept-Language': 'zh-CN', # 'Referer': 'https: // steamcommunity.com / market / search?appid = 730' } return headers if __name__ == '__main__': S = SteamCsgo(6500, './CsgoSteam.csv', 1000) S.get_page()