分类目录

链接

2022 年 9 月
 1234
567891011
12131415161718
19202122232425
2627282930  

近期文章

热门标签

新人福利,免费薅羊毛

现在位置:    首页 > LINUX, Python, 大数据 > 正文
centos安装chrome+chromedriver
LINUX, Python, 大数据 暂无评论 阅读(93)

一、下载对应的版本:

driver:

https://chromedriver.storage.googleapis.com/index.html?path=103.0.5060.24/

chrome:

http://dist.control.lth.se/public/CentOS-7/x86_64/google.x86_64/

 

二、安装

unzip chromedriver.zip

yum install google_chrome...

 

三、使用python做中转代理

Main.py

  1. import os
  2. from fastapi importFastAPI,Request
  3. import uvicorn
  4. from fastapi importFastAPI
  5. fromGoogleUtilimportGoogleUtil
  6. from fastapi.responses importHTMLResponse
  7. app =FastAPI()
  8. @app.get("/")
  9. def home():
  10. return'api is ok'
  11. @app.get("/api", response_class=HTMLResponse)
  12. def request(request :Request):
  13.     html =''
  14.     http =None
  15. try:
  16.         http =GoogleUtil()
  17.         url = request.query_params.get('url',None)
  18.         html = http.getString(url, params={})
  19.         http.close()
  20. exceptExceptionas e:
  21. print(e)
  22.         http.close()
  23. return html
  24. if __name__ =='__main__':
  25. print('chromedriver:',os.getcwd()+'/chromedriver')
  26. print('api is running...')
  27.     uvicorn.run(app, host="0.0.0.0", port=8080, log_level='debug')

 

GoogleUtil.py

  1. import time
  2.  
  3. from selenium import webdriver
  4.  
  5. import random
  6. from lxml import etree
  7. from Config import Config
  8. import os
  9. import json 
  10.  
  11.  
  12. class GoogleUtil:
  13.  
  14.     # driver.get("http://sl.baidu.com/#/home")
  15.  
  16.     def __init__(self):
  17.         self.chrome_options = webdriver.ChromeOptions()
  18.         self.chrome_options.add_argument("no-sandbox")
  19.         self.chrome_options.add_argument("--user-agent=" + random.choice(Config.user_agents))
  20.         self.chrome_options.add_argument('blink-settings=imagesEnabled=false')  # 不加载图片, 提升速度
  21.         self.chrome_options.add_argument('--headless')  # 无界面化浏览器
  22.         self.chrome_options.add_argument('window-size=1080x1920')  # 指定浏览器分辨率
  23.         # chrome_options.add_argument('--no-sandbox')
  24.         self.chrome_options.add_argument('--disable-gpu')
  25.         # chrome_options.add_argument('--disable-dev-shm-usage')
  26.  
  27.         # 文件下载地址
  28.         self.download_dir = os.getcwd() + '/downloads/'
  29.         prefs = {
  30.             'profile.default_content_settings.popups': 0,
  31.             'download.default_directory': self.download_dir
  32.         }
  33.         if not os.path.exists(self.download_dir):
  34.             os.mkdir(self.download_dir)
  35.         self.chrome_options.add_experimental_option("prefs", prefs)
  36.  
  37.         self.chromedriver = os.getcwd() + '/chromedriver'
  38.         os.environ["webdriver.chrome.driver"] = self.chromedriver
  39.         self.driver = webdriver.Chrome(chrome_options=self.chrome_options, executable_path=self.chromedriver)
  40.         pass
  41.  
  42.     def close(self):
  43.         self.driver.close()
  44.  
  45.     def get(self, url, params=None, headers=Config.headers):
  46.         print('google driver...')
  47.  
  48.         if params:
  49.             if url.find('?'):
  50.                 url = url + '?'
  51.             for p in params:
  52.                 url += '>' + p + '=' + str(params[p])
  53.  
  54.         try:
  55.             print(url)
  56.             self.driver.get(url)
  57.             self.driver.implicitly_wait(1)
  58.             time.sleep(2)
  59.             html = self.driver.page_source
  60.  
  61.             if html.find("Our systems have detected") > 0:
  62.                 print("Your IP have been blocked by " + url)
  63.  
  64.                 print("change user-agent, retry")
  65.                 self.chrome_options.add_argument("--user-agent=" + random.choice(Config.user_agents))
  66.                 self.driver = webdriver.Chrome(chrome_options=self.chrome_options, executable_path=self.chromedriver)
  67.                 self.driver.get(url)
  68.                 time.sleep(2)
  69.                 html = self.driver.page_source
  70.  
  71.             res = etree.HTML(html)
  72.  
  73.             return res
  74.         except Exception as e:
  75.             print(e)
  76.             return etree.HTML("<html></html>")
  77.  
  78.     def getString(self, url, params):
  79.         Config.headers["User-Agent"] = random.choice(Config.user_agents)
  80.  
  81.         try:
  82.             if params:
  83.                 if url.find('?'):
  84.                     url = url + '?'
  85.                 for p in params:
  86.                     url += '>' + p + '=' + str(params[p])
  87.  
  88.             self.driver.get(url)
  89.             res = self.driver.page_source
  90.             return res
  91.         except Exception as e:
  92.             print(e)
  93.             return "<html></html>"
  94.     def getJson(self, url, params):
  95.         Config.headers["User-Agent"] = random.choice(Config.user_agents)
  96.  
  97.         try:
  98.             if params:
  99.                 if url.find('?'):
  100.                     url = url + '?'
  101.                 for p in params:
  102.                     url += '>' + p + '=' + str(params[p])
  103.  
  104.             self.driver.get(url)
  105.             res = json.loads(self.driver.page_source)
  106.             return res
  107.         except Exception as e:
  108.             print(e)
  109.             return {}

============ 欢迎各位老板打赏~ ===========

【上篇】
【下篇】

本文版权归Bruce's Blog所有,转载引用请完整注明以下信息:
本文作者:Bruce
本文地址:centos安装chrome+chromedriver | Bruce's Blog

发表评论

留言无头像?