您的位置: 网站首页> selenium爬虫> 当前文章

强制等待、显式等待、隐式等待代码示例

老董-我爱我家房产SEO2020-07-28190围观,135赞

  场景

  1.打开百度,等待输入框元素和搜搜按钮出现(输入框和搜索按钮出现才说明网页加载好了)

  2.输入www.pythjon66.com,点击搜索

  3.在搜索结果页等待下一页这个元素出现(出现下一页的话,说明网页加载到了底部,页面的数据比较全了)

  一:强制等待

  1. # -*- coding: utf-8 -*-
  2.  
  3. import time
  4. from selenium import webdriver
  5. from selenium.webdriver.common.by import By
  6. from selenium.webdriver.support.ui import WebDriverWait
  7. from selenium.webdriver.support import expected_conditions as EC
  8. from selenium.webdriver.chrome.options import Options
  9.  
  10.  
  11. def get_html(kwd, retry=2):
  12. try:
  13. driver.get('https://www.baidu.com/')
  14. # 强制等待5秒尽量确保页面加载好
  15. time.sleep(5)
  16. # 输入框
  17. input = driver.find_element_by_id('kw')
  18. input.click() # 先click后clear,直接send_keys容易丢失字符
  19. input.clear()
  20. # 输入文字
  21. for wd in kwd:
  22. input.send_keys(wd)
  23. # 搜索按钮
  24. baidu = driver.find_element_by_id('su')
  25. # 点击搜索
  26. baidu.click()
  27. # 强制等待5秒尽量让搜索结果页加载完
  28. time.sleep(5)
  29. except Exception as e:
  30. print(e)
  31. time.sleep(10)
  32. if retry > 0:
  33. get_html(kwd, retry - 1)
  34. else:
  35. title = driver.title
  36. return title
  37.  
  38.  
  39. if __name__ == "__main__":
  40.  
  41. pc_ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
  42. option = Options()
  43. prefs = {
  44. 'profile.default_content_setting_values': {
  45. 'images' : 2, # 禁止图片加载
  46. 'notifications': 2 # 禁止弹窗
  47. }
  48. }
  49. option.add_experimental_option('prefs', prefs)
  50. driver = webdriver.Chrome(options=option)
  51. title = get_html('www.python66.com')
  52. print(title)
  53. driver.quit()

  二:显式等待

  1. # -*- coding: utf-8 -*-
  2.  
  3. import time
  4. from selenium import webdriver
  5. from selenium.webdriver.common.by import By
  6. from selenium.webdriver.support.ui import WebDriverWait
  7. from selenium.webdriver.support import expected_conditions as EC
  8. from selenium.webdriver.chrome.options import Options
  9.  
  10.  
  11. def get_html(kwd, retry=2):
  12. try:
  13. driver.get('https://www.baidu.com/')
  14. # 输入框元素,显式等待设定最多5秒
  15. input = WebDriverWait(driver, 5).until(
  16. EC.visibility_of_element_located((By.ID, "kw"))
  17. )
  18. input.click() # 先click后clear,直接send_keys容易丢失字符
  19. input.clear()
  20. # 输入文字
  21. for wd in kwd:
  22. input.send_keys(wd)
  23. # 搜索按钮元素,显式等待设定最多5秒
  24. baidu = WebDriverWait(driver, 5).until(
  25. EC.visibility_of_element_located((By.ID, "su"))
  26. )
  27. # 点击搜索
  28. baidu.click()
  29. # 下一页,显式等待设定最多5秒(有的词没有相关搜索可能报错)
  30. xg_serach = WebDriverWait(driver, 15).until(
  31. EC.visibility_of_element_located((By.CLASS_NAME, "tt"))
  32. )
  33. except Exception as e:
  34. print(e)
  35. time.sleep(10)
  36. if retry > 0:
  37. get_html(kwd, retry - 1)
  38. else:
  39. title = driver.title
  40. return title
  41.  
  42.  
  43. if __name__ == "__main__":
  44.  
  45. pc_ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
  46. option = Options()
  47. prefs = {
  48. 'profile.default_content_setting_values': {
  49. 'images' : 2, # 禁止图片加载
  50. 'notifications': 2 # 禁止弹窗
  51. }
  52. }
  53. option.add_experimental_option('prefs', prefs)
  54. driver = webdriver.Chrome(options=option)
  55. title = get_html('www.python66.com')
  56. print(title)
  57. driver.quit()

  三、隐式等待

  1. # -*- coding: utf-8 -*-
  2.  
  3.  
  4. import time
  5. from selenium import webdriver
  6. from selenium.webdriver.chrome.options import Options
  7.  
  8.  
  9. def get_html(kwd, retry=2):
  10. try:
  11. driver.get('https://www.baidu.com/')
  12. # 输入框
  13. input = driver.find_element_by_id('kw')
  14. input.click() # 先click后clear,直接send_keys容易丢失字符
  15. input.clear()
  16. # 输入文字
  17. for wd in kwd:
  18. input.send_keys(wd)
  19. print(driver.window_handles,driver.current_url)
  20. # 搜索按钮
  21. baidu = driver.find_element_by_id('su')
  22. # 点击搜索
  23. baidu.click()
  24. # 底部文本 此处也会隐式等待
  25. element = driver.find_element_by_id('help')
  26. except Exception as e:
  27. print(e)
  28. time.sleep(10)
  29. if retry > 0:
  30. get_html(kwd, retry - 1)
  31. else:
  32. print(driver.window_handles,driver.current_url)
  33. title = driver.title
  34. return title
  35.  
  36.  
  37. if __name__ == "__main__":
  38.  
  39. pc_ua ='Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
  40. option = Options()
  41. prefs = {
  42. 'profile.default_content_setting_values': {
  43. 'images' : 2, # 禁止图片加载
  44. 'notifications': 2 # 禁止弹窗
  45. }
  46. }
  47. option.add_experimental_option('prefs', prefs)
  48. driver = webdriver.Chrome(options=option)
  49. # 隐式等待设置5秒
  50. driver.implicitly_wait(30)
  51. title = get_html('python')
  52. print(title)
  53.  
  54.  
  55. if __name__ == "__main__":
  56.  
  57. pc_ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
  58. option = Options()
  59. prefs = {
  60. 'profile.default_content_setting_values': {
  61. 'images' : 2, # 禁止图片加载
  62. 'notifications': 2 # 禁止弹窗
  63. }
  64. }
  65. option.add_experimental_option('prefs', prefs)
  66. driver = webdriver.Chrome(options=option)
  67. # 隐式等待设置5秒
  68. driver.implicitly_wait(5)
  69. title = get_html('www.python66.com')
  70. print(title)
  71. # driver.quit() #如果设置了driver.quit()会很快退出,网页的title还是百度首页的title
  72.  
['CDwindow-0C09496C4890920231EFAB157730936A'] https://www.baidu.com/
['CDwindow-0C09496C4890920231EFAB157730936A'] https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=python&rsv_t=8a68lZY5YfYYb3CvaeMfYM9TJANA52kQOO484WvXZOLSo7BSB5x0t1xvfgE&rsv_enter=0&rsv_dl=tb&rsv_sug3=7&rsv_sug1=1&rsv_sug7=100&inputT=270&rsv_sug4=325
python_百度搜索

很赞哦!

python编程网提示:转载请注明来源www.python66.com。
有宝贵意见可添加站长微信(底部),获取技术资料请到公众号(底部)。同行交流请加群 python学习会

文章评论

    强制等待、显式等待、隐式等待代码示例文章写得不错,值得赞赏

站点信息

  • 网站程序:Laravel
  • 客服微信:a772483200