我的项目是为一家代理机构制作一个酒店价格竞争表。这是一个痛苦的操作,我想将其自动化,代码正确地提取了我想要提取的酒店名称和价格,但它只对第一家酒店正常工作,我不知道问题出在哪里。我为您提供了代码和输出,如果您能帮助我,我提前谢谢您。
注意:代码 2 工作正常,但当我添加更多操作时,问题就出现了
代码 1
#!/usr/bin/env python # coding: utf-8 import time from time import sleep import ast import pandas as pd from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By driver = webdriver.Chrome("C:\\Users\\marketing2\\Documents\\chromedriver.exe") driver.get('https://tn.tunisiebooking.com/') # params to select params = { 'destination': 'Tozeur', 'date_from': '11/09/2021', 'date_to': '12/09/2021', 'bedroom': '1' } # select destination destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des')))) destination_select.select_by_value(params['destination']) # select bedroom bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch')))) bedroom_select.select_by_value(params['bedroom']) # select dates script = f"document.getElementById('checkin').value ='{params['date_from']}';" script += f"document.getElementById('checkout').value ='{params['date_to']}';" script += f"document.getElementById('depart').value ='{params['date_from']}';" script += f"document.getElementById('arrivee').value ='{params['date_to']}';" driver.execute_script(script) # submit form btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="boutonr"]'))) btn_rechercher.click() urls = [] hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]"))) for hotel in hotels: link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href") urls.append(link) for url in urls: driver.get(url) def existsElement(xpath): try: driver.find_element_by_id(xpath); except NoSuchElementException: return "false" else: return "true" if (existsElement('result_par_arrangement')=="false"): btn_t = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="moteur_rech"]/form/div/div[3]/div'))) btn_t.click() sleep(10) else : pass try: name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]") opt = arropt.find_element_by_tag_name("b").text num = len(arropt.find_elements_by_tag_name("option")) optiondata = {} achats = {} marges= {} selection = Select(driver.find_element_by_id("arrangement")) for i in range(num): try: selection = Select(driver.find_element_by_id("arrangement")) selection.select_by_index(i) time.sleep(2) arr = driver.find_element_by_xpath("//select[@id='arrangement']/option[@selected='selected']").text prize = driver.find_element_by_id("prix_total").text optiondata[arr] = (int(prize)) btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="resultat"]/div/form/div/div[2]/div[1]/div[2]/div[2]/div'))) btn_passe.click() # params to select params = { 'civilite_acheteur': 'Mlle', 'prenom_acheteur': 'test', 'nom_acheteur': 'test', 'e_mail_acheteur': 'test@gmail.com', 'portable_acheteur': '22222222', 'ville_acheteur': 'Test', } # select civilite civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur')))) civilite_acheteur.select_by_value(params['civilite_acheteur']) # saisir prenom script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';" script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';" script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';" script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';" script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';" driver.execute_script(script) # submit form btn_agence = driver.find_element_by_id('titre_Nabeul') btn_agence.click() btn_continuez = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'boutonr'))) btn_continuez.click() achat = int(driver.find_element_by_xpath('/html/body/header/div[2]/div[1]/div[1]/div[4]/div[2]/div[2]').text.replace(' TND', '')) achats[arr]=achat marge =int(((float(prize) - float(achat)) / float(achat)) * 100); marges[arr]=marge optiondata[arr]=prize,achat,marge driver.get(url) btn_display = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="moteur_rech"]/form/div/div[3]/div'))) btn_display.click() sleep(10) except StaleElementReferenceException: pass except NoSuchElementException: pass s="- {} | {} : {}".format(name, opt, optiondata) print(s) ds = [] for l in s.splitlines(): d = l.split('-') if len(d) > 1: df = pd.DataFrame(ast.literal_eval(d[1].strip())) ds.append(df) for df in ds: df.reset_index(drop=True, inplace=True) df = pd.concat(ds, axis= 1) cols = df.columns cols = [((col.split('.')[0], col)) for col in df.columns] df.columns=pd.MultiIndex.from_tuples(cols) print(df.T) #print("{} : {} - {}".format(name, opt, optiondata))
代码 2
from selenium.webdriver.support.ui import Select from selenium.common.exceptions import StaleElementReferenceException,NoSuchElementException urls = [] hotels = driver.find_elements_by_xpath("//div[starts-with(@id,'produit_affair')]") for hotel in hotels: link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href") urls.append(link) for url in urls: driver.get(url) try: name = driver.find_element_by_xpath("//div[@class='bloc_titre_hotels']/h2").text arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]") opt = arropt.find_element_by_tag_name("b").text num = len(arropt.find_elements_by_tag_name("option")) optiondata = {} selection = Select(driver.find_element_by_id("arrangement")) for i in range(num): try: selection = Select(driver.find_element_by_id("arrangement")) selection.select_by_index(i) time.sleep(2) arr = driver.find_element_by_xpath("//select[@id='arrangement']/option[@selected='selected']").text prize = driver.find_element_by_id("prix_total").text optiondata[arr]=prize except StaleElementReferenceException: pass except NoSuchElementException: pass print("{} : {} - {} - {}".format(name,opt,num,optiondata))
boutonr
time.sleep()
WebDriverWait(...)
我不会说法语,所以我不明白你的代码中想要表达什么,但下面这个简化的例子应该可以帮助你理解这个原理。
#!/usr/bin/env python # coding: utf-8 import time from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By driver = webdriver.Chrome("C:\chromedriver.exe") driver.get('https://tn.tunisiebooking.com/') # params to select params = { 'destination': 'Nabeul', 'date_from': '25/08/2021', 'date_to': '26/08/2021', 'bedroom': '1' } # select destination destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des')))) destination_select.select_by_value(params['destination']) # select bedroom bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch')))) bedroom_select.select_by_value(params['bedroom']) # select dates script = f"document.getElementById('checkin').value ='{params['date_from']}';" script += f"document.getElementById('checkout').value ='{params['date_to']}';" script += f"document.getElementById('depart').value ='{params['date_from']}';" script += f"document.getElementById('arrivee').value ='{params['date_to']}';" driver.execute_script(script) # submit form btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@onclick="return submit_hotel_recherche()"]'))) btn_rechercher.click() urls = [] hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]"))) for hotel in hotels: link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href") urls.append(link) for url in urls: driver.get(url) try: name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]") opt = arropt.find_element_by_tag_name("b").text num = len(arropt.find_elements_by_tag_name("option")) optiondata = {} achats = {} marges= {} for i in range(num): try: selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement')))).select_by_index(i) time.sleep(0.5) arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[@id='arrangement']/option[@selected='selected']"))).text prize = driver.find_element_by_id("prix_total").text optiondata[arr] = int(prize) except StaleElementReferenceException: pass print("{} : {} - {}".format(name, opt, optiondata)) except NoSuchElementException: pass driver.quit()
结果:
Byzance Nabeul : Chambre Double - {'All Inclusive soft': 93, 'Demi Pension': 38, 'Petit Dejeuner': 28, 'Pension Complete': 78} Palmyra Club Nabeul Nabeul : Double Standard - {'All Inclusive soft': 92}
以下代码转到付款页面并提取那里的所有信息:
#!/usr/bin/env python # coding: utf-8 import time from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By driver = webdriver.Chrome("/usr/local/bin/chromedriver") driver.get('https://tn.tunisiebooking.com/') # params to select params = { 'destination': 'Nabeul', 'date_from': '29/08/2021', 'date_to': '30/08/2021', 'bedroom': '1' } # select destination destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des')))) destination_select.select_by_value(params['destination']) # select bedroom bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch')))) bedroom_select.select_by_value(params['bedroom']) # select dates script = f"document.getElementById('checkin').value ='{params['date_from']}';" script += f"document.getElementById('checkout').value ='{params['date_to']}';" script += f"document.getElementById('depart').value ='{params['date_from']}';" script += f"document.getElementById('arrivee').value ='{params['date_to']}';" driver.execute_script(script) # submit form btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@onclick="return submit_hotel_recherche()"]'))) btn_rechercher.click() urls = [] hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]"))) for hotel in hotels: link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href") urls.append(link) for url in urls: driver.get(url) try: name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]") opt = arropt.find_element_by_tag_name("b").text num = len(arropt.find_elements_by_tag_name("option")) optiondata = {} achats = {} marges= {} try: selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement')))) time.sleep(0.5) arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[@id='arrangement']/option[@selected='selected']"))).text prize = driver.find_element_by_id("prix_total").text optiondata[arr] = (int(prize)) btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'resa'))) btn_passe.click() tot = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'montant_total_apres_code'))) total = int(tot.text.replace(' €', '')) # params to select params = { 'civilite_acheteur': 'Mlle', 'prenom_acheteur': 'test', 'nom_acheteur': 'test', 'e_mail_acheteur': 'test@gmail.com', 'portable_acheteur': '22222222', 'ville_acheteur': 'Test', } # select civilite civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur')))) civilite_acheteur.select_by_value(params['civilite_acheteur']) # saisir prenom script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';" script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';" script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';" script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';" script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';" driver.execute_script(script) # submit form btn_agence = driver.find_element_by_class_name('continuez_resa') btn_agence.click() achat1 = int(driver.find_element_by_id('montant_a_payer').text.replace(' €', '')) achat = int(driver.find_element_by_id('montant_restant').text.replace(' €', '')) achat3 = float(driver.find_element_by_xpath('//div[@class="ligne_interne_total"]/div[3]/div[@class="prix_total1 text_shadow"]').text.replace(' TND', '')) achats[arr]=achat marge =int(((float(prize) - float(achat)) / float(achat)) * 100); marges[arr]=marge optiondata[arr]=prize,total,achat1,achat,achat3,marge except StaleElementReferenceException: pass print("{} : {} - {}".format(name, opt, optiondata)) except NoSuchElementException: pass driver.quit()
输出:
Byzance Nabeul : Chambre Double - {'Petit Dejeuner': (36, 41, 12, 29, 4.0, 24)}
在哪里:
36 = Prix Total 41 = Montant Total 12 = Montant de l'acompte 29 = Vous payerez le reste à votre arrivée à l'hôtel 4.0 = Total taxe de séjour à payer sur place à l'hôtel est 24 = Marges
酒店页面: