我正在阅读一个 Docx 文件 [此处是链接],从中解析一些文本,然后使用 python selenium 绑定和 chrome-driver 尝试单击源 (driver.page_source) 中的隐藏选项值。我知道它不可供选择。这是我到目前为止的代码:
import time, re from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from docx import opendocx, getdocumenttext from requests import Session def read_return(word_file): document = opendocx(word_file) paratextlist = getdocumenttext(document) newparatextlist = [] for paratext in paratextlist: newparatextlist.append((paratext.encode("utf-8")).strip('\n').strip('\t').strip('\r')) newparatextlist = str(newparatextlist).replace("]","").replace("[","") with open('sample.txt','wb')as writer: writer.write(newparatextlist) return newparatextlist word_file = read_return('Taxatierapport SEK - Auto Centrum Bollenstreek - Peugeot 308 - 5603.docx') x = lambda x:re.findall(x,word_file,re.DOTALL)[0].strip().replace("'","'").replace('"',''') Voertuig = x("::OBJECT::', '(.+?)'") Merk = x("::MERK::', '(.+?)'") Model = x("::TYPE::', '(.+?)'") TOELATING = x("::BOUWJAAR 1STE TOELATING::', '(.+?)'") d1 = TOELATING.split("-")[0] d2 = TOELATING.split("-")[1] d3 = TOELATING.split("-")[2] TRANSMISSIE = x("::TRANSMISSIE::', '(.+?)'") BRANDSTOF = x("::BRANDSTOF::', '(.+?)'") print "%r\n%r\n%r\n%r\n%r\n%r\n%r\n%r\n" %(Voertuig, Merk, Model, d1, d2, d3, TRANSMISSIE, BRANDSTOF) if Voertuig == "Personenauto": value = 1 elif Voertuig == "Personenbussen": value = 7 elif Voertuig == "Bedrijfsauto's tot 3.5 ton": value = 3 elif Voertuig == "Bedrijfsauto's 4x4": value = 2 elif Voertuig == "Motoren": value= 5 xr = 0; yr = 0; zr = 1972 while xr < 32: if int(d1) == xr: dvalue1 = xr else: pass xr+=1 while yr < 13: if int(d2) == yr: dvalue2 = yr else: pass yr+=1 while zr < 2018: if int(d3) == zr: dvalue3 = zr else: pass zr+=1 driver = webdriver.Chrome('chromedriver.exe') driver.get('https://autotelexpro.nl/LoginPage.aspx') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_txtVestigingsnummer"]').send_keys('3783') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_txtGebruikersnaam"]').send_keys('Frank') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_Password"]').send_keys('msnauto2016') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_btnLogin"]').click() time.sleep(10) #try: driver.find_element(By.XPATH, value ='//select[@name="ctl00$cp$ucSearch_Manual$ddlVoertuigType"]/option[@value="'+str(value)+'"]').click() driver.find_element(By.XPATH, value ='//select[@name="ctl00$cp$ucSearch_Manual$ddlBouwdag"]/option[@value="'+str(dvalue1)+'"]').click() driver.find_element(By.XPATH, value ='//select[@name="ctl00$cp$ucSearch_Manual$ddlBouwmaand"]/option[@value="'+str(dvalue2)+'"]').click() driver.find_element(By.XPATH, value ='//select[@name="ctl00$cp$ucSearch_Manual$ddlBouwjaar"]/option[@value="'+str(dvalue3)+'"]').click() driver.find_element(By.XPATH, value ='//select[@name="ctl00$cp$ucSearch_Manual$ddlMerk"]/option[@value="130"]').click() #except: driver.quit() time.sleep(5) driver.quit()
因此,使用请求模块我向链接发出 POST 请求并设法获得具有所需选项数据的响应,请参见此处:
<select name="ctl00$cp$ucSearch_Manual$ddlMerk" onchange="updateInputForServerNoPB();InvalidateVehicleSearchResult();setTimeout('__doPostBack(\'ctl00$cp$ucSearch_Manual$ddlMerk\',\'\')', 0)" id="ctl00_cp_ucSearch_Manual_ddlMerk" class="NormalDropdownlist" style="width:174px;"> <option selected="selected" value="-1">- Kies merk -</option> <option value="95">Alfa Romeo</option> <option value="154">Aston Martin</option> <option value="96">Audi</option> <option value="97">Bentley</option> <option value="98">BMW</option> <option value="352">Bugatti</option> <option value="100">Cadillac</option> <option value="342">Chevrolet</option> <option value="101">Chevrolet USA</option> <option value="102">Chrysler</option> <option value="103">Citroen</option> <option value="337">Corvette</option> <option value="104">Dacia</option> <option value="105">Daihatsu</option> <option value="166">Daimler</option> <option value="162">Dodge</option> <option value="106">Donkervoort</option> <option value="107">Ferrari</option> <option value="108">Fiat</option> <option value="94">Ford</option> <option value="111">Honda</option> <option value="340">Hummer</option> <option value="112">Hyundai</option> <option value="365">Infiniti</option> <option value="113">Jaguar</option> <option value="114">Jeep</option> <option value="150">Kia</option> <option value="115">Lada</option> <option value="116">Lamborghini</option> <option value="117">Lancia</option> <option value="168">Land Rover</option> <option value="432">Landwind</option> <option value="118">Lexus</option> <option value="119">Lotus</option> <option value="120">Maserati</option> <option value="330">Maybach</option> <option value="121">Mazda</option> <option value="122">Mercedes-Benz</option> <option value="304">Mini</option> <option value="124">Mitsubishi</option> <option value="126">Morgan</option> <option value="127">Nissan</option> <option value="128">Opel</option> <option value="130">Peugeot</option> <option value="132">Porsche</option> <option value="134">Renault</option> <option value="135">Rolls-Royce</option> <option value="138">Saab</option> <option value="139">Seat</option> <option value="140">Skoda</option> <option value="226">smart</option> <option value="343">Spyker</option> <option value="210">SsangYong</option> <option value="141">Subaru</option> <option value="142">Suzuki</option> <option value="417">Think</option> <option value="144">Toyota</option> <option value="147">Volkswagen</option> <option value="145">Volvo</option> </select>
,我想知道是否有办法将上述字符串文本添加到driver.page_source,以便我可以使用驱动程序属性遍历选项值?
from selenium import webdriver from selenium.webdriver.common.by import By import time from selenium.webdriver.support.ui import Select driver = webdriver.Chrome() driver.maximize_window() driver.get('https://autotelexpro.nl/LoginPage.aspx') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_txtVestigingsnummer"]').send_keys('3783') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_txtGebruikersnaam"]').send_keys('Frank') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_Password"]').send_keys('msnauto2016') driver.find_element(By.XPATH, value ='//*[@id="ctl00_cp_LogOnView_LogOn_btnLogin"]').click() time.sleep(10) currentselection = driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlVoertuigType']") select = Select(currentselection) select.select_by_visible_text("Motoren") time.sleep(5) try: x=driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlBouwdag']") select = Select(x) select.select_by_visible_text("1") y=driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlBouwmaand']") select = Select(y) select.select_by_visible_text("1") z=driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlBouwjaar']") select = Select(z) select.select_by_visible_text("2017") time.sleep(5) car = driver.find_element_by_css_selector("#ctl00_cp_ucSearch_Manual_ddlMerk") select = Select(car) select.select_by_visible_text("BTC") except: print "Not able to select"
此代码会有所帮助。更好的方法是显式等待,但对于临时解决方案,我使用了 time.sleep()
更新:如果您想从汽车下拉菜单中获取选项,可以使用以下方法:
def getallcarlist(): currentselection = driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlVoertuigType']") select = Select(currentselection) select.select_by_visible_text("Motoren") time.sleep(5) x = driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlBouwdag']") select = Select(x) select.select_by_visible_text("1") y = driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlBouwmaand']") select = Select(y) select.select_by_visible_text("1") z = driver.find_element_by_xpath(".//*[@id='ctl00_cp_ucSearch_Manual_ddlBouwjaar']") select = Select(z) select.select_by_visible_text("2017") time.sleep(5) car = driver.find_element_by_css_selector("#ctl00_cp_ucSearch_Manual_ddlMerk") carlist =[] for option in car.find_elements_by_tag_name('option'): carlist.append((option.text).encode('utf8')) return carlist
这是称呼它的方式
listcar= getallcarlist() for c in listcar: print c
输出将是:
- Kies merk - AGM AJP Aprilia Benelli Beta BMW BTC Bullit Derbi Ducati Energica Gilera Harley Davidson Hesketh Honda Husqvarna Hyosung Indian Kawasaki KTM Kymco Longjia Mash Morgan Mors Moto Guzzi MV Agusta Nimoto Ossa Peugeot Piaggio Quadro Razzo Renault Royal Enfield Sachs Scomadi Suzuki SWM SYM Triumph Turbho Vespa Victory Volta Motorbikes Yamaha Yiben Zero Motorcycles