|
|
@ -1,11 +1,11 @@
|
|
|
|
# Init Chrome web driver with extensions (if applicable)
|
|
|
|
# Init Chrome web driver with extensions (if applicable)
|
|
|
|
# Import section
|
|
|
|
# Import section
|
|
|
|
from selenium import webdriver
|
|
|
|
from selenium import webdriver
|
|
|
|
|
|
|
|
import time
|
|
|
|
##########################
|
|
|
|
##########################
|
|
|
|
# Init the Chrome web driver
|
|
|
|
# Init the Chrome web driver
|
|
|
|
###########################
|
|
|
|
###########################
|
|
|
|
gChromeExeFullPath = r'..\Resources\GoogleChromePortable\GoogleChromePortable.exe'
|
|
|
|
gChromeExeFullPath = r'..\Resources\GoogleChromePortable\App\Chrome-bin\chrome.exe'
|
|
|
|
gExtensionFullPathList = []
|
|
|
|
gExtensionFullPathList = []
|
|
|
|
gWebDriverFullPath = r'..\Resources\SeleniumWebDrivers\Chrome\chromedriver_win32 v84.0.4147.30\chromedriver.exe'
|
|
|
|
gWebDriverFullPath = r'..\Resources\SeleniumWebDrivers\Chrome\chromedriver_win32 v84.0.4147.30\chromedriver.exe'
|
|
|
|
def WebDriverInit(inWebDriverFullPath, inChromeExeFullPath, inExtensionFullPathList):
|
|
|
|
def WebDriverInit(inWebDriverFullPath, inChromeExeFullPath, inExtensionFullPathList):
|
|
|
@ -19,11 +19,41 @@ def WebDriverInit(inWebDriverFullPath, inChromeExeFullPath, inExtensionFullPathL
|
|
|
|
lWebDriverInstance = None
|
|
|
|
lWebDriverInstance = None
|
|
|
|
if inWebDriverFullPath:
|
|
|
|
if inWebDriverFullPath:
|
|
|
|
# Run with specified web driver path
|
|
|
|
# Run with specified web driver path
|
|
|
|
lWebDriverInstance = webdriver.Chrome(executable_path = inWebDriverFullPath, chrome_options=lWebDriverChromeOptionsInstance)
|
|
|
|
lWebDriverInstance = webdriver.Chrome(executable_path = inWebDriverFullPath, options=lWebDriverChromeOptionsInstance)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
lWebDriverInstance = webdriver.Chrome(chrome_options = lWebDriverChromeOptionsInstance)
|
|
|
|
lWebDriverInstance = webdriver.Chrome(options = lWebDriverChromeOptionsInstance)
|
|
|
|
# Return the result
|
|
|
|
# Return the result
|
|
|
|
return lWebDriverInstance
|
|
|
|
return lWebDriverInstance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
|
|
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
|
|
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
|
|
|
|
|
|
|
|
|
# Инициализировать Google Chrome with selenium web driver
|
|
|
|
# Инициализировать Google Chrome with selenium web driver
|
|
|
|
lWebDriver = WebDriverInit(inWebDriverFullPath = gWebDriverFullPath, inChromeExeFullPath = gChromeExeFullPath, inExtensionFullPathList = gExtensionFullPathList)
|
|
|
|
lWebDriver = WebDriverInit(inWebDriverFullPath = gWebDriverFullPath, inChromeExeFullPath = gChromeExeFullPath, inExtensionFullPathList = gExtensionFullPathList)
|
|
|
|
|
|
|
|
lFilterURLStr = "https://spb.cian.ru/cat.php?deal_type=sale&engine_version=2&in_polygon%5B1%5D=30.2815_59.9821%2C30.2844_59.9821%2C30.2874_59.9821%2C30.29_59.9821%2C30.293_59.9822%2C30.2957_59.9824%2C30.2984_59.9824%2C30.3019_59.9824%2C30.3048_59.9824%2C30.3074_59.9824%2C30.3088_59.9835%2C30.3085_59.9848%2C30.3065_59.9859%2C30.3049_59.987%2C30.3035_59.9885%2C30.302_59.9897%2C30.2991_59.9902%2C30.2961_59.9904%2C30.2934_59.9903%2C30.2904_59.9898%2C30.2879_59.9893%2C30.2855_59.9888%2C30.2825_59.9882%2C30.2799_59.9879%2C30.2768_59.9874%2C30.2741_59.987%2C30.2716_59.9867%2C30.2688_59.9867%2C30.2657_59.9867%2C30.2626_59.9867%2C30.26_59.9867%2C30.2577_59.986%2C30.2576_59.9846%2C30.2588_59.9834%2C30.2611_59.9827%2C30.2641_59.9822%2C30.2667_59.9819%2C30.2697_59.9819%2C30.2726_59.9816%2C30.2753_59.9815%2C30.2781_59.9818%2C30.2807_59.9823%2C30.2833_59.9823&offer_type=flat&polygon_name%5B1%5D=%D0%9E%D0%B1%D0%BB%D0%B0%D1%81%D1%82%D1%8C+%D0%BF%D0%BE%D0%B8%D1%81%D0%BA%D0%B0&room1=1&room2=1"
|
|
|
|
|
|
|
|
lWebDriver.get(lFilterURLStr)
|
|
|
|
|
|
|
|
# Get List of the page
|
|
|
|
|
|
|
|
lOfferListCSSStr = 'div[data-name="Offers"] > div:not([data-name="BannerServicePlaceInternal"]):not([data-name="getBannerMarkup"]):not([data-name="AdFoxBannerTracker"])'
|
|
|
|
|
|
|
|
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr)
|
|
|
|
|
|
|
|
lNextPageItemCSS = 'div[data-name="Pagination"] li[class*="active"] + li a'
|
|
|
|
|
|
|
|
lNextPageItem = lWebDriver.find_element_by_css_selector(lNextPageItemCSS)
|
|
|
|
|
|
|
|
while lNextPageItem:
|
|
|
|
|
|
|
|
for lOfferItem in lOfferList:
|
|
|
|
|
|
|
|
lTitleStr = lOfferItem.find_element_by_css_selector(css_selector='div[data-name="TopTitle"],div[data-name="Title"]').text
|
|
|
|
|
|
|
|
lPriceStr = lOfferItem.find_element_by_css_selector(css_selector='div[data-name="Price"] > div[class*="header"],div[data-name="TopPrice"] > div[class*="header"]').text
|
|
|
|
|
|
|
|
print(f"Title: {lTitleStr}, Price: {lPriceStr}")
|
|
|
|
|
|
|
|
# Click next page item
|
|
|
|
|
|
|
|
lNextPageItem = None
|
|
|
|
|
|
|
|
lNextPageList = lWebDriver.find_elements_by_css_selector(lNextPageItemCSS)
|
|
|
|
|
|
|
|
if len(lNextPageList)>0:
|
|
|
|
|
|
|
|
lNextPageItem = lNextPageList[0]
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
#lNextPageItem = WebDriverWait(lWebDriver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[data-name="Pagination"]')))
|
|
|
|
|
|
|
|
#lNextPageItem.click()
|
|
|
|
|
|
|
|
lWebDriver.execute_script("""document.querySelector('div[data-name="Pagination"] li[class*="active"] + li a').click()""")
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr)
|
|
|
|
|
|
|
|
print("Over!")
|