|
|
@ -67,11 +67,11 @@ lResult = {
|
|
|
|
"SearchItems": {} # prepare the result
|
|
|
|
"SearchItems": {} # prepare the result
|
|
|
|
}
|
|
|
|
}
|
|
|
|
# Get List of the page
|
|
|
|
# Get List of the page
|
|
|
|
lOfferListCSSStr = 'div[data-name="Offers"] > div:not([data-name="BannerServicePlaceInternal"]):not([data-name="getBannerMarkup"]):not([data-name="AdFoxBannerTracker"])'
|
|
|
|
|
|
|
|
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr)
|
|
|
|
|
|
|
|
lNextPageItemCSS = 'div[data-name="Pagination"] li[class*="active"] + li a'
|
|
|
|
lNextPageItemCSS = 'div[data-name="Pagination"] li[class*="active"] + li a'
|
|
|
|
lNextPageItem = lWebDriver.find_element_by_css_selector(lNextPageItemCSS)
|
|
|
|
lNextPageItem = lWebDriver.find_element_by_css_selector(lNextPageItemCSS)
|
|
|
|
while lNextPageItem:
|
|
|
|
while lNextPageItem:
|
|
|
|
|
|
|
|
lOfferListCSSStr = 'div[data-name="Offers"] > div:not([data-name="BannerServicePlaceInternal"]):not([data-name="getBannerMarkup"]):not([data-name="AdFoxBannerTracker"])'
|
|
|
|
|
|
|
|
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr)
|
|
|
|
for lOfferItem in lOfferList: # Processing the item, extract info
|
|
|
|
for lOfferItem in lOfferList: # Processing the item, extract info
|
|
|
|
lOfferItemInfo = { # Item URL with https
|
|
|
|
lOfferItemInfo = { # Item URL with https
|
|
|
|
"TitleStr": "3-комн. кв., 31,4 м², 5/8 этаж", # Offer title [str]
|
|
|
|
"TitleStr": "3-комн. кв., 31,4 м², 5/8 этаж", # Offer title [str]
|
|
|
@ -88,7 +88,7 @@ while lNextPageItem:
|
|
|
|
lOfferItemInfo["TitleStr"] = lTitleStr # set the title
|
|
|
|
lOfferItemInfo["TitleStr"] = lTitleStr # set the title
|
|
|
|
lPriceStr = lPriceStr.replace(" ","").replace("₽","")
|
|
|
|
lPriceStr = lPriceStr.replace(" ","").replace("₽","")
|
|
|
|
lOfferItemInfo["PriceFloat"] = round(float(lPriceStr),2) # Set the price
|
|
|
|
lOfferItemInfo["PriceFloat"] = round(float(lPriceStr),2) # Set the price
|
|
|
|
lREResult = re.match(r"(\d)-комн. .*, (\d*,?\d*) м², (\d*)/(\d*) эта.", lTitleStr) # run the re
|
|
|
|
lREResult = re.search(r".*(\d)-комн. .*, (\d*,?\d*) м², (\d*)/(\d*) эта.", lTitleStr) # run the re
|
|
|
|
lOfferItemInfo["RoomCountInt"] = lREResult.group(1) # Room count
|
|
|
|
lOfferItemInfo["RoomCountInt"] = lREResult.group(1) # Room count
|
|
|
|
lSqmStr = lREResult.group(2)
|
|
|
|
lSqmStr = lREResult.group(2)
|
|
|
|
lSqmStr= lSqmStr.replace(",",".")
|
|
|
|
lSqmStr= lSqmStr.replace(",",".")
|
|
|
@ -108,8 +108,13 @@ while lNextPageItem:
|
|
|
|
lWebDriver.execute_script("""document.querySelector('div[data-name="Pagination"] li[class*="active"] + li a').click()""")
|
|
|
|
lWebDriver.execute_script("""document.querySelector('div[data-name="Pagination"] li[class*="active"] + li a').click()""")
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
print(e)
|
|
|
|
time.sleep(3)
|
|
|
|
time.sleep(0.5) # some init operations
|
|
|
|
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr)
|
|
|
|
# wait while preloader is active
|
|
|
|
|
|
|
|
lDoWaitBool = True
|
|
|
|
|
|
|
|
while lDoWaitBool:
|
|
|
|
|
|
|
|
lPreloaderCSS = inWebDriver.find_elements_by_css_selector(css_selector='div[class*="--preloadOverlay--"]')
|
|
|
|
|
|
|
|
if len(lPreloaderCSS)>0: time.sleep(0.5) # preloader is here - wait
|
|
|
|
|
|
|
|
else: lDoWaitBool = False # Stop wait if preloader is dissappear
|
|
|
|
# Save result in file
|
|
|
|
# Save result in file
|
|
|
|
lFile = open(f"{lResult['SearchKeyStr']}_{lDatetimeNowStr.replace(' ','_').replace('-','_').replace(':','_')}","w",encoding="utf-8")
|
|
|
|
lFile = open(f"{lResult['SearchKeyStr']}_{lDatetimeNowStr.replace(' ','_').replace('-','_').replace(':','_')}","w",encoding="utf-8")
|
|
|
|
lFile.write(json.dumps(lResult))
|
|
|
|
lFile.write(json.dumps(lResult))
|
|
|
|