# New tutorial in progress

dev-linux
Ivan Maslov 4 years ago
parent 2e351f9d86
commit eb99e81c55

3
.gitignore vendored

@ -1,4 +1,4 @@
# Игнорируем новые файлы из Google Portable # Игнорируем новые файлы из Google Portable
/**/Resources/GoogleChromePortable /**/Resources/GoogleChromePortable
#Игнорируем все папки с названием __pycache__ #Игнорируем все папки с названием __pycache__
/**/__pycache__/** /**/__pycache__/**
@ -22,3 +22,4 @@
**/.idea/** **/.idea/**
/**/screenshot.png /**/screenshot.png
/**/*.log /**/*.log
3. AppWEB_Habr.md

@ -67,11 +67,11 @@ lResult = {
"SearchItems": {} # prepare the result "SearchItems": {} # prepare the result
} }
# Get List of the page # Get List of the page
lOfferListCSSStr = 'div[data-name="Offers"] > div:not([data-name="BannerServicePlaceInternal"]):not([data-name="getBannerMarkup"]):not([data-name="AdFoxBannerTracker"])'
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr)
lNextPageItemCSS = 'div[data-name="Pagination"] li[class*="active"] + li a' lNextPageItemCSS = 'div[data-name="Pagination"] li[class*="active"] + li a'
lNextPageItem = lWebDriver.find_element_by_css_selector(lNextPageItemCSS) lNextPageItem = lWebDriver.find_element_by_css_selector(lNextPageItemCSS)
while lNextPageItem: while lNextPageItem:
lOfferListCSSStr = 'div[data-name="Offers"] > div:not([data-name="BannerServicePlaceInternal"]):not([data-name="getBannerMarkup"]):not([data-name="AdFoxBannerTracker"])'
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr)
for lOfferItem in lOfferList: # Processing the item, extract info for lOfferItem in lOfferList: # Processing the item, extract info
lOfferItemInfo = { # Item URL with https lOfferItemInfo = { # Item URL with https
"TitleStr": "3-комн. кв., 31,4 м², 5/8 этаж", # Offer title [str] "TitleStr": "3-комн. кв., 31,4 м², 5/8 этаж", # Offer title [str]
@ -88,7 +88,7 @@ while lNextPageItem:
lOfferItemInfo["TitleStr"] = lTitleStr # set the title lOfferItemInfo["TitleStr"] = lTitleStr # set the title
lPriceStr = lPriceStr.replace(" ","").replace("","") lPriceStr = lPriceStr.replace(" ","").replace("","")
lOfferItemInfo["PriceFloat"] = round(float(lPriceStr),2) # Set the price lOfferItemInfo["PriceFloat"] = round(float(lPriceStr),2) # Set the price
lREResult = re.match(r"(\d)-комн. .*, (\d*,?\d*) м², (\d*)/(\d*) эта.", lTitleStr) # run the re lREResult = re.search(r".*(\d)-комн. .*, (\d*,?\d*) м², (\d*)/(\d*) эта.", lTitleStr) # run the re
lOfferItemInfo["RoomCountInt"] = lREResult.group(1) # Room count lOfferItemInfo["RoomCountInt"] = lREResult.group(1) # Room count
lSqmStr = lREResult.group(2) lSqmStr = lREResult.group(2)
lSqmStr= lSqmStr.replace(",",".") lSqmStr= lSqmStr.replace(",",".")
@ -108,8 +108,13 @@ while lNextPageItem:
lWebDriver.execute_script("""document.querySelector('div[data-name="Pagination"] li[class*="active"] + li a').click()""") lWebDriver.execute_script("""document.querySelector('div[data-name="Pagination"] li[class*="active"] + li a').click()""")
except Exception as e: except Exception as e:
print(e) print(e)
time.sleep(3) time.sleep(0.5) # some init operations
lOfferList = lWebDriver.find_elements_by_css_selector(css_selector=lOfferListCSSStr) # wait while preloader is active
lDoWaitBool = True
while lDoWaitBool:
lPreloaderCSS = inWebDriver.find_elements_by_css_selector(css_selector='div[class*="--preloadOverlay--"]')
if len(lPreloaderCSS)>0: time.sleep(0.5) # preloader is here - wait
else: lDoWaitBool = False # Stop wait if preloader is dissappear
# Save result in file # Save result in file
lFile = open(f"{lResult['SearchKeyStr']}_{lDatetimeNowStr.replace(' ','_').replace('-','_').replace(':','_')}","w",encoding="utf-8") lFile = open(f"{lResult['SearchKeyStr']}_{lDatetimeNowStr.replace(' ','_').replace('-','_').replace(':','_')}","w",encoding="utf-8")
lFile.write(json.dumps(lResult)) lFile.write(json.dumps(lResult))

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Loading…
Cancel
Save