# CIAN robot in progress

dev-linux
Ivan Maslov 4 years ago
parent c7cfa66a62
commit 0ce9de4d40

@ -2,6 +2,26 @@
# Import section # Import section
from selenium import webdriver from selenium import webdriver
import time import time
# Store structure (.json)
"""
{
"SearchTitleStr": "Search in SPB, Russia", # Title of the search [str]
"SearchURLStr": "https://spb.cian.ru/cat.php?deal_type=sale&engine_version=2&in_polygon%5B1%5D=30.2815_59.9821%2C30.2844_59.9821%2C30.2874_59.9821%2C30.29_59.9821%2C30.293_59.9822%2C30.2957_59.9824%2C30.2984_59.9824%2C30.3019_59.9824%2C30.3048_59.9824%2C30.3074_59.9824%2C30.3088_59.9835%2C30.3085_59.9848%2C30.3065_59.9859%2C30.3049_59.987%2C30.3035_59.9885%2C30.302_59.9897%2C30.2991_59.9902%2C30.2961_59.9904%2C30.2934_59.9903%2C30.2904_59.9898%2C30.2879_59.9893%2C30.2855_59.9888%2C30.2825_59.9882%2C30.2799_59.9879%2C30.2768_59.9874%2C30.2741_59.987%2C30.2716_59.9867%2C30.2688_59.9867%2C30.2657_59.9867%2C30.2626_59.9867%2C30.26_59.9867%2C30.2577_59.986%2C30.2576_59.9846%2C30.2588_59.9834%2C30.2611_59.9827%2C30.2641_59.9822%2C30.2667_59.9819%2C30.2697_59.9819%2C30.2726_59.9816%2C30.2753_59.9815%2C30.2781_59.9818%2C30.2807_59.9823%2C30.2833_59.9823%2C30.2815_59.9821&offer_type=flat&polygon_name%5B1%5D=%D0%9E%D0%B1%D0%BB%D0%B0%D1%81%D1%82%D1%8C+%D0%BF%D0%BE%D0%B8%D1%81%D0%BA%D0%B0&room1=1&room2=1", # URL of the CIAN search [str]
"SearchDatetimeStr": "2020-08-01 09:33:00.838081", # Date of data extraction, [str]
"SearchItems": {
"https://spb.cian.ru/sale/flat/777928777/:": { # Item URL with https
"TitleStr": "3-комн. кв., 31,4 м², 5/8 этаж", # Offer title [str]
"PriceFloat": 10000000.0, # Price [float]
"PriceSqmFloat": 133333.0, # CALCULATED Price per square meters [float]
"SqMFloat": 31.4, # Square meters in flat [float]
"FloorCurrentInt": 5, # Current floor [int]
"FloorTotalInt": 8, # Current floor [int]
}
}
}
"""
########################## ##########################
# Init the Chrome web driver # Init the Chrome web driver
########################### ###########################
@ -42,7 +62,9 @@ while lNextPageItem:
for lOfferItem in lOfferList: for lOfferItem in lOfferList:
lTitleStr = lOfferItem.find_element_by_css_selector(css_selector='div[data-name="TopTitle"],div[data-name="Title"]').text lTitleStr = lOfferItem.find_element_by_css_selector(css_selector='div[data-name="TopTitle"],div[data-name="Title"]').text
lPriceStr = lOfferItem.find_element_by_css_selector(css_selector='div[data-name="Price"] > div[class*="header"],div[data-name="TopPrice"] > div[class*="header"]').text lPriceStr = lOfferItem.find_element_by_css_selector(css_selector='div[data-name="Price"] > div[class*="header"],div[data-name="TopPrice"] > div[class*="header"]').text
lURLStr = lOfferItem.find_element_by_css_selector(css_selector='a[class*="--header--"]').get_attribute("href")
print(f"Title: {lTitleStr}, Price: {lPriceStr}") print(f"Title: {lTitleStr}, Price: {lPriceStr}")
print(f"URL: {lURLStr}")
# Click next page item # Click next page item
lNextPageItem = None lNextPageItem = None
lNextPageList = lWebDriver.find_elements_by_css_selector(lNextPageItemCSS) lNextPageList = lWebDriver.find_elements_by_css_selector(lNextPageItemCSS)

Loading…
Cancel
Save