Parsel usage

text = u"<html><body><h1>Hello, Parsel!</h1></body></html>"
sl = Selector(text=text)
sl.css('h1')
[<Selector xpath='descendant-or-self::h1' data='<h1>Hello, Parsel!</h1>'>]
sl.xpath('//h1')
[<Selector xpath='//h1' data='<h1>Hello, Parsel!</h1>'>]
sl.css('h1::text').get()
'Hello, Parsel!'
sl.xpath('//h1/text()').getall()
['Hello, Parsel!']

A look at CaffeF

url = 'https://s.cafef.vn/Lich-su-giao-dich-FPT-1.chn'
text = requests.get(url).text
cf = Selector(text)
rows = cf.xpath("//table[@id='GirdTable2']/tr[@id]")
for row in rows:
    date = row.xpath("td[@class='Item_DateItem']/text()").get()
    pr_scale = row.xpath("td[@class='Item_Price10'][1]/text()").get()
    pr_close = row.xpath("td[@class='Item_Price10'][2]/text()").get()
    print(date, pr_scale, pr_close)
    time.sleep(0.5)
15/01/2021 66.60  66.60 
14/01/2021 66.40  66.40 
13/01/2021 66.50  66.50 
12/01/2021 65.80  65.80 
11/01/2021 64.70  64.70 
08/01/2021 63.30  63.30 
07/01/2021 62.50  62.50 
06/01/2021 62.40  62.40 
05/01/2021 62.70  62.70 
04/01/2021 60.20  60.20 
31/12/2020 59.10  59.10 
30/12/2020 58.10  58.10 
29/12/2020 58.60  58.60 
28/12/2020 57.90  57.90 
25/12/2020 57.60  57.60 
24/12/2020 57.00  57.00 
23/12/2020 57.50  57.50 
22/12/2020 58.10  58.10 
21/12/2020 57.50  57.50 
18/12/2020 56.90  56.90 

A look at Vndirect

url = 'https://dstock.vndirect.com.vn/lich-su-gia/FPT'
text = requests.get(url).text
vndr = Selector(text)
vndr
<Selector xpath=None data='<html><head><meta name="viewport" con...'>
tbs = vndr.xpath("//table/tbody")
tbs
[<Selector xpath='//table/tbody' data='<tbody></tbody>'>,
 <Selector xpath='//table/tbody' data='<tbody></tbody>'>]
for tb in tbs:
    print(tb.xpath("/tr").get())
    
None
None

A look at BDS VN

urls = {
    'All'    : 'https://batdongsan.com.vn/nha-dat-ban',
    'Apartment' : 'https://batdongsan.com.vn/ban-can-ho-chung-cu',
    'All House': 'https://batdongsan.com.vn/ban-nha-dat',
    'Normal House' : 'https://batdongsan.com.vn/ban-nha-rieng',
    'Villa House' : 'https://batdongsan.com.vn/ban-nha-biet-thu-lien-ke',
    'Biz House' : 'https://batdongsan.com.vn/ban-nha-mat-pho',
    'All Land'   : 'https://batdongsan.com.vn/ban-dat-dat-nen',
    'Project Land': 'https://batdongsan.com.vn/ban-dat-nen-du-an',
    'Normal Land' : 'https://batdongsan.com.vn/ban-dat',
    'Farm'   : 'https://batdongsan.com.vn/ban-trang-trai-khu-nghi-duong',
    'Warehouse' : 'https://batdongsan.com.vn/ban-kho-nha-xuong',
    'Other' : 'https://batdongsan.com.vn/ban-loai-bat-dong-san-khac' 
    
}
date
'18/12/2020'
bdses = []
for cat, url in urls.items():
    bds = Selector(requests.get(url).text)
    title = bds.xpath("//div/h1/text()").get()
    number = bds.xpath("//div/span[@id='count-number']/text()").get()
    bdses.append([cat, title, date, number])
    print(cat, title, date, number)
    time.sleep(1)
All Mua bán nhà đất toàn quốc 18/12/2020 191,499
Apartment Bán căn hộ chung cư tại Việt Nam 18/12/2020 43,442
All House Nhà bán tại Việt Nam 18/12/2020 81,289
Normal House Bán nhà riêng tại Việt Nam 18/12/2020 44,433
Villa House Bán nhà biệt thự, liền kề tại Việt Nam 18/12/2020 14,450
Biz House Bán nhà mặt phố tại Việt Nam 18/12/2020 22,400
All Land Đất bán tại Việt Nam 18/12/2020 64,971
Project Land Bán đất nền dự án tại Việt Nam 18/12/2020 18,763
Normal Land Bán đất tại Việt Nam 18/12/2020 46,216
Farm Bán trang trại, khu nghỉ dưỡng tại Việt Nam 18/12/2020 424
Warehouse Bán kho, nhà xưởng tại Việt Nam 18/12/2020 551
Other Bán loại bất động sản khác tại Việt Nam 18/12/2020 830
df = pd.DataFrame(bdses, columns=['Type','Desc','Date','Count'])
df
Type Desc Date Count
0 All Mua bán nhà đất toàn quốc 18/12/2020 191,499
1 Apartment Bán căn hộ chung cư tại Việt Nam 18/12/2020 43,442
2 All House Nhà bán tại Việt Nam 18/12/2020 81,289
3 Normal House Bán nhà riêng tại Việt Nam 18/12/2020 44,433
4 Villa House Bán nhà biệt thự, liền kề tại Việt Nam 18/12/2020 14,450
5 Biz House Bán nhà mặt phố tại Việt Nam 18/12/2020 22,400
6 All Land Đất bán tại Việt Nam 18/12/2020 64,971
7 Project Land Bán đất nền dự án tại Việt Nam 18/12/2020 18,763
8 Normal Land Bán đất tại Việt Nam 18/12/2020 46,216
9 Farm Bán trang trại, khu nghỉ dưỡng tại Việt Nam 18/12/2020 424
10 Warehouse Bán kho, nhà xưởng tại Việt Nam 18/12/2020 551
11 Other Bán loại bất động sản khác tại Việt Nam 18/12/2020 830

Selinium

from selenium import webdriver
from selenium.webdriver import *
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

PATH = '/home/danph/drivers/chromedriver'

def get_driver(url):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(
        executable_path=PATH, 
        options=chrome_options)
    driver.get(url)
    return driver

def get_driver2(url):
    # Use headless option to not open a new browser window
    options = webdriver.ChromeOptions()
    options.add_argument("headless")
    desired_capabilities = options.to_capabilities()
    driver = webdriver.Chrome(executable_path=PATH, desired_capabilities=desired_capabilities)
    driver.get(url)
    return driver

def get_table_results(driver):
    for row in driver.find_elements_by_xpath("//table[contains(@id,'GirdTable')]//tr[@id]"):
        time.sleep(0.5)
        print([cell.text for cell in row.find_elements_by_xpath("./td[position()<=3]")])
import time
time.strftime('%H:%M:%S'), time.strftime('%Y-%M-%d'), time.strftime('%Y-%M-%d_%H:%M:%S')
('15:30:42', '2021-30-17', '2021-30-17_15:30:42')
URL = 'https://s.cafef.vn/Lich-su-giao-dich-MVB-1.chn'
driver = get_driver(URL)
for row in driver.find_elements_by_xpath("//table[contains(@id,'GirdTable')]//tr[@id]"):
    time.sleep(0.5)
    print([cell.text for cell in row.find_elements_by_xpath("./td[position()<=3]")])
['15/01/2021', '13.80 ', '13.80 ']
['14/01/2021', '12.60 ', '12.60 ']
['13/01/2021', '12.50 ', '12.50 ']
['12/01/2021', '12.70 ', '12.70 ']
['11/01/2021', '12.60 ', '12.60 ']
['08/01/2021', '12.90 ', '12.90 ']
['07/01/2021', '12.40 ', '12.40 ']
['06/01/2021', '12.40 ', '12.40 ']
['05/01/2021', '12.40 ', '12.40 ']
['04/01/2021', '12.00 ', '12.00 ']
['31/12/2020', '12.00 ', '12.00 ']
['30/12/2020', '12.00 ', '12.00 ']
['29/12/2020', '12.00 ', '12.00 ']
['28/12/2020', '12.00 ', '12.00 ']
['25/12/2020', '12.00 ', '12.00 ']
['24/12/2020', '12.00 ', '12.00 ']
['23/12/2020', '12.00 ', '12.00 ']
['22/12/2020', '12.00 ', '12.00 ']
['21/12/2020', '12.00 ', '12.00 ']
['18/12/2020', '11.60 ', '11.60 ']
while True:
    try:
        page_number = driver.find_element_by_xpath("//table[@class='CafeF_Paging']//td/span").text
        print("Page #" + page_number)
        get_table_results(driver)
        
        next_link = driver.find_element_by_xpath("//table[@class='CafeF_Paging']//a[contains(@title,'Next to Page')]")
        next_link.click()
        time.sleep(10)
        #WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"//table[@id='GirdTable2']")))
    except Exception:
        print(Exception)
        break
Page #1
['22/12/2020', '29.50 ', '29.50 ']
['21/12/2020', '30.20 ', '30.20 ']
['18/12/2020', '31.05 ', '31.05 ']
['17/12/2020', '30.90 ', '30.90 ']
['16/12/2020', '31.20 ', '31.20 ']
['15/12/2020', '29.80 ', '29.80 ']
['14/12/2020', '29.60 ', '29.60 ']
['11/12/2020', '29.50 ', '29.50 ']
['10/12/2020', '29.65 ', '29.65 ']
['09/12/2020', '30.40 ', '30.40 ']
['08/12/2020', '30.30 ', '30.30 ']
['07/12/2020', '30.60 ', '30.60 ']
['04/12/2020', '30.35 ', '30.35 ']
['03/12/2020', '30.80 ', '30.80 ']
['02/12/2020', '30.80 ', '30.80 ']
['01/12/2020', '30.45 ', '30.45 ']
['30/11/2020', '30.30 ', '30.30 ']
['27/11/2020', '29.70 ', '29.70 ']
['26/11/2020', '29.10 ', '29.10 ']
['25/11/2020', '28.60 ', '28.60 ']
Page #2
['23/11/2020', '28.70 ', '28.70 ']
['20/11/2020', '28.60 ', '28.60 ']
['19/11/2020', '28.50 ', '28.50 ']
['18/11/2020', '28.80 ', '28.80 ']
['17/11/2020', '29.00 ', '29.00 ']
['16/11/2020', '28.80 ', '28.80 ']
['13/11/2020', '29.10 ', '29.10 ']
['12/11/2020', '29.10 ', '29.10 ']
['11/11/2020', '29.30 ', '29.30 ']
['10/11/2020', '29.50 ', '29.50 ']
['09/11/2020', '29.60 ', '29.60 ']
['06/11/2020', '29.20 ', '29.20 ']
['05/11/2020', '29.40 ', '29.40 ']
['04/11/2020', '29.45 ', '29.45 ']
['03/11/2020', '29.30 ', '29.30 ']
['02/11/2020', '28.25 ', '28.25 ']
['30/10/2020', '28.10 ', '28.10 ']
['29/10/2020', '28.00 ', '28.00 ']
['28/10/2020', '28.20 ', '28.20 ']
['27/10/2020', '28.70 ', '28.70 ']
Page #3
['26/10/2020', '29.00 ', '29.00 ']
['23/10/2020', '29.30 ', '29.30 ']
['22/10/2020', '29.30 ', '29.30 ']
['21/10/2020', '29.20 ', '29.20 ']
['20/10/2020', '29.20 ', '29.20 ']
['19/10/2020', '29.30 ', '29.30 ']
['16/10/2020', '29.50 ', '29.50 ']
['15/10/2020', '29.90 ', '29.90 ']
['14/10/2020', '30.00 ', '30.00 ']
['13/10/2020', '30.20 ', '30.20 ']
['12/10/2020', '30.00 ', '30.00 ']
['09/10/2020', '30.35 ', '30.35 ']
['08/10/2020', '30.35 ', '30.35 ']
['07/10/2020', '30.25 ', '30.25 ']
['06/10/2020', '30.45 ', '30.45 ']
['05/10/2020', '30.35 ', '30.35 ']
['02/10/2020', '30.05 ', '30.05 ']
['01/10/2020', '30.70 ', '30.70 ']
['30/09/2020', '29.45 ', '29.45 ']
['29/09/2020', '29.50 ', '29.50 ']
Page #4
['28/09/2020', '29.70 ', '29.70 ']
['25/09/2020', '29.75 ', '29.75 ']
['24/09/2020', '29.60 ', '29.60 ']
['23/09/2020', '29.80 ', '29.80 ']
['22/09/2020', '29.55 ', '29.55 ']
['21/09/2020', '29.50 ', '29.50 ']
['18/09/2020', '29.50 ', '29.50 ']
['17/09/2020', '29.25 ', '29.25 ']
['16/09/2020', '29.65 ', '29.65 ']
['15/09/2020', '29.80 ', '29.80 ']
['14/09/2020', '30.05 ', '30.05 ']
['11/09/2020', '29.60 ', '29.60 ']
['10/09/2020', '29.75 ', '29.75 ']
['09/09/2020', '29.50 ', '29.50 ']
['08/09/2020', '28.65 ', '28.65 ']
['07/09/2020', '28.40 ', '28.40 ']
['04/09/2020', '28.25 ', '28.25 ']
['03/09/2020', '28.05 ', '28.05 ']
['01/09/2020', '28.45 ', '28.45 ']
['31/08/2020', '27.85 ', '27.85 ']
Page #5
['28/08/2020', '28.15 ', '28.15 ']
['27/08/2020', '27.15 ', '27.15 ']
['26/08/2020', '25.73 ', '28.30 ']
['25/08/2020', '25.68 ', '28.25 ']
['24/08/2020', '25.86 ', '28.45 ']
['21/08/2020', '25.73 ', '28.30 ']
['20/08/2020', '25.50 ', '28.05 ']
['19/08/2020', '25.64 ', '28.20 ']
['18/08/2020', '25.91 ', '28.50 ']
['17/08/2020', '25.82 ', '28.40 ']
['14/08/2020', '25.91 ', '28.50 ']
['13/08/2020', '26.00 ', '28.60 ']
['12/08/2020', '25.55 ', '28.10 ']
['11/08/2020', '24.91 ', '27.40 ']
['10/08/2020', '24.09 ', '26.50 ']
['07/08/2020', '24.41 ', '26.85 ']
['06/08/2020', '24.36 ', '26.80 ']
['05/08/2020', '24.27 ', '26.70 ']
['04/08/2020', '24.09 ', '26.50 ']
['03/08/2020', '24.09 ', '26.50 ']
Page #6
['31/07/2020', '24.00 ', '26.40 ']
['30/07/2020', '23.91 ', '26.30 ']
['29/07/2020', '24.36 ', '26.80 ']
['28/07/2020', '23.91 ', '26.30 ']
['27/07/2020', '23.64 ', '26.00 ']
['24/07/2020', '24.09 ', '26.50 ']
['23/07/2020', '24.45 ', '26.90 ']
['22/07/2020', '24.36 ', '26.80 ']
['21/07/2020', '24.23 ', '26.65 ']
['20/07/2020', '24.09 ', '26.50 ']
['17/07/2020', '24.18 ', '26.60 ']
['16/07/2020', '24.27 ', '26.70 ']
['15/07/2020', '24.18 ', '26.60 ']
['14/07/2020', '24.00 ', '26.40 ']
['13/07/2020', '24.00 ', '26.40 ']
['10/07/2020', '24.09 ', '26.50 ']
['09/07/2020', '24.27 ', '26.70 ']
['08/07/2020', '24.09 ', '26.50 ']
['07/07/2020', '24.18 ', '26.60 ']
['06/07/2020', '24.55 ', '27.00 ']
Page #7
['03/07/2020', '24.45 ', '26.90 ']
['02/07/2020', '24.73 ', '27.20 ']
['01/07/2020', '24.82 ', '27.30 ']
['30/06/2020', '24.73 ', '27.20 ']
['29/06/2020', '24.73 ', '27.20 ']
['26/06/2020', '25.27 ', '27.80 ']
['25/06/2020', '25.27 ', '27.80 ']
['24/06/2020', '25.55 ', '28.10 ']
['23/06/2020', '24.95 ', '27.45 ']
['22/06/2020', '24.91 ', '27.40 ']
['19/06/2020', '24.91 ', '27.40 ']
['18/06/2020', '24.64 ', '27.10 ']
['17/06/2020', '24.64 ', '27.10 ']
['16/06/2020', '24.64 ', '27.10 ']
['15/06/2020', '24.45 ', '26.90 ']
['12/06/2020', '24.64 ', '27.10 ']
['11/06/2020', '24.64 ', '27.10 ']
['10/06/2020', '24.82 ', '27.30 ']
['09/06/2020', '24.82 ', '27.30 ']
['08/06/2020', '25.00 ', '27.50 ']
Page #8
['05/06/2020', '24.73 ', '27.20 ']
['04/06/2020', '24.64 ', '27.10 ']
['03/06/2020', '24.64 ', '27.10 ']
['02/06/2020', '24.64 ', '27.10 ']
['01/06/2020', '24.73 ', '27.20 ']
['29/05/2020', '24.73 ', '27.20 ']
['28/05/2020', '24.77 ', '27.25 ']
['27/05/2020', '24.64 ', '27.10 ']
['26/05/2020', '24.82 ', '27.30 ']
['25/05/2020', '24.73 ', '27.20 ']
['22/05/2020', '24.73 ', '27.20 ']
['21/05/2020', '25.00 ', '27.50 ']
['20/05/2020', '24.91 ', '27.40 ']
['19/05/2020', '24.91 ', '27.40 ']
['18/05/2020', '24.86 ', '27.35 ']
['15/05/2020', '24.86 ', '27.35 ']
['14/05/2020', '24.82 ', '27.30 ']
['13/05/2020', '24.82 ', '27.30 ']
['12/05/2020', '24.91 ', '27.40 ']
['11/05/2020', '24.91 ', '27.40 ']
Page #9
['08/05/2020', '24.68 ', '27.15 ']
['07/05/2020', '24.64 ', '27.10 ']
['06/05/2020', '24.64 ', '27.10 ']
['05/05/2020', '24.82 ', '27.30 ']
['04/05/2020', '24.82 ', '27.30 ']
['29/04/2020', '25.27 ', '27.80 ']
['28/04/2020', '23.95 ', '26.35 ']
['27/04/2020', '24.09 ', '26.50 ']
['24/04/2020', '24.27 ', '26.70 ']
['23/04/2020', '24.73 ', '27.20 ']
['22/04/2020', '24.91 ', '27.40 ']
['21/04/2020', '25.09 ', '27.60 ']
['20/04/2020', '25.50 ', '28.05 ']
['17/04/2020', '25.64 ', '28.20 ']
['16/04/2020', '25.36 ', '27.90 ']
['15/04/2020', '26.18 ', '28.80 ']
['14/04/2020', '26.36 ', '29.00 ']
['13/04/2020', '26.82 ', '29.50 ']
['10/04/2020', '25.73 ', '28.30 ']
['09/04/2020', '25.91 ', '28.50 ']
Page #10
['08/04/2020', '26.05 ', '28.65 ']
['07/04/2020', '25.82 ', '28.40 ']
['06/04/2020', '25.64 ', '28.20 ']
['03/04/2020', '25.45 ', '28.00 ']
['01/04/2020', '24.09 ', '26.50 ']
['31/03/2020', '23.91 ', '26.30 ']
['30/03/2020', '23.95 ', '26.35 ']
['27/03/2020', '24.41 ', '26.85 ']
['26/03/2020', '24.73 ', '27.20 ']
['25/03/2020', '24.73 ', '27.20 ']
['24/03/2020', '24.09 ', '26.50 ']
['23/03/2020', '23.73 ', '26.10 ']
['20/03/2020', '24.05 ', '26.45 ']
['19/03/2020', '23.73 ', '26.10 ']
['18/03/2020', '23.82 ', '26.20 ']
['17/03/2020', '23.86 ', '26.25 ']
['16/03/2020', '24.18 ', '26.60 ']
['13/03/2020', '24.18 ', '26.60 ']
['12/03/2020', '24.09 ', '26.50 ']
['11/03/2020', '24.14 ', '26.55 ']
Page #11
['10/03/2020', '24.64 ', '27.10 ']
['09/03/2020', '24.59 ', '27.05 ']
['06/03/2020', '25.27 ', '27.80 ']
['05/03/2020', '25.32 ', '27.85 ']
['04/03/2020', '25.45 ', '28.00 ']
['03/03/2020', '25.41 ', '27.95 ']
['02/03/2020', '25.18 ', '27.70 ']
['28/02/2020', '24.95 ', '27.45 ']
['27/02/2020', '25.77 ', '28.35 ']
['26/02/2020', '25.95 ', '28.55 ']
['25/02/2020', '26.14 ', '28.75 ']
['24/02/2020', '26.09 ', '28.70 ']
['21/02/2020', '26.18 ', '28.80 ']
['20/02/2020', '26.18 ', '28.80 ']
['19/02/2020', '26.27 ', '28.90 ']
['18/02/2020', '26.23 ', '28.85 ']
['17/02/2020', '26.36 ', '29.00 ']
['14/02/2020', '26.23 ', '28.85 ']
['13/02/2020', '26.32 ', '28.95 ']
['12/02/2020', '26.41 ', '29.05 ']
Page #12
['11/02/2020', '26.64 ', '29.30 ']
['10/02/2020', '26.86 ', '29.55 ']
['07/02/2020', '26.77 ', '29.45 ']
['06/02/2020', '26.95 ', '29.65 ']
['05/02/2020', '26.91 ', '29.60 ']
['04/02/2020', '27.00 ', '29.70 ']
['03/02/2020', '26.82 ', '29.50 ']
['31/01/2020', '27.05 ', '29.75 ']
['30/01/2020', '27.45 ', '30.20 ']
['22/01/2020', '27.36 ', '30.10 ']
['21/01/2020', '27.09 ', '29.80 ']
['20/01/2020', '26.95 ', '29.65 ']
['17/01/2020', '27.00 ', '29.70 ']
['16/01/2020', '27.14 ', '29.85 ']
['15/01/2020', '26.95 ', '29.65 ']
['14/01/2020', '26.82 ', '29.50 ']
['13/01/2020', '27.41 ', '30.15 ']
['10/01/2020', '27.64 ', '30.40 ']
['09/01/2020', '27.27 ', '30.00 ']
<class 'Exception'>