Help required in Selenium and Web Scraping fo r django project

apoorvthedude · December 6, 2022, 6:50pm


#!/usr/bin/python -tt
import selenium
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

#importing time libraries to add wait times
from datetime import datetime
from time import sleep

#importing beautiful soup to read the page html source code
from bs4 import BeautifulSoup

#to create csv file where we'll scrape the content
import pandas as pd

#we'll also add the options functionality to disable notifications
chrome_options = Options()
#disable notifications
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-infobars")

#Writing three empty list

time_list = []  # Timestamp of the post
likes_list = [] # No of Likes of the post
name_list = []  # Name of the post

driver = webdriver.Chrome(ChromeDriverManager().install(),options=chrome_options)
driver.get("https://facebook.com")
driver.maximize_window()
sleep(2)

#Accept cookies
#cookies = WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.XPATH,'')))

email = driver.find_element_by_id("email")
email.send_keys("XXXXXXX@gmail.com") #Email-id

password = driver.find_element_by_id("pass")
password.send_keys("XXXXXXX") # Password
sleep(2)
login=driver.find_element_by_name("login")
login.click()
sleep(2)
driver.get("https://www.facebook.com/millerreporting") 
sleep(4)

while True:
    soup = BeautifulSoup(driver.page_source,"html.parser")
    all_posts = soup.find_all("div",{"class":"x1yztbdb x1n2onr6 xh8yej3 x1ja2u2z"})
    for post in all_posts:
        try:
            name=post.find("a",{"class":"x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz xt0b8zv xzsf02u x1s688f"})
        except:
            name = "Post not found"
        print(name)
        try:
            time=post.find("a",{"class":"x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz x1heor9g xt0b8zv xo1l8bm"})
        except:
            time="Time not found"
        print(time)
        try:
            likes=post.find("span",{"class":"xrbpyxo x6ikm8r x10wlt62 xlyipyv x1exxlbk"})
        except:
            likes = "Likes not found"
        print(likes)

        name_list.append(name)
        time_list.append(time)
        likes_list.append(likes)
        df=pd.DataFrame({"name":name_list,"time":time_list,"likes":likes_list})
        #df.drop_duplicates(subset="content",keep="first",inplace = True)
        df.to_csv("facebook_data1.csv")

        if df.shape[0]>10:
            break
    if df.shape[0]>10:
        break

    sleep(5)
    y = 500
    for timer in range(0,20):
        driver.execute_script("window.scrollTo(0," + str(y) + ")")
        y +=500
        sleep(3)


#print("You are inside the fb group & ready to close the browsr")
#driver.quit()

Aim: I want to scrape timestamp of all the posts & likes etc.

I’m scraping ( Miller Reporting Group )

First Problem is that it is not stopping at last post of the FB page.
Second in the csv file it is not extracting data,instead giving HTML.
CSV file : (facebook_data1.csv - Google Drive)

Help Needed.

Topic		Replies	Views
Need help with the Tests - Django tutorial part - 5 Getting Started	6	224	February 2, 2024
Help taking input and displaying it. Getting Started	3	111	November 15, 2023
issues with include() Using Django	5	1190	August 28, 2021
The view didn't return an HttpResponse object. It returned None instead. Getting Started	1	3547	August 25, 2022
Tutorial building index template Using Django	38	1832	July 4, 2020

Help required in Selenium and Web Scraping fo r django project

Related Topics