Hello everyone,
I want to web scrape the RSS of a website and display it on a created website of my own. The web scraping part is working when I use just a scraping.py file and write the response into a json file.
So, I created a model News with title, link, description and published_date. The code of the scraping.py file was copied and modified into the views.py.
I understand my code in the views.py file as following:
get the content of the xml file with provided link. store all into the articles variable. loop through the articles and find the title, link, description and published_date. Create new variable article and declare to News model. Initialize the article with data and after save the article. With return redirect I redirect to the function save_article for next steps (perhaps not necessary?)
In save_article function I create news and initialize it to a News model to get all saved articles. And render it in the html file.
To solve the issue I tried already several ways I could find but I get every time the error: The view didn't return an HttpResponse object. It returned None instead.
I will show you two different solutions:
1)
from django.shortcuts import render, redirect
from .models import News
from django.http import HttpResponse
import requests
from bs4 import BeautifulSoup as bs4
import json
def dw_rss(request):
try:
r = requests.get('https://rss.dw.com/rdf/rss-en-eu')
soup = bs4(r.content, features='xml')
articles = soup.findAll('item')
for a in articles:
title = a.find('title').text
link = a.find('link').text
description = a.find('description').text
published_date = a.find('dc:date').text
article = News()
article = {
'title': title,
'link': link,
'description': description,
'published_date': published_date
}
article.save()
return redirect(save_articles)
except Exception as err:
print('Scraping failed! Exception: ')
print(err)
def save_articles(request):
news_json = [{'news': news}]
return HttpResponse(news_json, 'application.json')
from django.shortcuts import render, redirect
from .models import News
from django.http import HttpResponse
import requests
from bs4 import BeautifulSoup as bs4
import json
def dw_rss(request):
try:
r = requests.get('https://rss.dw.com/rdf/rss-en-eu')
soup = bs4(r.content, features='xml')
articles = soup.findAll('item')
for a in articles:
title = a.find('title').text
link = a.find('link').text
description = a.find('description').text
published_date = a.find('dc:date').text
article = News()
article = {
'title': title,
'link': link,
'description': description,
'published_date': published_date
}
article.save()
return redirect(save_articles)
except Exception as err:
print('Scraping failed! Exception: ')
print(err)
def save_articles(request):
news = News.objects.all()
return render(request, 'news.html', {'news':news})
the scraping.py file:
import requests
from bs4 import BeautifulSoup as bs4
import json
def dw_rss():
article_list = []
try:
r = requests.get('https://rss.dw.com/rdf/rss-en-eu')
soup = bs4(r.content, features='xml')
articles = soup.findAll('item')
for a in articles:
title = a.find('title').text
link = a.find('link').text
description = a.find('description').text
published_date = a.find('dc:date').text
article = {
'title': title,
'link': link,
'description': description,
'published_date': published_date
}
article_list.append(article)
return save_articles(article_list)
except Exception as err:
print('Scraping failed! Exception: ')
print(err)
def save_articles(article_list):
with open('articles.json', 'w') as file:
json.dump(article_list, file, indent=4)
dw_rss()
I am looking forward to your help.
Thank you
Doro