The BeautifulSoup Package
Installation
pip install beautifulsoup4 # note the 4 at the end - this is the latest versionUsage
import requests
from bs4 import BeautifulSoup # note that the import package command is `bs4`
response = requests.get("https://www.gutenberg.org/ebooks/author/65")
response_html = response.text
soup = BeautifulSoup(response_html)
titles = soup.find_all("span", "title")
print(type(titles)) #> <class 'bs4.element.ResultSet'> (like a list)
print(titles[5]) #> <span class="title">Romeo and Juliet</span>
print(titles[5].text) #> Romeo and Juliet
booklinks = soup.find_all("li", "booklink")
books = []
for list_item in booklinks:
try:
title = list_item.find("span", "title").text #> "Shakespeare's Sonnets"
author = list_item.find("span", "subtitle").text #> "William Shakespeare"
downloads = list_item.find("span", "extra").text #> '830 downloads'
downloads_count = int(downloads.replace(" downloads", "")) #> 830
book = {"title": title, "author": author, "downloads": downloads_count}
print(book)
books.append(book)
except Exception as err:
print("OOPS", type(err), err, "SKIPPING...")
print(books[2]["title"]) #> Romeo and JulietLast updated