Code
try:
import wikipedia
except ImportError:
! pip install wikipediaMay 1, 2023
I wanted to get the first image of a Wikipedia page and the coordinates.

ChatGPT was very helpful here as well.
The code ChatGPT provided worked with the examples it used. I made only some small adjustments to the code below.
ChatGPT added comments to every line of the code. I combined them into blocks to make the snippet more concise.
import requests
import wikipedia
from bs4 import BeautifulSoup
# Set the Wikipedia page title and get page
page_title = "Berlin"
page = wikipedia.page(page_title)
# Get html content of page and parse it with BeautifulSoup
html = page.html()
soup = BeautifulSoup(html, "html.parser")
# Find the infobox image element and get url image
image_element = soup.find("table", class_="infobox").find("img")
image_url = "https:" + image_element["src"]
# Download the image with proper headers
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(image_url, headers=headers)
with open('image.jpg', 'wb') as f:
f.write(response.content)
import requests
import json
# function to get the coordinates from the infobox of a Wikipedia article
def get_coordinates(title):
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "query",
"prop": "coordinates",
"titles": title,
"format": "json",
"formatversion": 2
}
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, params=params, headers=headers)
data = response.json()
pages = data["query"]["pages"]
coordinates = None
for page in pages:
if "coordinates" in page:
coordinates = page["coordinates"][0]
break
return coordinates
# example usage
title = "Berlin"
coordinates = get_coordinates(title)
print(coordinates["lat"], coordinates["lon"])52.52 13.405