[1] code:python
show
#!/usr/bin/env python3
'''
Popular Things - A script to find popular items for a specific date by web scraping.
This program scrapes Billboard charts to find the most popular songs, albums, and artists
for a given year, month, or specific date.
Usage:
python popular_things.py –year YEAR [–month MONTH] [–day DAY] [–limit LIMIT] [–category CATEGORY]
Examples:
python popular_things.py –year 2023 –month 12 –day 25
python popular_things.py –year 2023 –month 6 –category albums
python popular_things.py –year 2022 –limit 15
'''
import argparse
import sys
import requests
import datetime
import calendar
from bs4 import BeautifulSoup
from typing import List, Dict, Any, Optional
def validate_date(year: int, month: Optional[int] = None, day: Optional[int] = None) → bool:
'''
Validate the provided date components.
Args:
year: The year (1958-current)
month: The month (1-12), optional
day: The day of the month, optional
Returns:
bool: True if date is valid, False otherwise
'''
current_date = datetime.datetime.now()
# Billboard data starts around 1958
if year < 1958 or year > current_date.year:
return False
# If month provided, validate month
if month is not None:
if month < 1 or month > 12:
return False
# Check if date is in the future
if year == current_date.year and month > current_date.month:
return False
# If day provided, validate day
if day is not None:
if not month:
return False # Day provided without month is invalid
try:
# Check if the day is valid for the given month and year
datetime.datetime(year, month, day)
except ValueError:
return False
# Check if date is in the future
if (year == current_date.year and
month == current_date.month and
day > current_date.day):
return False
return True
def scrape_billboard(year: int, month: Optional[int] = None,
day: Optional[int] = None,
category: str = 'hot-100',
limit: int = 10) → List[Dict[str, str]]:
'''
Scrape Billboard charts for popular items.
Args:
year: The year
month: The month (1-12), optional
day: The day of the month, optional
category: The chart category (hot-100, billboard-200, artist-100)
limit: Maximum number of items to return
Returns:
List of dictionaries containing rank, title, and artist
Raises:
Exception: If the scraping fails
'''
# Billboard requires a specific date, so default to end of month/year if not provided
if month is None:
month = 12
if day is None:
# Last day of the month
if month == 2 and (year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)):
day = 29 # Leap year
elif month == 2:
day = 28
elif month in [4, 6, 9, 11]:
day = 30
else:
day = 31
# Format the date for the URL
date_str = f'{year:04d}-{month:02d}-{day:02d}'
# Map category names to URL paths
category_map = {
'songs': 'hot-100',
'albums': 'billboard-200',
'artists': 'artist-100'
}
# Get the URL path for the category
chart_category = category_map.get(category.lower(), category)
# Billboard chart URL
url = f'https://www.billboard.com/charts/{chart_category}/{date_str}/'
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
popular_items = []
# Different charts have slightly different HTML structures
if chart_category in ['hot-100', 'billboard-200']:
# Find chart items
chart_items = soup.select('.o-chart-results-list-row')
for i, item in enumerate(chart_items[:limit], 1):
# Extract rank, title, and artist
try:
# The rank is in a span with class 'c-label'
rank_elem = item.select_one('.o-chart-results-list__item–rank .c-label')
rank = rank_elem.text.strip() if rank_elem else str(i)
# The title is in an h3 with class 'c-title'
title_elem = item.select_one('h3.c-title')
title = title_elem.text.strip() if title_elem else 'Unknown'
# The artist is in a span with class 'c-label a-font-primary-s'
artist_elem = item.select_one('span.c-label.a-font-primary-s')
artist = artist_elem.text.strip() if artist_elem else 'Unknown'
popular_items.append({
'rank': rank,
'title': title,
'artist': artist
})
except Exception as e:
print(f'Error parsing item: {e}')
continue
elif chart_category == 'artist-100':
# Artist chart has a different structure
chart_items = soup.select('.o-chart-results-list-row')
for i, item in enumerate(chart_items[:limit], 1):
try:
# The rank is in a span with class 'c-label'
rank_elem = item.select_one('.o-chart-results-list__item–rank .c-label')
rank = rank_elem.text.strip() if rank_elem else str(i)
# The artist name is in an h3 with class 'c-title'
artist_elem = item.select_one('h3.c-title')
artist = artist_elem.text.strip() if artist_elem else 'Unknown'
popular_items.append({
'rank': rank,
'title': artist, # For artists, the title is the artist name
'artist': '' # No separate artist field for this chart
})
except Exception as e:
print(f'Error parsing item: {e}')
continue
return popular_items
except requests.exceptions.RequestException as e:
raise Exception(f'Web scraping failed: {e}')
except Exception as e:
raise Exception(f'Failed to parse webpage: {e}')
def format_date_label(year: int, month: Optional[int] = None, day: Optional[int] = None) → str:
'''
Format a nice date label based on the provided components.
Args:
year: The year
month: The month (1-12), optional
day: The day of the month, optional
Returns:
str: Formatted date label
'''
if day and month:
return f'{calendar.month_name[month]} {day}, {year}'
elif month:
return f'{calendar.month_name[month]} {year}'
else:
return str(year)
def display_results(items: List[Dict[str, str]], date_label: str, category: str) → None:
'''
Display the formatted results.
Args:
items: List of item dictionaries (rank, title, artist)
date_label: String representing the date period
category: The category of items (songs, albums, artists)
'''
if not items:
print(f'No popular {category} found for {date_label}')
return
category_title = category.capitalize()
if category.lower() in ['hot-100', 'billboard-200', 'artist-100']:
if category.lower() == 'hot-100':
category_title = 'Songs'
elif category.lower() == 'billboard-200':
category_title = 'Albums'
elif category.lower() == 'artist-100':
category_title = 'Artists'
print(f'\nMost Popular {category_title} for {date_label}\n')
# Print header based on category
if category.lower() in ['artists', 'artist-100']:
print(f'{'Rank':<6}{'Artist'}')
print('-' * 50)
for item in items:
print(f'{item['rank']:<6}{item['title']}')
else:
print(f'{'Rank':<6}{'Title':<30}{'Artist'}')
print('-' * 70)
for item in items:
# Truncate long titles
title = item['title'][:27] + '…' if len(item['title']) > 30 else item['title'].ljust(30)
print(f'{item['rank']:<6}{title}{item['artist']}')
def main() → None:
'''Main function to handle argument parsing and program flow.'''
parser = argparse.ArgumentParser(
description='Find popular things (songs, albums, artists) for a specific date by web scraping Billboard charts.'
)
parser.add_argument('–year', type=int, required=True,
help='Year to find popular items from (1958-present)')
parser.add_argument('–month', type=int,
help='Month to find popular items from (1-12)')
parser.add_argument('–day', type=int,
help='Day to find popular items from')
parser.add_argument('–limit', type=int, default=10,
help='Number of items to display (default: 10)')
parser.add_argument('–category', type=str, default='songs',
choices=['songs', 'albums', 'artists', 'hot-100', 'billboard-200', 'artist-100'],
help='Category of popular items to find (default: songs)')
args = parser.parse_args()
# Validate the date
if not validate_date(args.year, args.month, args.day):
print('Error: Invalid date provided. Please check the date and try again.')
print('Note: Billboard data is generally available from 1958 onward.')
sys.exit(1)
try:
# Map friendly category names to Billboard chart names
category_map = {
'songs': 'hot-100',
'albums': 'billboard-200',
'artists': 'artist-100'
}
# Use the mapped category or the original if not in the map
category = category_map.get(args.category.lower(), args.category)
# Scrape the Billboard charts
items = scrape_billboard(
args.year, args.month, args.day, category, args.limit
)
# Format the date for display
date_label = format_date_label(args.year, args.month, args.day)
# Display the results
display_results(items, date_label, args.category)
except Exception as e:
print(f'Error: {e}')
print('\nTips:')
print('- Ensure you have internet connectivity')
print('- Try a different date (Billboard data starts around 1958)')
print('- Some charts may not be available for specific dates')
sys.exit(1)
if __name__ == '__main__':
main()