{{tag>projects cloud club computing virtualization machines VMs AWS Azure GCP}} [[python_club|About the Club]]\\ ==== Python Club Topics - Exercise: Popular Music ==== ==== Exercise: Display popular music for a given date ==== - Output: Obtain a date as arguments from the user before the program runs. Collect music information from the billboard.com on the internet and display it. - What you learn from the example: - Get information as arguments supplied on the command line - Scrape data from public websites - Organize the data obtained ==== Solution ==== #!/usr/bin/env python3 ''' Popular Things - A script to find popular items for a specific date by web scraping. This program scrapes Billboard charts to find the most popular songs, albums, and artists for a given year, month, or specific date. Usage: python popular_things.py --year YEAR [--month MONTH] [--day DAY] [--limit LIMIT] [--category CATEGORY] Examples: python popular_things.py --year 2023 --month 12 --day 25 python popular_things.py --year 2023 --month 6 --category albums python popular_things.py --year 2022 --limit 15 ''' import argparse import sys import requests import datetime import calendar from bs4 import BeautifulSoup from typing import List, Dict, Any, Optional def validate_date(year: int, month: Optional[int] = None, day: Optional[int] = None) -> bool: ''' Validate the provided date components. Args: year: The year (1958-current) month: The month (1-12), optional day: The day of the month, optional Returns: bool: True if date is valid, False otherwise ''' current_date = datetime.datetime.now() # Billboard data starts around 1958 if year < 1958 or year > current_date.year: return False # If month provided, validate month if month is not None: if month < 1 or month > 12: return False # Check if date is in the future if year == current_date.year and month > current_date.month: return False # If day provided, validate day if day is not None: if not month: return False # Day provided without month is invalid try: # Check if the day is valid for the given month and year datetime.datetime(year, month, day) except ValueError: return False # Check if date is in the future if (year == current_date.year and month == current_date.month and day > current_date.day): return False return True def scrape_billboard(year: int, month: Optional[int] = None, day: Optional[int] = None, category: str = 'hot-100', limit: int = 10) -> List[Dict[str, str]]: ''' Scrape Billboard charts for popular items. Args: year: The year month: The month (1-12), optional day: The day of the month, optional category: The chart category (hot-100, billboard-200, artist-100) limit: Maximum number of items to return Returns: List of dictionaries containing rank, title, and artist Raises: Exception: If the scraping fails ''' # Billboard requires a specific date, so default to end of month/year if not provided if month is None: month = 12 if day is None: # Last day of the month if month == 2 and (year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)): day = 29 # Leap year elif month == 2: day = 28 elif month in [4, 6, 9, 11]: day = 30 else: day = 31 # Format the date for the URL date_str = f'{year:04d}-{month:02d}-{day:02d}' # Map category names to URL paths category_map = { 'songs': 'hot-100', 'albums': 'billboard-200', 'artists': 'artist-100' } # Get the URL path for the category chart_category = category_map.get(category.lower(), category) # Billboard chart URL url = f'https://www.billboard.com/charts/{chart_category}/{date_str}/' try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') popular_items = [] # Different charts have slightly different HTML structures if chart_category in ['hot-100', 'billboard-200']: # Find chart items chart_items = soup.select('.o-chart-results-list-row') for i, item in enumerate(chart_items[:limit], 1): # Extract rank, title, and artist try: # The rank is in a span with class 'c-label' rank_elem = item.select_one('.o-chart-results-list__item--rank .c-label') rank = rank_elem.text.strip() if rank_elem else str(i) # The title is in an h3 with class 'c-title' title_elem = item.select_one('h3.c-title') title = title_elem.text.strip() if title_elem else 'Unknown' # The artist is in a span with class 'c-label a-font-primary-s' artist_elem = item.select_one('span.c-label.a-font-primary-s') artist = artist_elem.text.strip() if artist_elem else 'Unknown' popular_items.append({ 'rank': rank, 'title': title, 'artist': artist }) except Exception as e: print(f'Error parsing item: {e}') continue elif chart_category == 'artist-100': # Artist chart has a different structure chart_items = soup.select('.o-chart-results-list-row') for i, item in enumerate(chart_items[:limit], 1): try: # The rank is in a span with class 'c-label' rank_elem = item.select_one('.o-chart-results-list__item--rank .c-label') rank = rank_elem.text.strip() if rank_elem else str(i) # The artist name is in an h3 with class 'c-title' artist_elem = item.select_one('h3.c-title') artist = artist_elem.text.strip() if artist_elem else 'Unknown' popular_items.append({ 'rank': rank, 'title': artist, # For artists, the title is the artist name 'artist': '' # No separate artist field for this chart }) except Exception as e: print(f'Error parsing item: {e}') continue return popular_items except requests.exceptions.RequestException as e: raise Exception(f'Web scraping failed: {e}') except Exception as e: raise Exception(f'Failed to parse webpage: {e}') def format_date_label(year: int, month: Optional[int] = None, day: Optional[int] = None) -> str: ''' Format a nice date label based on the provided components. Args: year: The year month: The month (1-12), optional day: The day of the month, optional Returns: str: Formatted date label ''' if day and month: return f'{calendar.month_name[month]} {day}, {year}' elif month: return f'{calendar.month_name[month]} {year}' else: return str(year) def display_results(items: List[Dict[str, str]], date_label: str, category: str) -> None: ''' Display the formatted results. Args: items: List of item dictionaries (rank, title, artist) date_label: String representing the date period category: The category of items (songs, albums, artists) ''' if not items: print(f'No popular {category} found for {date_label}') return category_title = category.capitalize() if category.lower() in ['hot-100', 'billboard-200', 'artist-100']: if category.lower() == 'hot-100': category_title = 'Songs' elif category.lower() == 'billboard-200': category_title = 'Albums' elif category.lower() == 'artist-100': category_title = 'Artists' print(f'\nMost Popular {category_title} for {date_label}\n') # Print header based on category if category.lower() in ['artists', 'artist-100']: print(f'{'Rank':<6}{'Artist'}') print('-' * 50) for item in items: print(f'{item['rank']:<6}{item['title']}') else: print(f'{'Rank':<6}{'Title':<30}{'Artist'}') print('-' * 70) for item in items: # Truncate long titles title = item['title'][:27] + '...' if len(item['title']) > 30 else item['title'].ljust(30) print(f'{item['rank']:<6}{title}{item['artist']}') def main() -> None: '''Main function to handle argument parsing and program flow.''' parser = argparse.ArgumentParser( description='Find popular things (songs, albums, artists) for a specific date by web scraping Billboard charts.' ) parser.add_argument('--year', type=int, required=True, help='Year to find popular items from (1958-present)') parser.add_argument('--month', type=int, help='Month to find popular items from (1-12)') parser.add_argument('--day', type=int, help='Day to find popular items from') parser.add_argument('--limit', type=int, default=10, help='Number of items to display (default: 10)') parser.add_argument('--category', type=str, default='songs', choices=['songs', 'albums', 'artists', 'hot-100', 'billboard-200', 'artist-100'], help='Category of popular items to find (default: songs)') args = parser.parse_args() # Validate the date if not validate_date(args.year, args.month, args.day): print('Error: Invalid date provided. Please check the date and try again.') print('Note: Billboard data is generally available from 1958 onward.') sys.exit(1) try: # Map friendly category names to Billboard chart names category_map = { 'songs': 'hot-100', 'albums': 'billboard-200', 'artists': 'artist-100' } # Use the mapped category or the original if not in the map category = category_map.get(args.category.lower(), args.category) # Scrape the Billboard charts items = scrape_billboard( args.year, args.month, args.day, category, args.limit ) # Format the date for display date_label = format_date_label(args.year, args.month, args.day) # Display the results display_results(items, date_label, args.category) except Exception as e: print(f'Error: {e}') print('\nTips:') print('- Ensure you have internet connectivity') print('- Try a different date (Billboard data starts around 1958)') print('- Some charts may not be available for specific dates') sys.exit(1) if __name__ == '__main__': main()