User Tools

Site Tools


clubs:python_club:python_club_ex_popular_music
Home | clubs :: cloud club :: python_club :: 3D-Printing | projects :: Proxmox | Kubernetes | scripting | utilities | games

About the Club

Python Club Topics - Exercise: Popular Music

  1. Output: Obtain a date as arguments from the user before the program runs. Collect music information from the billboard.com on the internet and display it.
  2. What you learn from the example:
    1. Get information as arguments supplied on the command line
    2. Scrape data from public websites
    3. Organize the data obtained

Solution

[1] code:python show
#!/usr/bin/env python3
'''
Popular Things - A script to find popular items for a specific date by web scraping.

This program scrapes Billboard charts to find the most popular songs, albums, and artists
for a given year, month, or specific date.

Usage:
    python popular_things.py –year YEAR [–month MONTH] [–day DAY] [–limit LIMIT] [–category CATEGORY]

Examples:
    python popular_things.py –year 2023 –month 12 –day 25
    python popular_things.py –year 2023 –month 6 –category albums
    python popular_things.py –year 2022 –limit 15
'''

import argparse
import sys
import requests
import datetime
import calendar
from bs4 import BeautifulSoup
from typing import List, Dict, Any, Optional


def validate_date(year: int, month: Optional[int] = None, day: Optional[int] = None) → bool:
    '''
    Validate the provided date components.
    
    Args:
        year: The year (1958-current)
        month: The month (1-12), optional
        day: The day of the month, optional
        
    Returns:
        bool: True if date is valid, False otherwise
    '''
    current_date = datetime.datetime.now()
    
    # Billboard data starts around 1958
    if year < 1958 or year > current_date.year:
        return False
    
    # If month provided, validate month
    if month is not None:
        if month < 1 or month > 12:
            return False
            
        # Check if date is in the future
        if year == current_date.year and month > current_date.month:
            return False
    
    # If day provided, validate day
    if day is not None:
        if not month:
            return False  # Day provided without month is invalid
            
        try:
            # Check if the day is valid for the given month and year
            datetime.datetime(year, month, day)
        except ValueError:
            return False
            
        # Check if date is in the future
        if (year == current_date.year and 
            month == current_date.month and 
            day > current_date.day):
            return False
    
    return True


def scrape_billboard(year: int, month: Optional[int] = None, 
                     day: Optional[int] = None, 
                     category: str = 'hot-100', 
                     limit: int = 10) → List[Dict[str, str]]:
    '''
    Scrape Billboard charts for popular items.
    
    Args:
        year: The year
        month: The month (1-12), optional
        day: The day of the month, optional
        category: The chart category (hot-100, billboard-200, artist-100)
        limit: Maximum number of items to return
        
    Returns:
        List of dictionaries containing rank, title, and artist
        
    Raises:
        Exception: If the scraping fails
    '''
    # Billboard requires a specific date, so default to end of month/year if not provided
    if month is None:
        month = 12
    
    if day is None:
        # Last day of the month
        if month == 2 and (year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)):
            day = 29  # Leap year
        elif month == 2:
            day = 28
        elif month in [4, 6, 9, 11]:
            day = 30
        else:
            day = 31
    
    # Format the date for the URL
    date_str = f'{year:04d}-{month:02d}-{day:02d}'
    
    # Map category names to URL paths
    category_map = {
        'songs': 'hot-100',
        'albums': 'billboard-200',
        'artists': 'artist-100'
    }
    
    # Get the URL path for the category
    chart_category = category_map.get(category.lower(), category)
    
    # Billboard chart URL
    url = f'https://www.billboard.com/charts/{chart_category}/{date_str}/'
    
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        popular_items = []
        
        # Different charts have slightly different HTML structures
        if chart_category in ['hot-100', 'billboard-200']:
            # Find chart items
            chart_items = soup.select('.o-chart-results-list-row')
            
            for i, item in enumerate(chart_items[:limit], 1):
                # Extract rank, title, and artist
                try:
                    # The rank is in a span with class 'c-label'
                    rank_elem = item.select_one('.o-chart-results-list__item–rank .c-label')
                    rank = rank_elem.text.strip() if rank_elem else str(i)
                    
                    # The title is in an h3 with class 'c-title'
                    title_elem = item.select_one('h3.c-title')
                    title = title_elem.text.strip() if title_elem else 'Unknown'
                    
                    # The artist is in a span with class 'c-label a-font-primary-s'
                    artist_elem = item.select_one('span.c-label.a-font-primary-s')
                    artist = artist_elem.text.strip() if artist_elem else 'Unknown'
                    
                    popular_items.append({
                        'rank': rank,
                        'title': title,
                        'artist': artist
                    })
                except Exception as e:
                    print(f'Error parsing item: {e}')
                    continue
        
        elif chart_category == 'artist-100':
            # Artist chart has a different structure
            chart_items = soup.select('.o-chart-results-list-row')
            
            for i, item in enumerate(chart_items[:limit], 1):
                try:
                    # The rank is in a span with class 'c-label'
                    rank_elem = item.select_one('.o-chart-results-list__item–rank .c-label')
                    rank = rank_elem.text.strip() if rank_elem else str(i)
                    
                    # The artist name is in an h3 with class 'c-title'
                    artist_elem = item.select_one('h3.c-title')
                    artist = artist_elem.text.strip() if artist_elem else 'Unknown'
                    
                    popular_items.append({
                        'rank': rank,
                        'title': artist,  # For artists, the title is the artist name
                        'artist': ''  # No separate artist field for this chart
                    })
                except Exception as e:
                    print(f'Error parsing item: {e}')
                    continue
        
        return popular_items
        
    except requests.exceptions.RequestException as e:
        raise Exception(f'Web scraping failed: {e}')
    except Exception as e:
        raise Exception(f'Failed to parse webpage: {e}')


def format_date_label(year: int, month: Optional[int] = None, day: Optional[int] = None) → str:
    '''
    Format a nice date label based on the provided components.
    
    Args:
        year: The year
        month: The month (1-12), optional
        day: The day of the month, optional
        
    Returns:
        str: Formatted date label
    '''
    if day and month:
        return f'{calendar.month_name[month]} {day}, {year}'
    elif month:
        return f'{calendar.month_name[month]} {year}'
    else:
        return str(year)


def display_results(items: List[Dict[str, str]], date_label: str, category: str) → None:
    '''
    Display the formatted results.
    
    Args:
        items: List of item dictionaries (rank, title, artist)
        date_label: String representing the date period
        category: The category of items (songs, albums, artists)
    '''
    if not items:
        print(f'No popular {category} found for {date_label}')
        return
    
    category_title = category.capitalize()
    if category.lower() in ['hot-100', 'billboard-200', 'artist-100']:
        if category.lower() == 'hot-100':
            category_title = 'Songs'
        elif category.lower() == 'billboard-200':
            category_title = 'Albums'
        elif category.lower() == 'artist-100':
            category_title = 'Artists'
    
    print(f'\nMost Popular {category_title} for {date_label}\n')
    
    # Print header based on category
    if category.lower() in ['artists', 'artist-100']:
        print(f'{'Rank':<6}{'Artist'}')
        print('-' * 50)
        for item in items:
            print(f'{item['rank']:<6}{item['title']}')
    else:
        print(f'{'Rank':<6}{'Title':<30}{'Artist'}')
        print('-' * 70)
        for item in items:
            # Truncate long titles
            title = item['title'][:27] + '…' if len(item['title']) > 30 else item['title'].ljust(30)
            print(f'{item['rank']:<6}{title}{item['artist']}')


def main() → None:
    '''Main function to handle argument parsing and program flow.'''
    parser = argparse.ArgumentParser(
        description='Find popular things (songs, albums, artists) for a specific date by web scraping Billboard charts.'
    )
    
    parser.add_argument('–year', type=int, required=True, 
                        help='Year to find popular items from (1958-present)')
    parser.add_argument('–month', type=int, 
                        help='Month to find popular items from (1-12)')
    parser.add_argument('–day', type=int, 
                        help='Day to find popular items from')
    parser.add_argument('–limit', type=int, default=10,
                        help='Number of items to display (default: 10)')
    parser.add_argument('–category', type=str, default='songs',
                        choices=['songs', 'albums', 'artists', 'hot-100', 'billboard-200', 'artist-100'],
                        help='Category of popular items to find (default: songs)')
    
    args = parser.parse_args()
    
    # Validate the date
    if not validate_date(args.year, args.month, args.day):
        print('Error: Invalid date provided. Please check the date and try again.')
        print('Note: Billboard data is generally available from 1958 onward.')
        sys.exit(1)
    
    try:
        # Map friendly category names to Billboard chart names
        category_map = {
            'songs': 'hot-100',
            'albums': 'billboard-200',
            'artists': 'artist-100'
        }
        
        # Use the mapped category or the original if not in the map
        category = category_map.get(args.category.lower(), args.category)
        
        # Scrape the Billboard charts
        items = scrape_billboard(
            args.year, args.month, args.day, category, args.limit
        )
        
        # Format the date for display
        date_label = format_date_label(args.year, args.month, args.day)
        
        # Display the results
        display_results(items, date_label, args.category)
        
    except Exception as e:
        print(f'Error: {e}')
        print('\nTips:')
        print('- Ensure you have internet connectivity')
        print('- Try a different date (Billboard data starts around 1958)')
        print('- Some charts may not be available for specific dates')
        sys.exit(1)


if __name__ == '__main__':
    main()

clubs/python_club/python_club_ex_popular_music.txt · Last modified: by 127.0.0.1