help@rskworld.in +91 93305 39277
RSK World
  • Home
  • Development
    • Web Development
    • Mobile Apps
    • Software
    • Games
    • Project
  • Technologies
    • Data Science
    • AI Development
    • Cloud Development
    • Blockchain
    • Cyber Security
    • Dev Tools
    • Testing Tools
  • About
  • Contact

Theme Settings

Color Scheme
Display Options
Font Size
100%
Back to Project
RSK World
polars-fastdataframes
/
scripts
RSK World
polars-fastdataframes
High-performance DataFrames with Polars
scripts
  • __pycache__
  • advanced_queries.py7 KB
  • basic_operations.py3 KB
  • data_generator.py4.2 KB
  • lazy_evaluation.py3.2 KB
  • performance_comparison.py7.3 KB
advanced_queries.py
scripts/advanced_queries.py
Raw Download
Find: Go to:
"""
Advanced Queries with Polars
Comprehensive examples of advanced Polars operations and patterns

Author: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
"""

import polars as pl
import numpy as np
from datetime import datetime, timedelta
import time

def demonstrate_time_series():
    """Demonstrate time series operations"""
    print("=" * 60)
    print("TIME SERIES OPERATIONS")
    print("=" * 60)
    
    dates = pl.date_range(datetime(2023, 1, 1), datetime(2023, 12, 31), '1d', eager=True)
    ts_data = pl.DataFrame({
        'date': dates,
        'value': np.random.randn(len(dates)).cumsum() * 100 + 1000,
        'category': np.random.choice(['A', 'B', 'C'], len(dates))
    })
    
    ts_ops = ts_data.with_columns([
        pl.col('value').shift(1).over('category').alias('lag_1'),
        pl.col('value').diff().over('category').alias('diff'),
        pl.col('value').pct_change().over('category').alias('pct_change'),
        pl.col('value').rolling_mean(window_size=7).over('category').alias('rolling_mean_7d')
    ])
    
    print("\nTime series operations sample:")
    print(ts_ops.head(10))

def demonstrate_missing_data():
    """Demonstrate missing data handling"""
    print("\n" + "=" * 60)
    print("MISSING DATA HANDLING")
    print("=" * 60)
    
    data_with_nulls = pl.DataFrame({
        'id': range(1, 11),
        'name': ['Alice', None, 'Charlie', 'David', None, 'Frank', 'Grace', None, 'Ivy', 'Jack'],
        'age': [25, 30, None, 28, 32, None, 29, 31, None, 33],
        'salary': [50000, None, 70000, 55000, 65000, None, 58000, 62000, 51000, None]
    })
    
    print("\nOriginal data:")
    print(data_with_nulls)
    print(f"\nNull counts: {data_with_nulls.null_count()}")
    
    filled = data_with_nulls.with_columns([
        pl.col('name').fill_null('Unknown'),
        pl.col('age').fill_null(pl.col('age').mean()),
        pl.col('salary').fill_null(strategy='forward')
    ])
    
    print("\nAfter filling nulls:")
    print(filled)

def demonstrate_advanced_strings():
    """Demonstrate advanced string operations"""
    print("\n" + "=" * 60)
    print("ADVANCED STRING OPERATIONS")
    print("=" * 60)
    
    text_data = pl.DataFrame({
        'id': range(1, 6),
        'text': ['Hello World', 'Python Programming', 'Data Science', 'Machine Learning', 'Deep Learning'],
        'email': ['user1@example.com', 'user2@test.org', 'admin@company.com', 'info@website.net', 'contact@business.io']
    })
    
    string_ops = text_data.with_columns([
        pl.col('text').str.to_uppercase().alias('upper'),
        pl.col('text').str.len_chars().alias('char_count'),
        pl.col('email').str.extract(r'@(\w+)', 1).alias('domain'),
        pl.col('text').str.contains('Learning').alias('has_learning')
    ])
    
    print("\nString operations:")
    print(string_ops)

def demonstrate_joins():
    """Demonstrate all join types"""
    print("\n" + "=" * 60)
    print("ADVANCED JOIN TYPES")
    print("=" * 60)
    
    customers = pl.DataFrame({
        'customer_id': [1, 2, 3, 4, 5],
        'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve']
    })
    
    orders = pl.DataFrame({
        'order_id': [101, 102, 103, 104, 105, 106],
        'customer_id': [1, 2, 1, 4, 3, 99],
        'amount': [100, 200, 150, 300, 250, 400]
    })
    
    print("\nInner Join:")
    print(customers.join(orders, on='customer_id', how='inner'))
    
    print("\nLeft Join:")
    print(customers.join(orders, on='customer_id', how='left').head())
    
    print("\nAnti Join (customers with no orders):")
    print(customers.join(orders, on='customer_id', how='anti'))

def demonstrate_performance():
    """Demonstrate performance optimization"""
    print("\n" + "=" * 60)
    print("PERFORMANCE OPTIMIZATION")
    print("=" * 60)
    
    large_data = pl.DataFrame({
        'id': range(1, 100001),
        'category': np.random.choice(['A', 'B', 'C', 'D', 'E'], 100000),
        'value1': np.random.randn(100000) * 100,
        'value2': np.random.randn(100000) * 50
    })
    
    print(f"\nDataset shape: {large_data.shape}")
    
    # Eager vs Lazy
    start = time.time()
    eager_result = large_data.filter(pl.col('value1') > 50).group_by('category').agg([pl.col('value1').mean()])
    eager_time = time.time() - start
    
    start = time.time()
    lazy_result = (large_data.lazy()
        .filter(pl.col('value1') > 50)
        .group_by('category')
        .agg([pl.col('value1').mean()])
        .collect()
    )
    lazy_time = time.time() - start
    
    print(f"\nEager execution: {eager_time:.4f} seconds")
    print(f"Lazy execution: {lazy_time:.4f} seconds")
    print(f"Speedup: {eager_time / lazy_time:.2f}x")

def demonstrate_ecommerce_analytics():
    """Real-world e-commerce analytics example"""
    print("\n" + "=" * 60)
    print("E-COMMERCE ANALYTICS EXAMPLE")
    print("=" * 60)
    
    np.random.seed(42)
    dates = pl.date_range(datetime(2023, 1, 1), datetime(2023, 3, 31), '1d', eager=True)
    
    ecommerce = pl.DataFrame({
        'order_id': range(1, 501),
        'date': np.random.choice(dates, 500),
        'product': np.random.choice(['Laptop', 'Phone', 'Tablet'], 500),
        'region': np.random.choice(['North', 'South', 'East', 'West'], 500),
        'quantity': np.random.randint(1, 10, 500),
        'unit_price': np.random.uniform(100, 1000, 500),
        'discount': np.random.uniform(0, 0.3, 500)
    })
    
    ecommerce = ecommerce.with_columns([
        (pl.col('unit_price') * pl.col('quantity') * (1 - pl.col('discount'))).alias('total')
    ])
    
    # Analytics
    analytics = (ecommerce
        .group_by(['product', 'region'])
        .agg([
            pl.col('total').sum().alias('revenue'),
            pl.col('order_id').n_unique().alias('num_orders'),
            pl.col('quantity').sum().alias('total_sold')
        ])
        .sort('revenue', descending=True)
    )
    
    print("\nProduct and Region Analytics:")
    print(analytics)

def run_all_demonstrations():
    """Run all advanced query demonstrations"""
    print("\n" + "=" * 60)
    print("ADVANCED POLARS QUERIES - COMPREHENSIVE DEMONSTRATION")
    print("=" * 60)
    
    demonstrate_time_series()
    demonstrate_missing_data()
    demonstrate_advanced_strings()
    demonstrate_joins()
    demonstrate_performance()
    demonstrate_ecommerce_analytics()
    
    print("\n" + "=" * 60)
    print("All demonstrations complete!")
    print("=" * 60)
    print("\nKey Takeaways:")
    print("- Polars provides powerful operations for time series data")
    print("- Missing data handling is straightforward and flexible")
    print("- String operations are comprehensive and efficient")
    print("- Multiple join types available for different use cases")
    print("- Lazy evaluation provides significant performance benefits")
    print("- Real-world analytics can be done efficiently with Polars")

if __name__ == "__main__":
    run_all_demonstrations()

210 lines•7 KB
python

About RSK World

Founded by Molla Samser, with Designer & Tester Rima Khatun, RSK World is your one-stop destination for free programming resources, source code, and development tools.

Founder: Molla Samser
Designer & Tester: Rima Khatun

Development

  • Game Development
  • Web Development
  • Mobile Development
  • AI Development
  • Development Tools

Legal

  • Terms & Conditions
  • Privacy Policy
  • Disclaimer

Contact Info

Nutanhat, Mongolkote
Purba Burdwan, West Bengal
India, 713147

+91 93305 39277

hello@rskworld.in
support@rskworld.in

© 2026 RSK World. All rights reserved.

Content used for educational purposes only. View Disclaimer