help@rskworld.in +91 93305 39277
RSK World
  • Home
  • Development
    • Web Development
    • Mobile Apps
    • Software
    • Games
    • Project
  • Technologies
    • Data Science
    • AI Development
    • Cloud Development
    • Blockchain
    • Cyber Security
    • Dev Tools
    • Testing Tools
  • About
  • Contact

Theme Settings

Color Scheme
Display Options
Font Size
100%
Back to Project
RSK World
dask-parallel
/
scripts
RSK World
dask-parallel
Parallel and distributed computing with Dask
scripts
  • advanced_data_processing.py6.6 KB
  • create_basic_data.py4.9 KB
  • distributed_workflow.py4.3 KB
  • generate_advanced_data.py6.8 KB
  • memory_efficient_ops.py3.6 KB
  • parallel_processing.py2.2 KB
  • performance_profiling.py6 KB
memory_efficient_ops.py
scripts/memory_efficient_ops.py
Raw Download
Find: Go to:
#!/usr/bin/env python3
"""
Memory-Efficient Operations with Dask
Author: Molla Samser
Designer & Tester: Rima Khatun
Website: https://rskworld.in
Email: help@rskworld.in, support@rskworld.in
Phone: +91 93305 39277
"""

import dask.array as da
import dask.dataframe as dd
import numpy as np
import pandas as pd
import time


def memory_efficient_array_operations():
    """Demonstrate memory-efficient array operations"""
    print("=" * 60)
    print("Memory-Efficient Array Operations")
    print("=" * 60)
    
    # Create a very large array (chunked)
    print("\nCreating large chunked array (10M x 10M)...")
    large_array = da.random.random((10000, 10000), chunks=(1000, 1000))
    
    print(f"Array shape: {large_array.shape}")
    print(f"Chunk size: {large_array.chunksize}")
    print(f"Number of chunks: {large_array.numblocks}")
    
    # Perform operations without loading entire array into memory
    print("\nPerforming operations on chunks...")
    start_time = time.time()
    
    # These operations work on chunks, not the entire array
    result = (large_array ** 2).sum()
    final_result = result.compute()
    
    end_time = time.time()
    
    print(f"Result: {final_result}")
    print(f"Computation time: {end_time - start_time:.2f} seconds")
    print("Note: Entire array never loaded into memory!")
    print()


def memory_efficient_dataframe_operations():
    """Demonstrate memory-efficient DataFrame operations"""
    print("=" * 60)
    print("Memory-Efficient DataFrame Operations")
    print("=" * 60)
    
    # Create sample data
    print("\nCreating sample data...")
    n_rows = 1000000
    data = {
        'id': range(n_rows),
        'value': np.random.randn(n_rows),
        'category': np.random.choice(['A', 'B', 'C'], n_rows)
    }
    df = pd.DataFrame(data)
    df.to_csv('data/large_data.csv', index=False)
    
    # Read with Dask (chunked)
    print("Reading CSV with Dask (chunked)...")
    dask_df = dd.read_csv('data/large_data.csv')
    
    print(f"DataFrame shape: {dask_df.shape}")
    print(f"Number of partitions: {dask_df.npartitions}")
    
    # Perform operations
    print("\nPerforming groupby operation...")
    start_time = time.time()
    
    grouped = dask_df.groupby('category')['value'].mean().compute()
    
    end_time = time.time()
    
    print(f"Groupby result:\n{grouped}")
    print(f"Computation time: {end_time - start_time:.2f} seconds")
    print("Note: Data processed in chunks, not all at once!")
    print()


def streaming_processing():
    """Demonstrate streaming processing of large datasets"""
    print("=" * 60)
    print("Streaming Processing")
    print("=" * 60)
    
    # Process data in batches
    print("\nProcessing data in batches...")
    
    def process_batch(batch_num):
        """Process a single batch"""
        data = np.random.rand(1000, 100)
        return data.sum()
    
    # Use Dask delayed for batch processing
    from dask import delayed, compute
    
    batches = [delayed(process_batch)(i) for i in range(10)]
    results = compute(*batches)
    
    print(f"Processed {len(results)} batches")
    print(f"Total sum: {sum(results)}")
    print()


def main():
    """Main function"""
    print("\nDask Memory-Efficient Operations Examples")
    print("=" * 60)
    
    memory_efficient_array_operations()
    memory_efficient_dataframe_operations()
    streaming_processing()
    
    print("=" * 60)
    print("All examples completed!")
    print("=" * 60)


if __name__ == "__main__":
    main()

128 lines•3.6 KB
python

About RSK World

Founded by Molla Samser, with Designer & Tester Rima Khatun, RSK World is your one-stop destination for free programming resources, source code, and development tools.

Founder: Molla Samser
Designer & Tester: Rima Khatun

Development

  • Game Development
  • Web Development
  • Mobile Development
  • AI Development
  • Development Tools

Legal

  • Terms & Conditions
  • Privacy Policy
  • Disclaimer

Contact Info

Nutanhat, Mongolkote
Purba Burdwan, West Bengal
India, 713147

+91 93305 39277

hello@rskworld.in
support@rskworld.in

© 2026 RSK World. All rights reserved.

Content used for educational purposes only. View Disclaimer