RSK World - Speech Recognition Dataset - Project Files Browser | RSK World

scripts/example_usage.py

"""
============================================================================
Speech Recognition Dataset - Example Usage
============================================================================

Project: Speech Recognition Dataset
Description: Audio speech recognition dataset with labeled speech samples 
             for training speech-to-text and voice recognition models.

============================================================================
DEVELOPER INFORMATION
============================================================================
Website: https://rskworld.in
Founded by: Molla Samser
Designer & Tester: Rima Khatun
Email: help@rskworld.in
Support: support@rskworld.in
Phone: +91 93305 39277
Address: Nutanhat, Mongolkote, Purba Burdwan, West Bengal, India, 713147

============================================================================
COPYRIGHT NOTICE
============================================================================
© 2026 RSK World. All rights reserved.
This dataset is provided for educational and research purposes.

============================================================================

This script demonstrates how to use the Speech Recognition Dataset.
"""

import sys
from pathlib import Path

# Add parent directory to path
sys.path.append(str(Path(__file__).parent.parent))

from scripts.load_dataset import SpeechRecognitionDataset
from scripts.preprocess import SpeechRecognitionPreprocessor

def example_load_dataset():
    """Example: Loading and exploring the dataset"""
    print("=" * 60)
    print("Example 1: Loading Dataset")
    print("=" * 60)
    
    # Initialize dataset
    dataset = SpeechRecognitionDataset(data_dir='data')
    
    # Get statistics
    stats = dataset.get_statistics()
    print("\nDataset Statistics:")
    for key, value in stats.items():
        print(f"  {key}: {value}")
    
    # Get a sample
    print("\nSample files:")
    sample = dataset.sample(n=3)
    if sample is not None:
        print(sample[['id', 'file_name', 'speaker', 'duration', 'transcript']])
    
    # Get files by speaker
    print("\nFiles by Speaker_001:")
    speaker_files = dataset.get_files_by_speaker('Speaker_001')
    print(f"  Found {len(speaker_files)} files")
    
    # Get files by category
    print("\nFiles by Category (Greeting):")
    category_files = dataset.get_files_by_category('Greeting')
    print(f"  Found {len(category_files)} files")


def example_preprocessing():
    """Example: Preprocessing audio files"""
    print("\n" + "=" * 60)
    print("Example 2: Preprocessing Audio Files")
    print("=" * 60)
    
    # Initialize preprocessor
    preprocessor = SpeechRecognitionPreprocessor(
        audio_dir='data/audio',
        output_dir='data/features',
        sr=16000
    )
    
    print("\nPreprocessor initialized with:")
    print(f"  Audio directory: {preprocessor.audio_dir}")
    print(f"  Output directory: {preprocessor.output_dir}")
    print(f"  Sample rate: {preprocessor.sr} Hz")
    
    # Note: Uncomment the following line to process the entire dataset
    # preprocessor.process_dataset(metadata_path='data/metadata.csv')
    
    print("\nTo process the dataset, uncomment the process_dataset() call")


def example_feature_extraction():
    """Example: Extracting features from a single file"""
    print("\n" + "=" * 60)
    print("Example 3: Feature Extraction")
    print("=" * 60)
    
    from pathlib import Path
    
    # Check if audio file exists
    audio_file = Path('data/audio/audio_001.wav')
    
    if audio_file.exists():
        preprocessor = SpeechRecognitionPreprocessor()
        
        print(f"\nExtracting features from: {audio_file}")
        
        # Extract MFCC
        mfcc = preprocessor.extract_mfcc(str(audio_file))
        print(f"  MFCC shape: {mfcc.shape}")
        
        # Extract Mel Spectrogram
        mel_spec = preprocessor.extract_mel_spectrogram(str(audio_file))
        print(f"  Mel Spectrogram shape: {mel_spec.shape}")
        
        # Extract Chroma
        chroma = preprocessor.extract_chroma(str(audio_file))
        print(f"  Chroma shape: {chroma.shape}")
    else:
        print(f"\nAudio file not found: {audio_file}")
        print("Please add audio files to data/audio/ directory")


def example_model_training():
    """Example: Training a model"""
    print("\n" + "=" * 60)
    print("Example 4: Model Training")
    print("=" * 60)
    
    print("\nTo train a model:")
    print("1. First, extract features using preprocess.py")
    print("2. Then, run train_model.py")
    print("\nExample command:")
    print("  python scripts/train_model.py")


def main():
    """Run all examples"""
    print("\n" + "=" * 60)
    print("SPEECH RECOGNITION DATASET - USAGE EXAMPLES")
    print("=" * 60)
    print("\nWebsite: https://rskworld.in")
    print("© 2026 RSK World. All rights reserved.\n")
    
    # Run examples
    example_load_dataset()
    example_preprocessing()
    example_feature_extraction()
    example_model_training()
    
    print("\n" + "=" * 60)
    print("Examples completed!")
    print("=" * 60)
    print("\nFor more information, visit: https://rskworld.in")


if __name__ == '__main__':
    main()

165 lines•5.3 KB

python

Theme Settings

Color Scheme

Display Options

Font Size