help@rskworld.in +91 93305 39277
RSK World
  • Home
  • Development
    • Web Development
    • Mobile Apps
    • Software
    • Games
    • Project
  • Technologies
    • Data Science
    • AI Development
    • Cloud Development
    • Blockchain
    • Cyber Security
    • Dev Tools
    • Testing Tools
  • About
  • Contact

Theme Settings

Color Scheme
Display Options
Font Size
100%
Back to Project
RSK World
statsmodels-statistical
RSK World
statsmodels-statistical
Statistical Modeling with Statsmodels
statsmodels-statistical
  • __pycache__
  • data
  • examples
  • notebooks
  • .gitignore458 B
  • CHANGELOG.md4 KB
  • FEATURES.md6.3 KB
  • LICENSE1.2 KB
  • PROJECT_INFO.md2.2 KB
  • PROJECT_SUMMARY.md4.2 KB
  • README.md7.4 KB
  • RELEASE_NOTES_v1.0.0.md6.5 KB
  • UNIQUE_FEATURES.md5.3 KB
  • advanced_time_series.py9.8 KB
  • automated_reporting.py8.3 KB
  • bayesian_statistics.py7.5 KB
  • data_preprocessing.py8.2 KB
  • econometric_modeling.py9.8 KB
  • hypothesis_testing.py12.5 KB
  • index.html10.8 KB
  • model_evaluation.py9.1 KB
  • model_persistence.py6.5 KB
  • model_selection.py9.7 KB
  • panel_data_analysis.py7.3 KB
  • performance_benchmarking.py7.3 KB
  • regression_analysis.py9 KB
  • requirements.txt361 B
  • statistical_diagnostics.py13.8 KB
  • statsmodels-statistical.png284 B
  • time_series_analysis.py10.3 KB
  • visualization_utils.py8.9 KB
visualization_utils.py
visualization_utils.py
Raw Download
Find: Go to:
"""
Advanced Visualization Utilities

Author: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.graphics.regressionplots import plot_regress_exog
import warnings
warnings.filterwarnings('ignore')


class StatisticalVisualizations:
    """
    Advanced Statistical Visualizations
    
    Author: RSK World
    Website: https://rskworld.in
    Email: help@rskworld.in
    Phone: +91 93305 39277
    """
    
    @staticmethod
    def plot_correlation_matrix(data, figsize=(10, 8), cmap='coolwarm'):
        """Plot correlation matrix heatmap"""
        if isinstance(data, pd.DataFrame):
            corr = data.corr()
        else:
            corr = pd.DataFrame(data).corr()
        
        plt.figure(figsize=figsize)
        sns.heatmap(corr, annot=True, cmap=cmap, center=0, 
                   square=True, linewidths=1, cbar_kws={"shrink": 0.8})
        plt.title('Correlation Matrix')
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_distribution(data, bins=30, kde=True, title="Distribution"):
        """Plot distribution with histogram and KDE"""
        plt.figure(figsize=(10, 6))
        sns.histplot(data, bins=bins, kde=kde, alpha=0.7)
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.title(title)
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_time_series(data, title="Time Series", figsize=(12, 6)):
        """Plot time series data"""
        if isinstance(data, pd.Series):
            plt.figure(figsize=figsize)
            plt.plot(data.index, data.values, linewidth=1.5)
            plt.xlabel('Time')
            plt.ylabel('Value')
            plt.title(title)
            plt.grid(True, alpha=0.3)
            plt.tight_layout()
            plt.show()
        else:
            plt.figure(figsize=figsize)
            plt.plot(data, linewidth=1.5)
            plt.xlabel('Time')
            plt.ylabel('Value')
            plt.title(title)
            plt.grid(True, alpha=0.3)
            plt.tight_layout()
            plt.show()
    
    @staticmethod
    def plot_residual_analysis(residuals, fitted_values, figsize=(14, 10)):
        """Comprehensive residual analysis plots"""
        fig, axes = plt.subplots(2, 2, figsize=figsize)
        
        # Residuals vs Fitted
        axes[0, 0].scatter(fitted_values, residuals, alpha=0.6)
        axes[0, 0].axhline(y=0, color='r', linestyle='--')
        axes[0, 0].set_xlabel('Fitted Values')
        axes[0, 0].set_ylabel('Residuals')
        axes[0, 0].set_title('Residuals vs Fitted')
        axes[0, 0].grid(True, alpha=0.3)
        
        # Q-Q Plot
        from scipy import stats
        stats.probplot(residuals, dist="norm", plot=axes[0, 1])
        axes[0, 1].set_title('Q-Q Plot')
        axes[0, 1].grid(True, alpha=0.3)
        
        # Residuals Histogram
        axes[1, 0].hist(residuals, bins=30, edgecolor='black', alpha=0.7)
        axes[1, 0].set_xlabel('Residuals')
        axes[1, 0].set_ylabel('Frequency')
        axes[1, 0].set_title('Residuals Distribution')
        axes[1, 0].grid(True, alpha=0.3)
        
        # Scale-Location Plot
        sqrt_abs_residuals = np.sqrt(np.abs(residuals))
        axes[1, 1].scatter(fitted_values, sqrt_abs_residuals, alpha=0.6)
        axes[1, 1].set_xlabel('Fitted Values')
        axes[1, 1].set_ylabel('√|Standardized Residuals|')
        axes[1, 1].set_title('Scale-Location Plot')
        axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_acf_pacf(data, lags=40, figsize=(14, 6)):
        """Plot ACF and PACF"""
        fig, axes = plt.subplots(1, 2, figsize=figsize)
        
        plot_acf(data, lags=lags, ax=axes[0])
        axes[0].set_title('Autocorrelation Function (ACF)')
        
        plot_pacf(data, lags=lags, ax=axes[1])
        axes[1].set_title('Partial Autocorrelation Function (PACF)')
        
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_forecast_comparison(actual, forecast, conf_int=None, title="Forecast Comparison"):
        """Plot actual vs forecast with confidence intervals"""
        plt.figure(figsize=(12, 6))
        
        if isinstance(actual, pd.Series):
            plt.plot(actual.index, actual.values, label='Actual', linewidth=2)
            if isinstance(forecast, pd.Series):
                plt.plot(forecast.index, forecast.values, label='Forecast', 
                        linewidth=2, color='red')
                if conf_int is not None:
                    plt.fill_between(forecast.index, conf_int.iloc[:, 0], 
                                   conf_int.iloc[:, 1], alpha=0.3, color='red')
        else:
            plt.plot(actual, label='Actual', linewidth=2)
            plt.plot(range(len(actual), len(actual) + len(forecast)), forecast, 
                    label='Forecast', linewidth=2, color='red')
            if conf_int is not None:
                forecast_idx = range(len(actual), len(actual) + len(forecast))
                plt.fill_between(forecast_idx, conf_int[:, 0], conf_int[:, 1], 
                               alpha=0.3, color='red')
        
        plt.xlabel('Time')
        plt.ylabel('Value')
        plt.title(title)
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_model_comparison(models_dict, metric='AIC', figsize=(10, 6)):
        """Plot model comparison bar chart"""
        names = list(models_dict.keys())
        values = list(models_dict.values())
        
        plt.figure(figsize=figsize)
        bars = plt.bar(names, values, color='steelblue', alpha=0.7)
        plt.xlabel('Model')
        plt.ylabel(metric)
        plt.title(f'Model Comparison: {metric}')
        plt.xticks(rotation=45, ha='right')
        plt.grid(True, alpha=0.3, axis='y')
        
        # Add value labels on bars
        for bar in bars:
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height,
                    f'{height:.2f}', ha='center', va='bottom')
        
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_feature_importance(coefficients, feature_names=None, figsize=(10, 6)):
        """Plot feature importance/coefficients"""
        if feature_names is None:
            feature_names = [f'Feature {i+1}' for i in range(len(coefficients))]
        
        plt.figure(figsize=figsize)
        colors = ['green' if x > 0 else 'red' for x in coefficients]
        bars = plt.barh(feature_names, coefficients, color=colors, alpha=0.7)
        plt.xlabel('Coefficient Value')
        plt.title('Feature Importance (Coefficients)')
        plt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
        plt.grid(True, alpha=0.3, axis='x')
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_learning_curve(train_sizes, train_scores, val_scores, figsize=(10, 6)):
        """Plot learning curve"""
        plt.figure(figsize=figsize)
        plt.plot(train_sizes, train_scores, 'o-', label='Training Score', linewidth=2)
        plt.plot(train_sizes, val_scores, 'o-', label='Validation Score', linewidth=2)
        plt.xlabel('Training Set Size')
        plt.ylabel('Score')
        plt.title('Learning Curve')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_multiple_time_series(data_dict, figsize=(14, 8), title="Multiple Time Series"):
        """Plot multiple time series on same axes"""
        plt.figure(figsize=figsize)
        
        for name, series in data_dict.items():
            if isinstance(series, pd.Series):
                plt.plot(series.index, series.values, label=name, linewidth=1.5)
            else:
                plt.plot(series, label=name, linewidth=1.5)
        
        plt.xlabel('Time')
        plt.ylabel('Value')
        plt.title(title)
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()


if __name__ == "__main__":
    # Example usage
    print("Visualization Utilities Example")
    print("=" * 70)
    
    viz = StatisticalVisualizations()
    
    # Generate sample data
    np.random.seed(42)
    data = np.random.randn(100)
    
    # Plot distribution
    viz.plot_distribution(data, title="Sample Distribution")
    
    # Plot time series
    dates = pd.date_range('2020-01-01', periods=100, freq='D')
    ts_data = pd.Series(data, index=dates)
    viz.plot_time_series(ts_data, title="Sample Time Series")

252 lines•8.9 KB
python

About RSK World

Founded by Molla Samser, with Designer & Tester Rima Khatun, RSK World is your one-stop destination for free programming resources, source code, and development tools.

Founder: Molla Samser
Designer & Tester: Rima Khatun

Development

  • Game Development
  • Web Development
  • Mobile Development
  • AI Development
  • Development Tools

Legal

  • Terms & Conditions
  • Privacy Policy
  • Disclaimer

Contact Info

Nutanhat, Mongolkote
Purba Burdwan, West Bengal
India, 713147

+91 93305 39277

hello@rskworld.in
support@rskworld.in

© 2026 RSK World. All rights reserved.

Content used for educational purposes only. View Disclaimer