help@rskworld.in +91 93305 39277
RSK World
  • Home
  • Development
    • Web Development
    • Mobile Apps
    • Software
    • Games
    • Project
  • Technologies
    • Data Science
    • AI Development
    • Cloud Development
    • Blockchain
    • Cyber Security
    • Dev Tools
    • Testing Tools
  • About
  • Contact

Theme Settings

Color Scheme
Display Options
Font Size
100%
Back to Project
RSK World
language-translation
/
scripts
RSK World
language-translation
Language Translation Dataset - Machine Translation + Multilingual NLP + Parallel Corpus + Transformers
scripts
  • __pycache__
  • analyze_dataset.py4.6 KB
  • build_local_dictionary.py6.7 KB
  • convert_format.py3.6 KB
  • create_zip.py4.1 KB
  • download_translation_data.py17.6 KB
  • process_data.py3.9 KB
create_zip.py
scripts/create_zip.py
Raw Download
Find: Go to:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Language Translation Dataset - Create ZIP Archive Script
Author: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
Copyright © 2016 RSK World. All rights reserved.

This script creates a ZIP archive of the entire project for download.
"""

import zipfile
import os
from pathlib import Path
from datetime import datetime

BASE_DIR = Path(__file__).parent.parent
ZIP_FILE = BASE_DIR / 'language-translation.zip'

# Files and directories to include
INCLUDE_PATTERNS = [
    'data/*.json',
    'data/*.tsv',
    'data/*.txt',
    'scripts/*.py',
    'examples/*.py',
    '*.py',
    '*.md',
    '*.txt',
    '*.html',
    'LICENSE',
    'requirements.txt'
]

# Files and directories to exclude
EXCLUDE_PATTERNS = [
    '__pycache__',
    '*.pyc',
    '*.pyo',
    '.git',
    '.DS_Store',
    '*.zip',
    'tatoeba_*.csv',  # Exclude large downloaded files
    'opus_*.zip'
]

def should_include(file_path):
    """Check if file should be included in ZIP."""
    file_str = str(file_path)
    
    # Check exclude patterns
    for pattern in EXCLUDE_PATTERNS:
        if pattern in file_str or file_path.name.startswith('.'):
            return False
    
    # Check if it's a data file we want
    if file_path.suffix in ['.json', '.tsv', '.txt', '.py', '.md', '.html']:
        return True
    
    return False

def create_zip():
    """Create ZIP archive of the project."""
    print("=" * 60)
    print("Creating Language Translation Dataset ZIP Archive")
    print("Author: RSK World (https://rskworld.in)")
    print("=" * 60)
    
    # Remove existing ZIP if it exists
    if ZIP_FILE.exists():
        ZIP_FILE.unlink()
        print(f"[OK] Removed existing ZIP file")
    
    # Create ZIP file
    with zipfile.ZipFile(ZIP_FILE, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Track added files to avoid duplicates
        added_files = set()
        
        # Add all Python files
        for py_file in BASE_DIR.rglob('*.py'):
            if should_include(py_file):
                arcname = str(py_file.relative_to(BASE_DIR))
                if arcname not in added_files:
                    zipf.write(py_file, arcname)
                    added_files.add(arcname)
                    print(f"  Added: {arcname}")
        
        # Add data files
        data_dir = BASE_DIR / 'data'
        if data_dir.exists():
            for data_file in data_dir.iterdir():
                if data_file.is_file() and should_include(data_file):
                    arcname = str(data_file.relative_to(BASE_DIR))
                    if arcname not in added_files:
                        zipf.write(data_file, arcname)
                        added_files.add(arcname)
                        print(f"  Added: {arcname}")
        
        # Add documentation files
        for doc_file in BASE_DIR.glob('*.md'):
            if should_include(doc_file):
                arcname = str(doc_file.relative_to(BASE_DIR))
                if arcname not in added_files:
                    zipf.write(doc_file, arcname)
                    added_files.add(arcname)
                    print(f"  Added: {arcname}")
        
        # Add other important files
        important_files = ['index.html', 'LICENSE', 'requirements.txt', 'config.py']
        for file_name in important_files:
            file_path = BASE_DIR / file_name
            if file_path.exists():
                arcname = str(file_path.relative_to(BASE_DIR))
                if arcname not in added_files:
                    zipf.write(file_path, arcname)
                    added_files.add(arcname)
                    print(f"  Added: {arcname}")
    
    # Get file size
    file_size = ZIP_FILE.stat().st_size
    file_size_mb = file_size / (1024 * 1024)
    
    print("\n" + "=" * 60)
    print(f"[OK] ZIP archive created successfully!")
    print(f"File: {ZIP_FILE.name}")
    print(f"Size: {file_size_mb:.2f} MB ({file_size:,} bytes)")
    print(f"Location: {ZIP_FILE}")
    print("=" * 60)

if __name__ == '__main__':
    create_zip()

135 lines•4.1 KB
python

About RSK World

Founded by Molla Samser, with Designer & Tester Rima Khatun, RSK World is your one-stop destination for free programming resources, source code, and development tools.

Founder: Molla Samser
Designer & Tester: Rima Khatun

Development

  • Game Development
  • Web Development
  • Mobile Development
  • AI Development
  • Development Tools

Legal

  • Terms & Conditions
  • Privacy Policy
  • Disclaimer

Contact Info

Nutanhat, Mongolkote
Purba Burdwan, West Bengal
India, 713147

+91 93305 39277

hello@rskworld.in
support@rskworld.in

© 2026 RSK World. All rights reserved.

Content used for educational purposes only. View Disclaimer