help@rskworld.in +91 93305 39277
RSK World
  • Home
  • Development
    • Web Development
    • Mobile Apps
    • Software
    • Games
    • Project
  • Technologies
    • Data Science
    • AI Development
    • Cloud Development
    • Blockchain
    • Cyber Security
    • Dev Tools
    • Testing Tools
  • About
  • Contact

Theme Settings

Color Scheme
Display Options
Font Size
100%
Back to Project
RSK World
language-translation
/
scripts
RSK World
language-translation
Language Translation Dataset - Machine Translation + Multilingual NLP + Parallel Corpus + Transformers
scripts
  • __pycache__
  • analyze_dataset.py4.6 KB
  • build_local_dictionary.py6.7 KB
  • convert_format.py3.6 KB
  • create_zip.py4.1 KB
  • download_translation_data.py17.6 KB
  • process_data.py3.9 KB
build_local_dictionary.py
scripts/build_local_dictionary.py
Raw Download
Find: Go to:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Language Translation Dataset - Build Local Dictionary Script
Author: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
Copyright © 2016 RSK World. All rights reserved.

This script builds a comprehensive local dictionary with reverse translations.
"""

import json
from pathlib import Path

BASE_DIR = Path(__file__).parent.parent
DATA_DIR = BASE_DIR / 'data'

def build_reverse_dictionary(dictionary):
    """Build reverse translations for all language pairs."""
    # Reverse mappings
    reverse_pairs = {
        'en-es': 'es-en',
        'en-fr': 'fr-en',
        'en-de': 'de-en',
        'es-fr': 'fr-es',
        'es-de': 'de-es',
        'fr-de': 'de-fr'
    }
    
    # Build reverse dictionaries
    for source_key, target_key in reverse_pairs.items():
        if source_key in dictionary and target_key not in dictionary:
            dictionary[target_key] = {}
        
        # Reverse the translations
        if source_key in dictionary:
            for key, value in dictionary[source_key].items():
                if target_key in dictionary:
                    dictionary[target_key][value.lower()] = key
    
    # Also handle cross-language pairs
    # Spanish to French (via English if needed)
    if 'en-es' in dictionary and 'en-fr' in dictionary:
        if 'es-fr' not in dictionary:
            dictionary['es-fr'] = {}
        # Build via English bridge
        for es_key, en_val in dictionary['es-en'].items():
            if en_val.lower() in dictionary['en-fr']:
                dictionary['es-fr'][es_key] = dictionary['en-fr'][en_val.lower()]
    
    # Spanish to German
    if 'en-es' in dictionary and 'en-de' in dictionary:
        if 'es-de' not in dictionary:
            dictionary['es-de'] = {}
        for es_key, en_val in dictionary['es-en'].items():
            if en_val.lower() in dictionary['en-de']:
                dictionary['es-de'][es_key] = dictionary['en-de'][en_val.lower()]
    
    # French to German
    if 'en-fr' in dictionary and 'en-de' in dictionary:
        if 'fr-de' not in dictionary:
            dictionary['fr-de'] = {}
        for fr_key, en_val in dictionary['fr-en'].items():
            if en_val.lower() in dictionary['en-de']:
                dictionary['fr-de'][fr_key] = dictionary['en-de'][en_val.lower()]
    
    return dictionary

def load_train_data():
    """Load data from train.json and add to dictionary."""
    train_file = DATA_DIR / 'train.json'
    dictionary = {
        'en-es': {}, 'en-fr': {}, 'en-de': {},
        'es-en': {}, 'es-fr': {}, 'es-de': {},
        'fr-en': {}, 'fr-es': {}, 'fr-de': {},
        'de-en': {}, 'de-es': {}, 'de-fr': {}
    }
    
    if train_file.exists():
        with open(train_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        for item in data:
            # English to others
            if item.get('english') and item.get('spanish'):
                key = item['english'].lower()
                dictionary['en-es'][key] = item['spanish']
            if item.get('english') and item.get('french'):
                key = item['english'].lower()
                dictionary['en-fr'][key] = item['french']
            if item.get('english') and item.get('german'):
                key = item['english'].lower()
                dictionary['en-de'][key] = item['german']
            
            # Spanish to others
            if item.get('spanish') and item.get('english'):
                key = item['spanish'].lower()
                dictionary['es-en'][key] = item['english']
            if item.get('spanish') and item.get('french'):
                key = item['spanish'].lower()
                dictionary['es-fr'][key] = item['french']
            if item.get('spanish') and item.get('german'):
                key = item['spanish'].lower()
                dictionary['es-de'][key] = item['german']
            
            # French to others
            if item.get('french') and item.get('english'):
                key = item['french'].lower()
                dictionary['fr-en'][key] = item['english']
            if item.get('french') and item.get('spanish'):
                key = item['french'].lower()
                dictionary['fr-es'][key] = item['spanish']
            if item.get('french') and item.get('german'):
                key = item['french'].lower()
                dictionary['fr-de'][key] = item['german']
            
            # German to others
            if item.get('german') and item.get('english'):
                key = item['german'].lower()
                dictionary['de-en'][key] = item['english']
            if item.get('german') and item.get('spanish'):
                key = item['german'].lower()
                dictionary['de-es'][key] = item['spanish']
            if item.get('german') and item.get('french'):
                key = item['german'].lower()
                dictionary['de-fr'][key] = item['french']
    
    return dictionary

def main():
    """Main function to build local dictionary."""
    print("=" * 60)
    print("Building Local Translation Dictionary")
    print("Author: RSK World (https://rskworld.in)")
    print("=" * 60)
    
    # Load existing local dictionary
    local_dict_file = DATA_DIR / 'local_dictionary.json'
    if local_dict_file.exists():
        with open(local_dict_file, 'r', encoding='utf-8') as f:
            dictionary = json.load(f)
        print(f"[OK] Loaded existing local dictionary")
    else:
        dictionary = {
            'en-es': {}, 'en-fr': {}, 'en-de': {},
            'es-en': {}, 'es-fr': {}, 'es-de': {},
            'fr-en': {}, 'fr-es': {}, 'fr-de': {},
            'de-en': {}, 'de-es': {}, 'de-fr': {}
        }
    
    # Load from train.json
    train_dict = load_train_data()
    
    # Merge train data into dictionary
    for key in dictionary.keys():
        if key in train_dict:
            dictionary[key].update(train_dict[key])
    
    # Build reverse translations
    dictionary = build_reverse_dictionary(dictionary)
    
    # Save the complete dictionary
    with open(local_dict_file, 'w', encoding='utf-8') as f:
        json.dump(dictionary, f, ensure_ascii=False, indent=2)
    
    # Print statistics
    print("\nDictionary Statistics:")
    for key, value in dictionary.items():
        print(f"  {key}: {len(value)} entries")
    
    total = sum(len(v) for v in dictionary.values())
    print(f"\n[OK] Total translation entries: {total}")
    print(f"[OK] Dictionary saved to {local_dict_file}")

if __name__ == '__main__':
    main()

180 lines•6.7 KB
python

About RSK World

Founded by Molla Samser, with Designer & Tester Rima Khatun, RSK World is your one-stop destination for free programming resources, source code, and development tools.

Founder: Molla Samser
Designer & Tester: Rima Khatun

Development

  • Game Development
  • Web Development
  • Mobile Development
  • AI Development
  • Development Tools

Legal

  • Terms & Conditions
  • Privacy Policy
  • Disclaimer

Contact Info

Nutanhat, Mongolkote
Purba Burdwan, West Bengal
India, 713147

+91 93305 39277

hello@rskworld.in
support@rskworld.in

© 2026 RSK World. All rights reserved.

Content used for educational purposes only. View Disclaimer