help@rskworld.in +91 93305 39277
RSK World
  • Home
  • Development
    • Web Development
    • Mobile Apps
    • Software
    • Games
    • Project
  • Technologies
    • Data Science
    • AI Development
    • Cloud Development
    • Blockchain
    • Cyber Security
    • Dev Tools
    • Testing Tools
  • About
  • Contact

Theme Settings

Color Scheme
Display Options
Font Size
100%
Back to Project
RSK World
language-translation
/
scripts
RSK World
language-translation
Language Translation Dataset - Machine Translation + Multilingual NLP + Parallel Corpus + Transformers
scripts
  • __pycache__
  • analyze_dataset.py4.6 KB
  • build_local_dictionary.py6.7 KB
  • convert_format.py3.6 KB
  • create_zip.py4.1 KB
  • download_translation_data.py17.6 KB
  • process_data.py3.9 KB
tokenized_data.json.gitkeepstyles.csscleaned_data.csvconvert_format.py
scripts/convert_format.py
Raw Download
Find: Go to:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Language Translation Dataset - Format Conversion Script
Author: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
Copyright © 2016 RSK World. All rights reserved.

This script converts between TSV and JSON formats for the language translation dataset.
"""

import pandas as pd
import json
import argparse
from pathlib import Path

def convert_tsv_to_json(input_path, output_path):
    """
    Convert TSV file to JSON format.
    
    Args:
        input_path: Path to input TSV file
        output_path: Path to output JSON file
    """
    print(f"Converting TSV to JSON...")
    print(f"Input: {input_path}")
    print(f"Output: {output_path}")
    
    # Load TSV
    df = pd.read_csv(input_path, sep='\t', encoding='utf-8')
    print(f"Loaded {len(df)} rows from TSV")
    
    # Convert to JSON
    data = df.to_dict('records')
    
    # Save JSON
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    
    print(f"Successfully converted to JSON: {len(data)} records")

def convert_json_to_tsv(input_path, output_path):
    """
    Convert JSON file to TSV format.
    
    Args:
        input_path: Path to input JSON file
        output_path: Path to output TSV file
    """
    print(f"Converting JSON to TSV...")
    print(f"Input: {input_path}")
    print(f"Output: {output_path}")
    
    # Load JSON
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    print(f"Loaded {len(data)} records from JSON")
    
    # Convert to DataFrame
    df = pd.DataFrame(data)
    
    # Save TSV
    df.to_csv(output_path, sep='\t', index=False, encoding='utf-8')
    
    print(f"Successfully converted to TSV: {len(df)} rows")

def main():
    """Main conversion function."""
    parser = argparse.ArgumentParser(
        description='Convert between TSV and JSON formats for language translation dataset',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python convert_format.py --input data/train.tsv --output data/train.json
  python convert_format.py --input data/train.json --output data/train.tsv
        """
    )
    
    parser.add_argument(
        '--input',
        type=str,
        required=True,
        help='Input file path (TSV or JSON)'
    )
    
    parser.add_argument(
        '--output',
        type=str,
        required=True,
        help='Output file path (TSV or JSON)'
    )
    
    args = parser.parse_args()
    
    input_path = Path(args.input)
    output_path = Path(args.output)
    
    # Check if input file exists
    if not input_path.exists():
        print(f"Error: Input file not found: {input_path}")
        return
    
    # Determine conversion direction
    input_ext = input_path.suffix.lower()
    output_ext = output_path.suffix.lower()
    
    print("=" * 60)
    print("Language Translation Dataset - Format Converter")
    print("Author: RSK World (https://rskworld.in)")
    print("=" * 60)
    
    if input_ext == '.tsv' and output_ext == '.json':
        convert_tsv_to_json(input_path, output_path)
    elif input_ext == '.json' and output_ext == '.tsv':
        convert_json_to_tsv(input_path, output_path)
    else:
        print(f"Error: Unsupported conversion from {input_ext} to {output_ext}")
        print("Supported conversions: TSV <-> JSON")
        return
    
    print("=" * 60)
    print("Conversion complete!")
    print("=" * 60)

if __name__ == '__main__':
    main()

130 lines•3.6 KB
python

About RSK World

Founded by Molla Samser, with Designer & Tester Rima Khatun, RSK World is your one-stop destination for free programming resources, source code, and development tools.

Founder: Molla Samser
Designer & Tester: Rima Khatun

Development

  • Game Development
  • Web Development
  • Mobile Development
  • AI Development
  • Development Tools

Legal

  • Terms & Conditions
  • Privacy Policy
  • Disclaimer

Contact Info

Nutanhat, Mongolkote
Purba Burdwan, West Bengal
India, 713147

+91 93305 39277

hello@rskworld.in
support@rskworld.in

© 2026 RSK World. All rights reserved.

Content used for educational purposes only. View Disclaimer