Translate Multiple Document Files

import requests
import datetime
import time

# Authentication token
auth_token = "<YOUR_API_TOKEN>"

headers = {'Authorization': f'Bearer {auth_token}'}

# Constants for URLs and content type
BASE_URL = "https://doctranslate-api.doctranslate.io"
TRANSLATE_DOCUMENT_URL = f"{BASE_URL}/v1/process"
GET_RESULT_ENDPOINT = f"{BASE_URL}/v1/result/"
UPLOAD_FILE_URL = f"{BASE_URL}/v1/upload"

# File paths
input_file_paths = ['/data/example.docx', '/data/example_2.docx']  # Input file path

# UPLOAD FILE PART
TASK_TYPE = 'document'

form_data = {
    'task_type': TASK_TYPE,
}

# add files to the form data
files = [('files', open(file_path, 'rb')) for file_path in input_file_paths]

meta_files = []
try:
    response = requests.post(UPLOAD_FILE_URL, data=form_data, files=files,
                             headers=headers)
    response.raise_for_status()  # Proper error handling

    response_data = response.json()
    meta_files = response_data.get('data', {})
    for metadata in meta_files:
        if metadata["task_id"]:
            print(f'Task ID: {metadata["task_id"]}')
        else:
            print('Failed to get the task ID from the response.')
except requests.exceptions.RequestException as e:
    print(f'An error occurred: {e}')

# TRANSLATE DOCUMENT PART
# Translation settings
original_language = None  # Automatically detect language if not specified
destination_language = 'vi'
process_mode = 'append'
translate_type = 'Professional'
publish_api = True
dictionary = [{"ori_word": "Hello", "des_word": "Chào mừng", "des_lang": "en", "ori_lang": "vi"},
                   {"ori_word": "You", "des_word": "Cậu", "des_lang": "en", "ori_lang": "vi"}]
custom_prompt = None    # Edit the translation as desired. For example, 'Uppercase the nouns in the text.'

# Corrected variable names according to their declaration at the top
json_data = {
    'task_type': TASK_TYPE,
    'dest_lang': destination_language,
    'process_mode': process_mode,
    'translate_type': translate_type,
    'is_translate_images': False,
    'bilingual_text_style__font': 'Helvetica',
    'bilingual_text_style__color': '#96d35f',
    'meta_files': meta_files,
    "dictionary": dictionary,
    'custom_prompt': custom_prompt
}

# Corrected file upload part to match the expected key 'file' in the API
try:
    response = requests.post(TRANSLATE_DOCUMENT_URL, json=json_data,
                             headers=headers)
    response.raise_for_status()  # Proper error handling

    response_data = response.json()
    parent_task_id = response_data.get('data', {}).get('task_id')

    if parent_task_id:
        print(f'Parent Task ID: {parent_task_id}')
    else:
        print('Failed to get the parent task ID from the response.')
except requests.exceptions.RequestException as e:
    print(f'An error occurred: {e}')

# Check for task_id before proceeding
if 'parent_task_id' not in locals():
    print('Error: missing parent_task_id')
    exit()

timeout = 600  # 10 minutes
start_time = datetime.datetime.now()
url_download = ''

printed_tasks = set()  # Set to store printed task_ids

while True:
    if (datetime.datetime.now() - start_time).seconds >= timeout:
        print('Error: Processing timed out.')
        break

    try:
        response = requests.get(f"{GET_RESULT_ENDPOINT}{parent_task_id}", headers=headers)
        response.raise_for_status()  # Ensure HTTP request success
        response_data = response.json()

        # Check all tasks in response
        results = response_data.get('data', {})

        for task_id, result in results.items():
            if isinstance(result, dict) and result.get('status') == 'done' and task_id not in printed_tasks:
                url_download = result.get('url_download')
                if url_download:
                    print(f"URL Download file {result.get('filename')}: {url_download}")
                    printed_tasks.add(task_id)  # Mark this task_id as printed

        # Stop checking if all tasks have been printed
        if len(printed_tasks) == len(results) - 1:  # Ignore the "status": null key
            break

    except requests.exceptions.RequestException as e:
        print(f'An error occurred while checking task status: {e}')
        break

    time.sleep(5)  # Avoid overloading the server

if not url_download:
    print('Error: missing URL for download.')
PreviousUpload Multiple Files NextText Translation
Last updated 4 months ago