Translate Multiple Document Files
import requests
import datetime
import time
# Authentication token
auth_token = "<YOUR_API_TOKEN>"
headers = {'Authorization': f'Bearer {auth_token}'}
# Constants for URLs and content type
BASE_URL = "https://doctranslate-api.doctranslate.io"
TRANSLATE_DOCUMENT_URL = f"{BASE_URL}/v1/process"
GET_RESULT_ENDPOINT = f"{BASE_URL}/v1/result/"
UPLOAD_FILE_URL = f"{BASE_URL}/v1/upload"
# File paths
input_file_paths = ['/data/example.docx', '/data/example_2.docx'] # Input file path
# UPLOAD FILE PART
TASK_TYPE = 'document'
form_data = {
'task_type': TASK_TYPE,
}
# add files to the form data
files = [('files', open(file_path, 'rb')) for file_path in input_file_paths]
meta_files = []
try:
response = requests.post(UPLOAD_FILE_URL, data=form_data, files=files,
headers=headers)
response.raise_for_status() # Proper error handling
response_data = response.json()
meta_files = response_data.get('data', {})
for metadata in meta_files:
if metadata["task_id"]:
print(f'Task ID: {metadata["task_id"]}')
else:
print('Failed to get the task ID from the response.')
except requests.exceptions.RequestException as e:
print(f'An error occurred: {e}')
# TRANSLATE DOCUMENT PART
# Translation settings
original_language = None # Automatically detect language if not specified
destination_language = 'vi'
process_mode = 'append'
translate_type = 'Professional'
publish_api = True
dictionary = [{"ori_word": "Hello", "des_word": "Chào mừng", "des_lang": "en", "ori_lang": "vi"},
{"ori_word": "You", "des_word": "Cậu", "des_lang": "en", "ori_lang": "vi"}]
custom_prompt = None # Edit the translation as desired. For example, 'Uppercase the nouns in the text.'
# Corrected variable names according to their declaration at the top
json_data = {
'task_type': TASK_TYPE,
'dest_lang': destination_language,
'process_mode': process_mode,
'translate_type': translate_type,
'is_translate_images': False,
'bilingual_text_style__font': 'Helvetica',
'bilingual_text_style__color': '#96d35f',
'meta_files': meta_files,
"dictionary": dictionary,
'custom_prompt': custom_prompt
}
# Corrected file upload part to match the expected key 'file' in the API
try:
response = requests.post(TRANSLATE_DOCUMENT_URL, json=json_data,
headers=headers)
response.raise_for_status() # Proper error handling
response_data = response.json()
parent_task_id = response_data.get('data', {}).get('task_id')
if parent_task_id:
print(f'Parent Task ID: {parent_task_id}')
else:
print('Failed to get the parent task ID from the response.')
except requests.exceptions.RequestException as e:
print(f'An error occurred: {e}')
# Check for task_id before proceeding
if 'parent_task_id' not in locals():
print('Error: missing parent_task_id')
exit()
timeout = 600 # 10 minutes
start_time = datetime.datetime.now()
url_download = ''
printed_tasks = set() # Set to store printed task_ids
while True:
if (datetime.datetime.now() - start_time).seconds >= timeout:
print('Error: Processing timed out.')
break
try:
response = requests.get(f"{GET_RESULT_ENDPOINT}{parent_task_id}", headers=headers)
response.raise_for_status() # Ensure HTTP request success
response_data = response.json()
# Check all tasks in response
results = response_data.get('data', {})
for task_id, result in results.items():
if isinstance(result, dict) and result.get('status') == 'done' and task_id not in printed_tasks:
url_download = result.get('url_download')
if url_download:
print(f"URL Download file {result.get('filename')}: {url_download}")
printed_tasks.add(task_id) # Mark this task_id as printed
# Stop checking if all tasks have been printed
if len(printed_tasks) == len(results) - 1: # Ignore the "status": null key
break
except requests.exceptions.RequestException as e:
print(f'An error occurred while checking task status: {e}')
break
time.sleep(5) # Avoid overloading the server
if not url_download:
print('Error: missing URL for download.')
Last updated