#!/usr/bin/env python3
"""
Dataverse Uploader

A tool for uploading files to a Dataverse instance.
This tool is designed to test the performance of Dataverse file uploads.

Options:
- file size (--size)
- number of files (--num-files)
- disable tabular ingest (--disable-tab-ingest)
- create a zip file before uploading (--zip)
- debug mode for detailed error information (--debug)
"""

import os
import time
import datetime
import argparse
import zipfile
import tempfile
import requests
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Default configuration from .env file
DATAVERSE_SERVER_URL = os.environ.get("DATAVERSE_SERVER_URL")
DATASET_PID = os.environ.get("DATASET_PID")
API_TOKEN = os.environ.get("API_TOKEN")


def create_test_file(size_mb=10, file_index=1):
    """Create a test file with unique content and return its path."""
    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    file_path = f"dvuploader_test_{size_mb}MB_{timestamp}_{file_index}.bin"
    
    print(f"Generating {size_mb}MB file: {file_path}")
    
    pattern = b"0123456789ABCDEF" * 64  # ~1KB
    total_bytes = size_mb * 1024 * 1024
    
    with open(file_path, "wb") as f:
        header = f"TEST FILE: {file_path}\nTIMESTAMP: {timestamp}\n".encode('utf-8')
        f.write(header)
        bytes_written = len(header)
        
        start_time = time.time()
        last_progress_time = start_time
        
        while bytes_written < total_bytes:
            bytes_to_write = min(len(pattern), total_bytes - bytes_written)
            f.write(pattern[:bytes_to_write])
            bytes_written += bytes_to_write
            
            current_time = time.time()
            if current_time - last_progress_time >= 1.0:
                percent_complete = (bytes_written / total_bytes) * 100
                print(f"  Progress: {bytes_written / (1024*1024):.1f}MB / {size_mb}MB ({percent_complete:.1f}%)")
                last_progress_time = current_time
    
    file_size = os.path.getsize(file_path)
    print(f"Finished generating file: {file_path} ({file_size / (1024*1024):.1f}MB)")
    return file_path


def zip_files(file_paths):
    """Zips multiple files into a single zip to avoid tabular ingest."""
    base_file = os.path.basename(file_paths[0])
    zip_basename = f"{os.path.splitext(base_file)[0]}_multiple_files.zip"
    
    print(f"\nZipping files into: {zip_basename}")
    
    with zipfile.ZipFile(zip_basename, 'w', zipfile.ZIP_STORED) as zipf:
        for fp in file_paths:
            print(f"  Adding: {fp}")
            zipf.write(fp, os.path.basename(fp))
    
    zip_size = os.path.getsize(zip_basename)
    print(f"Created zip file: {zip_basename}")
    print(f"  Zip size: {zip_size / (1024*1024):.2f}MB\n")
    
    return zip_basename


def upload_file(file_path, dataset_pid, server_url, api_token, disable_tab_ingest=False, debug=False):
    """
    Upload a file to Dataverse using the direct API.
    
    Args:
        file_path: Path to the file to upload
        dataset_pid: Persistent ID of the dataset
        server_url: URL of the Dataverse server
        api_token: API token for authentication
        disable_tab_ingest: Whether to disable tabular ingest
        debug: Whether to save detailed debug information
        
    Returns:
        True if upload was successful, False otherwise
    """
    print(f"Uploading file: {file_path} to dataset {dataset_pid}")
    print(f"Tab ingest: {'disabled' if disable_tab_ingest else 'enabled'}")
    
    start_time = time.time()
    response = None
    
    try:
        # Ensure the dataset_pid is properly formatted
        if not dataset_pid.startswith("doi:") and not dataset_pid.startswith("perma:"):
            dataset_pid = f"doi:{dataset_pid}"
            
        url = f"{server_url}/api/datasets/:persistentId/add?persistentId={dataset_pid}"
        if disable_tab_ingest:
            url += "&tabIngest=false"
            
        headers = {"X-Dataverse-key": api_token}
        
        print(f"Using API URL: {url}")
        
        with open(file_path, "rb") as f:
            files = {"file": (os.path.basename(file_path), f)}
            print("Sending request...")
            
            # Set a longer timeout for large files
            file_size_mb = os.path.getsize(file_path) / (1024*1024)
            timeout = max(300, int(file_size_mb * 2))  # 2 seconds per MB, minimum 5 minutes
            print(f"Request timeout set to {timeout} seconds")
            
            response = requests.post(url, headers=headers, files=files, timeout=timeout)
            
            if response.ok:
                print(f"Upload successful with status {response.status_code}")
                
                # Save response for debugging if requested
                if debug:
                    debug_file = f"debug_response_{os.path.basename(file_path)}_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.json"
                    with open(debug_file, 'w') as df:
                        try:
                            df.write(response.text)
                            print(f"Debug response saved to {debug_file}")
                        except Exception as e:
                            print(f"Could not save debug response: {e}")
            else:
                print(f"Upload failed with status {response.status_code}")
                print(f"Response: {response.text}")
                
                # Save error response for debugging
                error_file = f"error_response_{os.path.basename(file_path)}_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.txt"
                with open(error_file, 'w') as ef:
                    ef.write(f"Status code: {response.status_code}\n\n")
                    ef.write(f"Headers:\n{response.headers}\n\n")
                    ef.write(f"Response:\n{response.text}")
                print(f"Error details saved to {error_file}")
                
                raise Exception(f"Upload failed with status {response.status_code}: {response.text}")
        
        # Calculate elapsed time and speed
        elapsed = time.time() - start_time
        upload_speed_mbps = (file_size_mb * 8) / elapsed if elapsed > 0 else 0
        
        print(f"Upload completed for {file_path}")
        print(f"  Time: {elapsed:.2f} seconds ({elapsed/60:.2f} minutes)")
        print(f"  Speed: {upload_speed_mbps:.2f} Mbps ({upload_speed_mbps/8:.2f} MB/s)")
        
        return True
    
    except requests.exceptions.Timeout:
        elapsed = time.time() - start_time
        print(f"ERROR: Request timed out after {elapsed:.2f} seconds")
        print(f"This may indicate that the server is overloaded or that the file is too large.")
        print(f"Consider using a smaller file size or trying again later.")
        return False
    
    except requests.exceptions.ConnectionError:
        elapsed = time.time() - start_time
        print(f"ERROR: Connection error after {elapsed:.2f} seconds")
        print(f"Could not connect to the server. Please check your internet connection and server URL.")
        return False
    
    except Exception as e:
        elapsed = time.time() - start_time
        print(f"Exception during upload of {file_path}: {e}")
        print(f"  Time elapsed before error: {elapsed:.2f} seconds ({elapsed/60:.2f} minutes)")
        
        # If we have a response but it wasn't handled earlier (e.g., exception in processing)
        if response is not None and not response.ok:
            error_file = f"error_response_{os.path.basename(file_path)}_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.txt"
            try:
                with open(error_file, 'w') as ef:
                    ef.write(f"Status code: {response.status_code}\n\n")
                    ef.write(f"Headers:\n{response.headers}\n\n")
                    ef.write(f"Response:\n{response.text}")
                print(f"Error details saved to {error_file}")
            except Exception as write_error:
                print(f"Could not save error details: {write_error}")
        
        # Print full traceback for debugging
        import traceback
        traceback.print_exc()
        
        return False


def main():
    parser = argparse.ArgumentParser(description='Dataverse Uploader - A tool for uploading files to Dataverse')
    parser.add_argument('--size', type=int, default=10, help='Size of each test file in MB (default: 10)')
    parser.add_argument('--num-files', type=int, default=1, help='Number of files to create (default: 1)')
    parser.add_argument('--disable-tab-ingest', action='store_true', help='Disable tabular ingest (prevents dataset locks)')
    parser.add_argument('--zip', action='store_true', help='Create a zip file before uploading (alternative way to avoid tabular ingest)')
    parser.add_argument('--debug', action='store_true', help='Save detailed debug information for successful uploads')
    
    args = parser.parse_args()
    
    # Check if required configuration is available
    if not DATAVERSE_SERVER_URL or not API_TOKEN or not DATASET_PID:
        print("ERROR: Missing required configuration in .env file:")
        if not DATAVERSE_SERVER_URL:
            print("  - DATAVERSE_SERVER_URL")
        if not API_TOKEN:
            print("  - API_TOKEN")
        if not DATASET_PID:
            print("  - DATASET_PID")
        return
    
    # Print configuration
    print("=== Configuration ===")
    print(f"DATAVERSE_SERVER_URL: {DATAVERSE_SERVER_URL}")
    print(f"DATASET_PID: {DATASET_PID}")
    print(f"API_TOKEN: {API_TOKEN[:5]}...{API_TOKEN[-5:]}")
    print(f"FILE_SIZE: {args.size}MB")
    print(f"NUMBER_OF_FILES: {args.num_files}")
    print(f"DISABLE_TAB_INGEST: {'Yes' if args.disable_tab_ingest else 'No'}")
    print(f"ZIP_BEFORE_UPLOAD: {'Yes' if args.zip else 'No'}")
    print(f"DEBUG_MODE: {'Yes' if args.debug else 'No'}")
    print("====================")
    
    try:
        # Create the requested number of files
        file_paths = []
        for i in range(1, args.num_files + 1):
            fp = create_test_file(size_mb=args.size, file_index=i)
            file_paths.append(fp)
        
        # Possibly zip them
        if args.zip:
            upload_path = zip_files(file_paths)
        else:
            upload_path = file_paths[0]
        
        # Decide whether to disable tab ingest
        use_direct_api = args.disable_tab_ingest or (args.zip and upload_path.endswith('.zip'))
        success = upload_file(upload_path, DATASET_PID, DATAVERSE_SERVER_URL, API_TOKEN, use_direct_api, args.debug)
        
        # Clean up the zip if created and successfully uploaded
        if success and args.zip and os.path.exists(upload_path):
            try:
                print(f"Cleaning up temporary zip file: {upload_path}")
                os.remove(upload_path)
            except Exception as e:
                print(f"Warning: Could not remove temporary zip file: {e}")
        
        if success:
            print("Upload completed successfully.")
        else:
            print("Upload failed.")
    
    except Exception as e:
        print(f"ERROR: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    print(f"Starting Dataverse Uploader at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    main()
