#!/usr/bin/env python3
"""
DXF Drawing Text Index Builder

This script extracts all text content from DXF files and creates a searchable JSON index.
The index maps each drawing file to all text strings found within it.

Usage:
    python build_drawing_index.py

Output:
    drawing_index.json - JSON file containing the full text index

Written by Douglas Millner of NERX Power Consultants LLC - Social, Website, and Blog - https://linktr.ee/nerxpower

DISCLAIMER: The software is free for distribution. The user assumes all risk.
"""

import os
import sys
import json
import re
from pathlib import Path
from collections import defaultdict

# Always run from the script's directory, not system32
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
os.chdir(SCRIPT_DIR)

def count_dxf_files(root_dir):
    """Quickly count all DXF files in the directory tree."""
    count = 0
    for root, dirs, files in os.walk(root_dir):
        for filename in files:
            if filename.lower().endswith('.dxf'):
                count += 1
    return count

def show_welcome_screen(file_count):
    """Display welcome screen with instructions and wait for user to continue."""
    # Clear screen (works on Windows and Unix)
    os.system('cls' if os.name == 'nt' else 'clear')
    
    # Calculate estimates based on actual file count
    # Assuming ~0.08-0.15 seconds per file for processing
    min_time_seconds = file_count * 0.08
    max_time_seconds = file_count * 0.15
    min_time_minutes = min_time_seconds / 60
    max_time_minutes = max_time_seconds / 60
    
    # Estimate index size: typically 8-10 KB per file
    min_size_mb = (file_count * 8) / 1024
    max_size_mb = (file_count * 10) / 1024
    
    print("=" * 80)
    print(" " * 25 + "DXF DRAWING TEXT INDEX BUILDER")
    print("=" * 80)
    print()
    print("📋 WHAT THIS SCRIPT DOES:")
    print("-" * 80)
    print("  • Scans ALL .dxf files in THIS directory and ALL subdirectories")
    print("  • Recursively searches through every folder and subfolder")
    print("  • Extracts all text content from each drawing")
    print("  • Creates a searchable JSON index for instant searching")
    print("  • Generates keyword statistics for common electrical terms")
    print()
    print("📁 SEARCH LOCATION:")
    print("-" * 80)
    print(f"  Starting from: {os.getcwd()}")
    print(f"  Searching: THIS DIRECTORY + ALL SUBDIRECTORIES (recursive)")
    print()
    print("📊 FILES FOUND:")
    print("-" * 80)
    print(f"  • {file_count:,} DXF files detected")
    print()
    print("📊 EXPECTED OUTPUT FILES:")
    print("-" * 80)
    print(f"  • drawing_index.json - Full searchable index ({file_count:,} files)")
    print("  • drawing_index_keywords.json - Keyword statistics")
    print()
    print("⏱️  ESTIMATED TIME:")
    print("-" * 80)
    if max_time_minutes < 1:
        print(f"  • Processing time: {int(max_time_seconds)} - {int(max_time_seconds * 1.2)} seconds")
    elif max_time_minutes < 2:
        print(f"  • Processing time: ~1-2 minutes")
    else:
        print(f"  • Processing time: ~{int(min_time_minutes)}-{int(max_time_minutes)} minutes")
    print("  • Progress will be shown as files are processed")
    print()
    print("💾 ESTIMATED FILE SIZE:")
    print("-" * 80)
    print(f"  • Index size: ~{min_size_mb:.1f}-{max_size_mb:.1f} MB (depending on text content)")
    print()
    print("🔄 RECURSIVE SEARCH:")
    print("-" * 80)
    print("  • YES - All subdirectories will be searched")
    print("  • All nested folders will be included")
    print()
    print("=" * 80)
    print("Written by Douglas Millner of NERX Power Consultants LLC")
    print("Social, Website, and Blog - https://linktr.ee/nerxpower")
    print("DISCLAIMER: The software is free for distribution. The user assumes all risk.")
    print("=" * 80)
    print()
    print("⌨️  Press ENTER to continue or Ctrl+C to cancel...")
    print()
    
    try:
        input()
    except KeyboardInterrupt:
        print("\n\n❌ Operation cancelled by user.")
        exit(0)
    
    # Clear screen again before showing progress
    os.system('cls' if os.name == 'nt' else 'clear')

def extract_text_from_dxf(filepath):
    """
    Extract all text content from a DXF file.
    DXF files are text-based CAD files where text appears after specific codes.
    Text content typically appears after code '1' in the file.
    """
    text_content = set()  # Use set to avoid duplicates
    
    try:
        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
            lines = f.readlines()
            
        # In DXF format, text typically follows code '1', '3', or '7'
        # We'll look for these patterns
        for i, line in enumerate(lines):
            line = line.strip()
            
            # Check if this is a text indicator code
            if line in ['  1', '  3', '  7', '1', '3', '7']:
                # The next line should contain the actual text
                if i + 1 < len(lines):
                    text = lines[i + 1].strip()
                    # Only add non-empty, meaningful text
                    if text and len(text) > 1 and not text.startswith('AcDb'):
                        # Clean up the text
                        text = text.replace('\\P', ' ')  # DXF line break
                        text = re.sub(r'\\[A-Z]', '', text)  # Remove DXF formatting codes
                        if text:
                            text_content.add(text)
    
    except Exception as e:
        print(f"  ⚠️ Error reading {filepath}: {e}")
        return []
    
    return sorted(list(text_content))

def build_index(root_dir, output_file='drawing_index.json'):
    """
    Build a searchable index of all DXF files in the directory tree.
    """
    print("=" * 80)
    print("STARTING INDEX BUILD")
    print("=" * 80)
    print(f"📂 Scanning directory: {root_dir}")
    print(f"⏳ This may take several minutes...")
    print()
    
    index = {}
    file_count = 0
    
    # Find all DXF files
    for root, dirs, files in os.walk(root_dir):
        for filename in files:
            if filename.lower().endswith('.dxf'):
                filepath = os.path.join(root, filename)
                relative_path = os.path.relpath(filepath, root_dir)
                
                # Normalize path separators to forward slashes for cross-platform compatibility
                # This ensures paths work correctly in JSON, URLs, and web browsers
                relative_path = relative_path.replace('\\', '/')
                
                # Show progress
                file_count += 1
                if file_count % 50 == 0:
                    print(f"  ✓ Processed {file_count} files...")
                else:
                    print(f"  Processing: {relative_path}")
                
                # Extract text from the file
                text_content = extract_text_from_dxf(filepath)
                
                if text_content:
                    index[relative_path] = {
                        'filename': filename,
                        'path': relative_path,
                        'text_count': len(text_content),
                        'text': text_content
                    }
    
    print()
    print(f"💾 Saving index to {output_file}...")
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(index, f, indent=2, ensure_ascii=False)
    
    print()
    print("=" * 80)
    print("✅ INDEX CREATED SUCCESSFULLY!")
    print("=" * 80)
    print(f"📊 Total files indexed: {file_count}")
    print(f"📁 Output file: {output_file}")
    print("=" * 80)
    
    # Create summary statistics
    total_text_entries = sum(item['text_count'] for item in index.values())
    print()
    print("📈 STATISTICS:")
    print("-" * 80)
    print(f"  Total unique text entries: {total_text_entries:,}")
    print(f"  Average text per file: {total_text_entries / file_count:.1f}")
    print()
    
    return index

def create_keyword_summary(index, keywords):
    """
    Create a summary of which files contain specific keywords.
    """
    summary = defaultdict(list)
    
    for filepath, data in index.items():
        text_lower = ' '.join(data['text']).lower()
        for keyword in keywords:
            if keyword.lower() in text_lower:
                summary[keyword].append(filepath)
    
    return summary

if __name__ == '__main__':
    # Get the current directory
    current_dir = os.getcwd()
    
    # Quick scan to count files
    print("Scanning for DXF files...")
    file_count = count_dxf_files(current_dir)
    
    # Show welcome screen with smart estimates
    show_welcome_screen(file_count)
    
    print("=" * 80)
    print("DXF DRAWING TEXT INDEX BUILDER - RUNNING")
    print("=" * 80)
    print(f"📂 Working directory: {current_dir}")
    print()
    
    # Build the index
    index = build_index(current_dir)
    
    # Create a keyword summary for common electrical terms
    keywords = [
        'transformer', 'xfmr', 'ct', 'bus', 'breaker', 
        'mva', 'amps', 'voltage', 'current', 'relay',
        'generator', 'switchgear', 'panel', 'motor'
    ]
    
    print("🔍 Creating keyword summary...")
    summary = create_keyword_summary(index, keywords)
    
    # Save keyword summary
    with open('drawing_index_keywords.json', 'w', encoding='utf-8') as f:
        json.dump(summary, f, indent=2)
    
    print()
    print("📊 KEYWORD SUMMARY:")
    print("-" * 80)
    for keyword, files in sorted(summary.items(), key=lambda x: len(x[1]), reverse=True):
        print(f"  {keyword:15s}: {len(files):4d} files")
    
    print()
    print("=" * 80)
    print("✅ ALL DONE!")
    print("=" * 80)
    print()
    print("📝 NEXT STEPS:")
    print("  1. Open search_drawings_local.html in your browser")
    print("  2. Load the drawing_index.json file")
    print("  3. Start searching!")
    print()
    print("⌨️  Press ENTER to exit...")
    
    try:
        input()
    except:
        pass