OutageGen/OutageGen.py

import argparse
import csv
import random
from datetime import datetime, timedelta

def read_csv(file_path):
    """Reads a CSV file and returns its records."""
    records = []
    try:
        with open(file_path, mode='r', newline='', encoding='utf-8') as file:
            csv_reader = csv.DictReader(file)  # Use DictReader to handle CSV as dictionaries
            for row in csv_reader:
                records.append(row)
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

    return records

def add_additional_fields(records, additional_fields):
    """Adds additional fields to each record with null values."""
    for record in records:
        for field in additional_fields:
            if field not in record:
                record[field] = None

def generate_random_timestamp(start_time, end_time):
    """Generates a random timestamp between start_time and end_time."""
    random_timestamp = start_time + timedelta(seconds=random.randint(0, int((end_time - start_time).total_seconds())))
    return random_timestamp.strftime("%m/%d/%Y %I:%M:%S %p")

def distribute_records(records, total_jobs, start_job_number):
    """Distributes records randomly among TotalJobs starting from start_job_number."""
    random.shuffle(records)  # Shuffle the records to randomize distribution

    job_number = start_job_number
    distribution = []
    num_records = len(records)
    records_per_job = num_records // total_jobs
    remainder = num_records % total_jobs

    start_index = 0

    for i in range(total_jobs):
        end_index = start_index + records_per_job + (1 if i < remainder else 0)
        job_records = records[start_index:end_index]
        for record in job_records:
            record['OutageJobNumber'] = job_number
        distribution.append(job_records)
        start_index = end_index
        job_number += 1

    return distribution

def assign_outage_start_dt(distributed_records):
    """Assigns a random OutageStartDt to each OutageJobNumber."""
    now = datetime.now()
    twelve_hours_ago = now - timedelta(hours=12)

    for job_records in distributed_records:
        # Generate a random timestamp for the current OutageJobNumber
        timestamp = generate_random_timestamp(twelve_hours_ago, now)
        for record in job_records:
            record['OutageStartDt'] = timestamp

def main():
    parser = argparse.ArgumentParser(description='Distribute outage records randomly among TotalJobs with incremental OutageJobNumber starting from a specified number.')
    parser.add_argument('file_path', type=str, help='Path to the CSV file')
    parser.add_argument('start_job_number', type=int, help='Starting Outage Job Number')

    args = parser.parse_args()

    # Define additional fields
    additional_fields = [
        'AffectedDeviceLatitude', 'Status', 'CustomerLongitude', 'Municipality', 'AffectedDeviceLongitude',
        'CriticalFacilityName', 'AffectedDevice', 'IsCustomerIsLifeSupportEquipment', 'County', 'CrewAssignDt',
        'ActualRestorationDt', 'CrewStatus', 'CrewOnsiteDt', 'OutageDescription', 'OutageStartDt', 'IsCriticalFacility',
        'Cause', 'SubCause', 'OpDiv', 'UCBatchId', 'UtilityId', 'OutageJobNumber', 'CustomerLatitude', 'EstimatedRestorationDt'
    ]

    # Read the CSV file and get all records
    records = read_csv(args.file_path)

    if records is not None:
        # Add additional fields to each record
        add_additional_fields(records, additional_fields)

        num_records = len(records)

        if num_records > 0:
            # Randomly determine TotalJobs between 1 and the number of records
            total_jobs = random.randint(1, num_records)
            print(f"TotalJobs: {total_jobs}")

            start_job_number = args.start_job_number

            # Distribute records among TotalJobs
            distributed_records = distribute_records(records, total_jobs, start_job_number)

            # Assign OutageStartDt to each record
            assign_outage_start_dt(distributed_records)

            # Print the distribution
            for job_records in distributed_records:
                job_number = job_records[0]['OutageJobNumber']
                print(f"\nOutage Job Number: {job_number}")
                for record in job_records:
                    print(record)
        else:
            print("No records found in the CSV file.")
    else:
        print("Failed to read records from the CSV file.")

if __name__ == "__main__":
    main()