Add functions to read CSV, generate time stamps.

2024-08-18 12:18:39 -04:00 · 2024-08-18 12:18:39 -04:00 · bebab601d4
commit bebab601d4
parent fb0c920d8a
1 changed files with 118 additions and 0 deletions
--- a/OutageGen.py
+++ b/OutageGen.py
@ -0,0 +1,118 @@
+import argparse
+import csv
+import random
+from datetime import datetime, timedelta
+
+def read_csv(file_path):
+    """Reads a CSV file and returns its records."""
+    records = []
+    try:
+        with open(file_path, mode='r', newline='', encoding='utf-8') as file:
+            csv_reader = csv.DictReader(file)  # Use DictReader to handle CSV as dictionaries
+            for row in csv_reader:
+                records.append(row)
+    except FileNotFoundError:
+        print(f"Error: The file '{file_path}' was not found.")
+        return None
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+    
+    return records
+
+def add_additional_fields(records, additional_fields):
+    """Adds additional fields to each record with null values."""
+    for record in records:
+        for field in additional_fields:
+            if field not in record:
+                record[field] = None
+
+def generate_random_timestamp(start_time, end_time):
+    """Generates a random timestamp between start_time and end_time."""
+    random_timestamp = start_time + timedelta(seconds=random.randint(0, int((end_time - start_time).total_seconds())))
+    return random_timestamp.strftime("%m/%d/%Y %I:%M:%S %p")
+
+def distribute_records(records, total_jobs, start_job_number):
+    """Distributes records randomly among TotalJobs starting from start_job_number."""
+    random.shuffle(records)  # Shuffle the records to randomize distribution
+    
+    job_number = start_job_number
+    distribution = []
+    num_records = len(records)
+    records_per_job = num_records // total_jobs
+    remainder = num_records % total_jobs
+    
+    start_index = 0
+    
+    for i in range(total_jobs):
+        end_index = start_index + records_per_job + (1 if i < remainder else 0)
+        job_records = records[start_index:end_index]
+        for record in job_records:
+            record['OutageJobNumber'] = job_number
+        distribution.append(job_records)
+        start_index = end_index
+        job_number += 1
+    
+    return distribution
+
+def assign_outage_start_dt(distributed_records):
+    """Assigns a random OutageStartDt to each OutageJobNumber."""
+    now = datetime.now()
+    twelve_hours_ago = now - timedelta(hours=12)
+    
+    for job_records in distributed_records:
+        # Generate a random timestamp for the current OutageJobNumber
+        timestamp = generate_random_timestamp(twelve_hours_ago, now)
+        for record in job_records:
+            record['OutageStartDt'] = timestamp
+
+def main():
+    parser = argparse.ArgumentParser(description='Distribute outage records randomly among TotalJobs with incremental OutageJobNumber starting from a specified number.')
+    parser.add_argument('file_path', type=str, help='Path to the CSV file')
+    parser.add_argument('start_job_number', type=int, help='Starting Outage Job Number')
+    
+    args = parser.parse_args()
+    
+    # Define additional fields
+    additional_fields = [
+        'AffectedDeviceLatitude', 'Status', 'CustomerLongitude', 'Municipality', 'AffectedDeviceLongitude',
+        'CriticalFacilityName', 'AffectedDevice', 'IsCustomerIsLifeSupportEquipment', 'County', 'CrewAssignDt',
+        'ActualRestorationDt', 'CrewStatus', 'CrewOnsiteDt', 'OutageDescription', 'OutageStartDt', 'IsCriticalFacility',
+        'Cause', 'SubCause', 'OpDiv', 'UCBatchId', 'UtilityId', 'OutageJobNumber', 'CustomerLatitude', 'EstimatedRestorationDt'
+    ]
+    
+    # Read the CSV file and get all records
+    records = read_csv(args.file_path)
+    
+    if records is not None:
+        # Add additional fields to each record
+        add_additional_fields(records, additional_fields)
+        
+        num_records = len(records)
+        
+        if num_records > 0:
+            # Randomly determine TotalJobs between 1 and the number of records
+            total_jobs = random.randint(1, num_records)
+            print(f"TotalJobs: {total_jobs}")
+            
+            start_job_number = args.start_job_number
+            
+            # Distribute records among TotalJobs
+            distributed_records = distribute_records(records, total_jobs, start_job_number)
+            
+            # Assign OutageStartDt to each record
+            assign_outage_start_dt(distributed_records)
+            
+            # Print the distribution
+            for job_records in distributed_records:
+                job_number = job_records[0]['OutageJobNumber']
+                print(f"\nOutage Job Number: {job_number}")
+                for record in job_records:
+                    print(record)
+        else:
+            print("No records found in the CSV file.")
+    else:
+        print("Failed to read records from the CSV file.")
+
+if __name__ == "__main__":
+    main()