Add functions to read CSV, generate time stamps.

This commit is contained in:
Nick Hepler 2024-08-18 12:18:39 -04:00
parent fb0c920d8a
commit bebab601d4

118
OutageGen.py Normal file
View File

@ -0,0 +1,118 @@
import argparse
import csv
import random
from datetime import datetime, timedelta
def read_csv(file_path):
"""Reads a CSV file and returns its records."""
records = []
try:
with open(file_path, mode='r', newline='', encoding='utf-8') as file:
csv_reader = csv.DictReader(file) # Use DictReader to handle CSV as dictionaries
for row in csv_reader:
records.append(row)
except FileNotFoundError:
print(f"Error: The file '{file_path}' was not found.")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None
return records
def add_additional_fields(records, additional_fields):
"""Adds additional fields to each record with null values."""
for record in records:
for field in additional_fields:
if field not in record:
record[field] = None
def generate_random_timestamp(start_time, end_time):
"""Generates a random timestamp between start_time and end_time."""
random_timestamp = start_time + timedelta(seconds=random.randint(0, int((end_time - start_time).total_seconds())))
return random_timestamp.strftime("%m/%d/%Y %I:%M:%S %p")
def distribute_records(records, total_jobs, start_job_number):
"""Distributes records randomly among TotalJobs starting from start_job_number."""
random.shuffle(records) # Shuffle the records to randomize distribution
job_number = start_job_number
distribution = []
num_records = len(records)
records_per_job = num_records // total_jobs
remainder = num_records % total_jobs
start_index = 0
for i in range(total_jobs):
end_index = start_index + records_per_job + (1 if i < remainder else 0)
job_records = records[start_index:end_index]
for record in job_records:
record['OutageJobNumber'] = job_number
distribution.append(job_records)
start_index = end_index
job_number += 1
return distribution
def assign_outage_start_dt(distributed_records):
"""Assigns a random OutageStartDt to each OutageJobNumber."""
now = datetime.now()
twelve_hours_ago = now - timedelta(hours=12)
for job_records in distributed_records:
# Generate a random timestamp for the current OutageJobNumber
timestamp = generate_random_timestamp(twelve_hours_ago, now)
for record in job_records:
record['OutageStartDt'] = timestamp
def main():
parser = argparse.ArgumentParser(description='Distribute outage records randomly among TotalJobs with incremental OutageJobNumber starting from a specified number.')
parser.add_argument('file_path', type=str, help='Path to the CSV file')
parser.add_argument('start_job_number', type=int, help='Starting Outage Job Number')
args = parser.parse_args()
# Define additional fields
additional_fields = [
'AffectedDeviceLatitude', 'Status', 'CustomerLongitude', 'Municipality', 'AffectedDeviceLongitude',
'CriticalFacilityName', 'AffectedDevice', 'IsCustomerIsLifeSupportEquipment', 'County', 'CrewAssignDt',
'ActualRestorationDt', 'CrewStatus', 'CrewOnsiteDt', 'OutageDescription', 'OutageStartDt', 'IsCriticalFacility',
'Cause', 'SubCause', 'OpDiv', 'UCBatchId', 'UtilityId', 'OutageJobNumber', 'CustomerLatitude', 'EstimatedRestorationDt'
]
# Read the CSV file and get all records
records = read_csv(args.file_path)
if records is not None:
# Add additional fields to each record
add_additional_fields(records, additional_fields)
num_records = len(records)
if num_records > 0:
# Randomly determine TotalJobs between 1 and the number of records
total_jobs = random.randint(1, num_records)
print(f"TotalJobs: {total_jobs}")
start_job_number = args.start_job_number
# Distribute records among TotalJobs
distributed_records = distribute_records(records, total_jobs, start_job_number)
# Assign OutageStartDt to each record
assign_outage_start_dt(distributed_records)
# Print the distribution
for job_records in distributed_records:
job_number = job_records[0]['OutageJobNumber']
print(f"\nOutage Job Number: {job_number}")
for record in job_records:
print(record)
else:
print("No records found in the CSV file.")
else:
print("Failed to read records from the CSV file.")
if __name__ == "__main__":
main()