From bebab601d41dca10707abb27e29026633fb4b3a4 Mon Sep 17 00:00:00 2001 From: Nick Hepler Date: Sun, 18 Aug 2024 12:18:39 -0400 Subject: [PATCH] Add functions to read CSV, generate time stamps. --- OutageGen.py | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 OutageGen.py diff --git a/OutageGen.py b/OutageGen.py new file mode 100644 index 0000000..2c6f739 --- /dev/null +++ b/OutageGen.py @@ -0,0 +1,118 @@ +import argparse +import csv +import random +from datetime import datetime, timedelta + +def read_csv(file_path): + """Reads a CSV file and returns its records.""" + records = [] + try: + with open(file_path, mode='r', newline='', encoding='utf-8') as file: + csv_reader = csv.DictReader(file) # Use DictReader to handle CSV as dictionaries + for row in csv_reader: + records.append(row) + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found.") + return None + except Exception as e: + print(f"An error occurred: {e}") + return None + + return records + +def add_additional_fields(records, additional_fields): + """Adds additional fields to each record with null values.""" + for record in records: + for field in additional_fields: + if field not in record: + record[field] = None + +def generate_random_timestamp(start_time, end_time): + """Generates a random timestamp between start_time and end_time.""" + random_timestamp = start_time + timedelta(seconds=random.randint(0, int((end_time - start_time).total_seconds()))) + return random_timestamp.strftime("%m/%d/%Y %I:%M:%S %p") + +def distribute_records(records, total_jobs, start_job_number): + """Distributes records randomly among TotalJobs starting from start_job_number.""" + random.shuffle(records) # Shuffle the records to randomize distribution + + job_number = start_job_number + distribution = [] + num_records = len(records) + records_per_job = num_records // total_jobs + remainder = num_records % total_jobs + + start_index = 0 + + for i in range(total_jobs): + end_index = start_index + records_per_job + (1 if i < remainder else 0) + job_records = records[start_index:end_index] + for record in job_records: + record['OutageJobNumber'] = job_number + distribution.append(job_records) + start_index = end_index + job_number += 1 + + return distribution + +def assign_outage_start_dt(distributed_records): + """Assigns a random OutageStartDt to each OutageJobNumber.""" + now = datetime.now() + twelve_hours_ago = now - timedelta(hours=12) + + for job_records in distributed_records: + # Generate a random timestamp for the current OutageJobNumber + timestamp = generate_random_timestamp(twelve_hours_ago, now) + for record in job_records: + record['OutageStartDt'] = timestamp + +def main(): + parser = argparse.ArgumentParser(description='Distribute outage records randomly among TotalJobs with incremental OutageJobNumber starting from a specified number.') + parser.add_argument('file_path', type=str, help='Path to the CSV file') + parser.add_argument('start_job_number', type=int, help='Starting Outage Job Number') + + args = parser.parse_args() + + # Define additional fields + additional_fields = [ + 'AffectedDeviceLatitude', 'Status', 'CustomerLongitude', 'Municipality', 'AffectedDeviceLongitude', + 'CriticalFacilityName', 'AffectedDevice', 'IsCustomerIsLifeSupportEquipment', 'County', 'CrewAssignDt', + 'ActualRestorationDt', 'CrewStatus', 'CrewOnsiteDt', 'OutageDescription', 'OutageStartDt', 'IsCriticalFacility', + 'Cause', 'SubCause', 'OpDiv', 'UCBatchId', 'UtilityId', 'OutageJobNumber', 'CustomerLatitude', 'EstimatedRestorationDt' + ] + + # Read the CSV file and get all records + records = read_csv(args.file_path) + + if records is not None: + # Add additional fields to each record + add_additional_fields(records, additional_fields) + + num_records = len(records) + + if num_records > 0: + # Randomly determine TotalJobs between 1 and the number of records + total_jobs = random.randint(1, num_records) + print(f"TotalJobs: {total_jobs}") + + start_job_number = args.start_job_number + + # Distribute records among TotalJobs + distributed_records = distribute_records(records, total_jobs, start_job_number) + + # Assign OutageStartDt to each record + assign_outage_start_dt(distributed_records) + + # Print the distribution + for job_records in distributed_records: + job_number = job_records[0]['OutageJobNumber'] + print(f"\nOutage Job Number: {job_number}") + for record in job_records: + print(record) + else: + print("No records found in the CSV file.") + else: + print("Failed to read records from the CSV file.") + +if __name__ == "__main__": + main()