119 lines
4.6 KiB
Python
119 lines
4.6 KiB
Python
import argparse
|
|
import csv
|
|
import random
|
|
from datetime import datetime, timedelta
|
|
|
|
def read_csv(file_path):
|
|
"""Reads a CSV file and returns its records."""
|
|
records = []
|
|
try:
|
|
with open(file_path, mode='r', newline='', encoding='utf-8') as file:
|
|
csv_reader = csv.DictReader(file) # Use DictReader to handle CSV as dictionaries
|
|
for row in csv_reader:
|
|
records.append(row)
|
|
except FileNotFoundError:
|
|
print(f"Error: The file '{file_path}' was not found.")
|
|
return None
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}")
|
|
return None
|
|
|
|
return records
|
|
|
|
def add_additional_fields(records, additional_fields):
|
|
"""Adds additional fields to each record with null values."""
|
|
for record in records:
|
|
for field in additional_fields:
|
|
if field not in record:
|
|
record[field] = None
|
|
|
|
def generate_random_timestamp(start_time, end_time):
|
|
"""Generates a random timestamp between start_time and end_time."""
|
|
random_timestamp = start_time + timedelta(seconds=random.randint(0, int((end_time - start_time).total_seconds())))
|
|
return random_timestamp.strftime("%m/%d/%Y %I:%M:%S %p")
|
|
|
|
def distribute_records(records, total_jobs, start_job_number):
|
|
"""Distributes records randomly among TotalJobs starting from start_job_number."""
|
|
random.shuffle(records) # Shuffle the records to randomize distribution
|
|
|
|
job_number = start_job_number
|
|
distribution = []
|
|
num_records = len(records)
|
|
records_per_job = num_records // total_jobs
|
|
remainder = num_records % total_jobs
|
|
|
|
start_index = 0
|
|
|
|
for i in range(total_jobs):
|
|
end_index = start_index + records_per_job + (1 if i < remainder else 0)
|
|
job_records = records[start_index:end_index]
|
|
for record in job_records:
|
|
record['OutageJobNumber'] = job_number
|
|
distribution.append(job_records)
|
|
start_index = end_index
|
|
job_number += 1
|
|
|
|
return distribution
|
|
|
|
def assign_outage_start_dt(distributed_records):
|
|
"""Assigns a random OutageStartDt to each OutageJobNumber."""
|
|
now = datetime.now()
|
|
twelve_hours_ago = now - timedelta(hours=12)
|
|
|
|
for job_records in distributed_records:
|
|
# Generate a random timestamp for the current OutageJobNumber
|
|
timestamp = generate_random_timestamp(twelve_hours_ago, now)
|
|
for record in job_records:
|
|
record['OutageStartDt'] = timestamp
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Distribute outage records randomly among TotalJobs with incremental OutageJobNumber starting from a specified number.')
|
|
parser.add_argument('file_path', type=str, help='Path to the CSV file')
|
|
parser.add_argument('start_job_number', type=int, help='Starting Outage Job Number')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Define additional fields
|
|
additional_fields = [
|
|
'AffectedDeviceLatitude', 'Status', 'CustomerLongitude', 'Municipality', 'AffectedDeviceLongitude',
|
|
'CriticalFacilityName', 'AffectedDevice', 'IsCustomerIsLifeSupportEquipment', 'County', 'CrewAssignDt',
|
|
'ActualRestorationDt', 'CrewStatus', 'CrewOnsiteDt', 'OutageDescription', 'OutageStartDt', 'IsCriticalFacility',
|
|
'Cause', 'SubCause', 'OpDiv', 'UCBatchId', 'UtilityId', 'OutageJobNumber', 'CustomerLatitude', 'EstimatedRestorationDt'
|
|
]
|
|
|
|
# Read the CSV file and get all records
|
|
records = read_csv(args.file_path)
|
|
|
|
if records is not None:
|
|
# Add additional fields to each record
|
|
add_additional_fields(records, additional_fields)
|
|
|
|
num_records = len(records)
|
|
|
|
if num_records > 0:
|
|
# Randomly determine TotalJobs between 1 and the number of records
|
|
total_jobs = random.randint(1, num_records)
|
|
print(f"TotalJobs: {total_jobs}")
|
|
|
|
start_job_number = args.start_job_number
|
|
|
|
# Distribute records among TotalJobs
|
|
distributed_records = distribute_records(records, total_jobs, start_job_number)
|
|
|
|
# Assign OutageStartDt to each record
|
|
assign_outage_start_dt(distributed_records)
|
|
|
|
# Print the distribution
|
|
for job_records in distributed_records:
|
|
job_number = job_records[0]['OutageJobNumber']
|
|
print(f"\nOutage Job Number: {job_number}")
|
|
for record in job_records:
|
|
print(record)
|
|
else:
|
|
print("No records found in the CSV file.")
|
|
else:
|
|
print("Failed to read records from the CSV file.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|