import argparse import csv import random from datetime import datetime, timedelta def read_csv(file_path): """Reads a CSV file and returns its records.""" records = [] try: with open(file_path, mode='r', newline='', encoding='utf-8') as file: csv_reader = csv.DictReader(file) # Use DictReader to handle CSV as dictionaries for row in csv_reader: records.append(row) except FileNotFoundError: print(f"Error: The file '{file_path}' was not found.") return None except Exception as e: print(f"An error occurred: {e}") return None return records def add_additional_fields(records, additional_fields): """Adds additional fields to each record with null values.""" for record in records: for field in additional_fields: if field not in record: record[field] = None def generate_random_timestamp(start_time, end_time): """Generates a random timestamp between start_time and end_time.""" random_timestamp = start_time + timedelta(seconds=random.randint(0, int((end_time - start_time).total_seconds()))) return random_timestamp.strftime("%m/%d/%Y %I:%M:%S %p") def distribute_records(records, total_jobs, start_job_number): """Distributes records randomly among TotalJobs starting from start_job_number.""" random.shuffle(records) # Shuffle the records to randomize distribution job_number = start_job_number distribution = [] num_records = len(records) records_per_job = num_records // total_jobs remainder = num_records % total_jobs start_index = 0 for i in range(total_jobs): end_index = start_index + records_per_job + (1 if i < remainder else 0) job_records = records[start_index:end_index] for record in job_records: record['OutageJobNumber'] = job_number distribution.append(job_records) start_index = end_index job_number += 1 return distribution def assign_outage_start_dt(distributed_records): """Assigns a random OutageStartDt to each OutageJobNumber.""" now = datetime.now() twelve_hours_ago = now - timedelta(hours=12) for job_records in distributed_records: # Generate a random timestamp for the current OutageJobNumber timestamp = generate_random_timestamp(twelve_hours_ago, now) for record in job_records: record['OutageStartDt'] = timestamp def main(): parser = argparse.ArgumentParser(description='Distribute outage records randomly among TotalJobs with incremental OutageJobNumber starting from a specified number.') parser.add_argument('file_path', type=str, help='Path to the CSV file') parser.add_argument('start_job_number', type=int, help='Starting Outage Job Number') args = parser.parse_args() # Define additional fields additional_fields = [ 'AffectedDeviceLatitude', 'Status', 'CustomerLongitude', 'Municipality', 'AffectedDeviceLongitude', 'CriticalFacilityName', 'AffectedDevice', 'IsCustomerIsLifeSupportEquipment', 'County', 'CrewAssignDt', 'ActualRestorationDt', 'CrewStatus', 'CrewOnsiteDt', 'OutageDescription', 'OutageStartDt', 'IsCriticalFacility', 'Cause', 'SubCause', 'OpDiv', 'UCBatchId', 'UtilityId', 'OutageJobNumber', 'CustomerLatitude', 'EstimatedRestorationDt' ] # Read the CSV file and get all records records = read_csv(args.file_path) if records is not None: # Add additional fields to each record add_additional_fields(records, additional_fields) num_records = len(records) if num_records > 0: # Randomly determine TotalJobs between 1 and the number of records total_jobs = random.randint(1, num_records) print(f"TotalJobs: {total_jobs}") start_job_number = args.start_job_number # Distribute records among TotalJobs distributed_records = distribute_records(records, total_jobs, start_job_number) # Assign OutageStartDt to each record assign_outage_start_dt(distributed_records) # Print the distribution for job_records in distributed_records: job_number = job_records[0]['OutageJobNumber'] print(f"\nOutage Job Number: {job_number}") for record in job_records: print(record) else: print("No records found in the CSV file.") else: print("Failed to read records from the CSV file.") if __name__ == "__main__": main()