From ca223ff3f489f8ce5905c5f4f8d318e4994680b1 Mon Sep 17 00:00:00 2001 From: Ash Garcia Date: Mon, 12 Aug 2024 16:59:47 -0700 Subject: [PATCH] make data_dir required & save output to {data_dir}/output by default --- anonymize_entries.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/anonymize_entries.py b/anonymize_entries.py index f6d5107..cfa28e3 100644 --- a/anonymize_entries.py +++ b/anonymize_entries.py @@ -31,13 +31,13 @@ from simfile.types import Simfile class AnonymizeEntriesRawArgs: - data_dir: str | None + data_dir: str csv: str | None file_uploads: str | None deanonymized: bool dry_run: bool emails: str - output: str + output: str | None regenerate: bool seed: str @@ -47,6 +47,7 @@ class AnonymizeEntriesArgs(AnonymizeEntriesRawArgs): csv: str file_uploads: str + output: str def argparser(): @@ -74,21 +75,26 @@ def argparser(): ) parser.add_argument( "data_dir", - nargs="?", type=str, - help="directory containing both the CSV form data and the file responses (uploads)", + help="working directory - used to find the form responses CSV, file responses directory, and for output", ) parser.add_argument( "-c", "--csv", type=str, - help="override path to the CSV file of form responses", + help="override CSV form responses path (defaults to first file matching {data_dir}/*.csv)", ) parser.add_argument( "-f", "--file-uploads", type=str, - help="override path to the directory of file responses (uploads)", + help="override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)", + ) + parser.add_argument( + "-o", + "--output", + type=str, + help="override output path (defaults to {data_dir}/output)", ) parser.add_argument( "-d", @@ -108,13 +114,6 @@ def argparser(): type=str, help="limit output to files from the specified emails (comma-separated)", ) - parser.add_argument( - "-o", - "--output", - type=str, - default="output/", - help="output directory", - ) parser.add_argument( "-r", "--regenerate", @@ -176,12 +175,22 @@ def process_args(args: AnonymizeEntriesRawArgs) -> AnonymizeEntriesArgs: for dir_entry in os.scandir(args.data_dir): if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"): args.csv = dir_entry.path + print( + f"Using {repr(dir_entry.name)} for form responses (override with --csv)" + ) if not args.file_uploads and dir_entry.is_dir(): if any( subdir_entry.name.endswith(".zip") for subdir_entry in os.scandir(dir_entry.path) ): args.file_uploads = dir_entry.path + print( + f"Using {repr(dir_entry.name)} for file responses (override with --file-uploads)" + ) + + if not args.output: + args.output = os.path.join(args.data_dir, "output") + print(f"Using {args.output} for output (override with --output)") assert args.csv, "Unable to find a CSV file in the provided directory" assert ( @@ -300,7 +309,8 @@ def maybe_mark_resubmitted_entries( row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else "" if resubmitted: resubmitted_total += 1 - print(f"Marked {resubmitted_total} resubmitted files to be ignored") + s = "" if resubmitted_total == 1 else "s" + print(f"Marked {resubmitted_total} resubmitted file{s} to be ignored") return True