make data_dir required & save output to {data_dir}/output by default

This commit is contained in:
Ash Garcia 2024-08-12 16:59:47 -07:00
parent ec379045cc
commit ca223ff3f4

View file

@ -31,13 +31,13 @@ from simfile.types import Simfile
class AnonymizeEntriesRawArgs:
data_dir: str | None
data_dir: str
csv: str | None
file_uploads: str | None
deanonymized: bool
dry_run: bool
emails: str
output: str
output: str | None
regenerate: bool
seed: str
@ -47,6 +47,7 @@ class AnonymizeEntriesArgs(AnonymizeEntriesRawArgs):
csv: str
file_uploads: str
output: str
def argparser():
@ -74,21 +75,26 @@ def argparser():
)
parser.add_argument(
"data_dir",
nargs="?",
type=str,
help="directory containing both the CSV form data and the file responses (uploads)",
help="working directory - used to find the form responses CSV, file responses directory, and for output",
)
parser.add_argument(
"-c",
"--csv",
type=str,
help="override path to the CSV file of form responses",
help="override CSV form responses path (defaults to first file matching {data_dir}/*.csv)",
)
parser.add_argument(
"-f",
"--file-uploads",
type=str,
help="override path to the directory of file responses (uploads)",
help="override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)",
)
parser.add_argument(
"-o",
"--output",
type=str,
help="override output path (defaults to {data_dir}/output)",
)
parser.add_argument(
"-d",
@ -108,13 +114,6 @@ def argparser():
type=str,
help="limit output to files from the specified emails (comma-separated)",
)
parser.add_argument(
"-o",
"--output",
type=str,
default="output/",
help="output directory",
)
parser.add_argument(
"-r",
"--regenerate",
@ -176,12 +175,22 @@ def process_args(args: AnonymizeEntriesRawArgs) -> AnonymizeEntriesArgs:
for dir_entry in os.scandir(args.data_dir):
if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"):
args.csv = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for form responses (override with --csv)"
)
if not args.file_uploads and dir_entry.is_dir():
if any(
subdir_entry.name.endswith(".zip")
for subdir_entry in os.scandir(dir_entry.path)
):
args.file_uploads = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for file responses (override with --file-uploads)"
)
if not args.output:
args.output = os.path.join(args.data_dir, "output")
print(f"Using {args.output} for output (override with --output)")
assert args.csv, "Unable to find a CSV file in the provided directory"
assert (
@ -300,7 +309,8 @@ def maybe_mark_resubmitted_entries(
row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else ""
if resubmitted:
resubmitted_total += 1
print(f"Marked {resubmitted_total} resubmitted files to be ignored")
s = "" if resubmitted_total == 1 else "s"
print(f"Marked {resubmitted_total} resubmitted file{s} to be ignored")
return True