make data_dir required & save output to {data_dir}/output by default

This commit is contained in:
Ash Garcia 2024-08-12 16:59:47 -07:00
parent ec379045cc
commit ca223ff3f4

View file

@ -31,13 +31,13 @@ from simfile.types import Simfile
class AnonymizeEntriesRawArgs: class AnonymizeEntriesRawArgs:
data_dir: str | None data_dir: str
csv: str | None csv: str | None
file_uploads: str | None file_uploads: str | None
deanonymized: bool deanonymized: bool
dry_run: bool dry_run: bool
emails: str emails: str
output: str output: str | None
regenerate: bool regenerate: bool
seed: str seed: str
@ -47,6 +47,7 @@ class AnonymizeEntriesArgs(AnonymizeEntriesRawArgs):
csv: str csv: str
file_uploads: str file_uploads: str
output: str
def argparser(): def argparser():
@ -74,21 +75,26 @@ def argparser():
) )
parser.add_argument( parser.add_argument(
"data_dir", "data_dir",
nargs="?",
type=str, type=str,
help="directory containing both the CSV form data and the file responses (uploads)", help="working directory - used to find the form responses CSV, file responses directory, and for output",
) )
parser.add_argument( parser.add_argument(
"-c", "-c",
"--csv", "--csv",
type=str, type=str,
help="override path to the CSV file of form responses", help="override CSV form responses path (defaults to first file matching {data_dir}/*.csv)",
) )
parser.add_argument( parser.add_argument(
"-f", "-f",
"--file-uploads", "--file-uploads",
type=str, type=str,
help="override path to the directory of file responses (uploads)", help="override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)",
)
parser.add_argument(
"-o",
"--output",
type=str,
help="override output path (defaults to {data_dir}/output)",
) )
parser.add_argument( parser.add_argument(
"-d", "-d",
@ -108,13 +114,6 @@ def argparser():
type=str, type=str,
help="limit output to files from the specified emails (comma-separated)", help="limit output to files from the specified emails (comma-separated)",
) )
parser.add_argument(
"-o",
"--output",
type=str,
default="output/",
help="output directory",
)
parser.add_argument( parser.add_argument(
"-r", "-r",
"--regenerate", "--regenerate",
@ -176,12 +175,22 @@ def process_args(args: AnonymizeEntriesRawArgs) -> AnonymizeEntriesArgs:
for dir_entry in os.scandir(args.data_dir): for dir_entry in os.scandir(args.data_dir):
if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"): if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"):
args.csv = dir_entry.path args.csv = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for form responses (override with --csv)"
)
if not args.file_uploads and dir_entry.is_dir(): if not args.file_uploads and dir_entry.is_dir():
if any( if any(
subdir_entry.name.endswith(".zip") subdir_entry.name.endswith(".zip")
for subdir_entry in os.scandir(dir_entry.path) for subdir_entry in os.scandir(dir_entry.path)
): ):
args.file_uploads = dir_entry.path args.file_uploads = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for file responses (override with --file-uploads)"
)
if not args.output:
args.output = os.path.join(args.data_dir, "output")
print(f"Using {args.output} for output (override with --output)")
assert args.csv, "Unable to find a CSV file in the provided directory" assert args.csv, "Unable to find a CSV file in the provided directory"
assert ( assert (
@ -300,7 +309,8 @@ def maybe_mark_resubmitted_entries(
row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else "" row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else ""
if resubmitted: if resubmitted:
resubmitted_total += 1 resubmitted_total += 1
print(f"Marked {resubmitted_total} resubmitted files to be ignored") s = "" if resubmitted_total == 1 else "s"
print(f"Marked {resubmitted_total} resubmitted file{s} to be ignored")
return True return True