Compare commits

..

2 commits

Author SHA1 Message Date
Ash Garcia
d3204cee88 update readme 2024-08-12 17:01:30 -07:00
Ash Garcia
ca223ff3f4 make data_dir required & save output to {data_dir}/output by default 2024-08-12 16:59:47 -07:00
2 changed files with 31 additions and 21 deletions

View file

@ -21,26 +21,26 @@ python ./anonymize_entries.py path/to/folder
### Help contents
```
usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS] [-o OUTPUT]
usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-o OUTPUT] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS]
[-r | --regenerate | --no-regenerate] [-s SEED]
[data_dir]
data_dir
positional arguments:
data_dir directory containing both the CSV form data and the file responses (uploads)
data_dir working directory - used to find the form responses CSV, file responses directory, and for output
options:
-h, --help show this help message and exit
-c CSV, --csv CSV override path to the CSV file of form responses
-c CSV, --csv CSV override CSV form responses path (defaults to first file matching {data_dir}/*.csv)
-f FILE_UPLOADS, --file-uploads FILE_UPLOADS
override path to the directory of file responses (uploads)
override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)
-o OUTPUT, --output OUTPUT
override output path (defaults to {data_dir}/output)
-d, --dry-run, --no-dry-run
do not create or modify any files
-D, --deanonymized, --no-deanonymized
skip anonymization of files, simply package them as-is
-e EMAILS, --emails EMAILS
limit output to files from the specified emails (comma-separated)
-o OUTPUT, --output OUTPUT
output directory
-r, --regenerate, --no-regenerate
force-update generated CSV columns
-s SEED, --seed SEED specify random seed for alias generation (treat this like a password & change it for each round)

View file

@ -31,13 +31,13 @@ from simfile.types import Simfile
class AnonymizeEntriesRawArgs:
data_dir: str | None
data_dir: str
csv: str | None
file_uploads: str | None
deanonymized: bool
dry_run: bool
emails: str
output: str
output: str | None
regenerate: bool
seed: str
@ -47,6 +47,7 @@ class AnonymizeEntriesArgs(AnonymizeEntriesRawArgs):
csv: str
file_uploads: str
output: str
def argparser():
@ -74,21 +75,26 @@ def argparser():
)
parser.add_argument(
"data_dir",
nargs="?",
type=str,
help="directory containing both the CSV form data and the file responses (uploads)",
help="working directory - used to find the form responses CSV, file responses directory, and for output",
)
parser.add_argument(
"-c",
"--csv",
type=str,
help="override path to the CSV file of form responses",
help="override CSV form responses path (defaults to first file matching {data_dir}/*.csv)",
)
parser.add_argument(
"-f",
"--file-uploads",
type=str,
help="override path to the directory of file responses (uploads)",
help="override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)",
)
parser.add_argument(
"-o",
"--output",
type=str,
help="override output path (defaults to {data_dir}/output)",
)
parser.add_argument(
"-d",
@ -108,13 +114,6 @@ def argparser():
type=str,
help="limit output to files from the specified emails (comma-separated)",
)
parser.add_argument(
"-o",
"--output",
type=str,
default="output/",
help="output directory",
)
parser.add_argument(
"-r",
"--regenerate",
@ -176,12 +175,22 @@ def process_args(args: AnonymizeEntriesRawArgs) -> AnonymizeEntriesArgs:
for dir_entry in os.scandir(args.data_dir):
if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"):
args.csv = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for form responses (override with --csv)"
)
if not args.file_uploads and dir_entry.is_dir():
if any(
subdir_entry.name.endswith(".zip")
for subdir_entry in os.scandir(dir_entry.path)
):
args.file_uploads = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for file responses (override with --file-uploads)"
)
if not args.output:
args.output = os.path.join(args.data_dir, "output")
print(f"Using {args.output} for output (override with --output)")
assert args.csv, "Unable to find a CSV file in the provided directory"
assert (
@ -300,7 +309,8 @@ def maybe_mark_resubmitted_entries(
row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else ""
if resubmitted:
resubmitted_total += 1
print(f"Marked {resubmitted_total} resubmitted files to be ignored")
s = "" if resubmitted_total == 1 else "s"
print(f"Marked {resubmitted_total} resubmitted file{s} to be ignored")
return True