Compare commits

...

2 commits

Author SHA1 Message Date
Ash Garcia
d3204cee88 update readme 2024-08-12 17:01:30 -07:00
Ash Garcia
ca223ff3f4 make data_dir required & save output to {data_dir}/output by default 2024-08-12 16:59:47 -07:00
2 changed files with 31 additions and 21 deletions

View file

@ -21,26 +21,26 @@ python ./anonymize_entries.py path/to/folder
### Help contents ### Help contents
``` ```
usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS] [-o OUTPUT] usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-o OUTPUT] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS]
[-r | --regenerate | --no-regenerate] [-s SEED] [-r | --regenerate | --no-regenerate] [-s SEED]
[data_dir] data_dir
positional arguments: positional arguments:
data_dir directory containing both the CSV form data and the file responses (uploads) data_dir working directory - used to find the form responses CSV, file responses directory, and for output
options: options:
-h, --help show this help message and exit -h, --help show this help message and exit
-c CSV, --csv CSV override path to the CSV file of form responses -c CSV, --csv CSV override CSV form responses path (defaults to first file matching {data_dir}/*.csv)
-f FILE_UPLOADS, --file-uploads FILE_UPLOADS -f FILE_UPLOADS, --file-uploads FILE_UPLOADS
override path to the directory of file responses (uploads) override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)
-o OUTPUT, --output OUTPUT
override output path (defaults to {data_dir}/output)
-d, --dry-run, --no-dry-run -d, --dry-run, --no-dry-run
do not create or modify any files do not create or modify any files
-D, --deanonymized, --no-deanonymized -D, --deanonymized, --no-deanonymized
skip anonymization of files, simply package them as-is skip anonymization of files, simply package them as-is
-e EMAILS, --emails EMAILS -e EMAILS, --emails EMAILS
limit output to files from the specified emails (comma-separated) limit output to files from the specified emails (comma-separated)
-o OUTPUT, --output OUTPUT
output directory
-r, --regenerate, --no-regenerate -r, --regenerate, --no-regenerate
force-update generated CSV columns force-update generated CSV columns
-s SEED, --seed SEED specify random seed for alias generation (treat this like a password & change it for each round) -s SEED, --seed SEED specify random seed for alias generation (treat this like a password & change it for each round)

View file

@ -31,13 +31,13 @@ from simfile.types import Simfile
class AnonymizeEntriesRawArgs: class AnonymizeEntriesRawArgs:
data_dir: str | None data_dir: str
csv: str | None csv: str | None
file_uploads: str | None file_uploads: str | None
deanonymized: bool deanonymized: bool
dry_run: bool dry_run: bool
emails: str emails: str
output: str output: str | None
regenerate: bool regenerate: bool
seed: str seed: str
@ -47,6 +47,7 @@ class AnonymizeEntriesArgs(AnonymizeEntriesRawArgs):
csv: str csv: str
file_uploads: str file_uploads: str
output: str
def argparser(): def argparser():
@ -74,21 +75,26 @@ def argparser():
) )
parser.add_argument( parser.add_argument(
"data_dir", "data_dir",
nargs="?",
type=str, type=str,
help="directory containing both the CSV form data and the file responses (uploads)", help="working directory - used to find the form responses CSV, file responses directory, and for output",
) )
parser.add_argument( parser.add_argument(
"-c", "-c",
"--csv", "--csv",
type=str, type=str,
help="override path to the CSV file of form responses", help="override CSV form responses path (defaults to first file matching {data_dir}/*.csv)",
) )
parser.add_argument( parser.add_argument(
"-f", "-f",
"--file-uploads", "--file-uploads",
type=str, type=str,
help="override path to the directory of file responses (uploads)", help="override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)",
)
parser.add_argument(
"-o",
"--output",
type=str,
help="override output path (defaults to {data_dir}/output)",
) )
parser.add_argument( parser.add_argument(
"-d", "-d",
@ -108,13 +114,6 @@ def argparser():
type=str, type=str,
help="limit output to files from the specified emails (comma-separated)", help="limit output to files from the specified emails (comma-separated)",
) )
parser.add_argument(
"-o",
"--output",
type=str,
default="output/",
help="output directory",
)
parser.add_argument( parser.add_argument(
"-r", "-r",
"--regenerate", "--regenerate",
@ -176,12 +175,22 @@ def process_args(args: AnonymizeEntriesRawArgs) -> AnonymizeEntriesArgs:
for dir_entry in os.scandir(args.data_dir): for dir_entry in os.scandir(args.data_dir):
if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"): if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"):
args.csv = dir_entry.path args.csv = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for form responses (override with --csv)"
)
if not args.file_uploads and dir_entry.is_dir(): if not args.file_uploads and dir_entry.is_dir():
if any( if any(
subdir_entry.name.endswith(".zip") subdir_entry.name.endswith(".zip")
for subdir_entry in os.scandir(dir_entry.path) for subdir_entry in os.scandir(dir_entry.path)
): ):
args.file_uploads = dir_entry.path args.file_uploads = dir_entry.path
print(
f"Using {repr(dir_entry.name)} for file responses (override with --file-uploads)"
)
if not args.output:
args.output = os.path.join(args.data_dir, "output")
print(f"Using {args.output} for output (override with --output)")
assert args.csv, "Unable to find a CSV file in the provided directory" assert args.csv, "Unable to find a CSV file in the provided directory"
assert ( assert (
@ -300,7 +309,8 @@ def maybe_mark_resubmitted_entries(
row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else "" row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else ""
if resubmitted: if resubmitted:
resubmitted_total += 1 resubmitted_total += 1
print(f"Marked {resubmitted_total} resubmitted files to be ignored") s = "" if resubmitted_total == 1 else "s"
print(f"Marked {resubmitted_total} resubmitted file{s} to be ignored")
return True return True