Compare commits
2 commits
ec379045cc
...
d3204cee88
Author | SHA1 | Date | |
---|---|---|---|
|
d3204cee88 | ||
|
ca223ff3f4 |
2 changed files with 31 additions and 21 deletions
14
README.md
14
README.md
|
@ -21,26 +21,26 @@ python ./anonymize_entries.py path/to/folder
|
||||||
### Help contents
|
### Help contents
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS] [-o OUTPUT]
|
usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-o OUTPUT] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS]
|
||||||
[-r | --regenerate | --no-regenerate] [-s SEED]
|
[-r | --regenerate | --no-regenerate] [-s SEED]
|
||||||
[data_dir]
|
data_dir
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
data_dir directory containing both the CSV form data and the file responses (uploads)
|
data_dir working directory - used to find the form responses CSV, file responses directory, and for output
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-c CSV, --csv CSV override path to the CSV file of form responses
|
-c CSV, --csv CSV override CSV form responses path (defaults to first file matching {data_dir}/*.csv)
|
||||||
-f FILE_UPLOADS, --file-uploads FILE_UPLOADS
|
-f FILE_UPLOADS, --file-uploads FILE_UPLOADS
|
||||||
override path to the directory of file responses (uploads)
|
override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)
|
||||||
|
-o OUTPUT, --output OUTPUT
|
||||||
|
override output path (defaults to {data_dir}/output)
|
||||||
-d, --dry-run, --no-dry-run
|
-d, --dry-run, --no-dry-run
|
||||||
do not create or modify any files
|
do not create or modify any files
|
||||||
-D, --deanonymized, --no-deanonymized
|
-D, --deanonymized, --no-deanonymized
|
||||||
skip anonymization of files, simply package them as-is
|
skip anonymization of files, simply package them as-is
|
||||||
-e EMAILS, --emails EMAILS
|
-e EMAILS, --emails EMAILS
|
||||||
limit output to files from the specified emails (comma-separated)
|
limit output to files from the specified emails (comma-separated)
|
||||||
-o OUTPUT, --output OUTPUT
|
|
||||||
output directory
|
|
||||||
-r, --regenerate, --no-regenerate
|
-r, --regenerate, --no-regenerate
|
||||||
force-update generated CSV columns
|
force-update generated CSV columns
|
||||||
-s SEED, --seed SEED specify random seed for alias generation (treat this like a password & change it for each round)
|
-s SEED, --seed SEED specify random seed for alias generation (treat this like a password & change it for each round)
|
||||||
|
|
|
@ -31,13 +31,13 @@ from simfile.types import Simfile
|
||||||
|
|
||||||
|
|
||||||
class AnonymizeEntriesRawArgs:
|
class AnonymizeEntriesRawArgs:
|
||||||
data_dir: str | None
|
data_dir: str
|
||||||
csv: str | None
|
csv: str | None
|
||||||
file_uploads: str | None
|
file_uploads: str | None
|
||||||
deanonymized: bool
|
deanonymized: bool
|
||||||
dry_run: bool
|
dry_run: bool
|
||||||
emails: str
|
emails: str
|
||||||
output: str
|
output: str | None
|
||||||
regenerate: bool
|
regenerate: bool
|
||||||
seed: str
|
seed: str
|
||||||
|
|
||||||
|
@ -47,6 +47,7 @@ class AnonymizeEntriesArgs(AnonymizeEntriesRawArgs):
|
||||||
|
|
||||||
csv: str
|
csv: str
|
||||||
file_uploads: str
|
file_uploads: str
|
||||||
|
output: str
|
||||||
|
|
||||||
|
|
||||||
def argparser():
|
def argparser():
|
||||||
|
@ -74,21 +75,26 @@ def argparser():
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"data_dir",
|
"data_dir",
|
||||||
nargs="?",
|
|
||||||
type=str,
|
type=str,
|
||||||
help="directory containing both the CSV form data and the file responses (uploads)",
|
help="working directory - used to find the form responses CSV, file responses directory, and for output",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-c",
|
"-c",
|
||||||
"--csv",
|
"--csv",
|
||||||
type=str,
|
type=str,
|
||||||
help="override path to the CSV file of form responses",
|
help="override CSV form responses path (defaults to first file matching {data_dir}/*.csv)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-f",
|
"-f",
|
||||||
"--file-uploads",
|
"--file-uploads",
|
||||||
type=str,
|
type=str,
|
||||||
help="override path to the directory of file responses (uploads)",
|
help="override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-o",
|
||||||
|
"--output",
|
||||||
|
type=str,
|
||||||
|
help="override output path (defaults to {data_dir}/output)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-d",
|
"-d",
|
||||||
|
@ -108,13 +114,6 @@ def argparser():
|
||||||
type=str,
|
type=str,
|
||||||
help="limit output to files from the specified emails (comma-separated)",
|
help="limit output to files from the specified emails (comma-separated)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"-o",
|
|
||||||
"--output",
|
|
||||||
type=str,
|
|
||||||
default="output/",
|
|
||||||
help="output directory",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-r",
|
"-r",
|
||||||
"--regenerate",
|
"--regenerate",
|
||||||
|
@ -176,12 +175,22 @@ def process_args(args: AnonymizeEntriesRawArgs) -> AnonymizeEntriesArgs:
|
||||||
for dir_entry in os.scandir(args.data_dir):
|
for dir_entry in os.scandir(args.data_dir):
|
||||||
if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"):
|
if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"):
|
||||||
args.csv = dir_entry.path
|
args.csv = dir_entry.path
|
||||||
|
print(
|
||||||
|
f"Using {repr(dir_entry.name)} for form responses (override with --csv)"
|
||||||
|
)
|
||||||
if not args.file_uploads and dir_entry.is_dir():
|
if not args.file_uploads and dir_entry.is_dir():
|
||||||
if any(
|
if any(
|
||||||
subdir_entry.name.endswith(".zip")
|
subdir_entry.name.endswith(".zip")
|
||||||
for subdir_entry in os.scandir(dir_entry.path)
|
for subdir_entry in os.scandir(dir_entry.path)
|
||||||
):
|
):
|
||||||
args.file_uploads = dir_entry.path
|
args.file_uploads = dir_entry.path
|
||||||
|
print(
|
||||||
|
f"Using {repr(dir_entry.name)} for file responses (override with --file-uploads)"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not args.output:
|
||||||
|
args.output = os.path.join(args.data_dir, "output")
|
||||||
|
print(f"Using {args.output} for output (override with --output)")
|
||||||
|
|
||||||
assert args.csv, "Unable to find a CSV file in the provided directory"
|
assert args.csv, "Unable to find a CSV file in the provided directory"
|
||||||
assert (
|
assert (
|
||||||
|
@ -300,7 +309,8 @@ def maybe_mark_resubmitted_entries(
|
||||||
row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else ""
|
row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else ""
|
||||||
if resubmitted:
|
if resubmitted:
|
||||||
resubmitted_total += 1
|
resubmitted_total += 1
|
||||||
print(f"Marked {resubmitted_total} resubmitted files to be ignored")
|
s = "" if resubmitted_total == 1 else "s"
|
||||||
|
print(f"Marked {resubmitted_total} resubmitted file{s} to be ignored")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue