update readme

make data_dir required & save output to {data_dir}/output by default
2024-08-12 17:01:30 -07:00 · 2024-08-12 16:59:47 -07:00
2 changed files with 31 additions and 21 deletions
--- a/README.md
+++ b/README.md
@ -21,26 +21,26 @@ python ./anonymize_entries.py path/to/folder
 ### Help contents

 ```
-usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS] [-o OUTPUT]
+usage: anonymize_entries.py [-h] [-c CSV] [-f FILE_UPLOADS] [-o OUTPUT] [-d | --dry-run | --no-dry-run] [-D | --deanonymized | --no-deanonymized] [-e EMAILS]
                            [-r | --regenerate | --no-regenerate] [-s SEED]
-                            [data_dir]
+                            data_dir

 positional arguments:
-  data_dir              directory containing both the CSV form data and the file responses (uploads)
+  data_dir              working directory - used to find the form responses CSV, file responses directory, and for output

 options:
  -h, --help            show this help message and exit
-  -c CSV, --csv CSV     override path to the CSV file of form responses
+  -c CSV, --csv CSV     override CSV form responses path (defaults to first file matching {data_dir}/*.csv)
  -f FILE_UPLOADS, --file-uploads FILE_UPLOADS
-                        override path to the directory of file responses (uploads)
+                        override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)
+  -o OUTPUT, --output OUTPUT
+                        override output path (defaults to {data_dir}/output)
  -d, --dry-run, --no-dry-run
                        do not create or modify any files
  -D, --deanonymized, --no-deanonymized
                        skip anonymization of files, simply package them as-is
  -e EMAILS, --emails EMAILS
                        limit output to files from the specified emails (comma-separated)
-  -o OUTPUT, --output OUTPUT
-                        output directory
  -r, --regenerate, --no-regenerate
                        force-update generated CSV columns
  -s SEED, --seed SEED  specify random seed for alias generation (treat this like a password & change it for each round)
--- a/anonymize_entries.py
+++ b/anonymize_entries.py
@ -31,13 +31,13 @@ from simfile.types import Simfile


 class AnonymizeEntriesRawArgs:
-    data_dir: str | None
+    data_dir: str
    csv: str | None
    file_uploads: str | None
    deanonymized: bool
    dry_run: bool
    emails: str
-    output: str
+    output: str | None
    regenerate: bool
    seed: str

@ -47,6 +47,7 @@ class AnonymizeEntriesArgs(AnonymizeEntriesRawArgs):

    csv: str
    file_uploads: str
+    output: str


 def argparser():
@ -74,21 +75,26 @@ def argparser():
    )
    parser.add_argument(
        "data_dir",
-        nargs="?",
        type=str,
-        help="directory containing both the CSV form data and the file responses (uploads)",
+        help="working directory - used to find the form responses CSV, file responses directory, and for output",
    )
    parser.add_argument(
        "-c",
        "--csv",
        type=str,
-        help="override path to the CSV file of form responses",
+        help="override CSV form responses path (defaults to first file matching {data_dir}/*.csv)",
    )
    parser.add_argument(
        "-f",
        "--file-uploads",
        type=str,
-        help="override path to the directory of file responses (uploads)",
+        help="override file responses directory path (defaults to first subdirectory matching {data_dir}/*/*.zip)",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=str,
+        help="override output path (defaults to {data_dir}/output)",
    )
    parser.add_argument(
        "-d",
@ -108,13 +114,6 @@ def argparser():
        type=str,
        help="limit output to files from the specified emails (comma-separated)",
    )
-    parser.add_argument(
-        "-o",
-        "--output",
-        type=str,
-        default="output/",
-        help="output directory",
-    )
    parser.add_argument(
        "-r",
        "--regenerate",
@ -176,12 +175,22 @@ def process_args(args: AnonymizeEntriesRawArgs) -> AnonymizeEntriesArgs:
        for dir_entry in os.scandir(args.data_dir):
            if not args.csv and dir_entry.is_file() and dir_entry.name.endswith(".csv"):
                args.csv = dir_entry.path
+                print(
+                    f"Using {repr(dir_entry.name)} for form responses (override with --csv)"
+                )
            if not args.file_uploads and dir_entry.is_dir():
                if any(
                    subdir_entry.name.endswith(".zip")
                    for subdir_entry in os.scandir(dir_entry.path)
                ):
                    args.file_uploads = dir_entry.path
+                    print(
+                        f"Using {repr(dir_entry.name)} for file responses (override with --file-uploads)"
+                    )
+
+    if not args.output:
+        args.output = os.path.join(args.data_dir, "output")
+        print(f"Using {args.output} for output (override with --output)")

    assert args.csv, "Unable to find a CSV file in the provided directory"
    assert (
@ -300,7 +309,8 @@ def maybe_mark_resubmitted_entries(
                    row[KnownColumns.IgnoreFile] = "resubmitted" if resubmitted else ""
                    if resubmitted:
                        resubmitted_total += 1
-        print(f"Marked {resubmitted_total} resubmitted files to be ignored")
+        s = "" if resubmitted_total == 1 else "s"
+        print(f"Marked {resubmitted_total} resubmitted file{s} to be ignored")
        return True
Author	SHA1	Message	Date
Ash Garcia	d3204cee88	update readme	2024-08-12 17:01:30 -07:00
Ash Garcia	ca223ff3f4	make data_dir required & save output to {data_dir}/output by default	2024-08-12 16:59:47 -07:00