resolve alias collisions

This commit is contained in:
Ash Garcia 2024-08-11 16:54:15 -07:00
parent d91e90b145
commit dabc89f588

View file

@ -145,16 +145,24 @@ def maybe_generate_aliases(
if reuse_aliases: if reuse_aliases:
print("Reusing generated aliases") print("Reusing generated aliases")
return False return False
else:
for row in csv_contents: alias_to_email_address = {}
random = Random(
"; ".join([row[KnownColumns.EmailAddress], args.csv, args.files]) for row in csv_contents:
rnd = Random("; ".join([row[KnownColumns.EmailAddress], args.csv, args.files]))
random_alias = f"{rnd.choice(alias_parts[0])} {rnd.choice(alias_parts[0])}"
while (
random_alias in alias_to_email_address
and alias_to_email_address[random_alias] != row[KnownColumns.EmailAddress]
):
print(
f"WARNING: rerolling alias for {row[KnownColumns.EmailAddress]} due to collision with {alias_to_email_address[random_alias]}"
) )
row[KnownColumns.GeneratedAlias] = ( random_alias = f"{rnd.choice(alias_parts[0])} {rnd.choice(alias_parts[0])}"
f"{random.choice(alias_parts[0])} {random.choice(alias_parts[0])}" row[KnownColumns.GeneratedAlias] = random_alias
)
print("Generated an alias for each entry") print("Generated an alias for each entry")
return True return True
def maybe_mark_resubmitted_entries(csv_contents: CsvContents) -> ChangedCsvContents: def maybe_mark_resubmitted_entries(csv_contents: CsvContents) -> ChangedCsvContents: