Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Old databases migrations #1038

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8ae2ac9
Revised kaspar migrating script.
vlejd Mar 22, 2017
1ed292a
kms migration skript + improved mihration structure
vlejd Mar 23, 2017
e399f1f
Good enough migration scripts.
vlejd Mar 26, 2017
b16f898
Whole pipeline is working on wet run.
vlejd Mar 31, 2017
c45a524
pep-8 fixes
vlejd Mar 31, 2017
fd387f8
Fixed lint errors and added some options
vlejd Apr 1, 2017
3937866
Minor parameter changes.
vlejd Apr 15, 2017
1f62905
Revised kaspar migrating script.
vlejd Mar 22, 2017
dcd9d35
kms migration skript + improved mihration structure
vlejd Mar 23, 2017
d149519
Good enough migration scripts.
vlejd Mar 26, 2017
e7e472e
Whole pipeline is working on wet run.
vlejd Mar 31, 2017
f4f4039
pep-8 fixes
vlejd Mar 31, 2017
dc73736
Fixed lint errors and added some options
vlejd Apr 1, 2017
adffa56
Minor parameter changes.
vlejd Apr 15, 2017
fd07ac2
Merge branch 'kaspar-migration' of github.com:trojsten/web into kaspa…
vlejd Oct 22, 2017
f362320
Merge branch 'master' into kaspar-migration
vlejd Oct 30, 2017
b176aa0
Review fixes part 1.
vlejd Oct 30, 2017
f3d7fb6
Fix typo.
mhozza Jul 9, 2019
8e08386
Merge branch 'master' of github.com:trojsten/web into kaspar-migration
mhozza Jul 9, 2019
4094bda
black and isort.
mhozza Jul 9, 2019
48944c6
Fix some review comments.
mhozza Jul 9, 2019
e476954
Use raw string literals for regexps.
mhozza Jul 9, 2019
0320953
Use raw string literals for strings containing regexps and ignore var…
mhozza Jul 9, 2019
df2bdbd
Merge branch 'master' into kaspar-migration
mhozza Jul 10, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions trojsten/people/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,26 @@
MAILING_OPTION_SCHOOL = "S"
MAILING_OPTION_OTHER = "O"

OTHER_SCHOOL_ID = 1

DEENVELOPING_NOT_REVIEWED_SYMBOL = "*"

# User properties
# User id in the old fks database
FKS_ID_PROPERTY_KEY = "FKS ID"
# User id in the old kms database
KMS_ID_PROPERTY_KEY = "KMS ID"
# User id in the old ksp database
KASPAR_ID_PROPERTY_KEY = "KSP ID"
# User id in the csv file
CSV_ID_PROPERTY_KEY = "csv ID"
MOBIL_PROPERTY_KEY = "Mobil"
NICKNAME_PROPERTY_KEY = "Prezyvka"
BIRTH_NAME_PROPERTY_KEY = "Rodne Meno"
LAST_CONTACT_PROPERTY_KEY = "Posledny kontakt"
KMS_CAMPS_PROPERTY_KEY = "KMS sustredenia"
KASPAR_NOTE_PROPERTY_KEY = "KSP note"
KSP_CAMPS_PROPERTY_KEY = "KSP sustredenia"
MEMORY_PROPERTY_KEY = "Spomienky"
COMPANY_PROPERTY_KEY = "Posobisko"
AFFILIATION_PROPERTY_KEY = "Pozicia"
1 change: 1 addition & 0 deletions trojsten/people/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

def get_similar_users(user):
"""Returns a list of users similar to the specified user."""
# TODO check birth day as well. (if defined, filter different)
return User.objects.exclude(pk=user.pk).filter(
first_name=user.first_name, last_name=user.last_name
)
Expand Down
38 changes: 38 additions & 0 deletions trojsten/people/management/commands/migrate_30rokovfks1_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from __future__ import unicode_literals

import csv

from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand


class Command(MigrateBaseCommand):
help = "Imports people and their related info from fks_csv."

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument("file", type=str)

def handle(self, **options):
super(Command, self).handle(**options)
participants_file = options["file"]

participants = csv.DictReader(open(participants_file))

idd = 0
for l in participants:
idd += 1
csv_id = "30rokovFKS1_{0:d}".format(idd)
contacted = l["kontaktovany?"] == "ano"
if contacted:
self.last_contact[csv_id].append(2014)

user = {"first_name": l["Meno"], "last_name": l["Priezvisko"], "email": l["Email"]}
user_properties = [
(self.MOBIL_PROPERTY, l["Telefon"].replace(" ", "").strip()),
(self.BIRTH_NAME_PROPERTY, l["Rodne priezvisko"]),
(self.NICKNAME_PROPERTY, l["Prezyvka"]),
]

self.process_person(user, user_properties, self.CSV_ID_PROPERTY, csv_id)

self.print_stats()
41 changes: 41 additions & 0 deletions trojsten/people/management/commands/migrate_30rokovfks2_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import unicode_literals

import csv

from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand


class Command(MigrateBaseCommand):
help = "Imports people and their related info from fks_csv."

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument("file", type=str)

def handle(self, **options):
super(Command, self).handle(**options)
participants_file = options["file"]

participants = csv.DictReader(open(participants_file))
idd = 0
for l in participants:
idd += 1
csv_id = "30rokovFKS2_{0:d}".format(idd)
if not l["Meno"]:
continue

self.last_contact[csv_id].append(2014)
user = {"first_name": l["Meno"], "last_name": l["Priezvisko"], "email": l["E-mail"]}
user_properties = [
(self.MOBIL_PROPERTY, l["Telefon"].replace(" ", "").strip()),
(self.BIRTH_NAME_PROPERTY, l["Rodne priezvisko"]),
(self.NICKNAME_PROPERTY, l["Prezyvka"]),
(self.COMPANY_PROPERTY, l["Posobisko"]),
(self.AFFILIATION_PROPERTY, l["Pozicia"]),
(self.MEMORY_PROPERTY, l["spomienka"]),
]
# TODO Adresa

self.process_person(user, user_properties, self.CSV_ID_PROPERTY, csv_id)

self.print_stats()
253 changes: 253 additions & 0 deletions trojsten/people/management/commands/migrate_base_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
from collections import defaultdict
from datetime import datetime

from django.core.management import BaseCommand as NoArgsCommand
from django.db import transaction
from django.db.models import Q
from django.utils.six.moves import input

from trojsten.people import constants
from trojsten.people.helpers import get_similar_users
from trojsten.people.models import Address, DuplicateUser, User, UserProperty, UserPropertyKey
from trojsten.schools.models import School


class MigrateBaseCommand(NoArgsCommand):
help = "Base class for importing people."
NUMBER_OF_SCHOOLS_IN_FAST_RUN = 100
NUMBER_OF_USERS_IN_FAST_RUN = 100

def add_arguments(self, parser):
parser.add_argument(
"--wet_run",
action="store_false",
dest="dry",
default=True,
help="Actually write something to DB",
)
parser.add_argument(
"--fast",
action="store_true",
dest="fast",
default=False,
help="Create only the first {} users and {} schools".format(
self.NUMBER_OF_USERS_IN_FAST_RUN, self.NUMBER_OF_SCHOOLS_IN_FAST_RUN
),
)

def handle(self, **options):
self.dry = options["dry"]
self.fast = options["fast"]
self.done_users = 0
self.done_schools = 0
if self.dry:
self.stderr.write("Running dry run!")

self.verbosity = options["verbosity"]
self.similar_users = []
self.school_id_map = {}
self.last_contact = defaultdict(list)

self.CSV_ID_PROPERTY = self.process_property(
constants.CSV_ID_PROPERTY_KEY, r"(.{1,20}_)?\d+"
)
self.MOBIL_PROPERTY = self.process_property(constants.MOBIL_PROPERTY_KEY, r"\+?\d+\/?\d+")
self.NICKNAME_PROPERTY = self.process_property(constants.NICKNAME_PROPERTY_KEY, r".{1,30}")
self.BIRTH_NAME_PROPERTY = self.process_property(
constants.BIRTH_NAME_PROPERTY_KEY, r".{1,30}"
)
# TODO fix False and stupid values
self.LAST_CONTACT_PROPERTY = self.process_property(
constants.LAST_CONTACT_PROPERTY_KEY, r"\d\d\d\d"
)
self.FKS_ID_PROPERTY = self.process_property(constants.FKS_ID_PROPERTY_KEY, r"\d+")
self.KMS_ID_PROPERTY = self.process_property(constants.KMS_ID_PROPERTY_KEY, r"\d+")
self.KMS_CAMPS_PROPERTY = self.process_property(constants.KMS_CAMPS_PROPERTY_KEY, r"\d+")
self.KASPAR_ID_PROPERTY = self.process_property(constants.KASPAR_ID_PROPERTY_KEY, r"\d+")
self.KASPAR_NOTE_PROPERTY = self.process_property(constants.KASPAR_NOTE_PROPERTY_KEY, r".*")
self.KSP_CAMPS_PROPERTY = self.process_property(constants.KSP_CAMPS_PROPERTY_KEY, r"\d+")
self.MEMORY_PROPERTY = self.process_property(constants.MEMORY_PROPERTY_KEY, r".*")
self.COMPANY_PROPERTY = self.process_property(constants.COMPANY_PROPERTY_KEY, r".*")
self.AFFILIATION_PROPERTY = self.process_property(constants.AFFILIATION_PROPERTY_KEY, r".*")

@transaction.atomic
def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code):

self.done_schools += 1
if self.fast and self.done_schools > self.NUMBER_OF_SCHOOLS_IN_FAST_RUN:
return None
# TODO improve this, do not work with abbreviations
if not abbr:
self.school_id_map[old_id] = None
return

candidates = School.objects.filter(
Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + "?")
)
row = (abbr, name, addr_name, street, city, self.fix_string(zip_code))
if len(candidates) == 1:
if self.verbosity >= 2:
self.stderr.write("Matched %r to %s" % (row, candidates[0]))
self.school_id_map[old_id] = candidates[0]
elif len(candidates) > 1:
self.stderr.write(
"Multiple candidates for %r:\n%s"
% (
row,
"\n".join(
"%02d: %s" % (i, candidate) for i, candidate in enumerate(candidates)
),
)
)
try:
choice = int(input("Choice (empty or invalid to create new): "))
self.school_id_map[old_id] = candidates[choice]
except (KeyError):
self.school_id_map[old_id] = self.create_school(*row)
else:
self.school_id_map[old_id] = self.create_school(*row)

def create_school(self, abbr, name, addr_name, street, city, zip_code):
abbr += "?" # Question mark denotes schools needing review.
school = None
if len(zip_code) > 10:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tato logika si zasluzi viac komentaru. No idea o co sa toto snazi.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

# Swiss zip codes are longer than 10 chars, but our db model does not allow
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nechceme zmenit model aby ich podporoval?
Ak sa nam nechce teraz, mozes sem pridat TODO

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ake dlhe su svajciarske PSC? Podla wikipedia a google map 4 miesta.

# them so we skip them.
zip_code = 0

if self.dry:
school = School(
abbreviation=abbr,
verbose_name=name,
addr_name=addr_name,
street=street,
city=city,
zip_code=zip_code,
)
else:
school = School.objects.create(
abbreviation=abbr,
verbose_name=name,
addr_name=addr_name,
street=street,
city=city,
zip_code=zip_code,
)
if self.verbosity >= 2:
self.stderr.write("Created new school %s" % school)
return school

@transaction.atomic
def process_person(
self, user_args, user_properties, old_user_id_field, old_user_id, address=None
):
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Skus prvy riadok docstringu mat ako strucny popis metody (hned za """).

Args: moze byt kludne odsadene rovnako ako """.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

Args:
user_args (dict): will be used for user constructor as is. Except for school_id.
user_properties (list(tuple(UserPropertyKey, string))):
will create additional user properties
old_user_id_field (UserPropertyKey): old field that contained oser id
(kaspar_id/ kms id ...), used for faster deduplication.
old_user_id (int/string): old id
user_args can have
first_name, last_name, graduation, email, birth_date, school_id
"""
# If we run in the fast mode and we already processed enough users, we skip this one.
self.done_users += 1
if self.fast and self.done_users > self.NUMBER_OF_USERS_IN_FAST_RUN:
return None

old_id_property = None
if old_user_id:
old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id)
else:
old_id_property = UserProperty.objects.none()

first_name = user_args["first_name"]
last_name = user_args["last_name"]
if old_id_property.exists():
if self.verbosity >= 2:
self.stderr.write("Skipping user %s %s" % (first_name, last_name))
return None

user_args["is_active"] = False

if "school_id" in user_args:
school_id = user_args["school_id"]
del user_args["school_id"]
user_args["school"] = self.school_id_map.get(school_id)

if self.verbosity >= 2:
self.stderr.write("Creating user %s %s" % (first_name, last_name))

new_user = None
if self.dry:
new_user = User(**user_args)
else:
if address:
user_args["home_address"] = Address.objects.create(
street=address["street"],
town=address["town"],
postal_code=address["postal_code"],
country=address["country"],
)

new_user = User.objects.create(**user_args)

new_user.properties.create(key=old_user_id_field, value=old_user_id)

# TODO last_contacted
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

toto todo tiez nie je moc velavravne, ideane k tomu treba spravit github issue a linknut ju tuna.

if old_user_id in self.last_contact:
contacts = self.last_contact[old_user_id]
valid_contacts = filter(lambda c: 1900 < c and c < 2017, contacts)
if valid_contacts:
user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)])

user_properties = [prop for prop in user_properties if prop is not None]
for key, value in user_properties:
new_user.properties.create(key=key, value=value)

similar_users = get_similar_users(new_user)
if len(similar_users):
names_of_similar = [(user.first_name, user.last_name) for user in similar_users]
self.similar_users.append(((first_name, last_name), names_of_similar))
if self.verbosity >= 2:
self.stderr.write("Similar users: %s" % str(names_of_similar))
if not self.dry:
DuplicateUser.objects.create(user=new_user)

return new_user

def print_stats(self):
for conflict in self.similar_users:
self.stderr.write("Conflicts: %s" % str(conflict))

self.stderr.write("Conflict users: %d" % len(self.similar_users))

def parse_dot_date(self, date_string):
# Remove any whitespace inside the string.
date_string = date_string.replace(" ", "")
# Just hope that all dates are in the same format.
return datetime.strptime(date_string, "%d.%m.%Y")

def parse_dash_date(self, date_string):
# Remove any whitespace inside the string.
date_string = date_string.replace(" ", "")
if date_string == "0000-00-00" or date_string == "NULL":
return None
else:
return datetime.strptime(date_string, "%Y-%m-%d")

def process_property(self, key_name, regexp=None):
user_property = UserPropertyKey.objects.filter(key_name=key_name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nechceme tuna get a ohandlovat notfound exception? key_name je kluc/unique, nie?

if not user_property.exists():
if self.dry:
user_property = UserPropertyKey(key_name=key_name, regex=regexp)
else:
user_property = UserPropertyKey.objects.create(key_name=key_name, regex=regexp)
else:
user_property = user_property.first()
return user_property

def fix_string(self, string):
return string.replace(" ", "").strip()
Loading