Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Old databases migrations #1038

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8ae2ac9
Revised kaspar migrating script.
vlejd Mar 22, 2017
1ed292a
kms migration skript + improved mihration structure
vlejd Mar 23, 2017
e399f1f
Good enough migration scripts.
vlejd Mar 26, 2017
b16f898
Whole pipeline is working on wet run.
vlejd Mar 31, 2017
c45a524
pep-8 fixes
vlejd Mar 31, 2017
fd387f8
Fixed lint errors and added some options
vlejd Apr 1, 2017
3937866
Minor parameter changes.
vlejd Apr 15, 2017
1f62905
Revised kaspar migrating script.
vlejd Mar 22, 2017
dcd9d35
kms migration skript + improved mihration structure
vlejd Mar 23, 2017
d149519
Good enough migration scripts.
vlejd Mar 26, 2017
e7e472e
Whole pipeline is working on wet run.
vlejd Mar 31, 2017
f4f4039
pep-8 fixes
vlejd Mar 31, 2017
dc73736
Fixed lint errors and added some options
vlejd Apr 1, 2017
adffa56
Minor parameter changes.
vlejd Apr 15, 2017
fd07ac2
Merge branch 'kaspar-migration' of github.com:trojsten/web into kaspa…
vlejd Oct 22, 2017
f362320
Merge branch 'master' into kaspar-migration
vlejd Oct 30, 2017
b176aa0
Review fixes part 1.
vlejd Oct 30, 2017
f3d7fb6
Fix typo.
mhozza Jul 9, 2019
8e08386
Merge branch 'master' of github.com:trojsten/web into kaspar-migration
mhozza Jul 9, 2019
4094bda
black and isort.
mhozza Jul 9, 2019
48944c6
Fix some review comments.
mhozza Jul 9, 2019
e476954
Use raw string literals for regexps.
mhozza Jul 9, 2019
0320953
Use raw string literals for strings containing regexps and ignore var…
mhozza Jul 9, 2019
df2bdbd
Merge branch 'master' into kaspar-migration
mhozza Jul 10, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions trojsten/people/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

def get_similar_users(user):
"""Returns a list of users similar to the specified user."""
# TODO check birth day as well. (if defined, filter different)
return User.objects.exclude(pk=user.pk).filter(
first_name=user.first_name,
last_name=user.last_name,
Expand Down
43 changes: 43 additions & 0 deletions trojsten/people/management/commands/migrate_30rokovfks1_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from __future__ import unicode_literals

import csv

from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand


class Command(MigrateBaceCommand):
help = 'Imports people and their related info from fks_csv.'

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument('file', type=str)

def handle_noargs(self, **options):
super(Command, self).handle_noargs(**options)
participants_file = options['file']

participants = csv.DictReader(open(participants_file))

idd = 0
for l in participants:
idd += 1
csv_id = "30rokovFKS1_{0:d}".format(idd)
contacted = l['kontaktovany?'] == 'ano'
if contacted:
self.last_contact[csv_id].append(2014)

user = {
'first_name': l['Meno'],
'last_name': l['Priezvisko'],
'email': l['Email'],
}
user_properties = [
(self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()),
(self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']),
(self.NICKNAME_PROPERTY, l['Prezyvka'])
]

self.process_person(user, user_properties, self.CSV_ID_PROPERTY,
csv_id)

self.print_stats()
46 changes: 46 additions & 0 deletions trojsten/people/management/commands/migrate_30rokovfks2_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import unicode_literals

import csv

from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand


class Command(MigrateBaceCommand):
help = 'Imports people and their related info from fks_csv.'

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument('file', type=str)

def handle_noargs(self, **options):
super(Command, self).handle_noargs(**options)
participants_file = options['file']

participants = csv.DictReader(open(participants_file))
idd = 0
for l in participants:
idd += 1
csv_id = "30rokovFKS2_{0:d}".format(idd)
if not l['Meno']:
continue

self.last_contact[csv_id].append(2014)
user = {
'first_name': l['Meno'],
'last_name': l['Priezvisko'],
'email': l['E-mail'],
}
user_properties = [
(self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()),
(self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']),
(self.NICKNAME_PROPERTY, l['Prezyvka']),
(self.COMPANY_PROPERTY, l['Posobisko']),
(self.AFFILIATION_PROPERTY, l['Pozicia']),
(self.MEMORY_PROPERTY, l['spomienka'])
]
# TODO Adresa

self.process_person(user, user_properties, self.CSV_ID_PROPERTY,
csv_id)

self.print_stats()
259 changes: 259 additions & 0 deletions trojsten/people/management/commands/migrate_base_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
from __future__ import unicode_literals

from datetime import datetime
from imp import reload
from collections import defaultdict
import sys

from django.core.management.base import NoArgsCommand
from django.db import transaction
from django.db.models import Q
from django.utils.six.moves import input

from trojsten.people.helpers import get_similar_users
from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address

reload(sys)
sys.setdefaultencoding("utf-8")


class MigrateBaceCommand(NoArgsCommand):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Base

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

help = 'Base class for importing people.'

def add_arguments(self, parser):
parser.add_argument('--wet_run',
action='store_false',
dest='dry',
default=True,
help='Actually write something to DB')
parser.add_argument('--fast',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Toto chce lepsi description - co je to "few users"?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

action='store_true',
dest='fast',
default=False,
help='Create only a few users')

def handle_noargs(self, **options):
self.dry = options['dry']
self.fast = options['fast']
self.done_users = 0
self.done_schools = 0
if self.dry:
self.stdout.write("Running dry run!")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nechceme taketo message pchat do stderr?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok


self.verbosity = options['verbosity']
self.similar_users = []
self.school_id_map = {}
self.last_contact = defaultdict(list)

CSV_ID_KEY = "csv ID"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tieto konstanty by som vydrbal do nejakeho constants.py

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

self.CSV_ID_PROPERTY = self.process_property(CSV_ID_KEY, "(.{1,20}_)?\d+")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vyrabanie properties by som urcite dal do zvlast metody. Nie som si ale isty ci to vobec patri sem do tejto classy. Skor by som si to vedel predstavit ako fixture, ktoru naladujeme do databazy. Tuna by som mozno nechal asserty na to ci existuju tie co potrebujeme.

MOBIL_KEY = "Mobil"
self.MOBIL_PROPERTY = self.process_property(MOBIL_KEY, "\+?\d+\/?\d+")
NICKNAME_KEY = "Prezyvka"
self.NICKNAME_PROPERTY = self.process_property(NICKNAME_KEY, ".{1,30}")
BIRTH_NAME_KEY = "Rodne Meno"
self.BIRTH_NAME_PROPERTY = self.process_property(BIRTH_NAME_KEY, ".{1,30}")
LAST_CONTACT_KEY = "Posledny kontakt"
# TODO fix False and stupid values
self.LAST_CONTACT_PROPERTY = self.process_property(LAST_CONTACT_KEY, "\d\d\d\d")
FKS_ID_KEY = "FKS ID"
self.FKS_ID_PROPERTY = self.process_property(FKS_ID_KEY, "\d+")
KMS_ID_KEY = "KMS ID"
self.KMS_ID_PROPERTY = self.process_property(KMS_ID_KEY, "\d+")
KMS_CAMPS_KEY = "KMS sustredenia"
self.KMS_CAMPS_PROPERTY = self.process_property(KMS_CAMPS_KEY, "\d+")
KASPAR_ID_KEY = "KSP ID"
self.KASPAR_ID_PROPERTY = self.process_property(KASPAR_ID_KEY, "\d+")
KASPAR_NOTE_KEY = "KSP note"
self.KASPAR_NOTE_PROPERTY = self.process_property(KASPAR_NOTE_KEY, ".*")
KSP_CAMPS_KEY = "KSP sustredenia"
self.KSP_CAMPS_PROPERTY = self.process_property(KSP_CAMPS_KEY, "\d+")
MEMORY_KEY = "Spomienky"
self.MEMORY_PROPERTY = self.process_property(MEMORY_KEY, ".*")
COMPANY_KEY = "Posobisko"
self.COMPANY_PROPERTY = self.process_property(COMPANY_KEY, ".*")
AFFILIATION_KEY = "Pozicia"
self.AFFILIATION_PROPERTY = self.process_property(AFFILIATION_KEY, ".*")

@transaction.atomic
def process_address(self, street, town, postal_code, country):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Toto je preco zvlast metoda? Inlinut obsah namiesto volania je cca rovnako vela kodu/informacie.
Ani atomic mi tu nedava velmi zmysel, kedze je to jedna db query.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country)

@transaction.atomic
def process_school(self, old_id, abbr, name, addr_name, street,
city, zip_code):

self.done_schools += 1
if self.fast and self.done_schools > 100:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zo 100 mozes urobit konstantu

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return None
# TODO improve this, do not work with abbreviations
if not abbr:
self.school_id_map[old_id] = None
return

candidates = School.objects.filter(
Q(abbreviation__iexact=abbr) |
Q(abbreviation__iexact=abbr + '?')
)
row = (abbr, name, addr_name, street, city, self.fix_string(zip_code))
if len(candidates) == 1:
if self.verbosity >= 2:
self.stdout.write("Matched %r to %s" % (row,
candidates[0]))
self.school_id_map[old_id] = candidates[0]
elif len(candidates) > 1:
self.stdout.write("Multiple candidates for %r:\n%s" % (
row,
"\n".join("%02d: %s" % (i, candidate)
for i, candidate in enumerate(candidates))
))
try:
choice = int(input("Choice (empty or invalid to create new): "))
self.school_id_map[old_id] = candidates[choice]
except (ValueError, KeyError):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Preco ValueError?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zaujimave. vyhodil som.

self.school_id_map[old_id] = self.create_school(*row)
else:
self.school_id_map[old_id] = self.create_school(*row)

def create_school(self, abbr, name, addr_name, street,
city, zip_code):
abbr += '?' # Question mark denotes schools needing review.
school = None
if len(zip_code) > 10:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tato logika si zasluzi viac komentaru. No idea o co sa toto snazi.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

# Swiss zip codes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tento comment patri k comu?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ten comment sa snazil vysvetlit pointu toho ifu. Dal som tam lepsi koment

zip_code = 0

if self.dry:
school = School(abbreviation=abbr,
verbose_name=name,
addr_name=addr_name,
street=street,
city=city,
zip_code=zip_code)
else:
school = School.objects.create(abbreviation=abbr,
verbose_name=name,
addr_name=addr_name,
street=street,
city=city,
zip_code=zip_code)
if self.verbosity >= 2:
self.stdout.write("Created new school %s" % school)
return school

@transaction.atomic
def process_person(self, user_args, user_properties, old_user_id_field, old_user_id, address=None):
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Skus prvy riadok docstringu mat ako strucny popis metody (hned za """).

Args: moze byt kludne odsadene rovnako ako """.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

Args:
user_args (dict): will be used for user constructor as is. Except for school_id.
user_properties (list(tuple(UserPropertyKey, string))):
will create additional user properties
old_user_id_field (UserPropertyKey): old field that contained oser id
(kaspar_id/ kms id ...), used for faster deduplication.
old_user_id (int/string): old id
user_args can have
first_name, last_name, graduation, email, birth_date, school_id
"""
# If the user already exists in our database, skip.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tento comment mi nesedi s kodom co je rovno pod nim.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

self.done_users += 1
if self.fast and self.done_users > 100:
return None

old_id_property = None
if old_user_id:
old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id)
else:
old_id_property = UserProperty.objects.none()

first_name = user_args['first_name']
last_name = user_args['last_name']
if old_id_property.exists():
if self.verbosity >= 2:
self.stdout.write("Skipping user %s %s" % (first_name,
last_name))
return None

# The username needs to be unique, thus the ID.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tento comment mi tiez nesedi s kodom pod nim.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ani mne.

user_args['is_active'] = False

if 'school_id' in user_args:
school_id = user_args['school_id']
del user_args['school_id']
user_args['school'] = self.school_id_map.get(school_id)

if self.verbosity >= 2:
self.stdout.write("Creating user %s %s" % (first_name, last_name))

new_user = None
if self.dry:
new_user = User(**user_args)
else:
addr = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

naco je tu tato premenna - ak dobre vidim, addr sa pouzije len na riadku 197 a medzitym je aj tak prepisana.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

if address:
addr = self.process_address(address['street'],
address['town'],
address['postal_code'],
address['country'])
user_args['home_address'] = addr
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tu mozes rovno priradit to co priradujes do addr a addr vyhodit

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok


new_user = User.objects.create(**user_args)

new_user.properties.create(key=old_user_id_field, value=old_user_id)

# TODO last_contacted
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

toto todo tiez nie je moc velavravne, ideane k tomu treba spravit github issue a linknut ju tuna.

if old_user_id in self.last_contact:
contacts = self.last_contact[old_user_id]
valid_contacts = filter(lambda c: 1900 < c and c < 2017, contacts)
if valid_contacts:
user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)])

user_properties = list(filter(lambda x: x, user_properties))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Toto je velmi obskurne. Co napriklad:
user_properties = [prop for prop in user_properties if prop is not None]

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

for key, value in user_properties:
new_user.properties.create(key=key, value=value)

similar_users = get_similar_users(new_user)
if len(similar_users):
names_of_similar = [(x.first_name, x.last_name) for x in similar_users]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nie som uplne fanusik pouzivania premennej x na vsetko ale whatever.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

self.similar_users.append(((first_name, last_name), names_of_similar))
if self.verbosity >= 2:
self.stdout.write('Similar users: %s' % str(names_of_similar))
if self.dry:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

co napr. if not self.dry:

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

pass
else:
DuplicateUser.objects.create(user=new_user)

return new_user

def print_stats(self):
for conflict in self.similar_users:
self.stdout.write("Conflicts: %s" % str(conflict))

self.stdout.write("Conflict users: %d" % len(self.similar_users))

def parse_dot_date(self, date_string):
# Remove any whitespace inside the string.
date_string = date_string.replace(' ', '')
# Just hope that all dates are in the same format.
return datetime.strptime(date_string, '%d.%m.%Y')

def parse_dash_date(self, date_string):
# Remove any whitespace inside the string.
date_string = date_string.replace(' ', '')
if date_string == "0000-00-00" or date_string == "NULL":
return None
else:
return datetime.strptime(date_string, '%Y-%m-%d')

def process_property(self, key_name, regexp=None):
user_property = UserPropertyKey.objects.filter(key_name=key_name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nechceme tuna get a ohandlovat notfound exception? key_name je kluc/unique, nie?

if not user_property.exists():
if self.dry:
user_property = UserPropertyKey(key_name=key_name, regex=regexp)
else:
user_property = UserPropertyKey.objects.create(key_name=key_name, regex=regexp)
else:
user_property = user_property.first()
return user_property

def fix_string(self, string):
return string.replace(" ", "").strip()
Loading