-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Old databases migrations #1038
base: master
Are you sure you want to change the base?
Old databases migrations #1038
Changes from 7 commits
8ae2ac9
1ed292a
e399f1f
b16f898
c45a524
fd387f8
3937866
1f62905
dcd9d35
d149519
e7e472e
f4f4039
dc73736
adffa56
fd07ac2
f362320
b176aa0
f3d7fb6
8e08386
4094bda
48944c6
e476954
0320953
df2bdbd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from __future__ import unicode_literals | ||
|
||
import csv | ||
|
||
from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand | ||
|
||
|
||
class Command(MigrateBaceCommand): | ||
help = 'Imports people and their related info from fks_csv.' | ||
|
||
def add_arguments(self, parser): | ||
super(Command, self).add_arguments(parser) | ||
parser.add_argument('file', type=str) | ||
|
||
def handle_noargs(self, **options): | ||
super(Command, self).handle_noargs(**options) | ||
participants_file = options['file'] | ||
|
||
participants = csv.DictReader(open(participants_file)) | ||
|
||
idd = 0 | ||
for l in participants: | ||
idd += 1 | ||
csv_id = "30rokovFKS1_{0:d}".format(idd) | ||
contacted = l['kontaktovany?'] == 'ano' | ||
if contacted: | ||
self.last_contact[csv_id].append(2014) | ||
|
||
user = { | ||
'first_name': l['Meno'], | ||
'last_name': l['Priezvisko'], | ||
'email': l['Email'], | ||
} | ||
user_properties = [ | ||
(self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), | ||
(self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), | ||
(self.NICKNAME_PROPERTY, l['Prezyvka']) | ||
] | ||
|
||
self.process_person(user, user_properties, self.CSV_ID_PROPERTY, | ||
csv_id) | ||
|
||
self.print_stats() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from __future__ import unicode_literals | ||
|
||
import csv | ||
|
||
from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand | ||
|
||
|
||
class Command(MigrateBaceCommand): | ||
help = 'Imports people and their related info from fks_csv.' | ||
|
||
def add_arguments(self, parser): | ||
super(Command, self).add_arguments(parser) | ||
parser.add_argument('file', type=str) | ||
|
||
def handle_noargs(self, **options): | ||
super(Command, self).handle_noargs(**options) | ||
participants_file = options['file'] | ||
|
||
participants = csv.DictReader(open(participants_file)) | ||
idd = 0 | ||
for l in participants: | ||
idd += 1 | ||
csv_id = "30rokovFKS2_{0:d}".format(idd) | ||
if not l['Meno']: | ||
continue | ||
|
||
self.last_contact[csv_id].append(2014) | ||
user = { | ||
'first_name': l['Meno'], | ||
'last_name': l['Priezvisko'], | ||
'email': l['E-mail'], | ||
} | ||
user_properties = [ | ||
(self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), | ||
(self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), | ||
(self.NICKNAME_PROPERTY, l['Prezyvka']), | ||
(self.COMPANY_PROPERTY, l['Posobisko']), | ||
(self.AFFILIATION_PROPERTY, l['Pozicia']), | ||
(self.MEMORY_PROPERTY, l['spomienka']) | ||
] | ||
# TODO Adresa | ||
|
||
self.process_person(user, user_properties, self.CSV_ID_PROPERTY, | ||
csv_id) | ||
|
||
self.print_stats() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
from __future__ import unicode_literals | ||
|
||
from datetime import datetime | ||
from imp import reload | ||
from collections import defaultdict | ||
import sys | ||
|
||
from django.core.management.base import NoArgsCommand | ||
from django.db import transaction | ||
from django.db.models import Q | ||
from django.utils.six.moves import input | ||
|
||
from trojsten.people.helpers import get_similar_users | ||
from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address | ||
|
||
reload(sys) | ||
sys.setdefaultencoding("utf-8") | ||
|
||
|
||
class MigrateBaceCommand(NoArgsCommand): | ||
help = 'Base class for importing people.' | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument('--wet_run', | ||
action='store_false', | ||
dest='dry', | ||
default=True, | ||
help='Actually write something to DB') | ||
parser.add_argument('--fast', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Toto chce lepsi description - co je to "few users"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
action='store_true', | ||
dest='fast', | ||
default=False, | ||
help='Create only a few users') | ||
|
||
def handle_noargs(self, **options): | ||
self.dry = options['dry'] | ||
self.fast = options['fast'] | ||
self.done_users = 0 | ||
self.done_schools = 0 | ||
if self.dry: | ||
self.stdout.write("Running dry run!") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nechceme taketo message pchat do stderr? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
|
||
self.verbosity = options['verbosity'] | ||
self.similar_users = [] | ||
self.school_id_map = {} | ||
self.last_contact = defaultdict(list) | ||
|
||
CSV_ID_KEY = "csv ID" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tieto konstanty by som vydrbal do nejakeho constants.py There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
self.CSV_ID_PROPERTY = self.process_property(CSV_ID_KEY, "(.{1,20}_)?\d+") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. vyrabanie properties by som urcite dal do zvlast metody. Nie som si ale isty ci to vobec patri sem do tejto classy. Skor by som si to vedel predstavit ako fixture, ktoru naladujeme do databazy. Tuna by som mozno nechal asserty na to ci existuju tie co potrebujeme. |
||
MOBIL_KEY = "Mobil" | ||
self.MOBIL_PROPERTY = self.process_property(MOBIL_KEY, "\+?\d+\/?\d+") | ||
NICKNAME_KEY = "Prezyvka" | ||
self.NICKNAME_PROPERTY = self.process_property(NICKNAME_KEY, ".{1,30}") | ||
BIRTH_NAME_KEY = "Rodne Meno" | ||
self.BIRTH_NAME_PROPERTY = self.process_property(BIRTH_NAME_KEY, ".{1,30}") | ||
LAST_CONTACT_KEY = "Posledny kontakt" | ||
# TODO fix False and stupid values | ||
self.LAST_CONTACT_PROPERTY = self.process_property(LAST_CONTACT_KEY, "\d\d\d\d") | ||
FKS_ID_KEY = "FKS ID" | ||
self.FKS_ID_PROPERTY = self.process_property(FKS_ID_KEY, "\d+") | ||
KMS_ID_KEY = "KMS ID" | ||
self.KMS_ID_PROPERTY = self.process_property(KMS_ID_KEY, "\d+") | ||
KMS_CAMPS_KEY = "KMS sustredenia" | ||
self.KMS_CAMPS_PROPERTY = self.process_property(KMS_CAMPS_KEY, "\d+") | ||
KASPAR_ID_KEY = "KSP ID" | ||
self.KASPAR_ID_PROPERTY = self.process_property(KASPAR_ID_KEY, "\d+") | ||
KASPAR_NOTE_KEY = "KSP note" | ||
self.KASPAR_NOTE_PROPERTY = self.process_property(KASPAR_NOTE_KEY, ".*") | ||
KSP_CAMPS_KEY = "KSP sustredenia" | ||
self.KSP_CAMPS_PROPERTY = self.process_property(KSP_CAMPS_KEY, "\d+") | ||
MEMORY_KEY = "Spomienky" | ||
self.MEMORY_PROPERTY = self.process_property(MEMORY_KEY, ".*") | ||
COMPANY_KEY = "Posobisko" | ||
self.COMPANY_PROPERTY = self.process_property(COMPANY_KEY, ".*") | ||
AFFILIATION_KEY = "Pozicia" | ||
self.AFFILIATION_PROPERTY = self.process_property(AFFILIATION_KEY, ".*") | ||
|
||
@transaction.atomic | ||
def process_address(self, street, town, postal_code, country): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Toto je preco zvlast metoda? Inlinut obsah namiesto volania je cca rovnako vela kodu/informacie. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country) | ||
|
||
@transaction.atomic | ||
def process_school(self, old_id, abbr, name, addr_name, street, | ||
city, zip_code): | ||
|
||
self.done_schools += 1 | ||
if self.fast and self.done_schools > 100: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. zo 100 mozes urobit konstantu There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
return None | ||
# TODO improve this, do not work with abbreviations | ||
if not abbr: | ||
self.school_id_map[old_id] = None | ||
return | ||
|
||
candidates = School.objects.filter( | ||
Q(abbreviation__iexact=abbr) | | ||
Q(abbreviation__iexact=abbr + '?') | ||
) | ||
row = (abbr, name, addr_name, street, city, self.fix_string(zip_code)) | ||
if len(candidates) == 1: | ||
if self.verbosity >= 2: | ||
self.stdout.write("Matched %r to %s" % (row, | ||
candidates[0])) | ||
self.school_id_map[old_id] = candidates[0] | ||
elif len(candidates) > 1: | ||
self.stdout.write("Multiple candidates for %r:\n%s" % ( | ||
row, | ||
"\n".join("%02d: %s" % (i, candidate) | ||
for i, candidate in enumerate(candidates)) | ||
)) | ||
try: | ||
choice = int(input("Choice (empty or invalid to create new): ")) | ||
self.school_id_map[old_id] = candidates[choice] | ||
except (ValueError, KeyError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Preco ValueError? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. zaujimave. vyhodil som. |
||
self.school_id_map[old_id] = self.create_school(*row) | ||
else: | ||
self.school_id_map[old_id] = self.create_school(*row) | ||
|
||
def create_school(self, abbr, name, addr_name, street, | ||
city, zip_code): | ||
abbr += '?' # Question mark denotes schools needing review. | ||
school = None | ||
if len(zip_code) > 10: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tato logika si zasluzi viac komentaru. No idea o co sa toto snazi. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
# Swiss zip codes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tento comment patri k comu? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ten comment sa snazil vysvetlit pointu toho ifu. Dal som tam lepsi koment |
||
zip_code = 0 | ||
|
||
if self.dry: | ||
school = School(abbreviation=abbr, | ||
verbose_name=name, | ||
addr_name=addr_name, | ||
street=street, | ||
city=city, | ||
zip_code=zip_code) | ||
else: | ||
school = School.objects.create(abbreviation=abbr, | ||
verbose_name=name, | ||
addr_name=addr_name, | ||
street=street, | ||
city=city, | ||
zip_code=zip_code) | ||
if self.verbosity >= 2: | ||
self.stdout.write("Created new school %s" % school) | ||
return school | ||
|
||
@transaction.atomic | ||
def process_person(self, user_args, user_properties, old_user_id_field, old_user_id, address=None): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Skus prvy riadok docstringu mat ako strucny popis metody (hned za """). Args: moze byt kludne odsadene rovnako ako """. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
Args: | ||
user_args (dict): will be used for user constructor as is. Except for school_id. | ||
user_properties (list(tuple(UserPropertyKey, string))): | ||
will create additional user properties | ||
old_user_id_field (UserPropertyKey): old field that contained oser id | ||
(kaspar_id/ kms id ...), used for faster deduplication. | ||
old_user_id (int/string): old id | ||
user_args can have | ||
first_name, last_name, graduation, email, birth_date, school_id | ||
""" | ||
# If the user already exists in our database, skip. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tento comment mi nesedi s kodom co je rovno pod nim. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
self.done_users += 1 | ||
if self.fast and self.done_users > 100: | ||
return None | ||
|
||
old_id_property = None | ||
if old_user_id: | ||
old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id) | ||
else: | ||
old_id_property = UserProperty.objects.none() | ||
|
||
first_name = user_args['first_name'] | ||
last_name = user_args['last_name'] | ||
if old_id_property.exists(): | ||
if self.verbosity >= 2: | ||
self.stdout.write("Skipping user %s %s" % (first_name, | ||
last_name)) | ||
return None | ||
|
||
# The username needs to be unique, thus the ID. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tento comment mi tiez nesedi s kodom pod nim. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ani mne. |
||
user_args['is_active'] = False | ||
|
||
if 'school_id' in user_args: | ||
school_id = user_args['school_id'] | ||
del user_args['school_id'] | ||
user_args['school'] = self.school_id_map.get(school_id) | ||
|
||
if self.verbosity >= 2: | ||
self.stdout.write("Creating user %s %s" % (first_name, last_name)) | ||
|
||
new_user = None | ||
if self.dry: | ||
new_user = User(**user_args) | ||
else: | ||
addr = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. naco je tu tato premenna - ak dobre vidim, addr sa pouzije len na riadku 197 a medzitym je aj tak prepisana. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
if address: | ||
addr = self.process_address(address['street'], | ||
address['town'], | ||
address['postal_code'], | ||
address['country']) | ||
user_args['home_address'] = addr | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tu mozes rovno priradit to co priradujes do addr a addr vyhodit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
|
||
new_user = User.objects.create(**user_args) | ||
|
||
new_user.properties.create(key=old_user_id_field, value=old_user_id) | ||
|
||
# TODO last_contacted | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. toto todo tiez nie je moc velavravne, ideane k tomu treba spravit github issue a linknut ju tuna. |
||
if old_user_id in self.last_contact: | ||
contacts = self.last_contact[old_user_id] | ||
valid_contacts = filter(lambda c: 1900 < c and c < 2017, contacts) | ||
if valid_contacts: | ||
user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)]) | ||
|
||
user_properties = list(filter(lambda x: x, user_properties)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Toto je velmi obskurne. Co napriklad: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
for key, value in user_properties: | ||
new_user.properties.create(key=key, value=value) | ||
|
||
similar_users = get_similar_users(new_user) | ||
if len(similar_users): | ||
names_of_similar = [(x.first_name, x.last_name) for x in similar_users] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nie som uplne fanusik pouzivania premennej x na vsetko ale whatever. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
self.similar_users.append(((first_name, last_name), names_of_similar)) | ||
if self.verbosity >= 2: | ||
self.stdout.write('Similar users: %s' % str(names_of_similar)) | ||
if self.dry: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. co napr. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
pass | ||
else: | ||
DuplicateUser.objects.create(user=new_user) | ||
|
||
return new_user | ||
|
||
def print_stats(self): | ||
for conflict in self.similar_users: | ||
self.stdout.write("Conflicts: %s" % str(conflict)) | ||
|
||
self.stdout.write("Conflict users: %d" % len(self.similar_users)) | ||
|
||
def parse_dot_date(self, date_string): | ||
# Remove any whitespace inside the string. | ||
date_string = date_string.replace(' ', '') | ||
# Just hope that all dates are in the same format. | ||
return datetime.strptime(date_string, '%d.%m.%Y') | ||
|
||
def parse_dash_date(self, date_string): | ||
# Remove any whitespace inside the string. | ||
date_string = date_string.replace(' ', '') | ||
if date_string == "0000-00-00" or date_string == "NULL": | ||
return None | ||
else: | ||
return datetime.strptime(date_string, '%Y-%m-%d') | ||
|
||
def process_property(self, key_name, regexp=None): | ||
user_property = UserPropertyKey.objects.filter(key_name=key_name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nechceme tuna get a ohandlovat notfound exception? key_name je kluc/unique, nie? |
||
if not user_property.exists(): | ||
if self.dry: | ||
user_property = UserPropertyKey(key_name=key_name, regex=regexp) | ||
else: | ||
user_property = UserPropertyKey.objects.create(key_name=key_name, regex=regexp) | ||
else: | ||
user_property = user_property.first() | ||
return user_property | ||
|
||
def fix_string(self, string): | ||
return string.replace(" ", "").strip() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Base
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done