From 8ae2ac993e6947e92acf17bbc27b404b8d9ffba0 Mon Sep 17 00:00:00 2001 From: vlejd Date: Wed, 22 Mar 2017 22:08:43 +0100 Subject: [PATCH 01/20] Revised kaspar migrating script. --- trojsten/people/helpers.py | 1 + .../commands/migrate_people_from_kaspar.py | 70 +++++++++++++++---- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/trojsten/people/helpers.py b/trojsten/people/helpers.py index c1a9c885d..3ac704642 100644 --- a/trojsten/people/helpers.py +++ b/trojsten/people/helpers.py @@ -5,6 +5,7 @@ def get_similar_users(user): """Returns a list of users similar to the specified user.""" + #TODO check birth day as well. (if defined, filter different) return User.objects.exclude(pk=user.pk).filter( first_name=user.first_name, last_name=user.last_name, diff --git a/trojsten/people/management/commands/migrate_people_from_kaspar.py b/trojsten/people/management/commands/migrate_people_from_kaspar.py index 4abeb5109..ecfc0411c 100644 --- a/trojsten/people/management/commands/migrate_people_from_kaspar.py +++ b/trojsten/people/management/commands/migrate_people_from_kaspar.py @@ -17,11 +17,22 @@ KASPAR_ID_LABEL = "kaspar ID" KASPAR_NOTE_LABEL = "kaspar note" - class Command(NoArgsCommand): help = 'Imports people and their related info from kaspar.' + def add_arguments(self, parser): + parser.add_argument('--wet_run', + action='store_false', + dest='dry', + default=True, + help='Actually write something to DB') + def handle_noargs(self, **options): + self.similar_users = [] + self.dry = options['dry'] + if self.dry: + self.stdout.write("Running dry run!") + self.verbosity = options['verbosity'] self.kaspar = connections['kaspar'] c = self.kaspar.cursor() @@ -57,9 +68,21 @@ def handle_noargs(self, **options): for row in c: self.process_person(*row) + for conflict in self.similar_users: + self.stdout.write("Conflicts: %s" % str(conflict)) + self.stdout.write("Conflict users: %d" % len(self.similar_users)) + @transaction.atomic def process_school(self, kaspar_id, abbr, name, addr_name, street, city, zip_code): + + if not abbr: + print("empty") + print(kaspar_id, abbr, name, street) + x = input() + self.school_id_map[kaspar_id] = None + return + candidates = School.objects.filter( Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + '?') @@ -87,12 +110,21 @@ def process_school(self, kaspar_id, abbr, name, addr_name, street, def create_school(self, kaspar_id, abbr, name, addr_name, street, city, zip_code): abbr += '?' # Question mark denotes schools needing review. - school = School.objects.create(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) + school = None + if self.dry: + school = School(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) + else: + school = School.objects.create(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) if self.verbosity >= 2: self.stdout.write("Created new school %s" % school) return school @@ -105,7 +137,7 @@ def process_person(self, man_id, first_name, last_name, school_id, if self.verbosity >= 2: self.stdout.write("Skipping user %s %s" % (first_name, last_name)) - return + return new_user_args = { 'first_name': first_name, @@ -139,17 +171,25 @@ def process_person(self, man_id, first_name, last_name, school_id, if self.verbosity >= 2: self.stdout.write("Creating user %s %s" % (first_name, last_name)) - new_user = User.objects.create(**new_user_args) - self.man_id_map[man_id] = new_user + new_user = None + if self.dry: + new_user = User(**new_user_args) + else: + new_user = User.objects.create(**new_user_args) + new_user.properties.create(key=self.kaspar_id_key, value=man_id) + if note: + new_user.properties.create(key=self.kaspar_note_key, value=note) - new_user.properties.create(key=self.kaspar_id_key, value=man_id) - if note: - new_user.properties.create(key=self.kaspar_note_key, value=note) similar_users = get_similar_users(new_user) if len(similar_users): + names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] + self.similar_users.append(((first_name, last_name), names_of_similar)) if self.verbosity >= 2: - self.stdout.write('Similar users: %s' % str(similar_users)) - DuplicateUser.objects.create(user=new_user) + self.stdout.write('Similar users: %s' % str(names_of_similar)) + if self.dry: + pass + else: + DuplicateUser.objects.create(user=new_user) def parse_date(self, date_string): # Remove any whitespace inside the string. From 1ed292adbf99b0b53b71d642619e3ef8bf80f973 Mon Sep 17 00:00:00 2001 From: vlejd Date: Thu, 23 Mar 2017 23:03:37 +0100 Subject: [PATCH 02/20] kms migration skript + improved mihration structure --- .../management/commands/migrate_base_class.py | 178 ++++++++++++++++++ .../management/commands/migrate_kms_csv.py | 95 ++++++++++ 2 files changed, 273 insertions(+) create mode 100644 trojsten/people/management/commands/migrate_base_class.py create mode 100644 trojsten/people/management/commands/migrate_kms_csv.py diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py new file mode 100644 index 000000000..e52d8a238 --- /dev/null +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -0,0 +1,178 @@ +from __future__ import unicode_literals + +from datetime import datetime + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty + +import sys +reload(sys) +sys.setdefaultencoding("utf-8") + +# Kaspar property IDs +EMAIL_PROP = 1 +BIRTHDAY_PROP = 2 +# Labels for auto-generated properties +KASPAR_ID_LABEL = "kaspar ID" +KASPAR_NOTE_LABEL = "kaspar note" + +class MigrateBaceCommand(NoArgsCommand): + help = 'Base class for importing people.' + + def add_arguments(self, parser): + parser.add_argument('--wet_run', + action='store_false', + dest='dry', + default=True, + help='Actually write something to DB') + + def handle_noargs(self, **options): + self.dry = options['dry'] + if self.dry: + self.stdout.write("Running dry run!") + + self.verbosity = options['verbosity'] + self.similar_users = [] + self.school_id_map={} + + @transaction.atomic + def process_school(self, old_id, abbr, name, addr_name, street, + city, zip_code): + + if not abbr: + print("empty") + print(old_id, abbr, name, street) + x = input() + self.school_id_map[old_id] = None + return + + candidates = School.objects.filter( + Q(abbreviation__iexact=abbr) | + Q(abbreviation__iexact=abbr + '?') + ) + row = (abbr, name, addr_name, street, city, zip_code) + if len(candidates) == 1: + if self.verbosity >= 2: + self.stdout.write("Matched %r to %s" % (row, + candidates[0])) + self.school_id_map[old_id] = candidates[0] + elif len(candidates) > 1: + self.stdout.write("Multiple candidates for %r:\n%s" % ( + row, + "\n".join("%02d: %s" % (i, candidate) + for i, candidate in enumerate(candidates)) + )) + try: + choice = int(input("Choice (empty or invalid to create new): ")) + self.school_id_map[old_id] = candidates[choice] + except (ValueError, KeyError): + self.school_id_map[old_id] = self.create_school(*row) + else: + self.school_id_map[old_id] = self.create_school(*row) + + def create_school(self, abbr, name, addr_name, street, + city, zip_code): + abbr += '?' # Question mark denotes schools needing review. + school = None + if self.dry: + school = School(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) + else: + school = School.objects.create(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) + if self.verbosity >= 2: + self.stdout.write("Created new school %s" % school) + return school + + @transaction.atomic + def process_person(self, user_args, user_properties, old_user_id_field, old_user_id): + """ + Args: + user_args (dict): will be uset for user constructor as is. + user_properties (list(tuple(UserPropertyKey, string))): will create additional user properties + old_user_id_field (UserPropertyKey): old field that contained oser id + (kaspar_id/ kms id ...), used for faster deduplication. + old_user_id (int/string): old id + user_args can have + first_name + last_name + graduation + email + birth_date + school_id + """ + # If the user already exists in our database, skip. + old_id_property = None + if old_user_id: + old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id) + else: + old_id_property = UserProperty.objects.none() + + first_name = user_args['first_name'] + last_name = user_args['last_name'] + if old_id_property.exists(): + if self.verbosity >= 2: + self.stdout.write("Skipping user %s %s" % (first_name, + last_name)) + return + + # The username needs to be unique, thus the ID. + user_args['username'] = u'{0:s}{1:s}{2:d}'.format(first_name, last_name, old_user_id), + user_args['is_active'] = False + + #TODO fix school + if 'school_id' in user_args: + school_id = user_args['school_id'] + del user_args['school_id'] + user_args['school'] = self.school_id_map.get(school_id) + + if self.verbosity >= 2: + self.stdout.write("Creating user %s %s" % (first_name, last_name)) + + new_user = None + if self.dry: + new_user = User(**user_args) + else: + new_user = User.objects.create(**user_args) + + if old_user_id: + new_user.properties.create(key=old_user_id_field, value=old_user_id) + + for key, value in user_properties: + new_user.properties.create(key=key, value=value) + + similar_users = get_similar_users(new_user) + if len(similar_users): + names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] + self.similar_users.append(((first_name, last_name), names_of_similar)) + if self.verbosity >= 2: + self.stdout.write('Similar users: %s' % str(names_of_similar)) + if self.dry: + pass + else: + DuplicateUser.objects.create(user=new_user) + + def print_stats(self): + for conflict in self.similar_users: + self.stdout.write("Conflicts: %s" % str(conflict)) + + self.stdout.write("Conflict users: %d" % len(self.similar_users)) + + def parse_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(' ', '') + # Just hope that all dates are in the same format. + return datetime.strptime(date_string, '%d.%m.%Y') diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py new file mode 100644 index 000000000..3b867e1eb --- /dev/null +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -0,0 +1,95 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand + + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from kms_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + riesitelia_file = os.path.join(base, "riesitelia.csv") + riesitelia = csv.DictReader(open(riesitelia_file)) + sustredenia_file = os.path.join(base, "sustredenia.csv") + sustredenia = csv.DictReader(open(sustredenia_file)) + ucasti = defaultdict(int) + last_kontakt = {} + for sustredko in sustredenia: + idcko = sustredko['id_riesitela'].strip() + ucasti[idcko]+=1 + if sustredko['rok']: + last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(sustredko['rok'])) + + + skoly_file = os.path.join(base, "skoly.csv") + skoly = csv.DictReader(open(skoly_file)) + for skola in skoly: + abbr = skola['skratka'].split(' ', 1)[0] + addr_name = skola['nazov'] + ", " + skola['ulica'] + self.process_school(skola['id'], abbr, skola['nazov'], addr_name, skola['ulica'], + skola['mesto'], skola['PSC']) + + + kms_id_key, _ = UserPropertyKey.objects.get_or_create(key_name="KMS ID") + kms_sustredka, _ = UserPropertyKey.objects.get_or_create(key_name="KMS sustredenia") + mobil, _ = UserPropertyKey.objects.get_or_create(key_name="Mobil") + trojsten_contact, _ = UserPropertyKey.objects.get_or_create(key_name="Posledny kontakt") + + + for l in riesitelia: + if not l['meno']: + continue + idcko = l['id'] + last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(l['matura'])-3) + user = { + 'first_name': l['meno'], + 'last_name': l['priezvisko'], + 'graduation': l['matura'], + 'email': l['email'], + 'birth_date': self.parse_date(l['datnar']), + 'school_id': l['id_skoly'] + } + + #TODO treba poparsovat adresy, + 'adresa_domov' + 'adresa_kores' + + user_properties = [ + (mobil, l['mobil']), + (kms_sustredka, ucasti[idcko]), + (trojsten_contact, last_kontakt[idcko]) + ] + self.process_person(user, user_properties, kms_id_key, int(l['id'])) + + #TODO akcie, sustredenia + self.print_stats() + + + + def parse_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(' ', '') + if date_string == "0000-00-00": + return None + else: + return datetime.strptime(date_string, '%Y-%m-%d') + + From e399f1f84b829975a344768260290e72237ed731 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 26 Mar 2017 19:19:37 +0200 Subject: [PATCH 03/20] Good enough migration scripts. --- .../commands/migrate_30rokovfks1_csv.py | 68 ++++++ .../commands/migrate_30rokovfks2_csv.py | 73 +++++++ .../management/commands/migrate_base_class.py | 92 ++++++-- .../management/commands/migrate_fks_csv.py | 111 ++++++++++ .../management/commands/migrate_kms_csv.py | 83 +++----- .../management/commands/migrate_ksp_kaspar.py | 137 ++++++++++++ .../commands/migrate_people_from_kaspar.py | 198 ------------------ 7 files changed, 492 insertions(+), 270 deletions(-) create mode 100644 trojsten/people/management/commands/migrate_30rokovfks1_csv.py create mode 100644 trojsten/people/management/commands/migrate_30rokovfks2_csv.py create mode 100644 trojsten/people/management/commands/migrate_fks_csv.py create mode 100644 trojsten/people/management/commands/migrate_ksp_kaspar.py delete mode 100644 trojsten/people/management/commands/migrate_people_from_kaspar.py diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py new file mode 100644 index 000000000..9cb8e8d52 --- /dev/null +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -0,0 +1,68 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + + +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in adresa osoba riesitel skola +do +mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done + +mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +""" + +#TODO vvysledkovky + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from fks_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + + participants_file = os.path.join(base, "FKS_30_rokov_1.csv") + participants = csv.DictReader(open(participants_file)) + + idd = 0 + for l in participants: + idd+=1 + contacted = l['kontaktovany?']=='ano' + user = { + 'first_name': l['Meno'], + 'last_name': l['Priezvisko'], + 'email': l['Email'], + } + user_properties = [ + (MOBIL_PROPERTY, l['Telefon']), + (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (NICKNAME_PROPERTY, l['Prezyvka']), + (LAST_CONTACT_PROPERTY, 2014 if contacted else False) + ] + + self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS1_{0:d}".format(idd)) + + self.print_stats() + + + diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py new file mode 100644 index 000000000..8fe6f3fc3 --- /dev/null +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -0,0 +1,73 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + + +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in adresa osoba riesitel skola +do +mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done + +mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +""" + +#TODO vvysledkovky + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from fks_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + + participants_file = os.path.join(base, "FKS_30_rokov_2.csv") + participants = csv.DictReader(open(participants_file)) + idd = 0 + for l in participants: + idd+=1 + if not l['Meno']: + continue + + user = { + 'first_name': l['Meno'], + 'last_name': l['Priezvisko'], + 'email': l['E-mail'], + } + user_properties = [ + (MOBIL_PROPERTY, l['Telefon']), + (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (NICKNAME_PROPERTY, l['Prezyvka']), + (COMPANY_PROPERTY, l['Posobisko']), + (AFFILIATION_PROPERTY, l['Pozicia']), + (MEMORY_PROPERTY, l['spomienka']), + (LAST_CONTACT_PROPERTY, 2014), + ] + #TODO Adresa + + self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS2_{0:d}".format(idd)) + + self.print_stats() + + + diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index e52d8a238..145699037 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -14,13 +14,6 @@ reload(sys) sys.setdefaultencoding("utf-8") -# Kaspar property IDs -EMAIL_PROP = 1 -BIRTHDAY_PROP = 2 -# Labels for auto-generated properties -KASPAR_ID_LABEL = "kaspar ID" -KASPAR_NOTE_LABEL = "kaspar note" - class MigrateBaceCommand(NoArgsCommand): help = 'Base class for importing people.' @@ -40,14 +33,17 @@ def handle_noargs(self, **options): self.similar_users = [] self.school_id_map={} + @transaction.atomic + def process_address(self, street, town, postal_code, country): + return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country) + + @transaction.atomic def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): + # TODO improve this, do not work with abbreviations if not abbr: - print("empty") - print(old_id, abbr, name, street) - x = input() self.school_id_map[old_id] = None return @@ -98,21 +94,16 @@ def create_school(self, abbr, name, addr_name, street, return school @transaction.atomic - def process_person(self, user_args, user_properties, old_user_id_field, old_user_id): + def process_person(self, user_args, user_properties, old_user_id_field, old_user_id, address=None): """ Args: - user_args (dict): will be uset for user constructor as is. + user_args (dict): will be used for user constructor as is. Except for school_id. user_properties (list(tuple(UserPropertyKey, string))): will create additional user properties old_user_id_field (UserPropertyKey): old field that contained oser id (kaspar_id/ kms id ...), used for faster deduplication. old_user_id (int/string): old id user_args can have - first_name - last_name - graduation - email - birth_date - school_id + first_name, last_name, graduation, email, birth_date, school_id """ # If the user already exists in our database, skip. old_id_property = None @@ -127,13 +118,12 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user if self.verbosity >= 2: self.stdout.write("Skipping user %s %s" % (first_name, last_name)) - return + return None # The username needs to be unique, thus the ID. - user_args['username'] = u'{0:s}{1:s}{2:d}'.format(first_name, last_name, old_user_id), + user_args['username'] = u'{0:s}{1:s}_{2:s}'.format(first_name, last_name, str(old_user_id)), user_args['is_active'] = False - #TODO fix school if 'school_id' in user_args: school_id = user_args['school_id'] del user_args['school_id'] @@ -146,11 +136,20 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user if self.dry: new_user = User(**user_args) else: + addr = None + if address: + addr = process_address(address['street'], + address['town'], + address['postal_code'], + address['country']) + user_args['home_address'] = addr + new_user = User.objects.create(**user_args) if old_user_id: new_user.properties.create(key=old_user_id_field, value=old_user_id) + user_properties = list(filter(lambda x: x, user_properties)) for key, value in user_properties: new_user.properties.create(key=key, value=value) @@ -165,14 +164,63 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user else: DuplicateUser.objects.create(user=new_user) + return new_user + def print_stats(self): for conflict in self.similar_users: self.stdout.write("Conflicts: %s" % str(conflict)) self.stdout.write("Conflict users: %d" % len(self.similar_users)) - def parse_date(self, date_string): + def parse_dot_date(self, date_string): # Remove any whitespace inside the string. date_string = date_string.replace(' ', '') # Just hope that all dates are in the same format. return datetime.strptime(date_string, '%d.%m.%Y') + + def parse_dash_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(' ', '') + if date_string == "0000-00-00" or date_string == "NULL": + return None + else: + return datetime.strptime(date_string, '%Y-%m-%d') + + def process_property(self, key_name): + #TODO handle regexp + hiddne, if does not exists, ask and create + #WARNING this is will create object in db even for dry run. + user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) + return user_property + + +COMMAND = MigrateBaceCommand() + +CSV_ID_KEY = "csv ID" +CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY) +MOBIL_KEY = "Mobil" +MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY) +NICKNAME_KEY = "Prezyvka" +NICKNAME_PROPERTY = COMMAND.process_property(NICKNAME_KEY) +BIRTH_NAME_KEY = "Rodne Meno" +BIRTH_NAME_PROPERTY = COMMAND.process_property(BIRTH_NAME_KEY) +LAST_CONTACT_KEY = "Posledny kontakt" +LAST_CONTACT_PROPERTY = COMMAND.process_property(LAST_CONTACT_KEY) + +FKS_ID_KEY = "FKS ID" +FKS_ID_PROPERTY = COMMAND.process_property(FKS_ID_KEY) +KMS_ID_KEY = "KMS ID" +KMS_ID_PROPERTY = COMMAND.process_property(KMS_ID_KEY) +KMS_CAMPS_KEY = "KMS sustredenia" +KMS_CAMPS_PROPERTY = COMMAND.process_property(KMS_CAMPS_KEY) +KASPAR_ID_KEY = "KSP ID" +KASPAR_ID_PROPERTY = COMMAND.process_property(KASPAR_ID_KEY) +KASPAR_NOTE_KEY = "KSP note" +KASPAR_NOTE_PROPERTY = COMMAND.process_property(KASPAR_NOTE_KEY) +KSP_CAMPS_KEY = "KSP sustredenia" +KSP_CAMPS_PROPERTY = COMMAND.process_property(KSP_CAMPS_KEY) +MEMORY_KEY = "Spomienky" +MEMORY_PROPERTY = COMMAND.process_property(MEMORY_KEY) +COMPANY_KEY = "Posobisko" +COMPANY_PROPERTY = COMMAND.process_property(COMPANY_KEY) +AFFILIATION_KEY = "Pozicia" +AFFILIATION_PROPERTY = COMMAND.process_property(AFFILIATION_KEY) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py new file mode 100644 index 000000000..090e376d7 --- /dev/null +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -0,0 +1,111 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + + +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in adresa osoba riesitel skola +do +mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done + +mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +""" + +#TODO vvysledkovky + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from fks_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + + addresses_file = os.path.join(base, "adresa.csv") + addresses = csv.DictReader(open(addresses_file)) + address_by_id = {} + for address in addresses: + address_by_id[address['id']] = address + + schools_file = os.path.join(base, "skola.csv") + schools = csv.DictReader(open(schools_file)) + for school in schools: + abbr = school['skratka'].split(' ', 1)[0] + addr = address_by_id[school['adresa_id']] + + street = addr['ulica'] + + addr_name = school['nazov'] + ", " + street + self.process_school(school['id'], abbr, school['nazov'], addr_name, street, + addr['mesto'], addr['psc']) + + activity_file = os.path.join(base, "aktivita.csv") + activity = csv.DictReader(open(activity_file)) + last_contact = {} + for act in activity: + idd = act['riesitel_id'] + date = self.parse_dash_date(act['termin']) + last_contact[idd] = max(last_contact.get(idd, 0), date.year) + + + people_file = os.path.join(base, "osoba.csv") + people = csv.DictReader(open(people_file)) + + people_by_id = {} + for person in people: + people_by_id[person['id']] = person + + participants_file = os.path.join(base, "riesitel.csv") + participants = csv.DictReader(open(participants_file)) + + for l in participants: + idd = l['osoba_id'] + person = people_by_id[idd] + matura = l['rok_maturity'] + last_contact[idd] = max(last_contact.get(idd,0), int(matura)-3) + address = address_by_id[person['adresa_id']] + parsed_address = { + 'street': address['ulica'], + 'town': address['mesto'], + 'postal_code': address['psc'], + 'country': address['stat'], + } + user = { + 'first_name': person['meno'], + 'last_name': person['priezvisko'], + 'graduation': matura, + 'email': person['email'], + 'birth_date': self.parse_dash_date(person['datum_narodenia']), + 'school_id': l['skola_id'], + } + + user_properties = [ + (MOBIL_PROPERTY, person['telefon']), + (LAST_CONTACT_PROPERTY, last_contact[idd]) + ] + self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), address=parsed_address) + + self.print_stats() + + + diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 3b867e1eb..0fb3978da 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -13,7 +13,7 @@ from trojsten.people.helpers import get_similar_users from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand +from trojsten.people.management.commands.migrate_base_class import * class Command(MigrateBaceCommand): @@ -26,70 +26,53 @@ def add_arguments(self, parser): def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) base = options['file'] - riesitelia_file = os.path.join(base, "riesitelia.csv") - riesitelia = csv.DictReader(open(riesitelia_file)) - sustredenia_file = os.path.join(base, "sustredenia.csv") - sustredenia = csv.DictReader(open(sustredenia_file)) - ucasti = defaultdict(int) - last_kontakt = {} - for sustredko in sustredenia: - idcko = sustredko['id_riesitela'].strip() - ucasti[idcko]+=1 - if sustredko['rok']: - last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(sustredko['rok'])) - - - skoly_file = os.path.join(base, "skoly.csv") - skoly = csv.DictReader(open(skoly_file)) - for skola in skoly: - abbr = skola['skratka'].split(' ', 1)[0] - addr_name = skola['nazov'] + ", " + skola['ulica'] - self.process_school(skola['id'], abbr, skola['nazov'], addr_name, skola['ulica'], - skola['mesto'], skola['PSC']) - - - kms_id_key, _ = UserPropertyKey.objects.get_or_create(key_name="KMS ID") - kms_sustredka, _ = UserPropertyKey.objects.get_or_create(key_name="KMS sustredenia") - mobil, _ = UserPropertyKey.objects.get_or_create(key_name="Mobil") - trojsten_contact, _ = UserPropertyKey.objects.get_or_create(key_name="Posledny kontakt") - - - for l in riesitelia: + participants_file = os.path.join(base, "riesitelia.csv") + participants = csv.DictReader(open(participants_file)) + camps_file = os.path.join(base, "sustredenia.csv") + camps = csv.DictReader(open(camps_file)) + camps_survived = defaultdict(int) + last_contact = {} + for camp in camps: + idd = camp['id_riesitela'].strip() + camps_survived[idd]+=1 + if camp['rok']: + last_contact[idd] = max(last_contact.get(idd,0), int(camp['rok'])) + + + schools_file = os.path.join(base, "skoly.csv") + schools = csv.DictReader(open(schools_file)) + for school in schools: + abbr = school['skratka'].split(' ', 1)[0] + addr_name = school['nazov'] + ", " + school['ulica'] + self.process_school(school['id'], abbr, school['nazov'], addr_name, school['ulica'], + school['mesto'], school['PSC']) + + + for l in participants: if not l['meno']: continue - idcko = l['id'] - last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(l['matura'])-3) + idd = l['id'] + last_contact[idd] = max(last_contact.get(idd,0), int(l['matura'])-3) user = { 'first_name': l['meno'], 'last_name': l['priezvisko'], 'graduation': l['matura'], 'email': l['email'], - 'birth_date': self.parse_date(l['datnar']), + 'birth_date': self.parse_dash_date(l['datnar']), 'school_id': l['id_skoly'] } - #TODO treba poparsovat adresy, + #TODO parse addresses from string. 'adresa_domov' 'adresa_kores' user_properties = [ - (mobil, l['mobil']), - (kms_sustredka, ucasti[idcko]), - (trojsten_contact, last_kontakt[idcko]) + (MOBIL_PROPERTY, l['mobil']), + (KMS_CAMPS_PROPERTY, camps_survived[idd]), + (LAST_CONTACT_PROPERTY, last_contact[idd]) ] - self.process_person(user, user_properties, kms_id_key, int(l['id'])) + self.process_person(user, user_properties, KMS_ID_PROPERTY, int(idd)) - #TODO akcie, sustredenia + #TODO parse camps more precisely self.print_stats() - - - def parse_date(self, date_string): - # Remove any whitespace inside the string. - date_string = date_string.replace(' ', '') - if date_string == "0000-00-00": - return None - else: - return datetime.strptime(date_string, '%Y-%m-%d') - - diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py new file mode 100644 index 000000000..fa6f6d82a --- /dev/null +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -0,0 +1,137 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + +# Kaspar property IDs +EMAIL_PROP = 1 +BIRTHDAY_PROP = 2 + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from kaspar.' + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + kaspar = connections['kaspar'] + + if self.verbosity >= 1: + self.stdout.write("Migrating schools...") + + c = kaspar.cursor() + c.execute(""" + SELECT school_id, short, name, addr_name, addr_street, + addr_city, addr_zip + FROM schools; + """) + self.school_id_map = dict() + for row in c: + self.process_school(*row) + + #TODO sustredka + + if self.verbosity >= 1: + self.stdout.write("Dumping veducis") + + c.execute(""" + SELECT man_id + FROM veduci + """) + + veduci = set() + for l in c: + veduci.add(l[0]) + + if self.verbosity >= 1: + self.stdout.write("Dumping participations") + + c.execute(""" + SELECT action_id, name, date_start, date_end + FROM actions + """) + + actions = {} + for action in c: + actions[action[0]] = { + "name": action[1], + "start": action[2], + "end": action[3] + } + + c.execute(""" + SELECT action_id, man_id, task, note + FROM participants + """) + + last_contact = {} + camps_survived = {} + for participant in c: + man_id = participant[1] + action = actions[participant[0]] + last_contact[man_id] = max(last_contact.get(man_id,0), action['end'].year) + camps_survived[man_id] = camps_survived.get(man_id, 0) + 1 + + + if self.verbosity >= 1: + self.stdout.write("Creating/retrieving required UserPropertyKeys...") + + if self.verbosity >= 1: + self.stdout.write("Migrating people...") + + + fields = ["man_id", "firstname", "lastname", "school_id", "finish", "note"] + c.execute(""" + SELECT %s + FROM people; + """ % (', '.join(fields))) + + for l in c: + l = dict(zip(fields, l)) + idcko = l['man_id'] + last_contact[idcko] = max(last_contact.get(idcko,0), int(l['finish'])-3) + + user = { + 'first_name': l['firstname'], + 'last_name': l['lastname'], + 'graduation': l['finish'], + 'school_id': l['school_id'] + } + cc = kaspar.cursor() + cc.execute(""" + SELECT ppt_id, value + FROM people_prop + WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); + """, (idcko, EMAIL_PROP, BIRTHDAY_PROP)) + for prop_id, value in cc: + if prop_id == EMAIL_PROP: + user['email'] = value + elif prop_id == BIRTHDAY_PROP: + try: + user['birth_date'] = self.parse_dot_date(value) + except ValueError: + # If we can't parse the date, give up. + pass + cc.close() + + user_properties = [ + (LAST_CONTACT_PROPERTY, last_contact[idcko]), + (KASPAR_NOTE_PROPERTY, l['note']), + (KSP_CAMPS_PROPERTY, camps_survived.get(idcko,0)) + ] + userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) + if idcko in veduci: + #TODO userObject add to group veduci + pass + + self.print_stats() diff --git a/trojsten/people/management/commands/migrate_people_from_kaspar.py b/trojsten/people/management/commands/migrate_people_from_kaspar.py deleted file mode 100644 index ecfc0411c..000000000 --- a/trojsten/people/management/commands/migrate_people_from_kaspar.py +++ /dev/null @@ -1,198 +0,0 @@ -from __future__ import unicode_literals - -from datetime import datetime - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey - -# Kaspar property IDs -EMAIL_PROP = 1 -BIRTHDAY_PROP = 2 -# Labels for auto-generated properties -KASPAR_ID_LABEL = "kaspar ID" -KASPAR_NOTE_LABEL = "kaspar note" - -class Command(NoArgsCommand): - help = 'Imports people and their related info from kaspar.' - - def add_arguments(self, parser): - parser.add_argument('--wet_run', - action='store_false', - dest='dry', - default=True, - help='Actually write something to DB') - - def handle_noargs(self, **options): - self.similar_users = [] - self.dry = options['dry'] - if self.dry: - self.stdout.write("Running dry run!") - - self.verbosity = options['verbosity'] - self.kaspar = connections['kaspar'] - c = self.kaspar.cursor() - - if self.verbosity >= 1: - self.stdout.write("Migrating schools...") - - c.execute(""" - SELECT school_id, short, name, addr_name, addr_street, - addr_city, addr_zip - FROM schools; - """) - self.school_id_map = dict() - for row in c: - self.process_school(*row) - - if self.verbosity >= 1: - self.stdout.write("Creating/retrieving required UserPropertyKeys...") - - self.kaspar_id_key, _ = UserPropertyKey.objects.get_or_create(key_name=KASPAR_ID_LABEL) - self.kaspar_note_key, _ = UserPropertyKey.objects.get_or_create(key_name=KASPAR_NOTE_LABEL) - - if self.verbosity >= 1: - self.stdout.write("Migrating people...") - - c.execute(""" - SELECT man_id, firstname, lastname, school_id, finish, note - FROM people; - """) - self.man_id_map = dict() - # This loop takes O(N) queries and I don't care -- it's a one-time - # background job anyway. - for row in c: - self.process_person(*row) - - for conflict in self.similar_users: - self.stdout.write("Conflicts: %s" % str(conflict)) - self.stdout.write("Conflict users: %d" % len(self.similar_users)) - - @transaction.atomic - def process_school(self, kaspar_id, abbr, name, addr_name, street, - city, zip_code): - - if not abbr: - print("empty") - print(kaspar_id, abbr, name, street) - x = input() - self.school_id_map[kaspar_id] = None - return - - candidates = School.objects.filter( - Q(abbreviation__iexact=abbr) | - Q(abbreviation__iexact=abbr + '?') - ) - row = (kaspar_id, abbr, name, addr_name, street, city, zip_code) - if len(candidates) == 1: - if self.verbosity >= 2: - self.stdout.write("Matched %r to %s" % (row, - candidates[0])) - self.school_id_map[kaspar_id] = candidates[0] - elif len(candidates) > 1: - self.stdout.write("Multiple candidates for %r:\n%s" % ( - row, - "\n".join("%02d: %s" % (i, candidate) - for i, candidate in enumerate(candidates)) - )) - try: - choice = int(input("Choice (empty or invalid to create new): ")) - self.school_id_map[kaspar_id] = candidates[choice] - except (ValueError, KeyError): - self.school_id_map[kaspar_id] = self.create_school(*row) - else: - self.school_id_map[kaspar_id] = self.create_school(*row) - - def create_school(self, kaspar_id, abbr, name, addr_name, street, - city, zip_code): - abbr += '?' # Question mark denotes schools needing review. - school = None - if self.dry: - school = School(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) - else: - school = School.objects.create(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) - if self.verbosity >= 2: - self.stdout.write("Created new school %s" % school) - return school - - @transaction.atomic - def process_person(self, man_id, first_name, last_name, school_id, - grad_year, note): - # If the user already exists in our database, skip. - if self.kaspar_id_key.properties.filter(value=man_id).exists(): - if self.verbosity >= 2: - self.stdout.write("Skipping user %s %s" % (first_name, - last_name)) - return - - new_user_args = { - 'first_name': first_name, - 'last_name': last_name, - # The username needs to be unique, thus the ID. - 'username': '%s%s%d' % (first_name, last_name, man_id), - 'is_active': False, - 'school': self.school_id_map[school_id] - } - - if grad_year: - new_user_args['graduation'] = grad_year - - c = self.kaspar.cursor() - c.execute(""" - SELECT ppt_id, value - FROM people_prop - WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); - """, (man_id, EMAIL_PROP, BIRTHDAY_PROP)) - for prop_id, value in c: - if prop_id == EMAIL_PROP: - new_user_args['email'] = value - elif prop_id == BIRTHDAY_PROP: - try: - new_user_args['birth_date'] = self.parse_date(value) - except ValueError: - # If we can't parse the date, give up. - pass - c.close() - - if self.verbosity >= 2: - self.stdout.write("Creating user %s %s" % (first_name, last_name)) - - new_user = None - if self.dry: - new_user = User(**new_user_args) - else: - new_user = User.objects.create(**new_user_args) - new_user.properties.create(key=self.kaspar_id_key, value=man_id) - if note: - new_user.properties.create(key=self.kaspar_note_key, value=note) - - similar_users = get_similar_users(new_user) - if len(similar_users): - names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] - self.similar_users.append(((first_name, last_name), names_of_similar)) - if self.verbosity >= 2: - self.stdout.write('Similar users: %s' % str(names_of_similar)) - if self.dry: - pass - else: - DuplicateUser.objects.create(user=new_user) - - def parse_date(self, date_string): - # Remove any whitespace inside the string. - date_string = date_string.replace(' ', '') - # Just hope that all dates are in the same format. - return datetime.strptime(date_string, '%d.%m.%Y') From b16f898bf3892744527245e38a77f08987ff188c Mon Sep 17 00:00:00 2001 From: vlejd Date: Sat, 1 Apr 2017 00:41:35 +0200 Subject: [PATCH 04/20] Whole pipeline is working on wet run. --- .../commands/migrate_30rokovfks1_csv.py | 5 ++-- .../commands/migrate_30rokovfks2_csv.py | 5 ++-- .../management/commands/migrate_base_class.py | 25 +++++++++++-------- .../management/commands/migrate_fks_csv.py | 2 +- .../management/commands/migrate_kms_csv.py | 2 +- .../management/commands/migrate_ksp_kaspar.py | 14 ----------- 6 files changed, 21 insertions(+), 32 deletions(-) diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index 9cb8e8d52..1afd5da31 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -39,9 +39,8 @@ def add_arguments(self, parser): def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + participants_file = options['file'] - participants_file = os.path.join(base, "FKS_30_rokov_1.csv") participants = csv.DictReader(open(participants_file)) idd = 0 @@ -54,7 +53,7 @@ def handle_noargs(self, **options): 'email': l['Email'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon']), + (MOBIL_PROPERTY, l['Telefon'].replace(" ","").strip()), (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), (NICKNAME_PROPERTY, l['Prezyvka']), (LAST_CONTACT_PROPERTY, 2014 if contacted else False) diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index 8fe6f3fc3..1728417d5 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -39,9 +39,8 @@ def add_arguments(self, parser): def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + participants_file = options['file'] - participants_file = os.path.join(base, "FKS_30_rokov_2.csv") participants = csv.DictReader(open(participants_file)) idd = 0 for l in participants: @@ -55,7 +54,7 @@ def handle_noargs(self, **options): 'email': l['E-mail'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon']), + (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), (NICKNAME_PROPERTY, l['Prezyvka']), (COMPANY_PROPERTY, l['Posobisko']), diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index 145699037..dbbcd3569 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -8,7 +8,7 @@ from django.utils.six.moves import input from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address import sys reload(sys) @@ -51,7 +51,7 @@ def process_school(self, old_id, abbr, name, addr_name, street, Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + '?') ) - row = (abbr, name, addr_name, street, city, zip_code) + row = (abbr, name, addr_name, street, city, self.fix_string(zip_code)) if len(candidates) == 1: if self.verbosity >= 2: self.stdout.write("Matched %r to %s" % (row, @@ -75,6 +75,10 @@ def create_school(self, abbr, name, addr_name, street, city, zip_code): abbr += '?' # Question mark denotes schools needing review. school = None + if len(zip_code) > 10: + # Swiss zip codes + zip_code = 0 + if self.dry: school = School(abbreviation=abbr, verbose_name=name, @@ -121,7 +125,6 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user return None # The username needs to be unique, thus the ID. - user_args['username'] = u'{0:s}{1:s}_{2:s}'.format(first_name, last_name, str(old_user_id)), user_args['is_active'] = False if 'school_id' in user_args: @@ -138,10 +141,10 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user else: addr = None if address: - addr = process_address(address['street'], - address['town'], - address['postal_code'], - address['country']) + addr = self.process_address(address['street'], + address['town'], + address['postal_code'], + address['country']) user_args['home_address'] = addr new_user = User.objects.create(**user_args) @@ -186,19 +189,21 @@ def parse_dash_date(self, date_string): else: return datetime.strptime(date_string, '%Y-%m-%d') - def process_property(self, key_name): + def process_property(self, key_name, regexp=None): #TODO handle regexp + hiddne, if does not exists, ask and create #WARNING this is will create object in db even for dry run. user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) return user_property + def fix_string(self, string): + return string.replace(" ", "").strip() COMMAND = MigrateBaceCommand() CSV_ID_KEY = "csv ID" -CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY) +CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY, "(.*_)?\d+") MOBIL_KEY = "Mobil" -MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY) +MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY, ".?.?\d*\\?") NICKNAME_KEY = "Prezyvka" NICKNAME_PROPERTY = COMMAND.process_property(NICKNAME_KEY) BIRTH_NAME_KEY = "Rodne Meno" diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 090e376d7..680a86218 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -100,7 +100,7 @@ def handle_noargs(self, **options): } user_properties = [ - (MOBIL_PROPERTY, person['telefon']), + (MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()), (LAST_CONTACT_PROPERTY, last_contact[idd]) ] self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), address=parsed_address) diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 0fb3978da..0cf821970 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -67,7 +67,7 @@ def handle_noargs(self, **options): 'adresa_kores' user_properties = [ - (MOBIL_PROPERTY, l['mobil']), + (MOBIL_PROPERTY, l['mobil'].replace(" ", "").strip()), (KMS_CAMPS_PROPERTY, camps_survived[idd]), (LAST_CONTACT_PROPERTY, last_contact[idd]) ] diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index fa6f6d82a..408f4c3ed 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -41,17 +41,6 @@ def handle_noargs(self, **options): #TODO sustredka - if self.verbosity >= 1: - self.stdout.write("Dumping veducis") - - c.execute(""" - SELECT man_id - FROM veduci - """) - - veduci = set() - for l in c: - veduci.add(l[0]) if self.verbosity >= 1: self.stdout.write("Dumping participations") @@ -130,8 +119,5 @@ def handle_noargs(self, **options): (KSP_CAMPS_PROPERTY, camps_survived.get(idcko,0)) ] userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) - if idcko in veduci: - #TODO userObject add to group veduci - pass self.print_stats() From c45a5243b4e0903ef9469614cd3befff7fccffd9 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sat, 1 Apr 2017 01:12:17 +0200 Subject: [PATCH 05/20] pep-8 fixes --- .../commands/migrate_30rokovfks1_csv.py | 26 ++++--------------- .../commands/migrate_30rokovfks2_csv.py | 24 +++-------------- .../management/commands/migrate_base_class.py | 19 +++++++------- .../management/commands/migrate_fks_csv.py | 22 +++++++++------- .../management/commands/migrate_kms_csv.py | 26 ++++++++++++------- .../management/commands/migrate_ksp_kaspar.py | 12 ++++----- 6 files changed, 53 insertions(+), 76 deletions(-) diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index 1afd5da31..bb5becc04 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -16,20 +16,6 @@ from trojsten.people.management.commands.migrate_base_class import * -""" -Restore the mysql database dump and run (replace and ) -Alternatively you can export these tables from phpAdmin. - -for tn in adresa osoba riesitel skola -do -mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv -done - -mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv -""" - -#TODO vvysledkovky - class Command(MigrateBaceCommand): help = 'Imports people and their related info from fks_csv.' @@ -45,23 +31,21 @@ def handle_noargs(self, **options): idd = 0 for l in participants: - idd+=1 - contacted = l['kontaktovany?']=='ano' + idd += 1 + contacted = l['kontaktovany?'] == 'ano' user = { 'first_name': l['Meno'], 'last_name': l['Priezvisko'], 'email': l['Email'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon'].replace(" ","").strip()), + (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), (NICKNAME_PROPERTY, l['Prezyvka']), (LAST_CONTACT_PROPERTY, 2014 if contacted else False) ] - self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS1_{0:d}".format(idd)) + self.process_person(user, user_properties, CSV_ID_PROPERTY, + "30rokovFKS1_{0:d}".format(idd)) self.print_stats() - - - diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index 1728417d5..d96d6796d 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -16,20 +16,6 @@ from trojsten.people.management.commands.migrate_base_class import * -""" -Restore the mysql database dump and run (replace and ) -Alternatively you can export these tables from phpAdmin. - -for tn in adresa osoba riesitel skola -do -mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv -done - -mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv -""" - -#TODO vvysledkovky - class Command(MigrateBaceCommand): help = 'Imports people and their related info from fks_csv.' @@ -44,7 +30,7 @@ def handle_noargs(self, **options): participants = csv.DictReader(open(participants_file)) idd = 0 for l in participants: - idd+=1 + idd += 1 if not l['Meno']: continue @@ -62,11 +48,9 @@ def handle_noargs(self, **options): (MEMORY_PROPERTY, l['spomienka']), (LAST_CONTACT_PROPERTY, 2014), ] - #TODO Adresa + # TODO Adresa - self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS2_{0:d}".format(idd)) + self.process_person(user, user_properties, CSV_ID_PROPERTY, + "30rokovFKS2_{0:d}".format(idd)) self.print_stats() - - - diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index dbbcd3569..5a09e9deb 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -14,6 +14,7 @@ reload(sys) sys.setdefaultencoding("utf-8") + class MigrateBaceCommand(NoArgsCommand): help = 'Base class for importing people.' @@ -31,13 +32,12 @@ def handle_noargs(self, **options): self.verbosity = options['verbosity'] self.similar_users = [] - self.school_id_map={} + self.school_id_map = {} @transaction.atomic def process_address(self, street, town, postal_code, country): return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country) - @transaction.atomic def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): @@ -102,7 +102,8 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user """ Args: user_args (dict): will be used for user constructor as is. Except for school_id. - user_properties (list(tuple(UserPropertyKey, string))): will create additional user properties + user_properties (list(tuple(UserPropertyKey, string))): + will create additional user properties old_user_id_field (UserPropertyKey): old field that contained oser id (kaspar_id/ kms id ...), used for faster deduplication. old_user_id (int/string): old id @@ -142,9 +143,9 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user addr = None if address: addr = self.process_address(address['street'], - address['town'], - address['postal_code'], - address['country']) + address['town'], + address['postal_code'], + address['country']) user_args['home_address'] = addr new_user = User.objects.create(**user_args) @@ -158,7 +159,7 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user similar_users = get_similar_users(new_user) if len(similar_users): - names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] + names_of_similar = [(x.first_name, x.last_name) for x in similar_users] self.similar_users.append(((first_name, last_name), names_of_similar)) if self.verbosity >= 2: self.stdout.write('Similar users: %s' % str(names_of_similar)) @@ -190,8 +191,8 @@ def parse_dash_date(self, date_string): return datetime.strptime(date_string, '%Y-%m-%d') def process_property(self, key_name, regexp=None): - #TODO handle regexp + hiddne, if does not exists, ask and create - #WARNING this is will create object in db even for dry run. + # TODO handle regexp + hiddne, if does not exists, ask and create + # WARNING this is will create object in db even for dry run. user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) return user_property diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 680a86218..59d4d0172 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -22,13 +22,18 @@ for tn in adresa osoba riesitel skola do -mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +mysql -u -p fks -B -e "select * from \`$tn\`;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv done -mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +mysql -u -p fks -B -e " \ +select riesitel_id, termin \ +from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r \ +where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv """ +# TODO vvysledkovky -#TODO vvysledkovky class Command(MigrateBaceCommand): help = 'Imports people and their related info from fks_csv.' @@ -57,7 +62,7 @@ def handle_noargs(self, **options): addr_name = school['nazov'] + ", " + street self.process_school(school['id'], abbr, school['nazov'], addr_name, street, - addr['mesto'], addr['psc']) + addr['mesto'], addr['psc']) activity_file = os.path.join(base, "aktivita.csv") activity = csv.DictReader(open(activity_file)) @@ -67,7 +72,6 @@ def handle_noargs(self, **options): date = self.parse_dash_date(act['termin']) last_contact[idd] = max(last_contact.get(idd, 0), date.year) - people_file = os.path.join(base, "osoba.csv") people = csv.DictReader(open(people_file)) @@ -82,7 +86,7 @@ def handle_noargs(self, **options): idd = l['osoba_id'] person = people_by_id[idd] matura = l['rok_maturity'] - last_contact[idd] = max(last_contact.get(idd,0), int(matura)-3) + last_contact[idd] = max(last_contact.get(idd, 0), int(matura)-3) address = address_by_id[person['adresa_id']] parsed_address = { 'street': address['ulica'], @@ -103,9 +107,7 @@ def handle_noargs(self, **options): (MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()), (LAST_CONTACT_PROPERTY, last_contact[idd]) ] - self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), address=parsed_address) + self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), + address=parsed_address) self.print_stats() - - - diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 0cf821970..a0524a907 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -15,6 +15,17 @@ from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey from trojsten.people.management.commands.migrate_base_class import * +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in akcie riesitelia skoly sustredenia +do +mysql -u -p fks -B -e "select * from \`$tn\`;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done +""" + class Command(MigrateBaceCommand): help = 'Imports people and their related info from kms_csv.' @@ -34,10 +45,9 @@ def handle_noargs(self, **options): last_contact = {} for camp in camps: idd = camp['id_riesitela'].strip() - camps_survived[idd]+=1 + camps_survived[idd] += 1 if camp['rok']: - last_contact[idd] = max(last_contact.get(idd,0), int(camp['rok'])) - + last_contact[idd] = max(last_contact.get(idd, 0), int(camp['rok'])) schools_file = os.path.join(base, "skoly.csv") schools = csv.DictReader(open(schools_file)) @@ -45,14 +55,13 @@ def handle_noargs(self, **options): abbr = school['skratka'].split(' ', 1)[0] addr_name = school['nazov'] + ", " + school['ulica'] self.process_school(school['id'], abbr, school['nazov'], addr_name, school['ulica'], - school['mesto'], school['PSC']) - + school['mesto'], school['PSC']) for l in participants: if not l['meno']: continue idd = l['id'] - last_contact[idd] = max(last_contact.get(idd,0), int(l['matura'])-3) + last_contact[idd] = max(last_contact.get(idd, 0), int(l['matura'])-3) user = { 'first_name': l['meno'], 'last_name': l['priezvisko'], @@ -62,7 +71,7 @@ def handle_noargs(self, **options): 'school_id': l['id_skoly'] } - #TODO parse addresses from string. + # TODO parse addresses from string. 'adresa_domov' 'adresa_kores' @@ -73,6 +82,5 @@ def handle_noargs(self, **options): ] self.process_person(user, user_properties, KMS_ID_PROPERTY, int(idd)) - #TODO parse camps more precisely + # TODO parse camps more precisely self.print_stats() - diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index 408f4c3ed..b8aff8134 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -19,6 +19,7 @@ EMAIL_PROP = 1 BIRTHDAY_PROP = 2 + class Command(MigrateBaceCommand): help = 'Imports people and their related info from kaspar.' @@ -39,8 +40,7 @@ def handle_noargs(self, **options): for row in c: self.process_school(*row) - #TODO sustredka - + # TODO sustredka if self.verbosity >= 1: self.stdout.write("Dumping participations") @@ -68,17 +68,15 @@ def handle_noargs(self, **options): for participant in c: man_id = participant[1] action = actions[participant[0]] - last_contact[man_id] = max(last_contact.get(man_id,0), action['end'].year) + last_contact[man_id] = max(last_contact.get(man_id, 0), action['end'].year) camps_survived[man_id] = camps_survived.get(man_id, 0) + 1 - if self.verbosity >= 1: self.stdout.write("Creating/retrieving required UserPropertyKeys...") if self.verbosity >= 1: self.stdout.write("Migrating people...") - fields = ["man_id", "firstname", "lastname", "school_id", "finish", "note"] c.execute(""" SELECT %s @@ -88,7 +86,7 @@ def handle_noargs(self, **options): for l in c: l = dict(zip(fields, l)) idcko = l['man_id'] - last_contact[idcko] = max(last_contact.get(idcko,0), int(l['finish'])-3) + last_contact[idcko] = max(last_contact.get(idcko, 0), int(l['finish'])-3) user = { 'first_name': l['firstname'], @@ -116,7 +114,7 @@ def handle_noargs(self, **options): user_properties = [ (LAST_CONTACT_PROPERTY, last_contact[idcko]), (KASPAR_NOTE_PROPERTY, l['note']), - (KSP_CAMPS_PROPERTY, camps_survived.get(idcko,0)) + (KSP_CAMPS_PROPERTY, camps_survived.get(idcko, 0)) ] userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) From fd387f8680b94b6f60e1d1e1306687e549dccb50 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sat, 1 Apr 2017 14:38:50 +0200 Subject: [PATCH 06/20] Fixed lint errors and added some options --- trojsten/people/helpers.py | 2 +- .../commands/migrate_30rokovfks1_csv.py | 28 ++--- .../commands/migrate_30rokovfks2_csv.py | 32 ++---- .../management/commands/migrate_base_class.py | 105 +++++++++++------- .../management/commands/migrate_fks_csv.py | 22 +--- .../management/commands/migrate_kms_csv.py | 23 +--- .../management/commands/migrate_ksp_kaspar.py | 28 ++--- 7 files changed, 106 insertions(+), 134 deletions(-) diff --git a/trojsten/people/helpers.py b/trojsten/people/helpers.py index 3ac704642..d82b23b3e 100644 --- a/trojsten/people/helpers.py +++ b/trojsten/people/helpers.py @@ -5,7 +5,7 @@ def get_similar_users(user): """Returns a list of users similar to the specified user.""" - #TODO check birth day as well. (if defined, filter different) + # TODO check birth day as well. (if defined, filter different) return User.objects.exclude(pk=user.pk).filter( first_name=user.first_name, last_name=user.last_name, diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index bb5becc04..67b0c5a22 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -1,19 +1,8 @@ from __future__ import unicode_literals import csv -from collections import defaultdict -from datetime import datetime -import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand class Command(MigrateBaceCommand): @@ -32,20 +21,23 @@ def handle_noargs(self, **options): idd = 0 for l in participants: idd += 1 + csv_id = "30rokovFKS1_{0:d}".format(idd) contacted = l['kontaktovany?'] == 'ano' + if contacted: + self.last_contact[csv_id].append(2014) + user = { 'first_name': l['Meno'], 'last_name': l['Priezvisko'], 'email': l['Email'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), - (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), - (NICKNAME_PROPERTY, l['Prezyvka']), - (LAST_CONTACT_PROPERTY, 2014 if contacted else False) + (self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (self.NICKNAME_PROPERTY, l['Prezyvka']) ] - self.process_person(user, user_properties, CSV_ID_PROPERTY, - "30rokovFKS1_{0:d}".format(idd)) + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, + csv_id) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index d96d6796d..bebfae18e 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -1,19 +1,8 @@ from __future__ import unicode_literals import csv -from collections import defaultdict -from datetime import datetime -import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand class Command(MigrateBaceCommand): @@ -31,26 +20,27 @@ def handle_noargs(self, **options): idd = 0 for l in participants: idd += 1 + csv_id = "30rokovFKS2_{0:d}".format(idd) if not l['Meno']: continue + self.last_contact[csv_id].append(2014) user = { 'first_name': l['Meno'], 'last_name': l['Priezvisko'], 'email': l['E-mail'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), - (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), - (NICKNAME_PROPERTY, l['Prezyvka']), - (COMPANY_PROPERTY, l['Posobisko']), - (AFFILIATION_PROPERTY, l['Pozicia']), - (MEMORY_PROPERTY, l['spomienka']), - (LAST_CONTACT_PROPERTY, 2014), + (self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (self.NICKNAME_PROPERTY, l['Prezyvka']), + (self.COMPANY_PROPERTY, l['Posobisko']), + (self.AFFILIATION_PROPERTY, l['Pozicia']), + (self.MEMORY_PROPERTY, l['spomienka']) ] # TODO Adresa - self.process_person(user, user_properties, CSV_ID_PROPERTY, - "30rokovFKS2_{0:d}".format(idd)) + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, + csv_id) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index 5a09e9deb..e5f77eef7 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -1,16 +1,18 @@ from __future__ import unicode_literals from datetime import datetime +from imp import reload +from collections import defaultdict +import sys from django.core.management.base import NoArgsCommand -from django.db import connections, transaction +from django.db import transaction from django.db.models import Q from django.utils.six.moves import input from trojsten.people.helpers import get_similar_users from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address -import sys reload(sys) sys.setdefaultencoding("utf-8") @@ -24,15 +26,54 @@ def add_arguments(self, parser): dest='dry', default=True, help='Actually write something to DB') + parser.add_argument('--fast', + action='store_true', + dest='fast', + default=False, + help='Create only a few users') def handle_noargs(self, **options): self.dry = options['dry'] + self.fast = options['fast'] + self.done_users = 0 + self.done_schools = 0 if self.dry: self.stdout.write("Running dry run!") self.verbosity = options['verbosity'] self.similar_users = [] self.school_id_map = {} + self.last_contact = defaultdict(list) + + CSV_ID_KEY = "csv ID" + self.CSV_ID_PROPERTY = self.process_property(CSV_ID_KEY, "(.{1,20}_)?\d+") + MOBIL_KEY = "Mobil" + self.MOBIL_PROPERTY = self.process_property(MOBIL_KEY, "\+?\d+\/?\d+") + NICKNAME_KEY = "Prezyvka" + self.NICKNAME_PROPERTY = self.process_property(NICKNAME_KEY, ".{1,30}") + BIRTH_NAME_KEY = "Rodne Meno" + self.BIRTH_NAME_PROPERTY = self.process_property(BIRTH_NAME_KEY, ".{1,30}") + LAST_CONTACT_KEY = "Posledny kontakt" + # TODO fix False and stupid values + self.LAST_CONTACT_PROPERTY = self.process_property(LAST_CONTACT_KEY, "\d\d\d\d") + FKS_ID_KEY = "FKS ID" + self.FKS_ID_PROPERTY = self.process_property(FKS_ID_KEY, "\d+") + KMS_ID_KEY = "KMS ID" + self.KMS_ID_PROPERTY = self.process_property(KMS_ID_KEY, "\d+") + KMS_CAMPS_KEY = "KMS sustredenia" + self.KMS_CAMPS_PROPERTY = self.process_property(KMS_CAMPS_KEY, "\d+") + KASPAR_ID_KEY = "KSP ID" + self.KASPAR_ID_PROPERTY = self.process_property(KASPAR_ID_KEY, "\d+") + KASPAR_NOTE_KEY = "KSP note" + self.KASPAR_NOTE_PROPERTY = self.process_property(KASPAR_NOTE_KEY, ".*") + KSP_CAMPS_KEY = "KSP sustredenia" + self.KSP_CAMPS_PROPERTY = self.process_property(KSP_CAMPS_KEY, "\d+") + MEMORY_KEY = "Spomienky" + self.MEMORY_PROPERTY = self.process_property(MEMORY_KEY, ".*") + COMPANY_KEY = "Posobisko" + self.COMPANY_PROPERTY = self.process_property(COMPANY_KEY, ".*") + AFFILIATION_KEY = "Pozicia" + self.AFFILIATION_PROPERTY = self.process_property(AFFILIATION_KEY, ".*") @transaction.atomic def process_address(self, street, town, postal_code, country): @@ -42,6 +83,9 @@ def process_address(self, street, town, postal_code, country): def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): + self.done_schools += 1 + if self.fast and self.done_schools > 100: + return None # TODO improve this, do not work with abbreviations if not abbr: self.school_id_map[old_id] = None @@ -111,6 +155,10 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user first_name, last_name, graduation, email, birth_date, school_id """ # If the user already exists in our database, skip. + self.done_users += 1 + if self.fast and self.done_users > 100: + return None + old_id_property = None if old_user_id: old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id) @@ -150,8 +198,14 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user new_user = User.objects.create(**user_args) - if old_user_id: - new_user.properties.create(key=old_user_id_field, value=old_user_id) + new_user.properties.create(key=old_user_id_field, value=old_user_id) + + # TODO last_contacted + if old_user_id in self.last_contact: + contacts = self.last_contact[old_user_id] + valid_contacts = filter(lambda c: 1900 < c and c < 2017, contacts) + if valid_contacts: + user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)]) user_properties = list(filter(lambda x: x, user_properties)) for key, value in user_properties: @@ -191,42 +245,15 @@ def parse_dash_date(self, date_string): return datetime.strptime(date_string, '%Y-%m-%d') def process_property(self, key_name, regexp=None): - # TODO handle regexp + hiddne, if does not exists, ask and create - # WARNING this is will create object in db even for dry run. - user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) + user_property = UserPropertyKey.objects.filter(key_name=key_name) + if not user_property.exists(): + if self.dry: + user_property = UserPropertyKey(key_name=key_name, regex=regexp) + else: + user_property = UserPropertyKey.objects.create(key_name=key_name, regex=regexp) + else: + user_property = user_property.first() return user_property def fix_string(self, string): return string.replace(" ", "").strip() - -COMMAND = MigrateBaceCommand() - -CSV_ID_KEY = "csv ID" -CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY, "(.*_)?\d+") -MOBIL_KEY = "Mobil" -MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY, ".?.?\d*\\?") -NICKNAME_KEY = "Prezyvka" -NICKNAME_PROPERTY = COMMAND.process_property(NICKNAME_KEY) -BIRTH_NAME_KEY = "Rodne Meno" -BIRTH_NAME_PROPERTY = COMMAND.process_property(BIRTH_NAME_KEY) -LAST_CONTACT_KEY = "Posledny kontakt" -LAST_CONTACT_PROPERTY = COMMAND.process_property(LAST_CONTACT_KEY) - -FKS_ID_KEY = "FKS ID" -FKS_ID_PROPERTY = COMMAND.process_property(FKS_ID_KEY) -KMS_ID_KEY = "KMS ID" -KMS_ID_PROPERTY = COMMAND.process_property(KMS_ID_KEY) -KMS_CAMPS_KEY = "KMS sustredenia" -KMS_CAMPS_PROPERTY = COMMAND.process_property(KMS_CAMPS_KEY) -KASPAR_ID_KEY = "KSP ID" -KASPAR_ID_PROPERTY = COMMAND.process_property(KASPAR_ID_KEY) -KASPAR_NOTE_KEY = "KSP note" -KASPAR_NOTE_PROPERTY = COMMAND.process_property(KASPAR_NOTE_KEY) -KSP_CAMPS_KEY = "KSP sustredenia" -KSP_CAMPS_PROPERTY = COMMAND.process_property(KSP_CAMPS_KEY) -MEMORY_KEY = "Spomienky" -MEMORY_PROPERTY = COMMAND.process_property(MEMORY_KEY) -COMPANY_KEY = "Posobisko" -COMPANY_PROPERTY = COMMAND.process_property(COMPANY_KEY) -AFFILIATION_KEY = "Pozicia" -AFFILIATION_PROPERTY = COMMAND.process_property(AFFILIATION_KEY) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 59d4d0172..ca63bd03d 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -1,19 +1,9 @@ from __future__ import unicode_literals import csv -from collections import defaultdict -from datetime import datetime import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand """ @@ -66,11 +56,10 @@ def handle_noargs(self, **options): activity_file = os.path.join(base, "aktivita.csv") activity = csv.DictReader(open(activity_file)) - last_contact = {} for act in activity: idd = act['riesitel_id'] date = self.parse_dash_date(act['termin']) - last_contact[idd] = max(last_contact.get(idd, 0), date.year) + self.last_contact[idd].append(int(date.year)) people_file = os.path.join(base, "osoba.csv") people = csv.DictReader(open(people_file)) @@ -86,7 +75,7 @@ def handle_noargs(self, **options): idd = l['osoba_id'] person = people_by_id[idd] matura = l['rok_maturity'] - last_contact[idd] = max(last_contact.get(idd, 0), int(matura)-3) + self.last_contact[idd].append(int(matura)-3) address = address_by_id[person['adresa_id']] parsed_address = { 'street': address['ulica'], @@ -104,10 +93,9 @@ def handle_noargs(self, **options): } user_properties = [ - (MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()), - (LAST_CONTACT_PROPERTY, last_contact[idd]) + (self.MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()) ] - self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), + self.process_person(user, user_properties, self.FKS_ID_PROPERTY, idd, address=parsed_address) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index a0524a907..e78b3c2b5 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -2,18 +2,9 @@ import csv from collections import defaultdict -from datetime import datetime import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand """ Restore the mysql database dump and run (replace and ) @@ -42,12 +33,11 @@ def handle_noargs(self, **options): camps_file = os.path.join(base, "sustredenia.csv") camps = csv.DictReader(open(camps_file)) camps_survived = defaultdict(int) - last_contact = {} for camp in camps: idd = camp['id_riesitela'].strip() camps_survived[idd] += 1 if camp['rok']: - last_contact[idd] = max(last_contact.get(idd, 0), int(camp['rok'])) + self.last_contact[idd].append(int(camp['rok'])) schools_file = os.path.join(base, "skoly.csv") schools = csv.DictReader(open(schools_file)) @@ -61,7 +51,7 @@ def handle_noargs(self, **options): if not l['meno']: continue idd = l['id'] - last_contact[idd] = max(last_contact.get(idd, 0), int(l['matura'])-3) + self.last_contact[idd].append(int(l['matura'])-3) user = { 'first_name': l['meno'], 'last_name': l['priezvisko'], @@ -76,11 +66,10 @@ def handle_noargs(self, **options): 'adresa_kores' user_properties = [ - (MOBIL_PROPERTY, l['mobil'].replace(" ", "").strip()), - (KMS_CAMPS_PROPERTY, camps_survived[idd]), - (LAST_CONTACT_PROPERTY, last_contact[idd]) + (self.MOBIL_PROPERTY, l['mobil'].replace(" ", "").strip()), + (self.KMS_CAMPS_PROPERTY, camps_survived[idd]) ] - self.process_person(user, user_properties, KMS_ID_PROPERTY, int(idd)) + self.process_person(user, user_properties, self.KMS_ID_PROPERTY, idd) # TODO parse camps more precisely self.print_stats() diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index b8aff8134..48278c8a8 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -1,19 +1,7 @@ from __future__ import unicode_literals -import csv -from collections import defaultdict -from datetime import datetime -import os - - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from django.db import connections +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand # Kaspar property IDs EMAIL_PROP = 1 @@ -63,12 +51,11 @@ def handle_noargs(self, **options): FROM participants """) - last_contact = {} camps_survived = {} for participant in c: man_id = participant[1] action = actions[participant[0]] - last_contact[man_id] = max(last_contact.get(man_id, 0), action['end'].year) + self.last_contact[man_id].append(int(action['end'].year)) camps_survived[man_id] = camps_survived.get(man_id, 0) + 1 if self.verbosity >= 1: @@ -86,7 +73,7 @@ def handle_noargs(self, **options): for l in c: l = dict(zip(fields, l)) idcko = l['man_id'] - last_contact[idcko] = max(last_contact.get(idcko, 0), int(l['finish'])-3) + self.last_contact[idcko].append(int(l['finish'])-3) user = { 'first_name': l['firstname'], @@ -112,10 +99,9 @@ def handle_noargs(self, **options): cc.close() user_properties = [ - (LAST_CONTACT_PROPERTY, last_contact[idcko]), - (KASPAR_NOTE_PROPERTY, l['note']), - (KSP_CAMPS_PROPERTY, camps_survived.get(idcko, 0)) + (self.KASPAR_NOTE_PROPERTY, l['note']), + (self.KSP_CAMPS_PROPERTY, camps_survived.get(idcko, 0)) ] - userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) + self.process_person(user, user_properties, self.KASPAR_ID_PROPERTY, idcko) self.print_stats() From 3937866ecd3c1b725dfe1621d9d321eaaa422f30 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 16 Apr 2017 00:08:06 +0200 Subject: [PATCH 07/20] Minor parameter changes. --- trojsten/people/management/commands/migrate_fks_csv.py | 5 +++-- trojsten/people/management/commands/migrate_kms_csv.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index ca63bd03d..49e0faad4 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -30,11 +30,12 @@ class Command(MigrateBaceCommand): def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('file', type=str) + parser.add_argument('csv_directory', type=str, + help="Directory containing all csv files.") def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + base = options['csv_directory'] addresses_file = os.path.join(base, "adresa.csv") addresses = csv.DictReader(open(addresses_file)) diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index e78b3c2b5..8d87ac71c 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -23,11 +23,12 @@ class Command(MigrateBaceCommand): def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('file', type=str) + parser.add_argument('csv_directory', type=str, + help="Directory containing all csv files.") def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + base = options['csv_directory'] participants_file = os.path.join(base, "riesitelia.csv") participants = csv.DictReader(open(participants_file)) camps_file = os.path.join(base, "sustredenia.csv") From 1f629053a83d67af0711352df6674f51021da07e Mon Sep 17 00:00:00 2001 From: vlejd Date: Wed, 22 Mar 2017 22:08:43 +0100 Subject: [PATCH 08/20] Revised kaspar migrating script. --- trojsten/people/helpers.py | 1 + .../commands/migrate_people_from_kaspar.py | 70 +++++++++++++++---- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/trojsten/people/helpers.py b/trojsten/people/helpers.py index f42fe911c..84c222b79 100644 --- a/trojsten/people/helpers.py +++ b/trojsten/people/helpers.py @@ -9,6 +9,7 @@ def get_similar_users(user): """Returns a list of users similar to the specified user.""" + #TODO check birth day as well. (if defined, filter different) return User.objects.exclude(pk=user.pk).filter( first_name=user.first_name, last_name=user.last_name, diff --git a/trojsten/people/management/commands/migrate_people_from_kaspar.py b/trojsten/people/management/commands/migrate_people_from_kaspar.py index 4abeb5109..ecfc0411c 100644 --- a/trojsten/people/management/commands/migrate_people_from_kaspar.py +++ b/trojsten/people/management/commands/migrate_people_from_kaspar.py @@ -17,11 +17,22 @@ KASPAR_ID_LABEL = "kaspar ID" KASPAR_NOTE_LABEL = "kaspar note" - class Command(NoArgsCommand): help = 'Imports people and their related info from kaspar.' + def add_arguments(self, parser): + parser.add_argument('--wet_run', + action='store_false', + dest='dry', + default=True, + help='Actually write something to DB') + def handle_noargs(self, **options): + self.similar_users = [] + self.dry = options['dry'] + if self.dry: + self.stdout.write("Running dry run!") + self.verbosity = options['verbosity'] self.kaspar = connections['kaspar'] c = self.kaspar.cursor() @@ -57,9 +68,21 @@ def handle_noargs(self, **options): for row in c: self.process_person(*row) + for conflict in self.similar_users: + self.stdout.write("Conflicts: %s" % str(conflict)) + self.stdout.write("Conflict users: %d" % len(self.similar_users)) + @transaction.atomic def process_school(self, kaspar_id, abbr, name, addr_name, street, city, zip_code): + + if not abbr: + print("empty") + print(kaspar_id, abbr, name, street) + x = input() + self.school_id_map[kaspar_id] = None + return + candidates = School.objects.filter( Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + '?') @@ -87,12 +110,21 @@ def process_school(self, kaspar_id, abbr, name, addr_name, street, def create_school(self, kaspar_id, abbr, name, addr_name, street, city, zip_code): abbr += '?' # Question mark denotes schools needing review. - school = School.objects.create(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) + school = None + if self.dry: + school = School(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) + else: + school = School.objects.create(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) if self.verbosity >= 2: self.stdout.write("Created new school %s" % school) return school @@ -105,7 +137,7 @@ def process_person(self, man_id, first_name, last_name, school_id, if self.verbosity >= 2: self.stdout.write("Skipping user %s %s" % (first_name, last_name)) - return + return new_user_args = { 'first_name': first_name, @@ -139,17 +171,25 @@ def process_person(self, man_id, first_name, last_name, school_id, if self.verbosity >= 2: self.stdout.write("Creating user %s %s" % (first_name, last_name)) - new_user = User.objects.create(**new_user_args) - self.man_id_map[man_id] = new_user + new_user = None + if self.dry: + new_user = User(**new_user_args) + else: + new_user = User.objects.create(**new_user_args) + new_user.properties.create(key=self.kaspar_id_key, value=man_id) + if note: + new_user.properties.create(key=self.kaspar_note_key, value=note) - new_user.properties.create(key=self.kaspar_id_key, value=man_id) - if note: - new_user.properties.create(key=self.kaspar_note_key, value=note) similar_users = get_similar_users(new_user) if len(similar_users): + names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] + self.similar_users.append(((first_name, last_name), names_of_similar)) if self.verbosity >= 2: - self.stdout.write('Similar users: %s' % str(similar_users)) - DuplicateUser.objects.create(user=new_user) + self.stdout.write('Similar users: %s' % str(names_of_similar)) + if self.dry: + pass + else: + DuplicateUser.objects.create(user=new_user) def parse_date(self, date_string): # Remove any whitespace inside the string. From dcd9d3512154bc47a6a7fb5f903249fd53c6df46 Mon Sep 17 00:00:00 2001 From: vlejd Date: Thu, 23 Mar 2017 23:03:37 +0100 Subject: [PATCH 09/20] kms migration skript + improved mihration structure --- .../management/commands/migrate_base_class.py | 178 ++++++++++++++++++ .../management/commands/migrate_kms_csv.py | 95 ++++++++++ 2 files changed, 273 insertions(+) create mode 100644 trojsten/people/management/commands/migrate_base_class.py create mode 100644 trojsten/people/management/commands/migrate_kms_csv.py diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py new file mode 100644 index 000000000..e52d8a238 --- /dev/null +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -0,0 +1,178 @@ +from __future__ import unicode_literals + +from datetime import datetime + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty + +import sys +reload(sys) +sys.setdefaultencoding("utf-8") + +# Kaspar property IDs +EMAIL_PROP = 1 +BIRTHDAY_PROP = 2 +# Labels for auto-generated properties +KASPAR_ID_LABEL = "kaspar ID" +KASPAR_NOTE_LABEL = "kaspar note" + +class MigrateBaceCommand(NoArgsCommand): + help = 'Base class for importing people.' + + def add_arguments(self, parser): + parser.add_argument('--wet_run', + action='store_false', + dest='dry', + default=True, + help='Actually write something to DB') + + def handle_noargs(self, **options): + self.dry = options['dry'] + if self.dry: + self.stdout.write("Running dry run!") + + self.verbosity = options['verbosity'] + self.similar_users = [] + self.school_id_map={} + + @transaction.atomic + def process_school(self, old_id, abbr, name, addr_name, street, + city, zip_code): + + if not abbr: + print("empty") + print(old_id, abbr, name, street) + x = input() + self.school_id_map[old_id] = None + return + + candidates = School.objects.filter( + Q(abbreviation__iexact=abbr) | + Q(abbreviation__iexact=abbr + '?') + ) + row = (abbr, name, addr_name, street, city, zip_code) + if len(candidates) == 1: + if self.verbosity >= 2: + self.stdout.write("Matched %r to %s" % (row, + candidates[0])) + self.school_id_map[old_id] = candidates[0] + elif len(candidates) > 1: + self.stdout.write("Multiple candidates for %r:\n%s" % ( + row, + "\n".join("%02d: %s" % (i, candidate) + for i, candidate in enumerate(candidates)) + )) + try: + choice = int(input("Choice (empty or invalid to create new): ")) + self.school_id_map[old_id] = candidates[choice] + except (ValueError, KeyError): + self.school_id_map[old_id] = self.create_school(*row) + else: + self.school_id_map[old_id] = self.create_school(*row) + + def create_school(self, abbr, name, addr_name, street, + city, zip_code): + abbr += '?' # Question mark denotes schools needing review. + school = None + if self.dry: + school = School(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) + else: + school = School.objects.create(abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code) + if self.verbosity >= 2: + self.stdout.write("Created new school %s" % school) + return school + + @transaction.atomic + def process_person(self, user_args, user_properties, old_user_id_field, old_user_id): + """ + Args: + user_args (dict): will be uset for user constructor as is. + user_properties (list(tuple(UserPropertyKey, string))): will create additional user properties + old_user_id_field (UserPropertyKey): old field that contained oser id + (kaspar_id/ kms id ...), used for faster deduplication. + old_user_id (int/string): old id + user_args can have + first_name + last_name + graduation + email + birth_date + school_id + """ + # If the user already exists in our database, skip. + old_id_property = None + if old_user_id: + old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id) + else: + old_id_property = UserProperty.objects.none() + + first_name = user_args['first_name'] + last_name = user_args['last_name'] + if old_id_property.exists(): + if self.verbosity >= 2: + self.stdout.write("Skipping user %s %s" % (first_name, + last_name)) + return + + # The username needs to be unique, thus the ID. + user_args['username'] = u'{0:s}{1:s}{2:d}'.format(first_name, last_name, old_user_id), + user_args['is_active'] = False + + #TODO fix school + if 'school_id' in user_args: + school_id = user_args['school_id'] + del user_args['school_id'] + user_args['school'] = self.school_id_map.get(school_id) + + if self.verbosity >= 2: + self.stdout.write("Creating user %s %s" % (first_name, last_name)) + + new_user = None + if self.dry: + new_user = User(**user_args) + else: + new_user = User.objects.create(**user_args) + + if old_user_id: + new_user.properties.create(key=old_user_id_field, value=old_user_id) + + for key, value in user_properties: + new_user.properties.create(key=key, value=value) + + similar_users = get_similar_users(new_user) + if len(similar_users): + names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] + self.similar_users.append(((first_name, last_name), names_of_similar)) + if self.verbosity >= 2: + self.stdout.write('Similar users: %s' % str(names_of_similar)) + if self.dry: + pass + else: + DuplicateUser.objects.create(user=new_user) + + def print_stats(self): + for conflict in self.similar_users: + self.stdout.write("Conflicts: %s" % str(conflict)) + + self.stdout.write("Conflict users: %d" % len(self.similar_users)) + + def parse_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(' ', '') + # Just hope that all dates are in the same format. + return datetime.strptime(date_string, '%d.%m.%Y') diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py new file mode 100644 index 000000000..3b867e1eb --- /dev/null +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -0,0 +1,95 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand + + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from kms_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + riesitelia_file = os.path.join(base, "riesitelia.csv") + riesitelia = csv.DictReader(open(riesitelia_file)) + sustredenia_file = os.path.join(base, "sustredenia.csv") + sustredenia = csv.DictReader(open(sustredenia_file)) + ucasti = defaultdict(int) + last_kontakt = {} + for sustredko in sustredenia: + idcko = sustredko['id_riesitela'].strip() + ucasti[idcko]+=1 + if sustredko['rok']: + last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(sustredko['rok'])) + + + skoly_file = os.path.join(base, "skoly.csv") + skoly = csv.DictReader(open(skoly_file)) + for skola in skoly: + abbr = skola['skratka'].split(' ', 1)[0] + addr_name = skola['nazov'] + ", " + skola['ulica'] + self.process_school(skola['id'], abbr, skola['nazov'], addr_name, skola['ulica'], + skola['mesto'], skola['PSC']) + + + kms_id_key, _ = UserPropertyKey.objects.get_or_create(key_name="KMS ID") + kms_sustredka, _ = UserPropertyKey.objects.get_or_create(key_name="KMS sustredenia") + mobil, _ = UserPropertyKey.objects.get_or_create(key_name="Mobil") + trojsten_contact, _ = UserPropertyKey.objects.get_or_create(key_name="Posledny kontakt") + + + for l in riesitelia: + if not l['meno']: + continue + idcko = l['id'] + last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(l['matura'])-3) + user = { + 'first_name': l['meno'], + 'last_name': l['priezvisko'], + 'graduation': l['matura'], + 'email': l['email'], + 'birth_date': self.parse_date(l['datnar']), + 'school_id': l['id_skoly'] + } + + #TODO treba poparsovat adresy, + 'adresa_domov' + 'adresa_kores' + + user_properties = [ + (mobil, l['mobil']), + (kms_sustredka, ucasti[idcko]), + (trojsten_contact, last_kontakt[idcko]) + ] + self.process_person(user, user_properties, kms_id_key, int(l['id'])) + + #TODO akcie, sustredenia + self.print_stats() + + + + def parse_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(' ', '') + if date_string == "0000-00-00": + return None + else: + return datetime.strptime(date_string, '%Y-%m-%d') + + From d149519497e71479c65c5ecc6da6d0cad870168c Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 26 Mar 2017 19:19:37 +0200 Subject: [PATCH 10/20] Good enough migration scripts. --- .../commands/migrate_30rokovfks1_csv.py | 68 ++++++ .../commands/migrate_30rokovfks2_csv.py | 73 +++++++ .../management/commands/migrate_base_class.py | 92 ++++++-- .../management/commands/migrate_fks_csv.py | 111 ++++++++++ .../management/commands/migrate_kms_csv.py | 83 +++----- .../management/commands/migrate_ksp_kaspar.py | 137 ++++++++++++ .../commands/migrate_people_from_kaspar.py | 198 ------------------ 7 files changed, 492 insertions(+), 270 deletions(-) create mode 100644 trojsten/people/management/commands/migrate_30rokovfks1_csv.py create mode 100644 trojsten/people/management/commands/migrate_30rokovfks2_csv.py create mode 100644 trojsten/people/management/commands/migrate_fks_csv.py create mode 100644 trojsten/people/management/commands/migrate_ksp_kaspar.py delete mode 100644 trojsten/people/management/commands/migrate_people_from_kaspar.py diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py new file mode 100644 index 000000000..9cb8e8d52 --- /dev/null +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -0,0 +1,68 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + + +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in adresa osoba riesitel skola +do +mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done + +mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +""" + +#TODO vvysledkovky + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from fks_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + + participants_file = os.path.join(base, "FKS_30_rokov_1.csv") + participants = csv.DictReader(open(participants_file)) + + idd = 0 + for l in participants: + idd+=1 + contacted = l['kontaktovany?']=='ano' + user = { + 'first_name': l['Meno'], + 'last_name': l['Priezvisko'], + 'email': l['Email'], + } + user_properties = [ + (MOBIL_PROPERTY, l['Telefon']), + (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (NICKNAME_PROPERTY, l['Prezyvka']), + (LAST_CONTACT_PROPERTY, 2014 if contacted else False) + ] + + self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS1_{0:d}".format(idd)) + + self.print_stats() + + + diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py new file mode 100644 index 000000000..8fe6f3fc3 --- /dev/null +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -0,0 +1,73 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + + +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in adresa osoba riesitel skola +do +mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done + +mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +""" + +#TODO vvysledkovky + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from fks_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + + participants_file = os.path.join(base, "FKS_30_rokov_2.csv") + participants = csv.DictReader(open(participants_file)) + idd = 0 + for l in participants: + idd+=1 + if not l['Meno']: + continue + + user = { + 'first_name': l['Meno'], + 'last_name': l['Priezvisko'], + 'email': l['E-mail'], + } + user_properties = [ + (MOBIL_PROPERTY, l['Telefon']), + (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (NICKNAME_PROPERTY, l['Prezyvka']), + (COMPANY_PROPERTY, l['Posobisko']), + (AFFILIATION_PROPERTY, l['Pozicia']), + (MEMORY_PROPERTY, l['spomienka']), + (LAST_CONTACT_PROPERTY, 2014), + ] + #TODO Adresa + + self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS2_{0:d}".format(idd)) + + self.print_stats() + + + diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index e52d8a238..145699037 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -14,13 +14,6 @@ reload(sys) sys.setdefaultencoding("utf-8") -# Kaspar property IDs -EMAIL_PROP = 1 -BIRTHDAY_PROP = 2 -# Labels for auto-generated properties -KASPAR_ID_LABEL = "kaspar ID" -KASPAR_NOTE_LABEL = "kaspar note" - class MigrateBaceCommand(NoArgsCommand): help = 'Base class for importing people.' @@ -40,14 +33,17 @@ def handle_noargs(self, **options): self.similar_users = [] self.school_id_map={} + @transaction.atomic + def process_address(self, street, town, postal_code, country): + return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country) + + @transaction.atomic def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): + # TODO improve this, do not work with abbreviations if not abbr: - print("empty") - print(old_id, abbr, name, street) - x = input() self.school_id_map[old_id] = None return @@ -98,21 +94,16 @@ def create_school(self, abbr, name, addr_name, street, return school @transaction.atomic - def process_person(self, user_args, user_properties, old_user_id_field, old_user_id): + def process_person(self, user_args, user_properties, old_user_id_field, old_user_id, address=None): """ Args: - user_args (dict): will be uset for user constructor as is. + user_args (dict): will be used for user constructor as is. Except for school_id. user_properties (list(tuple(UserPropertyKey, string))): will create additional user properties old_user_id_field (UserPropertyKey): old field that contained oser id (kaspar_id/ kms id ...), used for faster deduplication. old_user_id (int/string): old id user_args can have - first_name - last_name - graduation - email - birth_date - school_id + first_name, last_name, graduation, email, birth_date, school_id """ # If the user already exists in our database, skip. old_id_property = None @@ -127,13 +118,12 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user if self.verbosity >= 2: self.stdout.write("Skipping user %s %s" % (first_name, last_name)) - return + return None # The username needs to be unique, thus the ID. - user_args['username'] = u'{0:s}{1:s}{2:d}'.format(first_name, last_name, old_user_id), + user_args['username'] = u'{0:s}{1:s}_{2:s}'.format(first_name, last_name, str(old_user_id)), user_args['is_active'] = False - #TODO fix school if 'school_id' in user_args: school_id = user_args['school_id'] del user_args['school_id'] @@ -146,11 +136,20 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user if self.dry: new_user = User(**user_args) else: + addr = None + if address: + addr = process_address(address['street'], + address['town'], + address['postal_code'], + address['country']) + user_args['home_address'] = addr + new_user = User.objects.create(**user_args) if old_user_id: new_user.properties.create(key=old_user_id_field, value=old_user_id) + user_properties = list(filter(lambda x: x, user_properties)) for key, value in user_properties: new_user.properties.create(key=key, value=value) @@ -165,14 +164,63 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user else: DuplicateUser.objects.create(user=new_user) + return new_user + def print_stats(self): for conflict in self.similar_users: self.stdout.write("Conflicts: %s" % str(conflict)) self.stdout.write("Conflict users: %d" % len(self.similar_users)) - def parse_date(self, date_string): + def parse_dot_date(self, date_string): # Remove any whitespace inside the string. date_string = date_string.replace(' ', '') # Just hope that all dates are in the same format. return datetime.strptime(date_string, '%d.%m.%Y') + + def parse_dash_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(' ', '') + if date_string == "0000-00-00" or date_string == "NULL": + return None + else: + return datetime.strptime(date_string, '%Y-%m-%d') + + def process_property(self, key_name): + #TODO handle regexp + hiddne, if does not exists, ask and create + #WARNING this is will create object in db even for dry run. + user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) + return user_property + + +COMMAND = MigrateBaceCommand() + +CSV_ID_KEY = "csv ID" +CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY) +MOBIL_KEY = "Mobil" +MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY) +NICKNAME_KEY = "Prezyvka" +NICKNAME_PROPERTY = COMMAND.process_property(NICKNAME_KEY) +BIRTH_NAME_KEY = "Rodne Meno" +BIRTH_NAME_PROPERTY = COMMAND.process_property(BIRTH_NAME_KEY) +LAST_CONTACT_KEY = "Posledny kontakt" +LAST_CONTACT_PROPERTY = COMMAND.process_property(LAST_CONTACT_KEY) + +FKS_ID_KEY = "FKS ID" +FKS_ID_PROPERTY = COMMAND.process_property(FKS_ID_KEY) +KMS_ID_KEY = "KMS ID" +KMS_ID_PROPERTY = COMMAND.process_property(KMS_ID_KEY) +KMS_CAMPS_KEY = "KMS sustredenia" +KMS_CAMPS_PROPERTY = COMMAND.process_property(KMS_CAMPS_KEY) +KASPAR_ID_KEY = "KSP ID" +KASPAR_ID_PROPERTY = COMMAND.process_property(KASPAR_ID_KEY) +KASPAR_NOTE_KEY = "KSP note" +KASPAR_NOTE_PROPERTY = COMMAND.process_property(KASPAR_NOTE_KEY) +KSP_CAMPS_KEY = "KSP sustredenia" +KSP_CAMPS_PROPERTY = COMMAND.process_property(KSP_CAMPS_KEY) +MEMORY_KEY = "Spomienky" +MEMORY_PROPERTY = COMMAND.process_property(MEMORY_KEY) +COMPANY_KEY = "Posobisko" +COMPANY_PROPERTY = COMMAND.process_property(COMPANY_KEY) +AFFILIATION_KEY = "Pozicia" +AFFILIATION_PROPERTY = COMMAND.process_property(AFFILIATION_KEY) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py new file mode 100644 index 000000000..090e376d7 --- /dev/null +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -0,0 +1,111 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + + +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in adresa osoba riesitel skola +do +mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done + +mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +""" + +#TODO vvysledkovky + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from fks_csv.' + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument('file', type=str) + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + base = options['file'] + + addresses_file = os.path.join(base, "adresa.csv") + addresses = csv.DictReader(open(addresses_file)) + address_by_id = {} + for address in addresses: + address_by_id[address['id']] = address + + schools_file = os.path.join(base, "skola.csv") + schools = csv.DictReader(open(schools_file)) + for school in schools: + abbr = school['skratka'].split(' ', 1)[0] + addr = address_by_id[school['adresa_id']] + + street = addr['ulica'] + + addr_name = school['nazov'] + ", " + street + self.process_school(school['id'], abbr, school['nazov'], addr_name, street, + addr['mesto'], addr['psc']) + + activity_file = os.path.join(base, "aktivita.csv") + activity = csv.DictReader(open(activity_file)) + last_contact = {} + for act in activity: + idd = act['riesitel_id'] + date = self.parse_dash_date(act['termin']) + last_contact[idd] = max(last_contact.get(idd, 0), date.year) + + + people_file = os.path.join(base, "osoba.csv") + people = csv.DictReader(open(people_file)) + + people_by_id = {} + for person in people: + people_by_id[person['id']] = person + + participants_file = os.path.join(base, "riesitel.csv") + participants = csv.DictReader(open(participants_file)) + + for l in participants: + idd = l['osoba_id'] + person = people_by_id[idd] + matura = l['rok_maturity'] + last_contact[idd] = max(last_contact.get(idd,0), int(matura)-3) + address = address_by_id[person['adresa_id']] + parsed_address = { + 'street': address['ulica'], + 'town': address['mesto'], + 'postal_code': address['psc'], + 'country': address['stat'], + } + user = { + 'first_name': person['meno'], + 'last_name': person['priezvisko'], + 'graduation': matura, + 'email': person['email'], + 'birth_date': self.parse_dash_date(person['datum_narodenia']), + 'school_id': l['skola_id'], + } + + user_properties = [ + (MOBIL_PROPERTY, person['telefon']), + (LAST_CONTACT_PROPERTY, last_contact[idd]) + ] + self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), address=parsed_address) + + self.print_stats() + + + diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 3b867e1eb..0fb3978da 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -13,7 +13,7 @@ from trojsten.people.helpers import get_similar_users from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand +from trojsten.people.management.commands.migrate_base_class import * class Command(MigrateBaceCommand): @@ -26,70 +26,53 @@ def add_arguments(self, parser): def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) base = options['file'] - riesitelia_file = os.path.join(base, "riesitelia.csv") - riesitelia = csv.DictReader(open(riesitelia_file)) - sustredenia_file = os.path.join(base, "sustredenia.csv") - sustredenia = csv.DictReader(open(sustredenia_file)) - ucasti = defaultdict(int) - last_kontakt = {} - for sustredko in sustredenia: - idcko = sustredko['id_riesitela'].strip() - ucasti[idcko]+=1 - if sustredko['rok']: - last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(sustredko['rok'])) - - - skoly_file = os.path.join(base, "skoly.csv") - skoly = csv.DictReader(open(skoly_file)) - for skola in skoly: - abbr = skola['skratka'].split(' ', 1)[0] - addr_name = skola['nazov'] + ", " + skola['ulica'] - self.process_school(skola['id'], abbr, skola['nazov'], addr_name, skola['ulica'], - skola['mesto'], skola['PSC']) - - - kms_id_key, _ = UserPropertyKey.objects.get_or_create(key_name="KMS ID") - kms_sustredka, _ = UserPropertyKey.objects.get_or_create(key_name="KMS sustredenia") - mobil, _ = UserPropertyKey.objects.get_or_create(key_name="Mobil") - trojsten_contact, _ = UserPropertyKey.objects.get_or_create(key_name="Posledny kontakt") - - - for l in riesitelia: + participants_file = os.path.join(base, "riesitelia.csv") + participants = csv.DictReader(open(participants_file)) + camps_file = os.path.join(base, "sustredenia.csv") + camps = csv.DictReader(open(camps_file)) + camps_survived = defaultdict(int) + last_contact = {} + for camp in camps: + idd = camp['id_riesitela'].strip() + camps_survived[idd]+=1 + if camp['rok']: + last_contact[idd] = max(last_contact.get(idd,0), int(camp['rok'])) + + + schools_file = os.path.join(base, "skoly.csv") + schools = csv.DictReader(open(schools_file)) + for school in schools: + abbr = school['skratka'].split(' ', 1)[0] + addr_name = school['nazov'] + ", " + school['ulica'] + self.process_school(school['id'], abbr, school['nazov'], addr_name, school['ulica'], + school['mesto'], school['PSC']) + + + for l in participants: if not l['meno']: continue - idcko = l['id'] - last_kontakt[idcko] = max(last_kontakt.get(idcko,0), int(l['matura'])-3) + idd = l['id'] + last_contact[idd] = max(last_contact.get(idd,0), int(l['matura'])-3) user = { 'first_name': l['meno'], 'last_name': l['priezvisko'], 'graduation': l['matura'], 'email': l['email'], - 'birth_date': self.parse_date(l['datnar']), + 'birth_date': self.parse_dash_date(l['datnar']), 'school_id': l['id_skoly'] } - #TODO treba poparsovat adresy, + #TODO parse addresses from string. 'adresa_domov' 'adresa_kores' user_properties = [ - (mobil, l['mobil']), - (kms_sustredka, ucasti[idcko]), - (trojsten_contact, last_kontakt[idcko]) + (MOBIL_PROPERTY, l['mobil']), + (KMS_CAMPS_PROPERTY, camps_survived[idd]), + (LAST_CONTACT_PROPERTY, last_contact[idd]) ] - self.process_person(user, user_properties, kms_id_key, int(l['id'])) + self.process_person(user, user_properties, KMS_ID_PROPERTY, int(idd)) - #TODO akcie, sustredenia + #TODO parse camps more precisely self.print_stats() - - - def parse_date(self, date_string): - # Remove any whitespace inside the string. - date_string = date_string.replace(' ', '') - if date_string == "0000-00-00": - return None - else: - return datetime.strptime(date_string, '%Y-%m-%d') - - diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py new file mode 100644 index 000000000..fa6f6d82a --- /dev/null +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -0,0 +1,137 @@ +from __future__ import unicode_literals + +import csv +from collections import defaultdict +from datetime import datetime +import os + + +from django.core.management.base import NoArgsCommand +from django.db import connections, transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey +from trojsten.people.management.commands.migrate_base_class import * + +# Kaspar property IDs +EMAIL_PROP = 1 +BIRTHDAY_PROP = 2 + +class Command(MigrateBaceCommand): + help = 'Imports people and their related info from kaspar.' + + def handle_noargs(self, **options): + super(Command, self).handle_noargs(**options) + kaspar = connections['kaspar'] + + if self.verbosity >= 1: + self.stdout.write("Migrating schools...") + + c = kaspar.cursor() + c.execute(""" + SELECT school_id, short, name, addr_name, addr_street, + addr_city, addr_zip + FROM schools; + """) + self.school_id_map = dict() + for row in c: + self.process_school(*row) + + #TODO sustredka + + if self.verbosity >= 1: + self.stdout.write("Dumping veducis") + + c.execute(""" + SELECT man_id + FROM veduci + """) + + veduci = set() + for l in c: + veduci.add(l[0]) + + if self.verbosity >= 1: + self.stdout.write("Dumping participations") + + c.execute(""" + SELECT action_id, name, date_start, date_end + FROM actions + """) + + actions = {} + for action in c: + actions[action[0]] = { + "name": action[1], + "start": action[2], + "end": action[3] + } + + c.execute(""" + SELECT action_id, man_id, task, note + FROM participants + """) + + last_contact = {} + camps_survived = {} + for participant in c: + man_id = participant[1] + action = actions[participant[0]] + last_contact[man_id] = max(last_contact.get(man_id,0), action['end'].year) + camps_survived[man_id] = camps_survived.get(man_id, 0) + 1 + + + if self.verbosity >= 1: + self.stdout.write("Creating/retrieving required UserPropertyKeys...") + + if self.verbosity >= 1: + self.stdout.write("Migrating people...") + + + fields = ["man_id", "firstname", "lastname", "school_id", "finish", "note"] + c.execute(""" + SELECT %s + FROM people; + """ % (', '.join(fields))) + + for l in c: + l = dict(zip(fields, l)) + idcko = l['man_id'] + last_contact[idcko] = max(last_contact.get(idcko,0), int(l['finish'])-3) + + user = { + 'first_name': l['firstname'], + 'last_name': l['lastname'], + 'graduation': l['finish'], + 'school_id': l['school_id'] + } + cc = kaspar.cursor() + cc.execute(""" + SELECT ppt_id, value + FROM people_prop + WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); + """, (idcko, EMAIL_PROP, BIRTHDAY_PROP)) + for prop_id, value in cc: + if prop_id == EMAIL_PROP: + user['email'] = value + elif prop_id == BIRTHDAY_PROP: + try: + user['birth_date'] = self.parse_dot_date(value) + except ValueError: + # If we can't parse the date, give up. + pass + cc.close() + + user_properties = [ + (LAST_CONTACT_PROPERTY, last_contact[idcko]), + (KASPAR_NOTE_PROPERTY, l['note']), + (KSP_CAMPS_PROPERTY, camps_survived.get(idcko,0)) + ] + userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) + if idcko in veduci: + #TODO userObject add to group veduci + pass + + self.print_stats() diff --git a/trojsten/people/management/commands/migrate_people_from_kaspar.py b/trojsten/people/management/commands/migrate_people_from_kaspar.py deleted file mode 100644 index ecfc0411c..000000000 --- a/trojsten/people/management/commands/migrate_people_from_kaspar.py +++ /dev/null @@ -1,198 +0,0 @@ -from __future__ import unicode_literals - -from datetime import datetime - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey - -# Kaspar property IDs -EMAIL_PROP = 1 -BIRTHDAY_PROP = 2 -# Labels for auto-generated properties -KASPAR_ID_LABEL = "kaspar ID" -KASPAR_NOTE_LABEL = "kaspar note" - -class Command(NoArgsCommand): - help = 'Imports people and their related info from kaspar.' - - def add_arguments(self, parser): - parser.add_argument('--wet_run', - action='store_false', - dest='dry', - default=True, - help='Actually write something to DB') - - def handle_noargs(self, **options): - self.similar_users = [] - self.dry = options['dry'] - if self.dry: - self.stdout.write("Running dry run!") - - self.verbosity = options['verbosity'] - self.kaspar = connections['kaspar'] - c = self.kaspar.cursor() - - if self.verbosity >= 1: - self.stdout.write("Migrating schools...") - - c.execute(""" - SELECT school_id, short, name, addr_name, addr_street, - addr_city, addr_zip - FROM schools; - """) - self.school_id_map = dict() - for row in c: - self.process_school(*row) - - if self.verbosity >= 1: - self.stdout.write("Creating/retrieving required UserPropertyKeys...") - - self.kaspar_id_key, _ = UserPropertyKey.objects.get_or_create(key_name=KASPAR_ID_LABEL) - self.kaspar_note_key, _ = UserPropertyKey.objects.get_or_create(key_name=KASPAR_NOTE_LABEL) - - if self.verbosity >= 1: - self.stdout.write("Migrating people...") - - c.execute(""" - SELECT man_id, firstname, lastname, school_id, finish, note - FROM people; - """) - self.man_id_map = dict() - # This loop takes O(N) queries and I don't care -- it's a one-time - # background job anyway. - for row in c: - self.process_person(*row) - - for conflict in self.similar_users: - self.stdout.write("Conflicts: %s" % str(conflict)) - self.stdout.write("Conflict users: %d" % len(self.similar_users)) - - @transaction.atomic - def process_school(self, kaspar_id, abbr, name, addr_name, street, - city, zip_code): - - if not abbr: - print("empty") - print(kaspar_id, abbr, name, street) - x = input() - self.school_id_map[kaspar_id] = None - return - - candidates = School.objects.filter( - Q(abbreviation__iexact=abbr) | - Q(abbreviation__iexact=abbr + '?') - ) - row = (kaspar_id, abbr, name, addr_name, street, city, zip_code) - if len(candidates) == 1: - if self.verbosity >= 2: - self.stdout.write("Matched %r to %s" % (row, - candidates[0])) - self.school_id_map[kaspar_id] = candidates[0] - elif len(candidates) > 1: - self.stdout.write("Multiple candidates for %r:\n%s" % ( - row, - "\n".join("%02d: %s" % (i, candidate) - for i, candidate in enumerate(candidates)) - )) - try: - choice = int(input("Choice (empty or invalid to create new): ")) - self.school_id_map[kaspar_id] = candidates[choice] - except (ValueError, KeyError): - self.school_id_map[kaspar_id] = self.create_school(*row) - else: - self.school_id_map[kaspar_id] = self.create_school(*row) - - def create_school(self, kaspar_id, abbr, name, addr_name, street, - city, zip_code): - abbr += '?' # Question mark denotes schools needing review. - school = None - if self.dry: - school = School(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) - else: - school = School.objects.create(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) - if self.verbosity >= 2: - self.stdout.write("Created new school %s" % school) - return school - - @transaction.atomic - def process_person(self, man_id, first_name, last_name, school_id, - grad_year, note): - # If the user already exists in our database, skip. - if self.kaspar_id_key.properties.filter(value=man_id).exists(): - if self.verbosity >= 2: - self.stdout.write("Skipping user %s %s" % (first_name, - last_name)) - return - - new_user_args = { - 'first_name': first_name, - 'last_name': last_name, - # The username needs to be unique, thus the ID. - 'username': '%s%s%d' % (first_name, last_name, man_id), - 'is_active': False, - 'school': self.school_id_map[school_id] - } - - if grad_year: - new_user_args['graduation'] = grad_year - - c = self.kaspar.cursor() - c.execute(""" - SELECT ppt_id, value - FROM people_prop - WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); - """, (man_id, EMAIL_PROP, BIRTHDAY_PROP)) - for prop_id, value in c: - if prop_id == EMAIL_PROP: - new_user_args['email'] = value - elif prop_id == BIRTHDAY_PROP: - try: - new_user_args['birth_date'] = self.parse_date(value) - except ValueError: - # If we can't parse the date, give up. - pass - c.close() - - if self.verbosity >= 2: - self.stdout.write("Creating user %s %s" % (first_name, last_name)) - - new_user = None - if self.dry: - new_user = User(**new_user_args) - else: - new_user = User.objects.create(**new_user_args) - new_user.properties.create(key=self.kaspar_id_key, value=man_id) - if note: - new_user.properties.create(key=self.kaspar_note_key, value=note) - - similar_users = get_similar_users(new_user) - if len(similar_users): - names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] - self.similar_users.append(((first_name, last_name), names_of_similar)) - if self.verbosity >= 2: - self.stdout.write('Similar users: %s' % str(names_of_similar)) - if self.dry: - pass - else: - DuplicateUser.objects.create(user=new_user) - - def parse_date(self, date_string): - # Remove any whitespace inside the string. - date_string = date_string.replace(' ', '') - # Just hope that all dates are in the same format. - return datetime.strptime(date_string, '%d.%m.%Y') From e7e472ea1e5e2379b09f453c6a6ead28b3eea481 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sat, 1 Apr 2017 00:41:35 +0200 Subject: [PATCH 11/20] Whole pipeline is working on wet run. --- .../commands/migrate_30rokovfks1_csv.py | 5 ++-- .../commands/migrate_30rokovfks2_csv.py | 5 ++-- .../management/commands/migrate_base_class.py | 25 +++++++++++-------- .../management/commands/migrate_fks_csv.py | 2 +- .../management/commands/migrate_kms_csv.py | 2 +- .../management/commands/migrate_ksp_kaspar.py | 14 ----------- 6 files changed, 21 insertions(+), 32 deletions(-) diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index 9cb8e8d52..1afd5da31 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -39,9 +39,8 @@ def add_arguments(self, parser): def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + participants_file = options['file'] - participants_file = os.path.join(base, "FKS_30_rokov_1.csv") participants = csv.DictReader(open(participants_file)) idd = 0 @@ -54,7 +53,7 @@ def handle_noargs(self, **options): 'email': l['Email'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon']), + (MOBIL_PROPERTY, l['Telefon'].replace(" ","").strip()), (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), (NICKNAME_PROPERTY, l['Prezyvka']), (LAST_CONTACT_PROPERTY, 2014 if contacted else False) diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index 8fe6f3fc3..1728417d5 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -39,9 +39,8 @@ def add_arguments(self, parser): def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + participants_file = options['file'] - participants_file = os.path.join(base, "FKS_30_rokov_2.csv") participants = csv.DictReader(open(participants_file)) idd = 0 for l in participants: @@ -55,7 +54,7 @@ def handle_noargs(self, **options): 'email': l['E-mail'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon']), + (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), (NICKNAME_PROPERTY, l['Prezyvka']), (COMPANY_PROPERTY, l['Posobisko']), diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index 145699037..dbbcd3569 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -8,7 +8,7 @@ from django.utils.six.moves import input from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty +from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address import sys reload(sys) @@ -51,7 +51,7 @@ def process_school(self, old_id, abbr, name, addr_name, street, Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + '?') ) - row = (abbr, name, addr_name, street, city, zip_code) + row = (abbr, name, addr_name, street, city, self.fix_string(zip_code)) if len(candidates) == 1: if self.verbosity >= 2: self.stdout.write("Matched %r to %s" % (row, @@ -75,6 +75,10 @@ def create_school(self, abbr, name, addr_name, street, city, zip_code): abbr += '?' # Question mark denotes schools needing review. school = None + if len(zip_code) > 10: + # Swiss zip codes + zip_code = 0 + if self.dry: school = School(abbreviation=abbr, verbose_name=name, @@ -121,7 +125,6 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user return None # The username needs to be unique, thus the ID. - user_args['username'] = u'{0:s}{1:s}_{2:s}'.format(first_name, last_name, str(old_user_id)), user_args['is_active'] = False if 'school_id' in user_args: @@ -138,10 +141,10 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user else: addr = None if address: - addr = process_address(address['street'], - address['town'], - address['postal_code'], - address['country']) + addr = self.process_address(address['street'], + address['town'], + address['postal_code'], + address['country']) user_args['home_address'] = addr new_user = User.objects.create(**user_args) @@ -186,19 +189,21 @@ def parse_dash_date(self, date_string): else: return datetime.strptime(date_string, '%Y-%m-%d') - def process_property(self, key_name): + def process_property(self, key_name, regexp=None): #TODO handle regexp + hiddne, if does not exists, ask and create #WARNING this is will create object in db even for dry run. user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) return user_property + def fix_string(self, string): + return string.replace(" ", "").strip() COMMAND = MigrateBaceCommand() CSV_ID_KEY = "csv ID" -CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY) +CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY, "(.*_)?\d+") MOBIL_KEY = "Mobil" -MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY) +MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY, ".?.?\d*\\?") NICKNAME_KEY = "Prezyvka" NICKNAME_PROPERTY = COMMAND.process_property(NICKNAME_KEY) BIRTH_NAME_KEY = "Rodne Meno" diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 090e376d7..680a86218 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -100,7 +100,7 @@ def handle_noargs(self, **options): } user_properties = [ - (MOBIL_PROPERTY, person['telefon']), + (MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()), (LAST_CONTACT_PROPERTY, last_contact[idd]) ] self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), address=parsed_address) diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 0fb3978da..0cf821970 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -67,7 +67,7 @@ def handle_noargs(self, **options): 'adresa_kores' user_properties = [ - (MOBIL_PROPERTY, l['mobil']), + (MOBIL_PROPERTY, l['mobil'].replace(" ", "").strip()), (KMS_CAMPS_PROPERTY, camps_survived[idd]), (LAST_CONTACT_PROPERTY, last_contact[idd]) ] diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index fa6f6d82a..408f4c3ed 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -41,17 +41,6 @@ def handle_noargs(self, **options): #TODO sustredka - if self.verbosity >= 1: - self.stdout.write("Dumping veducis") - - c.execute(""" - SELECT man_id - FROM veduci - """) - - veduci = set() - for l in c: - veduci.add(l[0]) if self.verbosity >= 1: self.stdout.write("Dumping participations") @@ -130,8 +119,5 @@ def handle_noargs(self, **options): (KSP_CAMPS_PROPERTY, camps_survived.get(idcko,0)) ] userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) - if idcko in veduci: - #TODO userObject add to group veduci - pass self.print_stats() From f4f403921e2b7539c074398819957941796c17fa Mon Sep 17 00:00:00 2001 From: vlejd Date: Sat, 1 Apr 2017 01:12:17 +0200 Subject: [PATCH 12/20] pep-8 fixes --- .../commands/migrate_30rokovfks1_csv.py | 26 ++++--------------- .../commands/migrate_30rokovfks2_csv.py | 24 +++-------------- .../management/commands/migrate_base_class.py | 19 +++++++------- .../management/commands/migrate_fks_csv.py | 22 +++++++++------- .../management/commands/migrate_kms_csv.py | 26 ++++++++++++------- .../management/commands/migrate_ksp_kaspar.py | 12 ++++----- 6 files changed, 53 insertions(+), 76 deletions(-) diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index 1afd5da31..bb5becc04 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -16,20 +16,6 @@ from trojsten.people.management.commands.migrate_base_class import * -""" -Restore the mysql database dump and run (replace and ) -Alternatively you can export these tables from phpAdmin. - -for tn in adresa osoba riesitel skola -do -mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv -done - -mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv -""" - -#TODO vvysledkovky - class Command(MigrateBaceCommand): help = 'Imports people and their related info from fks_csv.' @@ -45,23 +31,21 @@ def handle_noargs(self, **options): idd = 0 for l in participants: - idd+=1 - contacted = l['kontaktovany?']=='ano' + idd += 1 + contacted = l['kontaktovany?'] == 'ano' user = { 'first_name': l['Meno'], 'last_name': l['Priezvisko'], 'email': l['Email'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon'].replace(" ","").strip()), + (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), (NICKNAME_PROPERTY, l['Prezyvka']), (LAST_CONTACT_PROPERTY, 2014 if contacted else False) ] - self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS1_{0:d}".format(idd)) + self.process_person(user, user_properties, CSV_ID_PROPERTY, + "30rokovFKS1_{0:d}".format(idd)) self.print_stats() - - - diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index 1728417d5..d96d6796d 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -16,20 +16,6 @@ from trojsten.people.management.commands.migrate_base_class import * -""" -Restore the mysql database dump and run (replace and ) -Alternatively you can export these tables from phpAdmin. - -for tn in adresa osoba riesitel skola -do -mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv -done - -mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv -""" - -#TODO vvysledkovky - class Command(MigrateBaceCommand): help = 'Imports people and their related info from fks_csv.' @@ -44,7 +30,7 @@ def handle_noargs(self, **options): participants = csv.DictReader(open(participants_file)) idd = 0 for l in participants: - idd+=1 + idd += 1 if not l['Meno']: continue @@ -62,11 +48,9 @@ def handle_noargs(self, **options): (MEMORY_PROPERTY, l['spomienka']), (LAST_CONTACT_PROPERTY, 2014), ] - #TODO Adresa + # TODO Adresa - self.process_person(user, user_properties, CSV_ID_PROPERTY, "30rokovFKS2_{0:d}".format(idd)) + self.process_person(user, user_properties, CSV_ID_PROPERTY, + "30rokovFKS2_{0:d}".format(idd)) self.print_stats() - - - diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index dbbcd3569..5a09e9deb 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -14,6 +14,7 @@ reload(sys) sys.setdefaultencoding("utf-8") + class MigrateBaceCommand(NoArgsCommand): help = 'Base class for importing people.' @@ -31,13 +32,12 @@ def handle_noargs(self, **options): self.verbosity = options['verbosity'] self.similar_users = [] - self.school_id_map={} + self.school_id_map = {} @transaction.atomic def process_address(self, street, town, postal_code, country): return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country) - @transaction.atomic def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): @@ -102,7 +102,8 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user """ Args: user_args (dict): will be used for user constructor as is. Except for school_id. - user_properties (list(tuple(UserPropertyKey, string))): will create additional user properties + user_properties (list(tuple(UserPropertyKey, string))): + will create additional user properties old_user_id_field (UserPropertyKey): old field that contained oser id (kaspar_id/ kms id ...), used for faster deduplication. old_user_id (int/string): old id @@ -142,9 +143,9 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user addr = None if address: addr = self.process_address(address['street'], - address['town'], - address['postal_code'], - address['country']) + address['town'], + address['postal_code'], + address['country']) user_args['home_address'] = addr new_user = User.objects.create(**user_args) @@ -158,7 +159,7 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user similar_users = get_similar_users(new_user) if len(similar_users): - names_of_similar = [(x.first_name, x.last_name ) for x in similar_users] + names_of_similar = [(x.first_name, x.last_name) for x in similar_users] self.similar_users.append(((first_name, last_name), names_of_similar)) if self.verbosity >= 2: self.stdout.write('Similar users: %s' % str(names_of_similar)) @@ -190,8 +191,8 @@ def parse_dash_date(self, date_string): return datetime.strptime(date_string, '%Y-%m-%d') def process_property(self, key_name, regexp=None): - #TODO handle regexp + hiddne, if does not exists, ask and create - #WARNING this is will create object in db even for dry run. + # TODO handle regexp + hiddne, if does not exists, ask and create + # WARNING this is will create object in db even for dry run. user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) return user_property diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 680a86218..59d4d0172 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -22,13 +22,18 @@ for tn in adresa osoba riesitel skola do -mysql -u -p fks -B -e "select * from \`$tn\`;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +mysql -u -p fks -B -e "select * from \`$tn\`;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv done -mysql -u -p fks -B -e "select riesitel_id, termin from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +mysql -u -p fks -B -e " \ +select riesitel_id, termin \ +from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r \ +where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv """ +# TODO vvysledkovky -#TODO vvysledkovky class Command(MigrateBaceCommand): help = 'Imports people and their related info from fks_csv.' @@ -57,7 +62,7 @@ def handle_noargs(self, **options): addr_name = school['nazov'] + ", " + street self.process_school(school['id'], abbr, school['nazov'], addr_name, street, - addr['mesto'], addr['psc']) + addr['mesto'], addr['psc']) activity_file = os.path.join(base, "aktivita.csv") activity = csv.DictReader(open(activity_file)) @@ -67,7 +72,6 @@ def handle_noargs(self, **options): date = self.parse_dash_date(act['termin']) last_contact[idd] = max(last_contact.get(idd, 0), date.year) - people_file = os.path.join(base, "osoba.csv") people = csv.DictReader(open(people_file)) @@ -82,7 +86,7 @@ def handle_noargs(self, **options): idd = l['osoba_id'] person = people_by_id[idd] matura = l['rok_maturity'] - last_contact[idd] = max(last_contact.get(idd,0), int(matura)-3) + last_contact[idd] = max(last_contact.get(idd, 0), int(matura)-3) address = address_by_id[person['adresa_id']] parsed_address = { 'street': address['ulica'], @@ -103,9 +107,7 @@ def handle_noargs(self, **options): (MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()), (LAST_CONTACT_PROPERTY, last_contact[idd]) ] - self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), address=parsed_address) + self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), + address=parsed_address) self.print_stats() - - - diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 0cf821970..a0524a907 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -15,6 +15,17 @@ from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey from trojsten.people.management.commands.migrate_base_class import * +""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in akcie riesitelia skoly sustredenia +do +mysql -u -p fks -B -e "select * from \`$tn\`;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done +""" + class Command(MigrateBaceCommand): help = 'Imports people and their related info from kms_csv.' @@ -34,10 +45,9 @@ def handle_noargs(self, **options): last_contact = {} for camp in camps: idd = camp['id_riesitela'].strip() - camps_survived[idd]+=1 + camps_survived[idd] += 1 if camp['rok']: - last_contact[idd] = max(last_contact.get(idd,0), int(camp['rok'])) - + last_contact[idd] = max(last_contact.get(idd, 0), int(camp['rok'])) schools_file = os.path.join(base, "skoly.csv") schools = csv.DictReader(open(schools_file)) @@ -45,14 +55,13 @@ def handle_noargs(self, **options): abbr = school['skratka'].split(' ', 1)[0] addr_name = school['nazov'] + ", " + school['ulica'] self.process_school(school['id'], abbr, school['nazov'], addr_name, school['ulica'], - school['mesto'], school['PSC']) - + school['mesto'], school['PSC']) for l in participants: if not l['meno']: continue idd = l['id'] - last_contact[idd] = max(last_contact.get(idd,0), int(l['matura'])-3) + last_contact[idd] = max(last_contact.get(idd, 0), int(l['matura'])-3) user = { 'first_name': l['meno'], 'last_name': l['priezvisko'], @@ -62,7 +71,7 @@ def handle_noargs(self, **options): 'school_id': l['id_skoly'] } - #TODO parse addresses from string. + # TODO parse addresses from string. 'adresa_domov' 'adresa_kores' @@ -73,6 +82,5 @@ def handle_noargs(self, **options): ] self.process_person(user, user_properties, KMS_ID_PROPERTY, int(idd)) - #TODO parse camps more precisely + # TODO parse camps more precisely self.print_stats() - diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index 408f4c3ed..b8aff8134 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -19,6 +19,7 @@ EMAIL_PROP = 1 BIRTHDAY_PROP = 2 + class Command(MigrateBaceCommand): help = 'Imports people and their related info from kaspar.' @@ -39,8 +40,7 @@ def handle_noargs(self, **options): for row in c: self.process_school(*row) - #TODO sustredka - + # TODO sustredka if self.verbosity >= 1: self.stdout.write("Dumping participations") @@ -68,17 +68,15 @@ def handle_noargs(self, **options): for participant in c: man_id = participant[1] action = actions[participant[0]] - last_contact[man_id] = max(last_contact.get(man_id,0), action['end'].year) + last_contact[man_id] = max(last_contact.get(man_id, 0), action['end'].year) camps_survived[man_id] = camps_survived.get(man_id, 0) + 1 - if self.verbosity >= 1: self.stdout.write("Creating/retrieving required UserPropertyKeys...") if self.verbosity >= 1: self.stdout.write("Migrating people...") - fields = ["man_id", "firstname", "lastname", "school_id", "finish", "note"] c.execute(""" SELECT %s @@ -88,7 +86,7 @@ def handle_noargs(self, **options): for l in c: l = dict(zip(fields, l)) idcko = l['man_id'] - last_contact[idcko] = max(last_contact.get(idcko,0), int(l['finish'])-3) + last_contact[idcko] = max(last_contact.get(idcko, 0), int(l['finish'])-3) user = { 'first_name': l['firstname'], @@ -116,7 +114,7 @@ def handle_noargs(self, **options): user_properties = [ (LAST_CONTACT_PROPERTY, last_contact[idcko]), (KASPAR_NOTE_PROPERTY, l['note']), - (KSP_CAMPS_PROPERTY, camps_survived.get(idcko,0)) + (KSP_CAMPS_PROPERTY, camps_survived.get(idcko, 0)) ] userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) From dc7373615f438ecb2da73a09a470b8a93fac36eb Mon Sep 17 00:00:00 2001 From: vlejd Date: Sat, 1 Apr 2017 14:38:50 +0200 Subject: [PATCH 13/20] Fixed lint errors and added some options --- trojsten/people/helpers.py | 2 +- .../commands/migrate_30rokovfks1_csv.py | 28 ++--- .../commands/migrate_30rokovfks2_csv.py | 32 ++---- .../management/commands/migrate_base_class.py | 105 +++++++++++------- .../management/commands/migrate_fks_csv.py | 22 +--- .../management/commands/migrate_kms_csv.py | 23 +--- .../management/commands/migrate_ksp_kaspar.py | 28 ++--- 7 files changed, 106 insertions(+), 134 deletions(-) diff --git a/trojsten/people/helpers.py b/trojsten/people/helpers.py index 84c222b79..59c6083d5 100644 --- a/trojsten/people/helpers.py +++ b/trojsten/people/helpers.py @@ -9,7 +9,7 @@ def get_similar_users(user): """Returns a list of users similar to the specified user.""" - #TODO check birth day as well. (if defined, filter different) + # TODO check birth day as well. (if defined, filter different) return User.objects.exclude(pk=user.pk).filter( first_name=user.first_name, last_name=user.last_name, diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index bb5becc04..67b0c5a22 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -1,19 +1,8 @@ from __future__ import unicode_literals import csv -from collections import defaultdict -from datetime import datetime -import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand class Command(MigrateBaceCommand): @@ -32,20 +21,23 @@ def handle_noargs(self, **options): idd = 0 for l in participants: idd += 1 + csv_id = "30rokovFKS1_{0:d}".format(idd) contacted = l['kontaktovany?'] == 'ano' + if contacted: + self.last_contact[csv_id].append(2014) + user = { 'first_name': l['Meno'], 'last_name': l['Priezvisko'], 'email': l['Email'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), - (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), - (NICKNAME_PROPERTY, l['Prezyvka']), - (LAST_CONTACT_PROPERTY, 2014 if contacted else False) + (self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (self.NICKNAME_PROPERTY, l['Prezyvka']) ] - self.process_person(user, user_properties, CSV_ID_PROPERTY, - "30rokovFKS1_{0:d}".format(idd)) + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, + csv_id) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index d96d6796d..bebfae18e 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -1,19 +1,8 @@ from __future__ import unicode_literals import csv -from collections import defaultdict -from datetime import datetime -import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand class Command(MigrateBaceCommand): @@ -31,26 +20,27 @@ def handle_noargs(self, **options): idd = 0 for l in participants: idd += 1 + csv_id = "30rokovFKS2_{0:d}".format(idd) if not l['Meno']: continue + self.last_contact[csv_id].append(2014) user = { 'first_name': l['Meno'], 'last_name': l['Priezvisko'], 'email': l['E-mail'], } user_properties = [ - (MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), - (BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), - (NICKNAME_PROPERTY, l['Prezyvka']), - (COMPANY_PROPERTY, l['Posobisko']), - (AFFILIATION_PROPERTY, l['Pozicia']), - (MEMORY_PROPERTY, l['spomienka']), - (LAST_CONTACT_PROPERTY, 2014), + (self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), + (self.NICKNAME_PROPERTY, l['Prezyvka']), + (self.COMPANY_PROPERTY, l['Posobisko']), + (self.AFFILIATION_PROPERTY, l['Pozicia']), + (self.MEMORY_PROPERTY, l['spomienka']) ] # TODO Adresa - self.process_person(user, user_properties, CSV_ID_PROPERTY, - "30rokovFKS2_{0:d}".format(idd)) + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, + csv_id) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index 5a09e9deb..e5f77eef7 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -1,16 +1,18 @@ from __future__ import unicode_literals from datetime import datetime +from imp import reload +from collections import defaultdict +import sys from django.core.management.base import NoArgsCommand -from django.db import connections, transaction +from django.db import transaction from django.db.models import Q from django.utils.six.moves import input from trojsten.people.helpers import get_similar_users from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address -import sys reload(sys) sys.setdefaultencoding("utf-8") @@ -24,15 +26,54 @@ def add_arguments(self, parser): dest='dry', default=True, help='Actually write something to DB') + parser.add_argument('--fast', + action='store_true', + dest='fast', + default=False, + help='Create only a few users') def handle_noargs(self, **options): self.dry = options['dry'] + self.fast = options['fast'] + self.done_users = 0 + self.done_schools = 0 if self.dry: self.stdout.write("Running dry run!") self.verbosity = options['verbosity'] self.similar_users = [] self.school_id_map = {} + self.last_contact = defaultdict(list) + + CSV_ID_KEY = "csv ID" + self.CSV_ID_PROPERTY = self.process_property(CSV_ID_KEY, "(.{1,20}_)?\d+") + MOBIL_KEY = "Mobil" + self.MOBIL_PROPERTY = self.process_property(MOBIL_KEY, "\+?\d+\/?\d+") + NICKNAME_KEY = "Prezyvka" + self.NICKNAME_PROPERTY = self.process_property(NICKNAME_KEY, ".{1,30}") + BIRTH_NAME_KEY = "Rodne Meno" + self.BIRTH_NAME_PROPERTY = self.process_property(BIRTH_NAME_KEY, ".{1,30}") + LAST_CONTACT_KEY = "Posledny kontakt" + # TODO fix False and stupid values + self.LAST_CONTACT_PROPERTY = self.process_property(LAST_CONTACT_KEY, "\d\d\d\d") + FKS_ID_KEY = "FKS ID" + self.FKS_ID_PROPERTY = self.process_property(FKS_ID_KEY, "\d+") + KMS_ID_KEY = "KMS ID" + self.KMS_ID_PROPERTY = self.process_property(KMS_ID_KEY, "\d+") + KMS_CAMPS_KEY = "KMS sustredenia" + self.KMS_CAMPS_PROPERTY = self.process_property(KMS_CAMPS_KEY, "\d+") + KASPAR_ID_KEY = "KSP ID" + self.KASPAR_ID_PROPERTY = self.process_property(KASPAR_ID_KEY, "\d+") + KASPAR_NOTE_KEY = "KSP note" + self.KASPAR_NOTE_PROPERTY = self.process_property(KASPAR_NOTE_KEY, ".*") + KSP_CAMPS_KEY = "KSP sustredenia" + self.KSP_CAMPS_PROPERTY = self.process_property(KSP_CAMPS_KEY, "\d+") + MEMORY_KEY = "Spomienky" + self.MEMORY_PROPERTY = self.process_property(MEMORY_KEY, ".*") + COMPANY_KEY = "Posobisko" + self.COMPANY_PROPERTY = self.process_property(COMPANY_KEY, ".*") + AFFILIATION_KEY = "Pozicia" + self.AFFILIATION_PROPERTY = self.process_property(AFFILIATION_KEY, ".*") @transaction.atomic def process_address(self, street, town, postal_code, country): @@ -42,6 +83,9 @@ def process_address(self, street, town, postal_code, country): def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): + self.done_schools += 1 + if self.fast and self.done_schools > 100: + return None # TODO improve this, do not work with abbreviations if not abbr: self.school_id_map[old_id] = None @@ -111,6 +155,10 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user first_name, last_name, graduation, email, birth_date, school_id """ # If the user already exists in our database, skip. + self.done_users += 1 + if self.fast and self.done_users > 100: + return None + old_id_property = None if old_user_id: old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id) @@ -150,8 +198,14 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user new_user = User.objects.create(**user_args) - if old_user_id: - new_user.properties.create(key=old_user_id_field, value=old_user_id) + new_user.properties.create(key=old_user_id_field, value=old_user_id) + + # TODO last_contacted + if old_user_id in self.last_contact: + contacts = self.last_contact[old_user_id] + valid_contacts = filter(lambda c: 1900 < c and c < 2017, contacts) + if valid_contacts: + user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)]) user_properties = list(filter(lambda x: x, user_properties)) for key, value in user_properties: @@ -191,42 +245,15 @@ def parse_dash_date(self, date_string): return datetime.strptime(date_string, '%Y-%m-%d') def process_property(self, key_name, regexp=None): - # TODO handle regexp + hiddne, if does not exists, ask and create - # WARNING this is will create object in db even for dry run. - user_property, _ = UserPropertyKey.objects.get_or_create(key_name=key_name) + user_property = UserPropertyKey.objects.filter(key_name=key_name) + if not user_property.exists(): + if self.dry: + user_property = UserPropertyKey(key_name=key_name, regex=regexp) + else: + user_property = UserPropertyKey.objects.create(key_name=key_name, regex=regexp) + else: + user_property = user_property.first() return user_property def fix_string(self, string): return string.replace(" ", "").strip() - -COMMAND = MigrateBaceCommand() - -CSV_ID_KEY = "csv ID" -CSV_ID_PROPERTY = COMMAND.process_property(CSV_ID_KEY, "(.*_)?\d+") -MOBIL_KEY = "Mobil" -MOBIL_PROPERTY = COMMAND.process_property(MOBIL_KEY, ".?.?\d*\\?") -NICKNAME_KEY = "Prezyvka" -NICKNAME_PROPERTY = COMMAND.process_property(NICKNAME_KEY) -BIRTH_NAME_KEY = "Rodne Meno" -BIRTH_NAME_PROPERTY = COMMAND.process_property(BIRTH_NAME_KEY) -LAST_CONTACT_KEY = "Posledny kontakt" -LAST_CONTACT_PROPERTY = COMMAND.process_property(LAST_CONTACT_KEY) - -FKS_ID_KEY = "FKS ID" -FKS_ID_PROPERTY = COMMAND.process_property(FKS_ID_KEY) -KMS_ID_KEY = "KMS ID" -KMS_ID_PROPERTY = COMMAND.process_property(KMS_ID_KEY) -KMS_CAMPS_KEY = "KMS sustredenia" -KMS_CAMPS_PROPERTY = COMMAND.process_property(KMS_CAMPS_KEY) -KASPAR_ID_KEY = "KSP ID" -KASPAR_ID_PROPERTY = COMMAND.process_property(KASPAR_ID_KEY) -KASPAR_NOTE_KEY = "KSP note" -KASPAR_NOTE_PROPERTY = COMMAND.process_property(KASPAR_NOTE_KEY) -KSP_CAMPS_KEY = "KSP sustredenia" -KSP_CAMPS_PROPERTY = COMMAND.process_property(KSP_CAMPS_KEY) -MEMORY_KEY = "Spomienky" -MEMORY_PROPERTY = COMMAND.process_property(MEMORY_KEY) -COMPANY_KEY = "Posobisko" -COMPANY_PROPERTY = COMMAND.process_property(COMPANY_KEY) -AFFILIATION_KEY = "Pozicia" -AFFILIATION_PROPERTY = COMMAND.process_property(AFFILIATION_KEY) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 59d4d0172..ca63bd03d 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -1,19 +1,9 @@ from __future__ import unicode_literals import csv -from collections import defaultdict -from datetime import datetime import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand """ @@ -66,11 +56,10 @@ def handle_noargs(self, **options): activity_file = os.path.join(base, "aktivita.csv") activity = csv.DictReader(open(activity_file)) - last_contact = {} for act in activity: idd = act['riesitel_id'] date = self.parse_dash_date(act['termin']) - last_contact[idd] = max(last_contact.get(idd, 0), date.year) + self.last_contact[idd].append(int(date.year)) people_file = os.path.join(base, "osoba.csv") people = csv.DictReader(open(people_file)) @@ -86,7 +75,7 @@ def handle_noargs(self, **options): idd = l['osoba_id'] person = people_by_id[idd] matura = l['rok_maturity'] - last_contact[idd] = max(last_contact.get(idd, 0), int(matura)-3) + self.last_contact[idd].append(int(matura)-3) address = address_by_id[person['adresa_id']] parsed_address = { 'street': address['ulica'], @@ -104,10 +93,9 @@ def handle_noargs(self, **options): } user_properties = [ - (MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()), - (LAST_CONTACT_PROPERTY, last_contact[idd]) + (self.MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()) ] - self.process_person(user, user_properties, FKS_ID_PROPERTY, int(idd), + self.process_person(user, user_properties, self.FKS_ID_PROPERTY, idd, address=parsed_address) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index a0524a907..e78b3c2b5 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -2,18 +2,9 @@ import csv from collections import defaultdict -from datetime import datetime import os - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand """ Restore the mysql database dump and run (replace and ) @@ -42,12 +33,11 @@ def handle_noargs(self, **options): camps_file = os.path.join(base, "sustredenia.csv") camps = csv.DictReader(open(camps_file)) camps_survived = defaultdict(int) - last_contact = {} for camp in camps: idd = camp['id_riesitela'].strip() camps_survived[idd] += 1 if camp['rok']: - last_contact[idd] = max(last_contact.get(idd, 0), int(camp['rok'])) + self.last_contact[idd].append(int(camp['rok'])) schools_file = os.path.join(base, "skoly.csv") schools = csv.DictReader(open(schools_file)) @@ -61,7 +51,7 @@ def handle_noargs(self, **options): if not l['meno']: continue idd = l['id'] - last_contact[idd] = max(last_contact.get(idd, 0), int(l['matura'])-3) + self.last_contact[idd].append(int(l['matura'])-3) user = { 'first_name': l['meno'], 'last_name': l['priezvisko'], @@ -76,11 +66,10 @@ def handle_noargs(self, **options): 'adresa_kores' user_properties = [ - (MOBIL_PROPERTY, l['mobil'].replace(" ", "").strip()), - (KMS_CAMPS_PROPERTY, camps_survived[idd]), - (LAST_CONTACT_PROPERTY, last_contact[idd]) + (self.MOBIL_PROPERTY, l['mobil'].replace(" ", "").strip()), + (self.KMS_CAMPS_PROPERTY, camps_survived[idd]) ] - self.process_person(user, user_properties, KMS_ID_PROPERTY, int(idd)) + self.process_person(user, user_properties, self.KMS_ID_PROPERTY, idd) # TODO parse camps more precisely self.print_stats() diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index b8aff8134..48278c8a8 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -1,19 +1,7 @@ from __future__ import unicode_literals -import csv -from collections import defaultdict -from datetime import datetime -import os - - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey -from trojsten.people.management.commands.migrate_base_class import * +from django.db import connections +from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand # Kaspar property IDs EMAIL_PROP = 1 @@ -63,12 +51,11 @@ def handle_noargs(self, **options): FROM participants """) - last_contact = {} camps_survived = {} for participant in c: man_id = participant[1] action = actions[participant[0]] - last_contact[man_id] = max(last_contact.get(man_id, 0), action['end'].year) + self.last_contact[man_id].append(int(action['end'].year)) camps_survived[man_id] = camps_survived.get(man_id, 0) + 1 if self.verbosity >= 1: @@ -86,7 +73,7 @@ def handle_noargs(self, **options): for l in c: l = dict(zip(fields, l)) idcko = l['man_id'] - last_contact[idcko] = max(last_contact.get(idcko, 0), int(l['finish'])-3) + self.last_contact[idcko].append(int(l['finish'])-3) user = { 'first_name': l['firstname'], @@ -112,10 +99,9 @@ def handle_noargs(self, **options): cc.close() user_properties = [ - (LAST_CONTACT_PROPERTY, last_contact[idcko]), - (KASPAR_NOTE_PROPERTY, l['note']), - (KSP_CAMPS_PROPERTY, camps_survived.get(idcko, 0)) + (self.KASPAR_NOTE_PROPERTY, l['note']), + (self.KSP_CAMPS_PROPERTY, camps_survived.get(idcko, 0)) ] - userObject = self.process_person(user, user_properties, KASPAR_ID_PROPERTY, int(idcko)) + self.process_person(user, user_properties, self.KASPAR_ID_PROPERTY, idcko) self.print_stats() From adffa567bbf85921984479d626f79bd9267a0344 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 16 Apr 2017 00:08:06 +0200 Subject: [PATCH 14/20] Minor parameter changes. --- trojsten/people/management/commands/migrate_fks_csv.py | 5 +++-- trojsten/people/management/commands/migrate_kms_csv.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index ca63bd03d..49e0faad4 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -30,11 +30,12 @@ class Command(MigrateBaceCommand): def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('file', type=str) + parser.add_argument('csv_directory', type=str, + help="Directory containing all csv files.") def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + base = options['csv_directory'] addresses_file = os.path.join(base, "adresa.csv") addresses = csv.DictReader(open(addresses_file)) diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index e78b3c2b5..8d87ac71c 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -23,11 +23,12 @@ class Command(MigrateBaceCommand): def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('file', type=str) + parser.add_argument('csv_directory', type=str, + help="Directory containing all csv files.") def handle_noargs(self, **options): super(Command, self).handle_noargs(**options) - base = options['file'] + base = options['csv_directory'] participants_file = os.path.join(base, "riesitelia.csv") participants = csv.DictReader(open(participants_file)) camps_file = os.path.join(base, "sustredenia.csv") From b176aa0ed3581f4e407a790f6e57a322f25e7092 Mon Sep 17 00:00:00 2001 From: vlejd Date: Mon, 30 Oct 2017 14:36:22 +0100 Subject: [PATCH 15/20] Review fixes part 1. --- trojsten/people/constants.py | 20 +++ .../commands/migrate_30rokovfks1_csv.py | 8 +- .../commands/migrate_30rokovfks2_csv.py | 8 +- .../management/commands/migrate_base_class.py | 147 +++++++++--------- .../management/commands/migrate_fks_csv.py | 8 +- .../management/commands/migrate_kms_csv.py | 8 +- .../management/commands/migrate_ksp_kaspar.py | 82 +++++----- 7 files changed, 155 insertions(+), 126 deletions(-) diff --git a/trojsten/people/constants.py b/trojsten/people/constants.py index a753bbe31..a79482a5e 100644 --- a/trojsten/people/constants.py +++ b/trojsten/people/constants.py @@ -16,3 +16,23 @@ OTHER_SCHOOL_ID = 1 DEENVELOPING_NOT_REVIEWED_SYMBOL = '*' + +# User properties +# User id in the old fks database +FKS_ID_PROPERTY_KEY = "FKS ID" +# User id in the old kms database +KMS_ID_PROPERTY_KEY = "KMS ID" +# User id in the old ksp database +KASPAR_ID_PROPERTY_KEY = "KSP ID" +# User id in the csv file +CSV_ID_PROPERTY_KEY = "csv ID" +MOBIL_PROPERTY_KEY = "Mobil" +NICKNAME_PROPERTY_KEY = "Prezyvka" +BIRTH_NAME_PROPERTY_KEY = "Rodne Meno" +LAST_CONTACT_PROPERTY_KEY = "Posledny kontakt" +KMS_CAMPS_PROPERTY_KEY = "KMS sustredenia" +KASPAR_NOTE_PROPERTY_KEY = "KSP note" +KSP_CAMPS_PROPERTY_KEY = "KSP sustredenia" +MEMORY_PROPERTY_KEY = "Spomienky" +COMPANY_PROPERTY_KEY = "Posobisko" +AFFILIATION_PROPERTY_KEY = "Pozicia" diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index 67b0c5a22..cf6919497 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -2,18 +2,18 @@ import csv -from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand -class Command(MigrateBaceCommand): +class Command(MigrateBaseCommand): help = 'Imports people and their related info from fks_csv.' def add_arguments(self, parser): super(Command, self).add_arguments(parser) parser.add_argument('file', type=str) - def handle_noargs(self, **options): - super(Command, self).handle_noargs(**options) + def handle(self, **options): + super(Command, self).handle(**options) participants_file = options['file'] participants = csv.DictReader(open(participants_file)) diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index bebfae18e..6471bf02e 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -2,18 +2,18 @@ import csv -from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand -class Command(MigrateBaceCommand): +class Command(MigrateBaseCommand): help = 'Imports people and their related info from fks_csv.' def add_arguments(self, parser): super(Command, self).add_arguments(parser) parser.add_argument('file', type=str) - def handle_noargs(self, **options): - super(Command, self).handle_noargs(**options) + def handle(self, **options): + super(Command, self).handle(**options) participants_file = options['file'] participants = csv.DictReader(open(participants_file)) diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index e5f77eef7..2f8748896 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -5,20 +5,24 @@ from collections import defaultdict import sys -from django.core.management.base import NoArgsCommand +from django.core.management import BaseCommand as NoArgsCommand from django.db import transaction from django.db.models import Q from django.utils.six.moves import input from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address +from trojsten.people import constants +from trojsten.schools.models import School +from trojsten.people.models import DuplicateUser, User, UserPropertyKey, UserProperty, Address reload(sys) sys.setdefaultencoding("utf-8") -class MigrateBaceCommand(NoArgsCommand): +class MigrateBaseCommand(NoArgsCommand): help = 'Base class for importing people.' + SCHOOLS_INF_FAST_RUN = 100 + USER_IN_FAST_RUN = 100 def add_arguments(self, parser): parser.add_argument('--wet_run', @@ -30,61 +34,58 @@ def add_arguments(self, parser): action='store_true', dest='fast', default=False, - help='Create only a few users') + help='Create only the first {} users and {} schools'.format( + self.USER_IN_FAST_RUN, self.SCHOOLS_INF_FAST_RUN)) - def handle_noargs(self, **options): + def handle(self, **options): self.dry = options['dry'] self.fast = options['fast'] self.done_users = 0 self.done_schools = 0 if self.dry: - self.stdout.write("Running dry run!") + self.stderr.write("Running dry run!") self.verbosity = options['verbosity'] self.similar_users = [] self.school_id_map = {} self.last_contact = defaultdict(list) - CSV_ID_KEY = "csv ID" - self.CSV_ID_PROPERTY = self.process_property(CSV_ID_KEY, "(.{1,20}_)?\d+") - MOBIL_KEY = "Mobil" - self.MOBIL_PROPERTY = self.process_property(MOBIL_KEY, "\+?\d+\/?\d+") - NICKNAME_KEY = "Prezyvka" - self.NICKNAME_PROPERTY = self.process_property(NICKNAME_KEY, ".{1,30}") - BIRTH_NAME_KEY = "Rodne Meno" - self.BIRTH_NAME_PROPERTY = self.process_property(BIRTH_NAME_KEY, ".{1,30}") - LAST_CONTACT_KEY = "Posledny kontakt" + self.CSV_ID_PROPERTY = self.process_property( + constants.CSV_ID_PROPERTY_KEY, "(.{1,20}_)?\d+") + self.MOBIL_PROPERTY = self.process_property( + constants.MOBIL_PROPERTY_KEY, "\+?\d+\/?\d+") + self.NICKNAME_PROPERTY = self.process_property( + constants.NICKNAME_PROPERTY_KEY, ".{1,30}") + self.BIRTH_NAME_PROPERTY = self.process_property( + constants.BIRTH_NAME_PROPERTY_KEY, ".{1,30}") # TODO fix False and stupid values - self.LAST_CONTACT_PROPERTY = self.process_property(LAST_CONTACT_KEY, "\d\d\d\d") - FKS_ID_KEY = "FKS ID" - self.FKS_ID_PROPERTY = self.process_property(FKS_ID_KEY, "\d+") - KMS_ID_KEY = "KMS ID" - self.KMS_ID_PROPERTY = self.process_property(KMS_ID_KEY, "\d+") - KMS_CAMPS_KEY = "KMS sustredenia" - self.KMS_CAMPS_PROPERTY = self.process_property(KMS_CAMPS_KEY, "\d+") - KASPAR_ID_KEY = "KSP ID" - self.KASPAR_ID_PROPERTY = self.process_property(KASPAR_ID_KEY, "\d+") - KASPAR_NOTE_KEY = "KSP note" - self.KASPAR_NOTE_PROPERTY = self.process_property(KASPAR_NOTE_KEY, ".*") - KSP_CAMPS_KEY = "KSP sustredenia" - self.KSP_CAMPS_PROPERTY = self.process_property(KSP_CAMPS_KEY, "\d+") - MEMORY_KEY = "Spomienky" - self.MEMORY_PROPERTY = self.process_property(MEMORY_KEY, ".*") - COMPANY_KEY = "Posobisko" - self.COMPANY_PROPERTY = self.process_property(COMPANY_KEY, ".*") - AFFILIATION_KEY = "Pozicia" - self.AFFILIATION_PROPERTY = self.process_property(AFFILIATION_KEY, ".*") - - @transaction.atomic - def process_address(self, street, town, postal_code, country): - return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country) + self.LAST_CONTACT_PROPERTY = self.process_property( + constants.LAST_CONTACT_PROPERTY_KEY, "\d\d\d\d") + self.FKS_ID_PROPERTY = self.process_property( + constants.FKS_ID_PROPERTY_KEY, "\d+") + self.KMS_ID_PROPERTY = self.process_property( + constants.KMS_ID_PROPERTY_KEY, "\d+") + self.KMS_CAMPS_PROPERTY = self.process_property( + constants.KMS_CAMPS_PROPERTY_KEY, "\d+") + self.KASPAR_ID_PROPERTY = self.process_property( + constants.KASPAR_ID_PROPERTY_KEY, "\d+") + self.KASPAR_NOTE_PROPERTY = self.process_property( + constants.KASPAR_NOTE_PROPERTY_KEY, ".*") + self.KSP_CAMPS_PROPERTY = self.process_property( + constants.KSP_CAMPS_PROPERTY_KEY, "\d+") + self.MEMORY_PROPERTY = self.process_property( + constants.MEMORY_PROPERTY_KEY, ".*") + self.COMPANY_PROPERTY = self.process_property( + constants.COMPANY_PROPERTY_KEY, ".*") + self.AFFILIATION_PROPERTY = self.process_property( + constants.AFFILIATION_PROPERTY_KEY, ".*") @transaction.atomic def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): self.done_schools += 1 - if self.fast and self.done_schools > 100: + if self.fast and self.done_schools > self.SCHOOLS_INF_FAST_RUN: return None # TODO improve this, do not work with abbreviations if not abbr: @@ -98,11 +99,11 @@ def process_school(self, old_id, abbr, name, addr_name, street, row = (abbr, name, addr_name, street, city, self.fix_string(zip_code)) if len(candidates) == 1: if self.verbosity >= 2: - self.stdout.write("Matched %r to %s" % (row, + self.stderr.write("Matched %r to %s" % (row, candidates[0])) self.school_id_map[old_id] = candidates[0] elif len(candidates) > 1: - self.stdout.write("Multiple candidates for %r:\n%s" % ( + self.stderr.write("Multiple candidates for %r:\n%s" % ( row, "\n".join("%02d: %s" % (i, candidate) for i, candidate in enumerate(candidates)) @@ -110,7 +111,7 @@ def process_school(self, old_id, abbr, name, addr_name, street, try: choice = int(input("Choice (empty or invalid to create new): ")) self.school_id_map[old_id] = candidates[choice] - except (ValueError, KeyError): + except (KeyError): self.school_id_map[old_id] = self.create_school(*row) else: self.school_id_map[old_id] = self.create_school(*row) @@ -120,7 +121,8 @@ def create_school(self, abbr, name, addr_name, street, abbr += '?' # Question mark denotes schools needing review. school = None if len(zip_code) > 10: - # Swiss zip codes + # Swiss zip codes are longer than 10 chars, but our db model does not allow + # them so we skip them. zip_code = 0 if self.dry: @@ -138,25 +140,30 @@ def create_school(self, abbr, name, addr_name, street, city=city, zip_code=zip_code) if self.verbosity >= 2: - self.stdout.write("Created new school %s" % school) + self.stderr.write("Created new school %s" % school) return school @transaction.atomic - def process_person(self, user_args, user_properties, old_user_id_field, old_user_id, address=None): + def process_person(self, + user_args, + user_properties, + old_user_id_field, + old_user_id, + address=None): """ - Args: - user_args (dict): will be used for user constructor as is. Except for school_id. - user_properties (list(tuple(UserPropertyKey, string))): - will create additional user properties - old_user_id_field (UserPropertyKey): old field that contained oser id - (kaspar_id/ kms id ...), used for faster deduplication. - old_user_id (int/string): old id - user_args can have - first_name, last_name, graduation, email, birth_date, school_id + Args: + user_args (dict): will be used for user constructor as is. Except for school_id. + user_properties (list(tuple(UserPropertyKey, string))): + will create additional user properties + old_user_id_field (UserPropertyKey): old field that contained oser id + (kaspar_id/ kms id ...), used for faster deduplication. + old_user_id (int/string): old id + user_args can have + first_name, last_name, graduation, email, birth_date, school_id """ - # If the user already exists in our database, skip. + # If we run in the fast mode and we already processed enough users, we skip this one. self.done_users += 1 - if self.fast and self.done_users > 100: + if self.fast and self.done_users > self.USER_IN_FAST_RUN: return None old_id_property = None @@ -169,11 +176,10 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user last_name = user_args['last_name'] if old_id_property.exists(): if self.verbosity >= 2: - self.stdout.write("Skipping user %s %s" % (first_name, + self.stderr.write("Skipping user %s %s" % (first_name, last_name)) return None - # The username needs to be unique, thus the ID. user_args['is_active'] = False if 'school_id' in user_args: @@ -182,19 +188,16 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user user_args['school'] = self.school_id_map.get(school_id) if self.verbosity >= 2: - self.stdout.write("Creating user %s %s" % (first_name, last_name)) + self.stderr.write("Creating user %s %s" % (first_name, last_name)) new_user = None if self.dry: new_user = User(**user_args) else: - addr = None if address: - addr = self.process_address(address['street'], - address['town'], - address['postal_code'], - address['country']) - user_args['home_address'] = addr + user_args['home_address'] = Address.objects.create( + street=address['street'], town=address['town'], + postal_code=address['postal_code'], country=address['country']) new_user = User.objects.create(**user_args) @@ -207,28 +210,26 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user if valid_contacts: user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)]) - user_properties = list(filter(lambda x: x, user_properties)) + user_properties = [prop for prop in user_properties if prop is not None] for key, value in user_properties: new_user.properties.create(key=key, value=value) similar_users = get_similar_users(new_user) if len(similar_users): - names_of_similar = [(x.first_name, x.last_name) for x in similar_users] + names_of_similar = [(user.first_name, user.last_name) for user in similar_users] self.similar_users.append(((first_name, last_name), names_of_similar)) if self.verbosity >= 2: - self.stdout.write('Similar users: %s' % str(names_of_similar)) - if self.dry: - pass - else: + self.stderr.write('Similar users: %s' % str(names_of_similar)) + if not self.dry: DuplicateUser.objects.create(user=new_user) return new_user def print_stats(self): for conflict in self.similar_users: - self.stdout.write("Conflicts: %s" % str(conflict)) + self.stderr.write("Conflicts: %s" % str(conflict)) - self.stdout.write("Conflict users: %d" % len(self.similar_users)) + self.stderr.write("Conflict users: %d" % len(self.similar_users)) def parse_dot_date(self, date_string): # Remove any whitespace inside the string. diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 49e0faad4..6ca060c77 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -3,7 +3,7 @@ import csv import os -from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand """ @@ -25,7 +25,7 @@ # TODO vvysledkovky -class Command(MigrateBaceCommand): +class Command(MigrateBaseCommand): help = 'Imports people and their related info from fks_csv.' def add_arguments(self, parser): @@ -33,8 +33,8 @@ def add_arguments(self, parser): parser.add_argument('csv_directory', type=str, help="Directory containing all csv files.") - def handle_noargs(self, **options): - super(Command, self).handle_noargs(**options) + def handle(self, **options): + super(Command, self).handle(**options) base = options['csv_directory'] addresses_file = os.path.join(base, "adresa.csv") diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 8d87ac71c..062320fb3 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -4,7 +4,7 @@ from collections import defaultdict import os -from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand """ Restore the mysql database dump and run (replace and ) @@ -18,7 +18,7 @@ """ -class Command(MigrateBaceCommand): +class Command(MigrateBaseCommand): help = 'Imports people and their related info from kms_csv.' def add_arguments(self, parser): @@ -26,8 +26,8 @@ def add_arguments(self, parser): parser.add_argument('csv_directory', type=str, help="Directory containing all csv files.") - def handle_noargs(self, **options): - super(Command, self).handle_noargs(**options) + def handle(self, **options): + super(Command, self).handle(**options) base = options['csv_directory'] participants_file = os.path.join(base, "riesitelia.csv") participants = csv.DictReader(open(participants_file)) diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index 48278c8a8..0c065e5da 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -1,76 +1,63 @@ from __future__ import unicode_literals from django.db import connections -from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand # Kaspar property IDs EMAIL_PROP = 1 BIRTHDAY_PROP = 2 -class Command(MigrateBaceCommand): +class Command(MigrateBaseCommand): help = 'Imports people and their related info from kaspar.' - def handle_noargs(self, **options): - super(Command, self).handle_noargs(**options) - kaspar = connections['kaspar'] - - if self.verbosity >= 1: - self.stdout.write("Migrating schools...") - - c = kaspar.cursor() - c.execute(""" + def process_schools(self): + cursor = self.kaspar.cursor() + cursor.execute(""" SELECT school_id, short, name, addr_name, addr_street, addr_city, addr_zip FROM schools; """) - self.school_id_map = dict() - for row in c: + for row in cursor: self.process_school(*row) - # TODO sustredka - - if self.verbosity >= 1: - self.stdout.write("Dumping participations") - - c.execute(""" + def process_particiaptions(self): + cursor = self.kaspar.cursor() + cursor.execute(""" SELECT action_id, name, date_start, date_end FROM actions """) actions = {} - for action in c: + for action in cursor: actions[action[0]] = { "name": action[1], "start": action[2], "end": action[3] } - c.execute(""" + cursor.execute(""" SELECT action_id, man_id, task, note FROM participants """) - camps_survived = {} - for participant in c: + self.camps_survived = {} + for participant in cursor: man_id = participant[1] action = actions[participant[0]] self.last_contact[man_id].append(int(action['end'].year)) - camps_survived[man_id] = camps_survived.get(man_id, 0) + 1 - - if self.verbosity >= 1: - self.stdout.write("Creating/retrieving required UserPropertyKeys...") - - if self.verbosity >= 1: - self.stdout.write("Migrating people...") + self.camps_survived[man_id] = self.camps_survived.get(man_id, 0) + 1 + def process_people(self): + cursor1 = self.kaspar.cursor() + cursor2 = self.kaspar.cursor() fields = ["man_id", "firstname", "lastname", "school_id", "finish", "note"] - c.execute(""" + cursor1.execute(""" SELECT %s FROM people; """ % (', '.join(fields))) - for l in c: + for l in cursor1: l = dict(zip(fields, l)) idcko = l['man_id'] self.last_contact[idcko].append(int(l['finish'])-3) @@ -81,13 +68,12 @@ def handle_noargs(self, **options): 'graduation': l['finish'], 'school_id': l['school_id'] } - cc = kaspar.cursor() - cc.execute(""" + cursor2.execute(""" SELECT ppt_id, value FROM people_prop WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); """, (idcko, EMAIL_PROP, BIRTHDAY_PROP)) - for prop_id, value in cc: + for prop_id, value in cursor2: if prop_id == EMAIL_PROP: user['email'] = value elif prop_id == BIRTHDAY_PROP: @@ -96,12 +82,34 @@ def handle_noargs(self, **options): except ValueError: # If we can't parse the date, give up. pass - cc.close() user_properties = [ (self.KASPAR_NOTE_PROPERTY, l['note']), - (self.KSP_CAMPS_PROPERTY, camps_survived.get(idcko, 0)) + (self.KSP_CAMPS_PROPERTY, self.camps_survived.get(idcko, 0)) ] self.process_person(user, user_properties, self.KASPAR_ID_PROPERTY, idcko) + cursor1.close() + cursor2.close() + + def handle(self, **options): + super(Command, self).handle(**options) + self.kaspar = connections['kaspar'] + + if self.verbosity >= 1: + self.stderr.write("Migrating schools...") + + self.process_schools() + + # TODO sustredka + + if self.verbosity >= 1: + self.stderr.write("Dumping participations") + + self.process_particiaptions() + + if self.verbosity >= 1: + self.stderr.write("Migrating people...") + + self.process_people() self.print_stats() From f3d7fb6b4cdeb2df6c21ade009ee9ed015aebe0e Mon Sep 17 00:00:00 2001 From: Michal Hozza Date: Tue, 9 Jul 2019 19:20:23 +0100 Subject: [PATCH 16/20] Fix typo. --- trojsten/people/management/commands/migrate_base_class.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index 2f8748896..a84c96f7d 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -21,7 +21,7 @@ class MigrateBaseCommand(NoArgsCommand): help = 'Base class for importing people.' - SCHOOLS_INF_FAST_RUN = 100 + SCHOOLS_IN_FAST_RUN = 100 USER_IN_FAST_RUN = 100 def add_arguments(self, parser): @@ -35,7 +35,7 @@ def add_arguments(self, parser): dest='fast', default=False, help='Create only the first {} users and {} schools'.format( - self.USER_IN_FAST_RUN, self.SCHOOLS_INF_FAST_RUN)) + self.USER_IN_FAST_RUN, self.SCHOOLS_IN_FAST_RUN)) def handle(self, **options): self.dry = options['dry'] @@ -85,7 +85,7 @@ def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): self.done_schools += 1 - if self.fast and self.done_schools > self.SCHOOLS_INF_FAST_RUN: + if self.fast and self.done_schools > self.SCHOOLS_IN_FAST_RUN: return None # TODO improve this, do not work with abbreviations if not abbr: From 4094bdad6fdab804b30f1545b39f5d64dbeacef3 Mon Sep 17 00:00:00 2001 From: Michal Hozza Date: Tue, 9 Jul 2019 21:25:32 +0100 Subject: [PATCH 17/20] black and isort. --- .../commands/migrate_30rokovfks1_csv.py | 23 +-- .../commands/migrate_30rokovfks2_csv.py | 29 ++- .../management/commands/migrate_base_class.py | 186 +++++++++--------- .../management/commands/migrate_fks_csv.py | 66 +++---- .../management/commands/migrate_kms_csv.py | 57 +++--- .../management/commands/migrate_ksp_kaspar.py | 65 +++--- 6 files changed, 215 insertions(+), 211 deletions(-) diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py index cf6919497..d481daac7 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -6,15 +6,15 @@ class Command(MigrateBaseCommand): - help = 'Imports people and their related info from fks_csv.' + help = "Imports people and their related info from fks_csv." def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('file', type=str) + parser.add_argument("file", type=str) def handle(self, **options): super(Command, self).handle(**options) - participants_file = options['file'] + participants_file = options["file"] participants = csv.DictReader(open(participants_file)) @@ -22,22 +22,17 @@ def handle(self, **options): for l in participants: idd += 1 csv_id = "30rokovFKS1_{0:d}".format(idd) - contacted = l['kontaktovany?'] == 'ano' + contacted = l["kontaktovany?"] == "ano" if contacted: self.last_contact[csv_id].append(2014) - user = { - 'first_name': l['Meno'], - 'last_name': l['Priezvisko'], - 'email': l['Email'], - } + user = {"first_name": l["Meno"], "last_name": l["Priezvisko"], "email": l["Email"]} user_properties = [ - (self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), - (self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), - (self.NICKNAME_PROPERTY, l['Prezyvka']) + (self.MOBIL_PROPERTY, l["Telefon"].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l["Rodne priezvisko"]), + (self.NICKNAME_PROPERTY, l["Prezyvka"]), ] - self.process_person(user, user_properties, self.CSV_ID_PROPERTY, - csv_id) + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, csv_id) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py index 6471bf02e..5075c4a53 100644 --- a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -6,41 +6,36 @@ class Command(MigrateBaseCommand): - help = 'Imports people and their related info from fks_csv.' + help = "Imports people and their related info from fks_csv." def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('file', type=str) + parser.add_argument("file", type=str) def handle(self, **options): super(Command, self).handle(**options) - participants_file = options['file'] + participants_file = options["file"] participants = csv.DictReader(open(participants_file)) idd = 0 for l in participants: idd += 1 csv_id = "30rokovFKS2_{0:d}".format(idd) - if not l['Meno']: + if not l["Meno"]: continue self.last_contact[csv_id].append(2014) - user = { - 'first_name': l['Meno'], - 'last_name': l['Priezvisko'], - 'email': l['E-mail'], - } + user = {"first_name": l["Meno"], "last_name": l["Priezvisko"], "email": l["E-mail"]} user_properties = [ - (self.MOBIL_PROPERTY, l['Telefon'].replace(" ", "").strip()), - (self.BIRTH_NAME_PROPERTY, l['Rodne priezvisko']), - (self.NICKNAME_PROPERTY, l['Prezyvka']), - (self.COMPANY_PROPERTY, l['Posobisko']), - (self.AFFILIATION_PROPERTY, l['Pozicia']), - (self.MEMORY_PROPERTY, l['spomienka']) + (self.MOBIL_PROPERTY, l["Telefon"].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l["Rodne priezvisko"]), + (self.NICKNAME_PROPERTY, l["Prezyvka"]), + (self.COMPANY_PROPERTY, l["Posobisko"]), + (self.AFFILIATION_PROPERTY, l["Pozicia"]), + (self.MEMORY_PROPERTY, l["spomienka"]), ] # TODO Adresa - self.process_person(user, user_properties, self.CSV_ID_PROPERTY, - csv_id) + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, csv_id) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index a84c96f7d..fce2cd9a8 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -1,88 +1,84 @@ from __future__ import unicode_literals +import sys +from collections import defaultdict from datetime import datetime from imp import reload -from collections import defaultdict -import sys from django.core.management import BaseCommand as NoArgsCommand from django.db import transaction from django.db.models import Q from django.utils.six.moves import input -from trojsten.people.helpers import get_similar_users from trojsten.people import constants +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import Address, DuplicateUser, User, UserProperty, UserPropertyKey from trojsten.schools.models import School -from trojsten.people.models import DuplicateUser, User, UserPropertyKey, UserProperty, Address reload(sys) sys.setdefaultencoding("utf-8") class MigrateBaseCommand(NoArgsCommand): - help = 'Base class for importing people.' + help = "Base class for importing people." SCHOOLS_IN_FAST_RUN = 100 USER_IN_FAST_RUN = 100 def add_arguments(self, parser): - parser.add_argument('--wet_run', - action='store_false', - dest='dry', - default=True, - help='Actually write something to DB') - parser.add_argument('--fast', - action='store_true', - dest='fast', - default=False, - help='Create only the first {} users and {} schools'.format( - self.USER_IN_FAST_RUN, self.SCHOOLS_IN_FAST_RUN)) + parser.add_argument( + "--wet_run", + action="store_false", + dest="dry", + default=True, + help="Actually write something to DB", + ) + parser.add_argument( + "--fast", + action="store_true", + dest="fast", + default=False, + help="Create only the first {} users and {} schools".format( + self.USER_IN_FAST_RUN, self.SCHOOLS_IN_FAST_RUN + ), + ) def handle(self, **options): - self.dry = options['dry'] - self.fast = options['fast'] + self.dry = options["dry"] + self.fast = options["fast"] self.done_users = 0 self.done_schools = 0 if self.dry: self.stderr.write("Running dry run!") - self.verbosity = options['verbosity'] + self.verbosity = options["verbosity"] self.similar_users = [] self.school_id_map = {} self.last_contact = defaultdict(list) self.CSV_ID_PROPERTY = self.process_property( - constants.CSV_ID_PROPERTY_KEY, "(.{1,20}_)?\d+") - self.MOBIL_PROPERTY = self.process_property( - constants.MOBIL_PROPERTY_KEY, "\+?\d+\/?\d+") - self.NICKNAME_PROPERTY = self.process_property( - constants.NICKNAME_PROPERTY_KEY, ".{1,30}") + constants.CSV_ID_PROPERTY_KEY, "(.{1,20}_)?\d+" + ) + self.MOBIL_PROPERTY = self.process_property(constants.MOBIL_PROPERTY_KEY, "\+?\d+\/?\d+") + self.NICKNAME_PROPERTY = self.process_property(constants.NICKNAME_PROPERTY_KEY, ".{1,30}") self.BIRTH_NAME_PROPERTY = self.process_property( - constants.BIRTH_NAME_PROPERTY_KEY, ".{1,30}") + constants.BIRTH_NAME_PROPERTY_KEY, ".{1,30}" + ) # TODO fix False and stupid values self.LAST_CONTACT_PROPERTY = self.process_property( - constants.LAST_CONTACT_PROPERTY_KEY, "\d\d\d\d") - self.FKS_ID_PROPERTY = self.process_property( - constants.FKS_ID_PROPERTY_KEY, "\d+") - self.KMS_ID_PROPERTY = self.process_property( - constants.KMS_ID_PROPERTY_KEY, "\d+") - self.KMS_CAMPS_PROPERTY = self.process_property( - constants.KMS_CAMPS_PROPERTY_KEY, "\d+") - self.KASPAR_ID_PROPERTY = self.process_property( - constants.KASPAR_ID_PROPERTY_KEY, "\d+") - self.KASPAR_NOTE_PROPERTY = self.process_property( - constants.KASPAR_NOTE_PROPERTY_KEY, ".*") - self.KSP_CAMPS_PROPERTY = self.process_property( - constants.KSP_CAMPS_PROPERTY_KEY, "\d+") - self.MEMORY_PROPERTY = self.process_property( - constants.MEMORY_PROPERTY_KEY, ".*") - self.COMPANY_PROPERTY = self.process_property( - constants.COMPANY_PROPERTY_KEY, ".*") - self.AFFILIATION_PROPERTY = self.process_property( - constants.AFFILIATION_PROPERTY_KEY, ".*") + constants.LAST_CONTACT_PROPERTY_KEY, "\d\d\d\d" + ) + self.FKS_ID_PROPERTY = self.process_property(constants.FKS_ID_PROPERTY_KEY, "\d+") + self.KMS_ID_PROPERTY = self.process_property(constants.KMS_ID_PROPERTY_KEY, "\d+") + self.KMS_CAMPS_PROPERTY = self.process_property(constants.KMS_CAMPS_PROPERTY_KEY, "\d+") + self.KASPAR_ID_PROPERTY = self.process_property(constants.KASPAR_ID_PROPERTY_KEY, "\d+") + self.KASPAR_NOTE_PROPERTY = self.process_property(constants.KASPAR_NOTE_PROPERTY_KEY, ".*") + self.KSP_CAMPS_PROPERTY = self.process_property(constants.KSP_CAMPS_PROPERTY_KEY, "\d+") + self.MEMORY_PROPERTY = self.process_property(constants.MEMORY_PROPERTY_KEY, ".*") + self.COMPANY_PROPERTY = self.process_property(constants.COMPANY_PROPERTY_KEY, ".*") + self.AFFILIATION_PROPERTY = self.process_property(constants.AFFILIATION_PROPERTY_KEY, ".*") @transaction.atomic - def process_school(self, old_id, abbr, name, addr_name, street, - city, zip_code): + def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): self.done_schools += 1 if self.fast and self.done_schools > self.SCHOOLS_IN_FAST_RUN: @@ -93,21 +89,23 @@ def process_school(self, old_id, abbr, name, addr_name, street, return candidates = School.objects.filter( - Q(abbreviation__iexact=abbr) | - Q(abbreviation__iexact=abbr + '?') + Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + "?") ) row = (abbr, name, addr_name, street, city, self.fix_string(zip_code)) if len(candidates) == 1: if self.verbosity >= 2: - self.stderr.write("Matched %r to %s" % (row, - candidates[0])) + self.stderr.write("Matched %r to %s" % (row, candidates[0])) self.school_id_map[old_id] = candidates[0] elif len(candidates) > 1: - self.stderr.write("Multiple candidates for %r:\n%s" % ( - row, - "\n".join("%02d: %s" % (i, candidate) - for i, candidate in enumerate(candidates)) - )) + self.stderr.write( + "Multiple candidates for %r:\n%s" + % ( + row, + "\n".join( + "%02d: %s" % (i, candidate) for i, candidate in enumerate(candidates) + ), + ) + ) try: choice = int(input("Choice (empty or invalid to create new): ")) self.school_id_map[old_id] = candidates[choice] @@ -116,9 +114,8 @@ def process_school(self, old_id, abbr, name, addr_name, street, else: self.school_id_map[old_id] = self.create_school(*row) - def create_school(self, abbr, name, addr_name, street, - city, zip_code): - abbr += '?' # Question mark denotes schools needing review. + def create_school(self, abbr, name, addr_name, street, city, zip_code): + abbr += "?" # Question mark denotes schools needing review. school = None if len(zip_code) > 10: # Swiss zip codes are longer than 10 chars, but our db model does not allow @@ -126,30 +123,31 @@ def create_school(self, abbr, name, addr_name, street, zip_code = 0 if self.dry: - school = School(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) + school = School( + abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code, + ) else: - school = School.objects.create(abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code) + school = School.objects.create( + abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code, + ) if self.verbosity >= 2: self.stderr.write("Created new school %s" % school) return school @transaction.atomic - def process_person(self, - user_args, - user_properties, - old_user_id_field, - old_user_id, - address=None): + def process_person( + self, user_args, user_properties, old_user_id_field, old_user_id, address=None + ): """ Args: user_args (dict): will be used for user constructor as is. Except for school_id. @@ -172,20 +170,19 @@ def process_person(self, else: old_id_property = UserProperty.objects.none() - first_name = user_args['first_name'] - last_name = user_args['last_name'] + first_name = user_args["first_name"] + last_name = user_args["last_name"] if old_id_property.exists(): if self.verbosity >= 2: - self.stderr.write("Skipping user %s %s" % (first_name, - last_name)) + self.stderr.write("Skipping user %s %s" % (first_name, last_name)) return None - user_args['is_active'] = False + user_args["is_active"] = False - if 'school_id' in user_args: - school_id = user_args['school_id'] - del user_args['school_id'] - user_args['school'] = self.school_id_map.get(school_id) + if "school_id" in user_args: + school_id = user_args["school_id"] + del user_args["school_id"] + user_args["school"] = self.school_id_map.get(school_id) if self.verbosity >= 2: self.stderr.write("Creating user %s %s" % (first_name, last_name)) @@ -195,9 +192,12 @@ def process_person(self, new_user = User(**user_args) else: if address: - user_args['home_address'] = Address.objects.create( - street=address['street'], town=address['town'], - postal_code=address['postal_code'], country=address['country']) + user_args["home_address"] = Address.objects.create( + street=address["street"], + town=address["town"], + postal_code=address["postal_code"], + country=address["country"], + ) new_user = User.objects.create(**user_args) @@ -219,7 +219,7 @@ def process_person(self, names_of_similar = [(user.first_name, user.last_name) for user in similar_users] self.similar_users.append(((first_name, last_name), names_of_similar)) if self.verbosity >= 2: - self.stderr.write('Similar users: %s' % str(names_of_similar)) + self.stderr.write("Similar users: %s" % str(names_of_similar)) if not self.dry: DuplicateUser.objects.create(user=new_user) @@ -233,17 +233,17 @@ def print_stats(self): def parse_dot_date(self, date_string): # Remove any whitespace inside the string. - date_string = date_string.replace(' ', '') + date_string = date_string.replace(" ", "") # Just hope that all dates are in the same format. - return datetime.strptime(date_string, '%d.%m.%Y') + return datetime.strptime(date_string, "%d.%m.%Y") def parse_dash_date(self, date_string): # Remove any whitespace inside the string. - date_string = date_string.replace(' ', '') + date_string = date_string.replace(" ", "") if date_string == "0000-00-00" or date_string == "NULL": return None else: - return datetime.strptime(date_string, '%Y-%m-%d') + return datetime.strptime(date_string, "%Y-%m-%d") def process_property(self, key_name, regexp=None): user_property = UserPropertyKey.objects.filter(key_name=key_name) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 6ca060c77..166e7a08e 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -5,7 +5,6 @@ from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand - """ Restore the mysql database dump and run (replace and ) Alternatively you can export these tables from phpAdmin. @@ -26,40 +25,40 @@ class Command(MigrateBaseCommand): - help = 'Imports people and their related info from fks_csv.' + help = "Imports people and their related info from fks_csv." def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('csv_directory', type=str, - help="Directory containing all csv files.") + parser.add_argument("csv_directory", type=str, help="Directory containing all csv files.") def handle(self, **options): super(Command, self).handle(**options) - base = options['csv_directory'] + base = options["csv_directory"] addresses_file = os.path.join(base, "adresa.csv") addresses = csv.DictReader(open(addresses_file)) address_by_id = {} for address in addresses: - address_by_id[address['id']] = address + address_by_id[address["id"]] = address schools_file = os.path.join(base, "skola.csv") schools = csv.DictReader(open(schools_file)) for school in schools: - abbr = school['skratka'].split(' ', 1)[0] - addr = address_by_id[school['adresa_id']] + abbr = school["skratka"].split(" ", 1)[0] + addr = address_by_id[school["adresa_id"]] - street = addr['ulica'] + street = addr["ulica"] - addr_name = school['nazov'] + ", " + street - self.process_school(school['id'], abbr, school['nazov'], addr_name, street, - addr['mesto'], addr['psc']) + addr_name = school["nazov"] + ", " + street + self.process_school( + school["id"], abbr, school["nazov"], addr_name, street, addr["mesto"], addr["psc"] + ) activity_file = os.path.join(base, "aktivita.csv") activity = csv.DictReader(open(activity_file)) for act in activity: - idd = act['riesitel_id'] - date = self.parse_dash_date(act['termin']) + idd = act["riesitel_id"] + date = self.parse_dash_date(act["termin"]) self.last_contact[idd].append(int(date.year)) people_file = os.path.join(base, "osoba.csv") @@ -67,36 +66,35 @@ def handle(self, **options): people_by_id = {} for person in people: - people_by_id[person['id']] = person + people_by_id[person["id"]] = person participants_file = os.path.join(base, "riesitel.csv") participants = csv.DictReader(open(participants_file)) for l in participants: - idd = l['osoba_id'] + idd = l["osoba_id"] person = people_by_id[idd] - matura = l['rok_maturity'] - self.last_contact[idd].append(int(matura)-3) - address = address_by_id[person['adresa_id']] + matura = l["rok_maturity"] + self.last_contact[idd].append(int(matura) - 3) + address = address_by_id[person["adresa_id"]] parsed_address = { - 'street': address['ulica'], - 'town': address['mesto'], - 'postal_code': address['psc'], - 'country': address['stat'], + "street": address["ulica"], + "town": address["mesto"], + "postal_code": address["psc"], + "country": address["stat"], } user = { - 'first_name': person['meno'], - 'last_name': person['priezvisko'], - 'graduation': matura, - 'email': person['email'], - 'birth_date': self.parse_dash_date(person['datum_narodenia']), - 'school_id': l['skola_id'], + "first_name": person["meno"], + "last_name": person["priezvisko"], + "graduation": matura, + "email": person["email"], + "birth_date": self.parse_dash_date(person["datum_narodenia"]), + "school_id": l["skola_id"], } - user_properties = [ - (self.MOBIL_PROPERTY, person['telefon'].replace(" ", "").strip()) - ] - self.process_person(user, user_properties, self.FKS_ID_PROPERTY, idd, - address=parsed_address) + user_properties = [(self.MOBIL_PROPERTY, person["telefon"].replace(" ", "").strip())] + self.process_person( + user, user_properties, self.FKS_ID_PROPERTY, idd, address=parsed_address + ) self.print_stats() diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 062320fb3..3c02eb6a2 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -1,11 +1,12 @@ from __future__ import unicode_literals import csv -from collections import defaultdict import os +from collections import defaultdict from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand + """ Restore the mysql database dump and run (replace and ) Alternatively you can export these tables from phpAdmin. @@ -19,56 +20,62 @@ class Command(MigrateBaseCommand): - help = 'Imports people and their related info from kms_csv.' + help = "Imports people and their related info from kms_csv." def add_arguments(self, parser): super(Command, self).add_arguments(parser) - parser.add_argument('csv_directory', type=str, - help="Directory containing all csv files.") + parser.add_argument("csv_directory", type=str, help="Directory containing all csv files.") def handle(self, **options): super(Command, self).handle(**options) - base = options['csv_directory'] + base = options["csv_directory"] participants_file = os.path.join(base, "riesitelia.csv") participants = csv.DictReader(open(participants_file)) camps_file = os.path.join(base, "sustredenia.csv") camps = csv.DictReader(open(camps_file)) camps_survived = defaultdict(int) for camp in camps: - idd = camp['id_riesitela'].strip() + idd = camp["id_riesitela"].strip() camps_survived[idd] += 1 - if camp['rok']: - self.last_contact[idd].append(int(camp['rok'])) + if camp["rok"]: + self.last_contact[idd].append(int(camp["rok"])) schools_file = os.path.join(base, "skoly.csv") schools = csv.DictReader(open(schools_file)) for school in schools: - abbr = school['skratka'].split(' ', 1)[0] - addr_name = school['nazov'] + ", " + school['ulica'] - self.process_school(school['id'], abbr, school['nazov'], addr_name, school['ulica'], - school['mesto'], school['PSC']) + abbr = school["skratka"].split(" ", 1)[0] + addr_name = school["nazov"] + ", " + school["ulica"] + self.process_school( + school["id"], + abbr, + school["nazov"], + addr_name, + school["ulica"], + school["mesto"], + school["PSC"], + ) for l in participants: - if not l['meno']: + if not l["meno"]: continue - idd = l['id'] - self.last_contact[idd].append(int(l['matura'])-3) + idd = l["id"] + self.last_contact[idd].append(int(l["matura"]) - 3) user = { - 'first_name': l['meno'], - 'last_name': l['priezvisko'], - 'graduation': l['matura'], - 'email': l['email'], - 'birth_date': self.parse_dash_date(l['datnar']), - 'school_id': l['id_skoly'] + "first_name": l["meno"], + "last_name": l["priezvisko"], + "graduation": l["matura"], + "email": l["email"], + "birth_date": self.parse_dash_date(l["datnar"]), + "school_id": l["id_skoly"], } # TODO parse addresses from string. - 'adresa_domov' - 'adresa_kores' + "adresa_domov" + "adresa_kores" user_properties = [ - (self.MOBIL_PROPERTY, l['mobil'].replace(" ", "").strip()), - (self.KMS_CAMPS_PROPERTY, camps_survived[idd]) + (self.MOBIL_PROPERTY, l["mobil"].replace(" ", "").strip()), + (self.KMS_CAMPS_PROPERTY, camps_survived[idd]), ] self.process_person(user, user_properties, self.KMS_ID_PROPERTY, idd) diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index 0c065e5da..bc6b45197 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from django.db import connections + from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand # Kaspar property IDs @@ -9,83 +10,91 @@ class Command(MigrateBaseCommand): - help = 'Imports people and their related info from kaspar.' + help = "Imports people and their related info from kaspar." def process_schools(self): cursor = self.kaspar.cursor() - cursor.execute(""" + cursor.execute( + """ SELECT school_id, short, name, addr_name, addr_street, addr_city, addr_zip FROM schools; - """) + """ + ) for row in cursor: self.process_school(*row) def process_particiaptions(self): cursor = self.kaspar.cursor() - cursor.execute(""" + cursor.execute( + """ SELECT action_id, name, date_start, date_end FROM actions - """) + """ + ) actions = {} for action in cursor: - actions[action[0]] = { - "name": action[1], - "start": action[2], - "end": action[3] - } + actions[action[0]] = {"name": action[1], "start": action[2], "end": action[3]} - cursor.execute(""" + cursor.execute( + """ SELECT action_id, man_id, task, note FROM participants - """) + """ + ) self.camps_survived = {} for participant in cursor: man_id = participant[1] action = actions[participant[0]] - self.last_contact[man_id].append(int(action['end'].year)) + self.last_contact[man_id].append(int(action["end"].year)) self.camps_survived[man_id] = self.camps_survived.get(man_id, 0) + 1 def process_people(self): cursor1 = self.kaspar.cursor() cursor2 = self.kaspar.cursor() fields = ["man_id", "firstname", "lastname", "school_id", "finish", "note"] - cursor1.execute(""" + cursor1.execute( + """ SELECT %s FROM people; - """ % (', '.join(fields))) + """ + % (", ".join(fields)) + ) for l in cursor1: l = dict(zip(fields, l)) - idcko = l['man_id'] - self.last_contact[idcko].append(int(l['finish'])-3) + idcko = l["man_id"] + self.last_contact[idcko].append(int(l["finish"]) - 3) user = { - 'first_name': l['firstname'], - 'last_name': l['lastname'], - 'graduation': l['finish'], - 'school_id': l['school_id'] + "first_name": l["firstname"], + "last_name": l["lastname"], + "graduation": l["finish"], + "school_id": l["school_id"], } - cursor2.execute(""" + cursor2.execute( + """ SELECT ppt_id, value FROM people_prop WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); - """, (idcko, EMAIL_PROP, BIRTHDAY_PROP)) + """, + (idcko, EMAIL_PROP, BIRTHDAY_PROP), + ) for prop_id, value in cursor2: if prop_id == EMAIL_PROP: - user['email'] = value + user["email"] = value elif prop_id == BIRTHDAY_PROP: try: - user['birth_date'] = self.parse_dot_date(value) + user["birth_date"] = self.parse_dot_date(value) except ValueError: # If we can't parse the date, give up. pass user_properties = [ - (self.KASPAR_NOTE_PROPERTY, l['note']), - (self.KSP_CAMPS_PROPERTY, self.camps_survived.get(idcko, 0)) + (self.KASPAR_NOTE_PROPERTY, l["note"]), + (self.KSP_CAMPS_PROPERTY, self.camps_survived.get(idcko, 0)), ] self.process_person(user, user_properties, self.KASPAR_ID_PROPERTY, idcko) @@ -94,7 +103,7 @@ def process_people(self): def handle(self, **options): super(Command, self).handle(**options) - self.kaspar = connections['kaspar'] + self.kaspar = connections["kaspar"] if self.verbosity >= 1: self.stderr.write("Migrating schools...") From 48944c6734eea36c423cf97a5b3f4817ed4f5f88 Mon Sep 17 00:00:00 2001 From: Michal Hozza Date: Tue, 9 Jul 2019 21:31:50 +0100 Subject: [PATCH 18/20] Fix some review comments. --- .../management/commands/migrate_base_class.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index fce2cd9a8..dd90f969b 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -1,9 +1,5 @@ -from __future__ import unicode_literals - -import sys from collections import defaultdict from datetime import datetime -from imp import reload from django.core.management import BaseCommand as NoArgsCommand from django.db import transaction @@ -15,14 +11,11 @@ from trojsten.people.models import Address, DuplicateUser, User, UserProperty, UserPropertyKey from trojsten.schools.models import School -reload(sys) -sys.setdefaultencoding("utf-8") - class MigrateBaseCommand(NoArgsCommand): help = "Base class for importing people." - SCHOOLS_IN_FAST_RUN = 100 - USER_IN_FAST_RUN = 100 + NUMBER_OF_SCHOOLS_IN_FAST_RUN = 100 + NUMBER_OF_USERS_IN_FAST_RUN = 100 def add_arguments(self, parser): parser.add_argument( @@ -38,7 +31,7 @@ def add_arguments(self, parser): dest="fast", default=False, help="Create only the first {} users and {} schools".format( - self.USER_IN_FAST_RUN, self.SCHOOLS_IN_FAST_RUN + self.NUMBER_OF_USERS_IN_FAST_RUN, self.NUMBER_OF_SCHOOLS_IN_FAST_RUN ), ) @@ -81,7 +74,7 @@ def handle(self, **options): def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): self.done_schools += 1 - if self.fast and self.done_schools > self.SCHOOLS_IN_FAST_RUN: + if self.fast and self.done_schools > self.NUMBER_OF_SCHOOLS_IN_FAST_RUN: return None # TODO improve this, do not work with abbreviations if not abbr: @@ -161,7 +154,7 @@ def process_person( """ # If we run in the fast mode and we already processed enough users, we skip this one. self.done_users += 1 - if self.fast and self.done_users > self.USER_IN_FAST_RUN: + if self.fast and self.done_users > self.NUMBER_OF_USERS_IN_FAST_RUN: return None old_id_property = None From e476954be94c654ba74bbb9a67762844359b4580 Mon Sep 17 00:00:00 2001 From: Michal Hozza Date: Tue, 9 Jul 2019 21:36:43 +0100 Subject: [PATCH 19/20] Use raw string literals for regexps. --- .../management/commands/migrate_base_class.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py index dd90f969b..fde2c94b0 100644 --- a/trojsten/people/management/commands/migrate_base_class.py +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -49,26 +49,26 @@ def handle(self, **options): self.last_contact = defaultdict(list) self.CSV_ID_PROPERTY = self.process_property( - constants.CSV_ID_PROPERTY_KEY, "(.{1,20}_)?\d+" + constants.CSV_ID_PROPERTY_KEY, r"(.{1,20}_)?\d+" ) - self.MOBIL_PROPERTY = self.process_property(constants.MOBIL_PROPERTY_KEY, "\+?\d+\/?\d+") - self.NICKNAME_PROPERTY = self.process_property(constants.NICKNAME_PROPERTY_KEY, ".{1,30}") + self.MOBIL_PROPERTY = self.process_property(constants.MOBIL_PROPERTY_KEY, r"\+?\d+\/?\d+") + self.NICKNAME_PROPERTY = self.process_property(constants.NICKNAME_PROPERTY_KEY, r".{1,30}") self.BIRTH_NAME_PROPERTY = self.process_property( - constants.BIRTH_NAME_PROPERTY_KEY, ".{1,30}" + constants.BIRTH_NAME_PROPERTY_KEY, r".{1,30}" ) # TODO fix False and stupid values self.LAST_CONTACT_PROPERTY = self.process_property( - constants.LAST_CONTACT_PROPERTY_KEY, "\d\d\d\d" + constants.LAST_CONTACT_PROPERTY_KEY, r"\d\d\d\d" ) - self.FKS_ID_PROPERTY = self.process_property(constants.FKS_ID_PROPERTY_KEY, "\d+") - self.KMS_ID_PROPERTY = self.process_property(constants.KMS_ID_PROPERTY_KEY, "\d+") - self.KMS_CAMPS_PROPERTY = self.process_property(constants.KMS_CAMPS_PROPERTY_KEY, "\d+") - self.KASPAR_ID_PROPERTY = self.process_property(constants.KASPAR_ID_PROPERTY_KEY, "\d+") - self.KASPAR_NOTE_PROPERTY = self.process_property(constants.KASPAR_NOTE_PROPERTY_KEY, ".*") - self.KSP_CAMPS_PROPERTY = self.process_property(constants.KSP_CAMPS_PROPERTY_KEY, "\d+") - self.MEMORY_PROPERTY = self.process_property(constants.MEMORY_PROPERTY_KEY, ".*") - self.COMPANY_PROPERTY = self.process_property(constants.COMPANY_PROPERTY_KEY, ".*") - self.AFFILIATION_PROPERTY = self.process_property(constants.AFFILIATION_PROPERTY_KEY, ".*") + self.FKS_ID_PROPERTY = self.process_property(constants.FKS_ID_PROPERTY_KEY, r"\d+") + self.KMS_ID_PROPERTY = self.process_property(constants.KMS_ID_PROPERTY_KEY, r"\d+") + self.KMS_CAMPS_PROPERTY = self.process_property(constants.KMS_CAMPS_PROPERTY_KEY, r"\d+") + self.KASPAR_ID_PROPERTY = self.process_property(constants.KASPAR_ID_PROPERTY_KEY, r"\d+") + self.KASPAR_NOTE_PROPERTY = self.process_property(constants.KASPAR_NOTE_PROPERTY_KEY, r".*") + self.KSP_CAMPS_PROPERTY = self.process_property(constants.KSP_CAMPS_PROPERTY_KEY, r"\d+") + self.MEMORY_PROPERTY = self.process_property(constants.MEMORY_PROPERTY_KEY, r".*") + self.COMPANY_PROPERTY = self.process_property(constants.COMPANY_PROPERTY_KEY, r".*") + self.AFFILIATION_PROPERTY = self.process_property(constants.AFFILIATION_PROPERTY_KEY, r".*") @transaction.atomic def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): From 03209537b5de14bf229a216eb541fb884df91414 Mon Sep 17 00:00:00 2001 From: Michal Hozza Date: Tue, 9 Jul 2019 21:43:28 +0100 Subject: [PATCH 20/20] Use raw string literals for strings containing regexps and ignore variable reuse. --- trojsten/people/management/commands/migrate_fks_csv.py | 2 +- trojsten/people/management/commands/migrate_kms_csv.py | 2 +- trojsten/people/management/commands/migrate_ksp_kaspar.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py index 166e7a08e..f986f3d5a 100644 --- a/trojsten/people/management/commands/migrate_fks_csv.py +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -5,7 +5,7 @@ from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand -""" +r""" Restore the mysql database dump and run (replace and ) Alternatively you can export these tables from phpAdmin. diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py index 3c02eb6a2..b3bd9ea1f 100644 --- a/trojsten/people/management/commands/migrate_kms_csv.py +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -7,7 +7,7 @@ from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand -""" +r""" Restore the mysql database dump and run (replace and ) Alternatively you can export these tables from phpAdmin. diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py index bc6b45197..97f5ddc25 100644 --- a/trojsten/people/management/commands/migrate_ksp_kaspar.py +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -64,7 +64,7 @@ def process_people(self): ) for l in cursor1: - l = dict(zip(fields, l)) + l = dict(zip(fields, l)) # noqa: E741 idcko = l["man_id"] self.last_contact[idcko].append(int(l["finish"]) - 3)