-
Notifications
You must be signed in to change notification settings - Fork 255
/
Copy pathrepmgr-action-standby.c
9312 lines (7739 loc) · 253 KB
/
repmgr-action-standby.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* repmgr-action-standby.c
*
* Implements standby actions for the repmgr command line utility
*
* Copyright (c) EnterpriseDB Corporation, 2010-2021
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>
#include "repmgr.h"
#include "dirutil.h"
#include "compat.h"
#include "controldata.h"
#include "repmgr-client-global.h"
#include "repmgr-action-standby.h"
#include "pgbackupapi.h"
typedef struct TablespaceDataListCell
{
struct TablespaceDataListCell *next;
char *name;
char *oid;
char *location;
/* Optional pointer to a file containing a list of tablespace files to copy from Barman */
FILE *fptr;
} TablespaceDataListCell;
typedef struct TablespaceDataList
{
TablespaceDataListCell *head;
TablespaceDataListCell *tail;
} TablespaceDataList;
typedef struct
{
int reachable_sibling_node_count;
int reachable_sibling_nodes_with_slot_count;
int unreachable_sibling_node_count;
int min_required_wal_senders;
int min_required_free_slots;
} SiblingNodeStats;
#define T_SIBLING_NODES_STATS_INITIALIZER { \
0, \
0, \
0, \
0, \
0 \
}
static PGconn *primary_conn = NULL;
static PGconn *source_conn = NULL;
static char local_data_directory[MAXPGPATH] = "";
static bool upstream_conninfo_found = false;
static int upstream_node_id = UNKNOWN_NODE_ID;
static t_conninfo_param_list recovery_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
static char recovery_conninfo_str[MAXLEN] = "";
static char upstream_repluser[NAMEDATALEN] = "";
static char upstream_user[NAMEDATALEN] = "";
static int source_server_version_num = UNKNOWN_SERVER_VERSION_NUM;
static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
static standy_clone_mode mode = pg_basebackup;
/* used by barman mode */
static char local_repmgr_tmp_directory[MAXPGPATH] = "";
static char datadir_list_filename[MAXLEN] = "";
static char barman_command_buf[MAXLEN] = "";
/*
* To enable "standby clone" to run with lowest possible user
* privileges, we'll need to determine which actions need to
* be run and which of the available users, which will be one
* of the repmgr user, the replication user (if available) or
* the superuser (if available).
*
* This is determined in check_source_server().
*/
static t_user_type SettingsUser = REPMGR_USER;
static void _do_standby_promote_internal(PGconn *conn);
static void _do_create_replication_conf(void);
static void check_barman_config(void);
static void check_source_server(void);
static void check_source_server_via_barman(void);
static bool check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_info, bool exit_on_error);
static void check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn);
static void check_recovery_type(PGconn *conn);
static void initialise_direct_clone(t_node_info *local_node_record, t_node_info *upstream_node_record);
static int run_basebackup(t_node_info *node_record);
static int run_file_backup(t_node_info *node_record);
static int run_pg_backupapi(t_node_info *node_record);
static void copy_configuration_files(bool delete_after_copy);
static void tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location);
static void get_barman_property(char *dst, char *name, char *local_repmgr_directory);
static int get_tablespace_data_barman(char *, TablespaceDataList *);
static char *make_barman_ssh_command(char *buf);
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
static bool check_free_slots(t_node_info *local_node_record, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
static void sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
static t_remote_error_type parse_remote_error(const char *error);
static CheckStatus parse_check_status(const char *status_str);
static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold, t_remote_error_type *remote_error);
static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output);
static bool parse_data_directory_config(const char *node_check_output, t_remote_error_type *remote_error);
static bool parse_replication_config_owner(const char *node_check_output);
static CheckStatus parse_db_connection(const char *db_connection);
/*
* STANDBY CLONE
*
* Event(s):
* - standby_clone
*
* Parameters:
* --upstream-conninfo
* --upstream-node-id
* --no-upstream-connection
* -F/--force
* --dry-run
* -c/--fast-checkpoint
* --copy-external-config-files
* -R/--remote-user
* --replication-user (only required if no upstream record)
* --without-barman
* --replication-conf-only (--recovery-conf-only)
* --verify-backup (PostgreSQL 13 and later)
*/
void
do_standby_clone(void)
{
PQExpBufferData event_details;
int r = 0;
/* dummy node record */
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
bool local_data_directory_provided = false;
initialize_conninfo_params(&recovery_conninfo, false);
/*
* Copy the provided data directory; if a configuration file was provided,
* use the (mandatory) value from that; if -D/--pgdata was provided, use
* that.
*
* Note that barman mode requires -D/--pgdata.
*/
get_node_data_directory(local_data_directory);
if (local_data_directory[0] != '\0')
{
local_data_directory_provided = true;
log_notice(_("destination directory \"%s\" provided"),
local_data_directory);
}
else
{
/*
* If a configuration file is provided, repmgr will error out after
* parsing it if no data directory is provided; this check is for
* niche use-cases where no configuration file is provided.
*/
log_error(_("no data directory provided"));
log_hint(_("use -D/--pgdata to explicitly specify a data directory"));
exit(ERR_BAD_CONFIG);
}
/*
* --replication-conf-only provided - we'll handle that separately
*/
if (runtime_options.replication_conf_only == true)
{
return _do_create_replication_conf();
}
/*
* conninfo params for the actual upstream node (which might be different
* to the node we're cloning from) to write to recovery.conf
*/
mode = get_standby_clone_mode();
if (mode == barman)
{
/*
* Not currently possible to use --verify-backup with Barman
*/
if (runtime_options.verify_backup == true)
{
log_error(_("--verify-backup option cannot be used when cloning from Barman backups"));
exit(ERR_BAD_CONFIG);
}
/*
* Sanity-check barman connection and installation;
* this will exit with ERR_BARMAN if problems found.
*/
check_barman_config();
}
init_node_record(&local_node_record);
local_node_record.type = STANDBY;
/*
* Initialise list of conninfo parameters which will later be used to
* create the "primary_conninfo" recovery parameter.
*
* We'll initialise it with the host settings specified on the command
* line. As it's possible the standby will be cloned from a node different
* to its intended upstream, we'll later attempt to fetch the upstream
* node record and overwrite the values set here with those from the
* upstream node record (excluding that record's application_name)
*/
copy_conninfo_params(&recovery_conninfo, &source_conninfo);
/* Set the default application name to this node's name */
if (config_file_options.node_id != UNKNOWN_NODE_ID)
{
char application_name[MAXLEN] = "";
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
get_conninfo_value(config_file_options.conninfo, "application_name", application_name);
if (strlen(application_name) && strncmp(application_name, config_file_options.node_name, sizeof(config_file_options.node_name)) != 0)
{
log_notice(_("\"application_name\" is set in repmgr.conf but will be replaced by the node name"));
}
}
else
{
/*
* this will only happen in corner cases where the node is being
* cloned without a configuration file; fall back to "repmgr" if no
* application_name provided
*/
char *application_name = param_get(&source_conninfo, "application_name");
if (application_name == NULL)
param_set(&recovery_conninfo, "application_name", "repmgr");
}
/*
* Do some sanity checks on the proposed data directory; if it exists:
* - check it's openable
* - check if there's an instance running
*
* We do this here so the check can be part of a --dry-run.
*/
switch (check_dir(local_data_directory))
{
case DIR_ERROR:
log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
log_detail("%s", strerror(errno));
exit(ERR_BAD_CONFIG);
break;
case DIR_NOENT:
/*
* directory doesn't exist
* TODO: in --dry-run mode, attempt to create and delete?
*/
break;
case DIR_EMPTY:
/* Present but empty */
break;
case DIR_NOT_EMPTY:
/* Present but not empty */
if (is_pg_dir(local_data_directory))
{
/* even -F/--force is not enough to overwrite an active directory... */
if (is_pg_running(local_data_directory))
{
log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
local_data_directory);
log_hint(_("ensure the target data directory does not contain a running PostgreSQL instance"));
exit(ERR_BAD_CONFIG);
}
}
break;
default:
break;
}
/*
* By default attempt to connect to the source node. This will fail if no
* connection is possible, unless in Barman mode, in which case we can
* fall back to connecting to the source node via Barman (if available).
*/
if (runtime_options.no_upstream_connection == false)
{
RecordStatus record_status = RECORD_NOT_FOUND;
/*
* This connects to the source node and performs sanity checks, also
* sets "recovery_conninfo_str", "upstream_repluser", "upstream_user" and
* "upstream_node_id" and creates a connection handle in "source_conn".
*
* Will error out if source connection not possible and not in
* "barman" mode.
*/
check_source_server();
if (runtime_options.verify_backup == true)
{
/*
* --verify-backup available for PostgreSQL 13 and later
*/
if (PQserverVersion(source_conn) < 130000)
{
log_error(_("--verify-backup available for PostgreSQL 13 and later"));
exit(ERR_BAD_CONFIG);
}
}
/* attempt to retrieve upstream node record */
record_status = get_node_record(source_conn,
upstream_node_id,
&upstream_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve record for upstream node %i"),
upstream_node_id);
exit(ERR_BAD_CONFIG);
}
}
else
{
upstream_node_id = runtime_options.upstream_node_id;
}
/*
* if --upstream-conninfo was supplied, use that (will overwrite value set
* by check_source_server(), but that's OK)
*/
if (runtime_options.upstream_conninfo[0] != '\0')
{
strncpy(recovery_conninfo_str, runtime_options.upstream_conninfo, MAXLEN);
upstream_conninfo_found = true;
}
else if (mode == barman && PQstatus(source_conn) != CONNECTION_OK)
{
/*
* Here we don't have a connection to the upstream node (either
* because --no-upstream-connection was supplied, or
* check_source_server() was unable to make a connection, and
* --upstream-conninfo wasn't supplied.
*
* As we're executing in Barman mode we can try and connect via the
* Barman server to extract the upstream node's conninfo string.
*
* To do this we need to extract Barman's conninfo string, replace the
* database name with the repmgr one (they could well be different)
* and remotely execute psql.
*
* This attempts to set "recovery_conninfo_str".
*/
check_source_server_via_barman();
}
if (recovery_conninfo_str[0] == '\0')
{
log_error(_("unable to determine a connection string to use as \"primary_conninfo\""));
log_hint(_("use \"--upstream-conninfo\" to explicitly provide a value for \"primary_conninfo\""));
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
if (upstream_conninfo_found == true)
{
/*
* parse returned upstream conninfo string to recovery
* primary_conninfo params
*/
char *errmsg = NULL;
bool parse_success = false;
log_verbose(LOG_DEBUG, "parsing upstream conninfo string \"%s\"", recovery_conninfo_str);
/*
* parse_conninfo_string() here will remove the upstream's
* `application_name`, if set
*/
parse_success = parse_conninfo_string(recovery_conninfo_str, &recovery_conninfo, &errmsg, true);
if (parse_success == false)
{
log_error(_("unable to parse conninfo string \"%s\" for upstream node"),
recovery_conninfo_str);
log_detail("%s", errmsg);
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
if (upstream_repluser[0] != '\0')
{
/* Write the replication user from the node's upstream record */
param_set(&recovery_conninfo, "user", upstream_repluser);
}
}
else
{
/*
* If no upstream node record found, we'll abort with an error here,
* unless -F/--force is used, in which case we'll use the parameters
* provided on the command line (and assume the user knows what
* they're doing).
*/
if (upstream_node_id == UNKNOWN_NODE_ID)
{
log_error(_("unable to determine upstream node"));
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
if (!runtime_options.force)
{
log_error(_("no record found for upstream node (upstream_node_id: %i)"),
upstream_node_id);
log_hint(_("use -F/--force to create \"primary_conninfo\" based on command-line parameters"));
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
}
/*
* If copying of external configuration files requested, and any are
* detected, perform sanity checks
*/
if (PQstatus(source_conn) == CONNECTION_OK && runtime_options.copy_external_config_files == true)
{
PGconn *superuser_conn = NULL;
PGconn *privileged_conn = NULL;
bool external_config_files = false;
int i = 0;
/*
* Obtain configuration file locations
*
* We'll check to see whether the configuration files are in the data
* directory - if not we'll have to copy them via SSH, if copying
* requested.
*
* This will require superuser permissions, so we'll attempt to
* connect as -S/--superuser (if provided), otherwise check the
* current connection user has superuser rights.
*
* XXX: if configuration files are symlinks to targets outside the
* data directory, they won't be copied by pg_basebackup, but we can't
* tell this from the below query; we'll probably need to add a check
* for their presence and if missing force copy by SSH
*/
if (SettingsUser == REPMGR_USER)
{
privileged_conn = source_conn;
}
else
{
get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
}
if (get_configuration_file_locations(privileged_conn, &config_files) == false)
{
log_notice(_("unable to proceed without establishing configuration file locations"));
PQfinish(source_conn);
if (superuser_conn != NULL)
PQfinish(superuser_conn);
exit(ERR_BAD_CONFIG);
}
/* check if any files actually outside the data directory */
for (i = 0; i < config_files.entries; i++)
{
t_configfile_info *file = config_files.files[i];
if (file->in_data_directory == false)
{
external_config_files = true;
break;
}
}
if (external_config_files == true)
{
r = 0;
PQExpBufferData msg;
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("external configuration files detected, checking SSH connection to host \"%s\""),
runtime_options.host);
if (runtime_options.dry_run == true)
{
log_notice("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
if (r != 0)
{
log_error(_("remote host \"%s\" is not reachable via SSH - unable to copy external configuration files"),
runtime_options.host);
if (superuser_conn != NULL)
PQfinish(superuser_conn);
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("SSH connection to host \"%s\" succeeded"),
runtime_options.host);
if (runtime_options.dry_run == true)
{
log_info("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
/*
* Here we'll attempt an initial test copy of the detected external
* files, to detect any issues before we run the base backup.
*
* Note this will exit with an error, unless -F/--force supplied.
*
* We don't do this during a --dry-run as it may introduce unexpected changes
* on the local node; during an actual clone operation, any problems with
* copying files will be detected early and the operation aborted before
* the actual database cloning commences.
*
* TODO: put the files in a temporary directory and move to their final
* destination once the database has been cloned.
*/
if (runtime_options.dry_run == false)
{
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
{
/*
* Files will be placed in the same path as on the source server;
* don't delete after copying.
*/
copy_configuration_files(false);
}
else
{
/*
* Files will be placed in the data directory - delete after copying.
* They'll be copied again later; see TODO above.
*/
copy_configuration_files(true);
}
}
}
if (superuser_conn != NULL)
PQfinish(superuser_conn);
}
if (runtime_options.dry_run == true)
{
/*
* If replication slots in use, sanity-check whether we can create them
* with the available user permissions.
*/
if (config_file_options.use_replication_slots == true && PQstatus(source_conn) == CONNECTION_OK)
{
PQExpBufferData msg;
bool success = true;
initPQExpBuffer(&msg);
/*
* "create_replication_slot()" knows about --dry-run mode and
* will perform checks but not actually create the slot.
*/
success = create_replication_slot(source_conn,
local_node_record.slot_name,
&upstream_node_record,
&msg);
if (success == false)
{
log_error(_("prerequisites not met for creating a replication slot on upstream node %i"),
upstream_node_record.node_id);
termPQExpBuffer(&msg);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&msg);
}
if (upstream_node_id != UNKNOWN_NODE_ID)
{
log_notice(_("standby will attach to upstream node %i"), upstream_node_id);
}
else
{
log_warning(_("unable to determine a valid upstream node id"));
}
if (mode == pg_basebackup && runtime_options.fast_checkpoint == false)
{
log_hint(_("consider using the -c/--fast-checkpoint option"));
}
if (mode == pg_basebackup)
{
/*
* In --dry-run mode, this will just output the pg_basebackup command which
* would be executed.
*/
run_basebackup(&local_node_record);
}
PQfinish(source_conn);
log_info(_("all prerequisites for \"standby clone\" are met"));
exit(SUCCESS);
}
switch (mode)
{
case pg_basebackup:
initialise_direct_clone(&local_node_record, &upstream_node_record);
log_notice(_("starting backup (using pg_basebackup)..."));
break;
case barman:
log_notice(_("retrieving backup from Barman..."));
break;
case pg_backupapi:
log_notice(_("starting backup (using pg_backupapi)..."));
break;
default:
/* should never reach here */
log_error(_("unknown clone mode"));
}
if (mode == pg_basebackup)
{
if (runtime_options.fast_checkpoint == false)
{
log_hint(_("this may take some time; consider using the -c/--fast-checkpoint option"));
}
}
switch (mode)
{
case pg_basebackup:
r = run_basebackup(&local_node_record);
break;
case barman:
r = run_file_backup(&local_node_record);
break;
case pg_backupapi:
r = run_pg_backupapi(&local_node_record);
break;
default:
/* should never reach here */
log_error(_("unknown clone mode"));
}
/* If the backup failed then exit */
if (r != SUCCESS)
{
/* If a replication slot was previously created, drop it */
if (config_file_options.use_replication_slots == true)
{
/*
* In the case where a standby is being cloned from a node other than its
* intended upstream, We can't be sure of the source node's node_id. This
* is only required by "drop_replication_slot_if_exists()" to determine
* from the node's record whether it has a different replication user, and
* as in this case that would need to be supplied via "--replication-user"
* it's not a problem.
*/
drop_replication_slot_if_exists(source_conn, UNKNOWN_NODE_ID, local_node_record.slot_name);
}
log_error(_("unable to take a base backup of the source server"));
log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
local_data_directory);
PQfinish(source_conn);
exit(r);
}
/*
* Run pg_verifybackup here if requested, before any alterations are made
* to the data directory.
*/
if (mode == pg_basebackup && runtime_options.verify_backup == true)
{
PQExpBufferData command;
r = 0;
struct stat st;
initPQExpBuffer(&command);
make_pg_path(&command, "pg_verifybackup");
/* check command actually exists */
if (stat(command.data, &st) != 0)
{
log_error(_("unable to find expected binary \"%s\""), command.data);
log_detail("%s", strerror(errno));
exit(ERR_BAD_CONFIG);
}
appendPQExpBufferChar(&command, ' ');
/* Somewhat inconsistent, but pg_verifybackup doesn't accept a -D option */
appendShellString(&command,
local_data_directory);
log_debug("executing:\n %s", command.data);
r = system(command.data);
termPQExpBuffer(&command);
if (r != 0)
{
log_error(_("unable to verify backup"));
exit(ERR_BAD_BASEBACKUP);
}
log_verbose(LOG_INFO, _("backup successfully verified"));
}
/*
* If `--copy-external-config-files` was provided, copy any configuration
* files detected to the appropriate location. Any errors encountered will
* not be treated as fatal.
*
* This won't run in Barman mode as "config_files" is only populated in
* "initialise_direct_clone()", which isn't called in Barman mode.
*/
if (runtime_options.copy_external_config_files == true && config_files.entries > 0)
{
/*
* If "--copy-external-config-files=samepath" was used, the files will already
* have been copied.
*/
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_PGDATA)
copy_configuration_files(false);
}
/* Write the recovery.conf file */
if (create_recovery_file(&local_node_record,
&recovery_conninfo,
source_server_version_num,
local_data_directory,
true) == false)
{
/* create_recovery_file() will log an error */
if (source_server_version_num >= 120000)
{
log_notice(_("unable to write replication configuration; see preceding error messages"));
}
else
{
log_notice(_("unable to create recovery.conf; see preceding error messages"));
}
log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
local_data_directory);
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
switch (mode)
{
case pg_basebackup:
log_notice(_("standby clone (using pg_basebackup) complete"));
break;
case barman:
log_notice(_("standby clone (from Barman) complete"));
break;
case pg_backupapi:
log_notice(_("standby clone (from pg_backupapi) complete"));
break;
}
/*
* Do a final check on the data directory permissions - if the user
* is cloning into an existing directory set to 0750, and the server
* is Pg10 or earlier, Pg will refuse to start. We might not have
* known the server version when creating the data directory
* (mainly if cloning from Barman with no upstream connection), hence
* the additional check here.
*/
set_dir_permissions(local_data_directory, source_server_version_num);
/*
* TODO: It might be nice to provide an option to have repmgr start the
* PostgreSQL server automatically
*/
log_notice(_("you can now start your PostgreSQL server"));
if (config_file_options.service_start_command[0] != '\0')
{
log_hint(_("for example: %s"),
config_file_options.service_start_command);
}
else if (local_data_directory_provided)
{
log_hint(_("for example: pg_ctl -D %s start"),
local_data_directory);
}
else
{
log_hint(_("for example: /etc/init.d/postgresql start"));
}
/*
* XXX forgetting to (re) register the standby is a frequent cause of
* error; we should consider having repmgr automatically register the
* standby, either by default with an option "--no-register", or an option
* "--register".
*
* Note that "repmgr standby register" requires the standby to be running
* - if not, and we just update the node record, we'd have an incorrect
* representation of the replication cluster. Best combined with an
* automatic start of the server (see note above)
*/
/*
* Check for an existing node record, and output the appropriate command
* for registering or re-registering.
*/
{
t_node_info node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
record_status = get_node_record(primary_conn,
config_file_options.node_id,
&node_record);
if (record_status == RECORD_FOUND)
{
log_hint(_("after starting the server, you need to re-register this standby with \"repmgr standby register --force\" to update the existing node record"));
}
else
{
log_hint(_("after starting the server, you need to register this standby with \"repmgr standby register\""));
}
}
/* Log the event */
initPQExpBuffer(&event_details);
/* Add details about relevant runtime options used */
appendPQExpBuffer(&event_details,
_("cloned from host \"%s\", port %s"),
runtime_options.host,
runtime_options.port);
appendPQExpBufferStr(&event_details,
_("; backup method: "));
switch (mode)
{
case pg_basebackup:
appendPQExpBufferStr(&event_details, "pg_basebackup");
break;
case barman:
appendPQExpBufferStr(&event_details, "barman");
break;
case pg_backupapi:
appendPQExpBufferStr(&event_details, "pg_backupapi");
break;
}
appendPQExpBuffer(&event_details,
_("; --force: %s"),
runtime_options.force ? "Y" : "N");
create_event_notification(primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_clone",
true,
event_details.data);
if (primary_conn != source_conn && PQstatus(primary_conn) == CONNECTION_OK)
PQfinish(primary_conn);
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(r);
}
void
check_barman_config(void)
{
PQExpBufferData command;
bool command_ok = false;
/*
* Check that there is at least one valid backup
*/
log_info(_("connecting to Barman server to verify backup for \"%s\""), config_file_options.barman_server);
initPQExpBuffer(&command);
appendPQExpBuffer(&command, "%s show-backup %s latest > /dev/null",
make_barman_ssh_command(barman_command_buf),
config_file_options.barman_server);
command_ok = local_command(command.data, NULL);
if (command_ok == false)
{
log_error(_("no valid backup for server \"%s\" was found in the Barman catalogue"),
config_file_options.barman_server);
log_detail(_("command executed was:\n %s"), command.data),
log_hint(_("refer to the Barman documentation for more information"));
termPQExpBuffer(&command);
exit(ERR_BARMAN);