-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnirmata_test.sh
executable file
·1149 lines (1088 loc) · 38.8 KB
/
nirmata_test.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
# shellcheck disable=SC1117,SC2086,SC2001
# This might be better done in python or ruby, but we can't really depend on those existing or having useful modules on customer sites or containers.
# This script has 3 functional modes. You can in theory run all 3 modes, but really you should run them independently even when that makes sense.
# Test local system for K8 compatiblity, and basic custom cluster sanity tests. --local
# Test K8 for basic sanity --cluster
# Test Nirmata installation mainly mongodb. --nirmata
# Note this script considers any nirmata installation that isn't HA to be in warning.
version=1.1
# Url of script for updates
script_url='https://raw.githubusercontent.com/silborynirmata/k8_test/master/nirmata_test.sh'
# Should we update
update=1
# default external dns target
DNSTARGET=nirmata.com
# default service target
SERVICETARGET=kubernetes.default.svc.cluster.local
# set to zero to default to all namespaces
allns=1
# set to zero to default to curl url
curl=1
# Default namespace for nirmata services
namespace="nirmata"
# Should we continue to execute on failure
CONTINUE="yes"
# Set to yes to be quieter
QUIET="no"
# set to 1 to disable local tests, this tests K8 compatiblity of local system
run_local=1
# set to 1 to disable remote tests, this tests k8 functionality via kubectl
run_remote=1
# These are used to run the mongo, zookeeper, and kafka tests by default to test the nrimata setup.
# Maybe we should fork this script to move the nirmata tests else where?
run_mongo=1
run_zoo=1
run_kafka=1
# Did we get an error?
export error=0
# Did we get a warning?
export warn=0
# Default to not using ssh
nossh=0
# Collect our script args
script_args=""
# shellcheck disable=SC2124
all_args="$@"
# We should do something if there is no instruction for us
if [[ ! $all_args == *--cluster* ]] ; then
if [[ ! $all_args == *--local* ]] ; then
if [[ ! $all_args == *--nirmata* ]] ; then
# default to testing nirmata
run_mongo=0
run_zoo=0
run_kafka=0
fi
fi
fi
# Should we email by default?
email=1
# default sendemail containers Note NOT sendmail!
sendemail='ssilbory/sendemail'
alwaysemail=1
localmail=1
# Set this to fix local issues by default
fix_issues=1
# warnings return 2 otherwise
warnok=1
#additional args for kubectl
add_kubectl=""
# required free space for nirmata pods
df_free=80
# mongo seems to run out of space more easily during syncs
df_free_mongo=50
# docker parition free space
df_free_root=85
if [ -f /.dockerenv ]; then
export INDOCKER=0
else
export INDOCKER=1
fi
#function to print red text
error(){
error=1
# shellcheck disable=SC2145
echo -e "\e[31mError: ${@}\e[0m"
if [ "$CONTINUE" = "no" ];then
# THIS EXITS THE SCRIPT
echo -e "\e[31mContinue is not set exiting on error!\e[0m"
namespaces="$(kubectl get ns --no-headers | awk '{print $1}')"
for ns in $namespaces;do
kubectl --namespace=$ns delete ds nirmata-net-test-all --ignore-not-found=true &>/dev/null
done
kubectl --namespace=$namespace delete ds nirmata-net-test-all --ignore-not-found=true &>/dev/null
# THIS EXITS THE SCRIPT
exit 1
fi
}
#function to print yellow text
warn(){
warn=1
# shellcheck disable=SC2145
echo -e "\e[33mWarn: ${@}\e[0m"
}
#function to print green text
good(){
if [ ! "$QUIET" = "yes" ];then
# shellcheck disable=SC2145
echo -e "\e[32mGOOD: ${@}\e[0m"
fi
}
echo_cmd(){
# shellcheck disable=SC2145
echo "${@}"
# shellcheck disable=SC2068
${@}
}
helpfunction(){
echo "Note that this script requires access to the following containers:"
echo "nicolaka/netshoot for cluster tests."
echo "ssilbory/sendemail for sending email."
echo "Usage: $0"
echo "--version Reports version ($version)"
echo "--allns Test all namespaces (Default is only \"$namespace\")"
echo '--dns-target dns.name (Default nirmata.com)'
#echo '--exit Exit on errors'
echo '--https Curl the service with https.'
echo '--http Curl the service with http.'
echo '--local Run local tests'
echo '--nirmata Run Nirmata app tests'
echo '-q Do not report success'
echo "--warnok Do not exit 2 on warnings."
echo "--namespace namespace_name (Default is \"$namespace\")."
echo '--cluster Run Nirmata K8 cluster tests'
echo "--service service_target (Default $SERVICETARGET)."
echo "--fix Attempt to fix issues (local only)"
echo "--ssh \"[email protected]\" Ssh to a space-separated list of systems and run local tests"
echo "--update Update script from $script_url"
echo "Note that --ssh does not return non-zero on failure on ssh targets. Parse for:"
echo " 'Test completed with errors'"
echo " 'Test completed with warnings'"
echo
echo "Email Settings (Note that these options are incompatible with --ssh.)"
echo "--email Enables email reporting on error"
echo "--to [email protected] Sets the to address. Required"
echo "--from [email protected] Sets the from address. (default [email protected])"
echo "--subject 'something' Sets the email subject. (default 'K8 test script error')"
echo "--smtp smtp.server Set your smtp server. Required"
echo "--user user.name Sets your user name. Optional"
echo "--passwd 'L33TPASSW)RD' Set your password. Optional"
echo "--email-opts '-o tls=yes' Additional options to send to the sendemail program."
echo "--always-email Send emails on warning and good test results"
echo "--sendemail Set the container used to send email."
echo "--mail-local Use mail command to send email."
echo "Simple open smtp server:"
echo "$0 --email --to [email protected] --smtp smtp.example.com"
echo "Authenication with an smtp server:"
echo "--email --to [email protected] --smtp smtp.example.com --user sam.silbory --passwd 'foo!foo'"
echo "Authenication with gmail: (Requires an app password be used!)"
echo "--email --to [email protected] --smtp smtp.gmail.com:587 --user sam.silbory --passwd 'foo!foo'"
}
# deal with args
# Args are getting out of control it might be worth using getops or something.
for i in "$@";do
case $i in
--version)
echo "$0 version $version"
exit 0
;;
--dns-target)
script_args=" $script_args $1 $2 "
DNSTARGET=$2
shift
shift
echo DNSTARGET is $DNSTARGET
;;
--service)
script_args=" $script_args $1 $2 "
SERVICETARGET=$2
shift
shift
echo SERVICETARGET is $SERVICETARGET
;;
--continue|-c)
script_args=" $script_args $1 "
CONTINUE="yes"
shift
;;
--allns)
script_args=" $script_args $1 "
allns=0
shift
;;
--https)
script_args=" $script_args $1 "
curl=0
http=1
shift
;;
--http)
script_args=" $script_args $1 "
curl=0
http=0
shift
;;
--namespace)
script_args=" $script_args $1 $2 "
namespace=$2
shift
shift
;;
--local)
script_args=" $script_args $1 "
run_local=0
if [[ ! $all_args == *--cluster* ]] ; then
run_remote=1
fi
shift
;;
--cluster)
script_args=" $script_args $1 "
if [[ ! $all_args == *--local* ]] ; then
run_local=1
fi
run_remote=0
shift
;;
--nirmata)
script_args=" $script_args $1 "
run_mongo=0
run_zoo=0
run_kafka=0
if [[ ! $all_args == *--cluster* ]] ; then
run_remote=1
fi
if [[ ! $all_args == *--local* ]] ; then
run_local=1
fi
shift
;;
--exit)
script_args=" $script_args $1 "
CONTINUE="no"
shift
;;
--insecure)
script_args=" $script_args $1 $2 "
add_kubectl=" $add_kubectl --insecure-skip-tls-verify=false "
shift
;;
--client-cert)
add_kubectl=" $add_kubectl --client-certificate=$2"
shift
shift
;;
-q)
script_args=" $script_args $1 "
QUIET="yes"
shift
;;
--ssh)
ssh_hosts=$2
nossh=1
shift
shift
;;
--nossh)
script_args=" $script_args $1 "
nossh=0
shift
;;
--fix)
fix_issues=0
shift
;;
--logfile)
script_args=" $script_args $1 $2 "
logfile=$2
shift
shift
;;
--email)
script_args=" $script_args $1 "
email=0
shift
;;
--to)
script_args=" $script_args $1 $2 "
TO="$2 $TO"
shift
shift
;;
--from)
script_args=" $script_args $1 $2 "
FROM=$2
shift
shift
;;
--subject)
script_args=" $script_args $1 $2 "
SUBJECT=$2
shift
shift
;;
--smtp)
script_args=" $script_args $1 $2 "
SMTP_SERVER=$2
shift
shift
;;
--user)
script_args=" $script_args $1 $2 "
EMAIL_USER=$2
shift
shift
;;
--passwd)
script_args=" $script_args $1 $2 "
EMAIL_PASSWD=$2
shift
shift
;;
--sendemail)
sendemail=$2
shift
shift
;;
--always-email)
alwaysemail=0
shift
;;
--mail-local)
localmail=0
shift
;;
--warnok)
script_args=" $script_args $1 "
warnok=0
shift
;;
--update)
update=0
shift
;;
#--email-opts)
# script_args=" $script_args $1 $2 "
# EMAIL_OPTS="\'$2\'"
# shift
# shift
#;;
-h|--help)
helpfunction
exit 0
;;
# Remember that shifting doesn't remove later args from the loop
# We will exir on any arg with a - even if we shift it away.
-*)
helpfunction
exit 1
;;
esac
done
# We don't ever want to pass --ssh or --update. We might get inception, but without DiCaprio.
script_args=$(echo $script_args |sed -e 's/--ssh//' -e 's/--update//')
# Update Script?
if [[ $update == 0 ]];then
rm -f /tmp/nirmata_test.sh.download.$$
if [ -x "$(command -v wget)" ];then
wget -O /tmp/nirmata_test.sh.download.$$ $script_url || error "Download failed of $script_url"
else
if [ -x "$(command -v curl)" ];then
curl $script_url -o /tmp/nirmata_test.sh.download.$$ || error "Download failed of $script_url"
else
error "Unable to dowonload $script_url as we can't find curl or wget"
fi
fi
if [ -e /tmp/nirmata_test.sh.download.$$ ];then
basename=$(basename $0)
dirname=$(dirname $0)
fullname="$dirname/$basename"
cp -f $fullname $fullname.bak
cp -f /tmp/nirmata_test.sh.download.$$ $fullname
rm -f /tmp/nirmata_test.sh.download.$$
$fullname $script_args
exit $?
else
error "Failed to update script"
fi
fi
# shellcheck disable=SC2139
alias kubectl="kubectl $add_kubectl "
# Test mongodb pods
mongo_test(){
echo "Testing MongoDB Pods"
mongo_ns=$(kubectl get pod --all-namespaces -l nirmata.io/service.name=mongodb --no-headers | awk '{print $1}'|head -1)
mongos=$(kubectl get pod --namespace=$mongo_ns -l nirmata.io/service.name=mongodb --no-headers | awk '{print $1}')
mongo_num=0
# The mongo master (or masters ?!!?)
mongo_master=""
# Number of masters (ideally one)
mongo_masters=0
mongo_error=0
for mongo in $mongos; do
# Depending on the version of mongo we might have a sidecar. We want to give kubectl the right container.
if kubectl -n $mongo_ns get pod $mongo --no-headers |awk '{ print $2 }' |grep -q '[0-2]/2'; then
mongo_container="-c mongodb"
else
mongo_container=""
fi
cur_mongo=$(kubectl -n $mongo_ns exec $mongo $mongo_container -- sh -c 'echo "db.serverStatus()" |mongo' 2>&1|grep '"ismaster"')
if [[ $cur_mongo =~ "true" ]];then
echo "$mongo is master"
mongo_master="$mongo_master $mongo"
mongo_masters=$((mongo_masters+ 1));
else
if [[ $cur_mongo =~ "false" ]];then
echo "$mongo is a slave"
else
warn "$mongo is not master or slave! (Are we standalone?)"
mongo_error=1
kubectl -n $mongo_ns get pod $mongo --no-headers -o wide
fi
fi
mongo_df=$(kubectl -n $mongo_ns exec $mongo $mongo_container -- df /data/db | awk '{ print $5; }' |tail -1|sed s/%//)
if [[ $mongo_df -gt $df_free_mongo ]];then
error "Found MongoDB volume at ${mongo_df}% usage on $mongo"
kubectl -n $mongo_ns exec $mongo $mongo_container -- du --all -h /data/db/ |grep '^[0-9,.]*G'
else
good "Found MongoDB volume at ${mongo_df}% usage on $mongo"
fi
kubectl -n $mongo_ns exec $mongo $mongo_container -- du -h /data/db/WiredTigerLAS.wt |grep '[0-9]G' && \
warn "WiredTiger lookaside file is very large on $mongo. Consider increasing Mongodb memory."
mongo_num=$((mongo_num + 1));
mongo_stateStr_full=$(kubectl -n $mongo_ns exec $mongo $mongo_container -- sh -c 'echo "rs.status()" |mongo' 2>&1)
mongo_stateStr=$(echo $mongo_stateStr_full |grep stateStr)
if [[ $mongo_stateStr =~ RECOVERING || $mongo_stateStr =~ DOWN || $mongo_stateStr =~ STARTUP ]];then
echo $mongo_stateStr_full
if [[ $mongo_stateStr =~ RECOVERING ]];then warn "Detected recovering Mongodb from this node!"; mongo_error=1; fi
if [[ $mongo_stateStr =~ DOWN ]];then error "Detected Mongodb in down state from this node!"; mongo_error=1 ; fi
if [[ $mongo_stateStr =~ STARTUP ]];then warn "Detected Mongodb in startup state from this node!"; mongo_error=2; fi
fi
done
if [[ $mongo_num -gt 3 ]];then
# Are we ever goign to run more than 3 pods?
error "Found $mongo_num Mongo Pods $mongos!!"
mongo_error=1
fi
if [[ $mongo_num -eq 0 ]];then
error "Found Mongo Pods $mongo_num!!" && mongo_error=1
else
[[ $mongo_num -lt 3 ]] && warn "Found $mongo_num Mongo Pods" && mongo_error=1
fi
if [[ $mongo_masters -lt 1 ]]; then
if [[ $mongo_num -eq 1 ]];then
warn "No Mongo Master found!! (Assuming standalone)"
else
error "No Mongo Master found with multiple mongo nodes!!"
mongo_error=1
fi
else
if [[ $mongo_masters -gt 1 ]];then
error "Found $mongo_masters masters: $mongo_master!!"
mongo_error=1
fi
fi
[ $mongo_error -eq 0 ] && good "MongoDB passed tests"
}
# Zookeeper testing
zoo_test(){
zoo_error=0
echo "Testing Zookeeper pods"
zoo_ns=$(kubectl get pod --all-namespaces -l 'nirmata.io/service.name in (zookeeper, zk)' --no-headers | awk '{print $1}'|head -1)
zoos=$(kubectl get pod -n $zoo_ns -l 'nirmata.io/service.name in (zookeeper, zk)' --no-headers | awk '{print $1}')
zoo_num=0
zoo_leader=""
for zoo in $zoos; do
# High node counts indicate a resource issue or a cleanup failure.
curr_zoo=$(kubectl -n $zoo_ns exec $zoo -- sh -c "/opt/zookeeper-*/bin/zkServer.sh status" 2>&1|grep Mode)
zoo_node_count=$(kubectl exec $zoo -n $zoo_ns -- sh -c "echo srvr | nc localhost 2181|grep Node.count:" |awk '{ print $3; }')
if [ $zoo_node_count -lt 50000 ];then
good $zoo node count is $zoo_node_count
else
error Error $zoo node count is $zoo_node_count
fi
if [[ $curr_zoo =~ "leader" ]];then
echo "$zoo is zookeeper leader"
zoo_leader="$zoo_leader $zoo"
else
if [[ $curr_zoo =~ "follower" ]];then
echo "$zoo is zookeeper follower"
else
if [[ $curr_zoo =~ "standalone" ]];then
warn "$zoo is zookeeper standalone!"
zoo_leader="$zoo_leader $zoo"
else
error "$zoo appears to have failed!! (not follower/leader/standalone)"
kubectl -n $zoo_ns get pod $zoo --no-headers -o wide
zoo_error=1
fi
fi
fi
zoo_num=$((zoo_num + 1));
zoo_df=$(kubectl -n $zoo_ns exec $zoo -- df /var/lib/zookeeper | awk '{ print $5; }' |tail -1|sed s/%//)
[[ $zoo_df -gt $df_free ]] && error "Found zookeeper volume at ${zoo_df}% usage on $zoo!!"
done
# Many kafkas are connected?
# Crude parse, but it will do for now.
zkCli=$(kubectl exec $zoo -n $zoo_ns -- sh -c "ls /opt/zoo*/bin/zkCli.sh|head -1")
connected_kaf=$(kubectl exec $zoo -n $zoo_ns -- sh -c "echo ls /brokers/ids | $zkCli")
con_kaf_num=0
# What was I thinking here? Sure there is a more readable shell aproved means to do this.
# shellcheck disable=SC2076
if [[ $connected_kaf =~ '[0, 1, 2]' ]];then
con_kaf_num=3
fi
# shellcheck disable=SC2076
if [[ $connected_kaf =~ '[0, 1]' ]];then
con_kaf_num=2
fi
# shellcheck disable=SC2076
if [[ $connected_kaf =~ '[0]' ]];then
con_kaf_num=1
fi
if [[ $zoo_num -gt 3 ]];then
error "Found $zoo_num Zookeeper Pods $zoos!!"
zoo_error=1
fi
if [[ $zoo_num -eq 0 ]];then
error "Found Zero Zookeeper Pods !!"
zoo_error=1
else
[[ $zoo_num -eq 1 ]] && warn "Found One Zookeeper Pod." && zoo_error=1
fi
if [ -z $zoo_leader ];then
error "No Zookeeper Leader found!!"
zoo_error=1
fi
if [[ $(echo $zoo_leader|wc -w) -gt 1 ]];then
warn "Found Zookeeper Leaders $zoo_leader!"
zoo_error=1
fi
[ $zoo_error -eq 0 ] && good "Zookeeper passed tests"
if [[ $con_kaf_num -eq 3 ]];then
good "Found 3 connected Kafkas"
else
if [[ $con_kaf_num -gt 0 ]];then
warn "Found $con_kaf_num connected Kafkas!"
else
warn "Found no connected Kafkas!"
fi
fi
}
# testing kafka pods
kafka_test(){
echo "Testing Kafka pods"
kafka_ns=$(kubectl get pod --all-namespaces -l nirmata.io/service.name=kafka --no-headers | awk '{print $1}'|head -1)
kafkas=$(kubectl get pod -n $kafka_ns -l nirmata.io/service.name=kafka --no-headers | awk '{print $1}')
kaf_num=0
for kafka in $kafkas; do
echo "Found Kafka Pod $kafka"
kafka_df=$(kubectl -n $kafka_ns exec $kafka -- df /var/lib/kafka | awk '{ print $5; }' |tail -1|sed s/%//)
[[ $kafka_df -gt $df_free ]] && error "Found Kafka volume at ${kafka_df}% usage on $kafka"
kaf_num=$((kaf_num + 1));
done
[[ $kaf_num -gt 3 ]] && error "Found $kaf_num Kafka Pods $kafkas!!!" && kaf_error=1
if [[ $kaf_num -eq 0 ]];then
error "Found Zero Kafka Pods!!!"
kaf_error=1
else
[[ $kaf_num -lt 3 ]] && warn "Found $kaf_num Kafka Pod!"
kaf_error=1
fi
[[ $kaf_error -eq 0 ]] && good "Kafka passed tests"
# Is there more to test is it enough that the zookeeper test verifies the number of connection?
}
#function to email results
do_email(){
if [[ $email -eq 0 ]];then
# Check for certs in the cronjob's container as sendEmail won't use a server that doesn't auth.
# This won't work for any that isn't debianish.
if [ -e /certs/ ];then
cp -f /certs/*.crt /usr/local/share/ca-certificates/
update-ca-certificates
fi
[ -z $logfile ] && logfile="/tmp/k8_test.$$"
[ -z $EMAIL_USER ] && EMAIL_USER="" #would this ever work?
[ -z $EMAIL_PASSWD ] && EMAIL_PASSWD="" #would this ever work?
[ -z "$TO" ] && error "No TO address given!!!" && exit 1 # Why did I comment this out?
[ -z "$SUBJECT" ] && SUBJECT="K8 test script error" && echo -e "\e[33mYou provided no Subject using $SUBJECT \e[0m"
# This needs to be redone with less nesting and more sanity.
if [[ ${alwaysemail} -eq 0 || ${error} -gt 0 || ${warn} -gt 0 ]]; then
if [[ $warnok -eq 0 ]];then
if [[ ${alwaysemail} -ne 0 ]];then
if [[ ${error} -eq 0 ]];then
return 0
fi
fi
fi
#Let's wait for the file to sync in case tee is buffered
echo; echo; echo
sleep 2
# Reformat the log file for better reading and shell check can bite me.
# shellcheck disable=SC1012,SC2028,SC2116
BODY=$(sed -e 's/\x1b\[[0-9;]*m//g' -e 's/$'"/$(echo \\\r)/" ${logfile})
for email_to in $TO; do
if [[ $localmail -eq 0 ]];then
echo Using local mail client
echo "$BODY" |mail -s \""$SUBJECT"\" "$email_to"
else
[ -z $FROM ] && FROM="[email protected]" && warn "You provided no From address using $FROM"
[ -z $SMTP_SERVER ] && error "No smtp server given!!!" && exit 1
if type -P "sendEmail" &>/dev/null; then
if [ -n "$PASSWORD" ];then
echo $BODY |sendEmail -t "$email_to" -f "$FROM" -u \""$SUBJECT"\" -s "$SMTP_SERVER" "$EMAIL_OPTS"
else
echo $BODY |sendEmail -t "$email_to" -f "$FROM" -u \""$SUBJECT"\" -s "$SMTP_SERVER" -xu "$EMAIL_USER" -xp "$EMAIL_PASSWD" "$EMAIL_OPTS"
fi
else
docker run $sendemail $email_to $FROM "$SUBJECT" "${BODY}" $SMTP_SERVER "$EMAIL_USER" "$EMAIL_PASSWD" "$EMAIL_OPTS"
fi
#If they named it something else don't delete
rm -f /tmp/k8_test.$$
fi
done
fi
fi
}
# This tests the sanity of your k8 cluster
cluster_test(){
command -v kubectl &>/dev/null || error 'No kubectl found in path!!!'
echo "Starting Cluster Tests"
# Setup a DaemonSet to run tests on all nodes.
echo 'apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nirmata-net-test-all
labels:
app.kubernetes.io/name: nirmata-net-test-all-app
spec:
selector:
matchLabels:
app.kubernetes.io/name: nirmata-net-test-all-app
template:
metadata:
labels:
app.kubernetes.io/name: nirmata-net-test-all-app
spec:
containers:
- name: nirmata-net-test-node
image: nicolaka/netshoot
command: [ "/bin/sh", "-c", "sleep 100000" ]' >/tmp/nirmata-net-test-all.yml
namespaces="$(kubectl get ns --no-headers | awk '{print $1}')"
for ns in $namespaces;do
kubectl --namespace=$ns delete ds nirmata-net-test-all --ignore-not-found=true &>/dev/null
done
kubectl --namespace=$namespace delete ds nirmata-net-test-all --ignore-not-found=true &>/dev/null
if [ $allns -eq 1 ];then
namespaces=$namespace
fi
for ns in $namespaces;do
kubectl --namespace=$ns apply -f /tmp/nirmata-net-test-all.yml &>/dev/null
done
#echo Testing namespaces $namespaces
#check for nodes, and kubectl function
echo
echo Found the following nodes:
if ! kubectl get node --no-headers; then
error 'Failed to contact cluster!!!'
echo 'Is the master up? Is kubectl configured?'
fi
echo
if kubectl get no -o jsonpath="{.items[?(@.spec.unschedulable)].metadata.name}"|grep .;then
warn 'Above nodes are unschedulable!!'
fi
times=0
required_pods=$(kubectl get node --no-headers | awk '{print $2}' |grep -c Ready )
num_ns=$(echo $namespaces |wc -w)
required_pods=$((required_pods * num_ns))
echo -n 'Waiting for nirmata-net-test-all pods to start'
until [[ $(kubectl get pods -l app.kubernetes.io/name=nirmata-net-test-all-app --no-headers --all-namespaces|awk '{print $4}' |grep -c Running) -ge $required_pods ]]|| \
[[ $times = 60 ]];do
sleep 1;
echo -n .;
times=$((times + 1));
done
echo
# Do we have at least as many pods as nodes? (Do we care enough to do a compare node to pod?)
if [[ $(kubectl -n $namespace get pods -l app.kubernetes.io/name=nirmata-net-test-all-app --no-headers |awk '{print $3}' |grep -c Running) -ne \
$(kubectl get node --no-headers | awk '{print $2}' |grep -c Ready) ]] ;then
error 'Failed to start nirmata-net-test-all on all nodes!!'
echo Debugging:
kubectl get pods -l app.kubernetes.io/name=nirmata-net-test-all-app -o wide
kubectl get node
fi
dns_error=0
for ns in $namespaces;do
echo Testing $ns namespace
for pod in $(kubectl -n $ns get pods -l app.kubernetes.io/name=nirmata-net-test-all-app --no-headers |grep Running |awk '{print $1}');do
node=$(kubectl get pod $pod -o=custom-columns=NODE:.spec.nodeName -n $ns --no-headers)
echo "Testing DNS on Node $node in Namespace $ns"
if kubectl exec $pod -- nslookup $DNSTARGET 2>&1|grep -e can.t.resolve -e does.not.resolve -e can.t.find -e No.answer;then
warn "Can not resolve external DNS name $DNSTARGET in $ns."
kubectl -n $ns get pod $pod -o wide
kubectl -n $ns exec $pod -- sh -c "nslookup $DNSTARGET"
echo
else
good "DNS test $DNSTARGET on $node in $ns suceeded."
fi
#kubectl -n $ns exec $pod -- nslookup $SERVICETARGET
if kubectl -n $ns exec $pod -- nslookup $SERVICETARGET 2>&1|grep -e can.t.resolve -e does.not.resolve -e can.t.find -e No.answer;then
warn "Can not resolve $SERVICETARGET service on $node in $ns"
echo 'Debugging info:'
kubectl get pod $pod -o wide
dns_error=1
kubectl -n $ns exec $pod -- nslookup $DNSTARGET
kubectl -n $ns exec $pod -- nslookup $SERVICETARGET
kubectl -n $ns exec $pod -- cat /etc/resolv.conf
error "DNS test failed to find $SERVICETARGET service on $node in $ns"
else
good "DNS test $SERVICETARGET on $node in $ns suceeded."
fi
if [[ $curl -eq 0 ]];then
if [[ $http -eq 0 ]];then
if kubectl -n $ns exec $pod -- sh -c "if curl --max-time 5 http://$SERVICETARGET; then exit 0; else exit 1; fi" 2>&1|grep -e 'command terminated with exit code 1';then
error "http://$SERVICETARGET failed to respond to curl in 5 seconds!"
else
good "HTTP test $SERVICETARGET on $node in $ns suceeded."
fi
else
if kubectl -n $ns exec $pod -- sh -c "if curl --max-time 5 -k https://$SERVICETARGET; then exit 0; else exit 1; fi" 2>&1|grep -e 'command terminated with exit code 1';then
error "https://$SERVICETARGET failed to respond to curl in 5 seconds!"
else
good "HTTPS test $SERVICETARGET on $node in $ns suceeded."
fi
fi
fi
done
done
if [[ dns_error -eq 1 ]];then
warn "DNS issues detected"
echo 'Additional debugging info:'
kubectl get svc -n kube-system kube-dns coredns
kubectl get deployments -n kube-system coredns kube-dns
echo 'Note you should have either coredns or kube-dns running. Not both.'
fi
echo Testing space availble on docker partition.
for pod in $(kubectl -n $ns get pods -l app.kubernetes.io/name=nirmata-net-test-all-app --no-headers |grep Running |awk '{print $1}');do
root_df=$(kubectl -n $ns exec $pod -- df / | awk '{ print $5; }' |tail -1|sed s/%//)
node=$(kubectl get pod $pod -o=custom-columns=NODE:.spec.nodeName -n $ns --no-headers)
if [[ $root_df -gt $df_free_root ]];then
error "Found docker partition at ${root_df}% usage on $node"
else
good "Found docker partition at ${root_df}% usage on $node"
fi
done
namespaces="$(kubectl get ns --no-headers | awk '{print $1}')"
for ns in $namespaces;do
kubectl --namespace=$ns delete ds nirmata-net-test-all --ignore-not-found=true &>/dev/null
done
}
# test if your local system can run k8
local_test(){
echo "Starting Local Tests"
# Kubelet generally won't run if swap is enabled.
if [[ $(swapon -s | wc -l) -gt 1 ]] ; then
if [[ $fix_issues -eq 0 ]];then
warn "Found swap enabled"
echo_cmd swapoff -a
echo_cmd sed -i '/[[:space:]]*swap[[:space:]]*swap/d' /etc/fstab
else
error "Found swap enabled!"
echo Consider if you are having issues:
echo "sed -i '/[[:space:]]*swap[[:space:]]*swap/d' /etc/fstab"
echo "swapoff -a"
fi
else
good No swap found
fi
# It's possible to run docker with selinux, but we don't support that.
if type sestatus &>/dev/null;then
if sestatus | grep "Current mode:" |grep -e enforcing ;then
warn 'SELinux enabled'
sestatus
if [[ $fix_issues -eq 0 ]];then
echo "Applying the following fixes"
echo_cmd sed -i s/^SELINUX=.*/SELINUX=permissive/ /etc/selinux/config
echo_cmd setenforce 0
else
echo Consider the following changes to disabled SELinux if you are having issues:
echo ' sed -i s/^SELINUX=.*/SELINUX=permissive/ /etc/selinux/config'
echo ' setenforce 0'
fi
else
good Selinux not enforcing
fi
else
#Assuming debian/ubuntu don't do selinux if no sestatus binary
if [ -e /etc/os-release ] && ! grep -q -i -e debian -e ubuntu /etc/os-release;then
warn 'sestatus binary not found assuming SELinux is disabled.'
else
good "No Selinux found"
fi
fi
#test kernel ip forward settings
if grep -q 0 /proc/sys/net/ipv4/ip_forward;then
if [[ $fix_issues -eq 0 ]];then
warn net.ipv4.ip_forward is set to 0
echo "Applying the following fixes"
echo_cmd sysctl -w net.ipv4.ip_forward=1
echo_cmd echo net.ipv4.ip_forward=1 >> /etc/sysctl.conf
else
error net.ipv4.ip_forward is set to 0
echo Consider the following changes:
echo ' sysctl -w net.ipv4.ip_forward=1'
echo ' echo net.ipv4.ip_forward=1 >> /etc/sysctl.conf'
fi
else
good ip_forward enabled
fi
#check for br netfilter
if [ ! -e /proc/sys/net/bridge/bridge-nf-call-iptables ];then
if [[ $fix_issues -eq 0 ]];then
warn '/proc/sys/net/bridge/bridge-nf-call-iptables does not exist!'
echo "Applying the following fixes"
echo_cmd modprobe br_netfilter
echo_cmd echo "br_netfilter" > /etc/modules-load.d/br_netfilter.conf
else
error '/proc/sys/net/bridge/bridge-nf-call-iptables does not exist!'
echo 'Is the br_netfilter module loaded? "lsmod |grep br_netfilter"'
echo Consider the following changes:
echo ' modprobe br_netfilter'
echo ' echo "br_netfilter" > /etc/modules-load.d/br_netfilter.conf'
fi
else
good bridge-nf-call-iptables module loaded
fi
if grep -q 0 /proc/sys/net/bridge/bridge-nf-call-iptables;then
if [[ $fix_issues -eq 0 ]];then
warn "Bridge netfilter disabled!!"
echo "Applying the following fixes"
echo_cmd sysctl -w net.bridge.bridge-nf-call-iptables=1
echo_cmd echo net.bridge.bridge-nf-call-iptables=1 >> /etc/sysctl.conf
else
error "Bridge netfilter disabled!!"
echo Consider the following changes:
echo ' sysctl -w net.bridge.bridge-nf-call-iptables=1'
echo ' echo net.bridge.bridge-nf-call-iptables=1 >> /etc/sysctl.conf'
fi
else
good bridge-nf-call-iptables enabled
fi
#TODO check for proxy settings, how, what, why
# Do we really need this has anyone complained?
#test for docker
if ! systemctl is-active docker &>/dev/null ; then
warn 'Docker service is not active? Maybe you are using some other CRI??'
if [[ $fix_issues -eq 0 ]];then
echo_cmd sudo systemctl start docker
fi
else
good Docker is running
fi
if ! systemctl is-enabled docker &>/dev/null;then
warn 'Docker service is not starting at boot. Maybe you are using some other CRI??'
if [[ $fix_issues -eq 0 ]];then
echo_cmd sudo systemctl enable docker
fi
else
good Docker is starting at boot
fi
if docker info 2>/dev/null|grep mountpoint;then
warn 'Docker does not have its own mountpoint'
# What is the fix for this??? How does this happen I've never seen it.
fi
# Is the version of docker locked/held if not we are going to suffer death by upgrade.
if [ ! -e /usr/bin/docker ];then
error no /usr/bin/docker
fi
if [ -e /usr/bin/dpkg ];then
dockerpkg=$(dpkg -S /usr/bin/docker |awk '{print $1}' |sed 's/:$//')
if [[ $dockerpkg =~ docker.io ]];then
if sudo apt-mark showhold |grep -q docker.io; then
good docker.io package held
else
warn docker.io package is not held
if [[ $fix_issues -eq 0 ]];then
echo_cmd sudo apt-mark hold docker.io
fi
fi
else
if [[ $dockerpkg =~ docker-ce ]];then
if sudo apt-mark showhold |grep -q docker-ce; then
good docker-ce package held
else
warn docker-ce package is not held
if [[ $fix_issues -eq 0 ]];then
echo_cmd sudo apt-mark hold docker-ce
fi
fi
fi
fi
else
if [ -e /usr/bin/rpm ];then
if yum versionlock list |grep -q docker-ce;then
good docker versionlocked
else
warn docker is not versionlocked
if [[ $fix_issues -eq 0 ]];then
echo_cmd sudo yum versionlock docker-ce
fi
fi
fi
fi
#Customers often have time issues, which can cause cert issues. Ex:cert is in future.
if type chronyc &>/dev/null;then
if chronyc activity |grep -q "^0 sources online";then
warn "Chrony found, but no ntp sources reported!"
else
good Found Chrony with valid ntp sources.
fi
else
if type ntpq &>/dev/null;then
if ntpq -c rv |grep -q 'leap=00,'; then
good Found ntp and we appear to be syncing.
else
warn "Found ntp client, but it appears to not be synced"
fi
else
warn "No ntp client found!!"
fi
fi
# Are we running the agent or kubelet?
if [ -e /etc/systemd/system/nirmata-agent.service ];then
echo Found nirmata-agent.service testing Nirmata agent
test_agent
else
if type kubelet &>/dev/null;then
#test for k8 service
echo Found kubelet binary running local kubernetes tests
echo -e "\e[33mNote if you plan on running the Nirmata agent remove this kubelet!!! \nIf this kubelet is running it will prevent Nirmata's kubelet from running. \e[0m"
if ! systemctl is-active kubelet &>/dev/null;then
error 'Kubelet is not active?'
else
good Kublet is active
fi
if ! systemctl is-enabled kubelet &>/dev/null;then
if [[ $fix_issues -eq 0 ]];then
echo "Applying the following fixes"
echo systectl enable kubelet
systectl enable kubelet
else
error 'Kubelet is not set to run at boot?'
fi
else