From 54473184af4ddaeea5727fec6b3e8870b5c658c5 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 12 Aug 2019 11:26:37 +0200 Subject: [PATCH 1/5] crashkernel: Add panic=5 Set kernel.panic to 5 to enable reboot after 5 seconds in case of a panic in the crashkernel. --- misc_systemd_units/crashkernel.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc_systemd_units/crashkernel.service b/misc_systemd_units/crashkernel.service index 6f4a92a..d5a944d 100644 --- a/misc_systemd_units/crashkernel.service +++ b/misc_systemd_units/crashkernel.service @@ -6,7 +6,7 @@ ConditionPathExists=/boot/bzImage.crash StandardOutput=kmsg StandardError=kmsg Type=oneshot -ExecStart=/usr/sbin/kexec -p /boot/bzImage.crash --initrd=/boot/grub/initramfs.igz --command-line="root=LABEL=root ro console=ttyS1,115200n8 console=tty0 irqpoll maxcpus=1 reset_devices CRASH" +ExecStart=/usr/sbin/kexec -p /boot/bzImage.crash --initrd=/boot/grub/initramfs.igz --command-line="root=LABEL=root ro console=ttyS1,115200n8 console=tty0 irqpoll maxcpus=1 reset_devices panic=5 CRASH" ExecStop=/usr/sbin/kexec -p -u RemainAfterExit=yes From 8b858666767fe53c160d15727bb6c553d2f95dbb Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 12 Aug 2019 11:38:26 +0200 Subject: [PATCH 2/5] mxgrub: Change crashkernel size to 512MB Systems like "kreios" panic during initialization of the panic kernel with out of memory. Double the memory size for the panic kernel. This is an experiment, because for systems with <8GB we surly don't want to reserve 512 MB. --- mxgrub/mxgrub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mxgrub/mxgrub b/mxgrub/mxgrub index 626b2ab..7e96993 100755 --- a/mxgrub/mxgrub +++ b/mxgrub/mxgrub @@ -239,7 +239,7 @@ sub scan_mariux { } } -our $KERNEL_PARAMETER="ro crashkernel=256M console=ttyS0,115200n8 console=ttyS1,115200n8 console=tty0 init=/bin/systemd audit=0"; +our $KERNEL_PARAMETER="ro crashkernel=512M console=ttyS0,115200n8 console=ttyS1,115200n8 console=tty0 init=/bin/systemd audit=0"; sub update_grub_cfg { my $kernellist=''; From d1a89dcf0afd4fb64b9b94549e45b9b0e393c4be Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 12 Aug 2019 11:44:02 +0200 Subject: [PATCH 3/5] mxgrub: use $KERNEL_PARAMETER everywhere The variable $KERNEL_PARAMETER is supposed to be used everywhere a kernel command line is build. One site was overlooked, so use it there, too. Also remove ttyS0 from KERNEL_PARAMETER, which doesn't work. --- mxgrub/mxgrub | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mxgrub/mxgrub b/mxgrub/mxgrub index 7e96993..4d354c4 100755 --- a/mxgrub/mxgrub +++ b/mxgrub/mxgrub @@ -239,7 +239,7 @@ sub scan_mariux { } } -our $KERNEL_PARAMETER="ro crashkernel=512M console=ttyS0,115200n8 console=ttyS1,115200n8 console=tty0 init=/bin/systemd audit=0"; +our $KERNEL_PARAMETER="ro crashkernel=512M console=ttyS1,115200n8 console=tty0 init=/bin/systemd audit=0"; sub update_grub_cfg { my $kernellist=''; @@ -248,7 +248,7 @@ sub update_grub_cfg { for my $label (@MARIUX) { my $image=label_to_image($label); - $kernellist.="\tmenuentry \"$label\" --unrestricted { save_env chosen ; linux /boot/$image crashkernel=256M root=LABEL=root ro console=ttyS1,115200n8 console=tty0 init=/bin/systemd audit=0 ; initrd /boot/grub/initramfs.igz }\n"; + $kernellist.="\tmenuentry \"$label\" --unrestricted { save_env chosen ; linux /boot/$image root=LABEL=root $KERNEL_PARAMETER ; initrd /boot/grub/initramfs.igz }\n"; } my $GRUB_CFG_NEW=<<"EOF"; From 1fc354805d3ec10d5345263c741e07f34544ee9c Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 12 Aug 2019 14:01:15 +0200 Subject: [PATCH 4/5] crashkernel: Move into crashkernel/ We are going to add the scripts, which saves the crashdump to this repository. To keep related files together, create a new directory crashkernel and move the existing service file into it. --- {misc_systemd_units => crashkernel}/crashkernel.service | 0 install.sh | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename {misc_systemd_units => crashkernel}/crashkernel.service (100%) diff --git a/misc_systemd_units/crashkernel.service b/crashkernel/crashkernel.service similarity index 100% rename from misc_systemd_units/crashkernel.service rename to crashkernel/crashkernel.service diff --git a/install.sh b/install.sh index 58c140c..32f7e69 100755 --- a/install.sh +++ b/install.sh @@ -149,5 +149,5 @@ install_data misc_systemd_units/mxvlan.service "$DESTDIR$systemdunitd mkdir -p "$DESTDIR$usr_prefix/share/applications" install_data applications-defaults/defaults.list "$DESTDIR$usr_prefix/share/applications/defaults.list" install_data misc_etc_files/modprobe.d/nfs_slotlimit.conf "$DESTDIR$sysconfdir/modprobe.d/nfs_slotlimit.conf" -install_data misc_systemd_units/crashkernel.service "$DESTDIR$systemdunitdir/crashkernel.service" +install_data crashkernel/crashkernel.service "$DESTDIR$systemdunitdir/crashkernel.service" exit From c331d3df806789b03de00d8a4762a64dff2ce016 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 12 Aug 2019 14:08:48 +0200 Subject: [PATCH 5/5] crashkernel: Add crash-recovery.sh Import existing /sbin/crash-recovery.sh into repository. --- crashkernel/crash-recovery.sh | 77 +++++++++++++++++++++++++++++++++++ install.sh | 1 + 2 files changed, 78 insertions(+) create mode 100644 crashkernel/crash-recovery.sh diff --git a/crashkernel/crash-recovery.sh b/crashkernel/crash-recovery.sh new file mode 100644 index 0000000..0114b17 --- /dev/null +++ b/crashkernel/crash-recovery.sh @@ -0,0 +1,77 @@ +# We are called from initramfs:/init ( /project/admin/initramfs/init ) +# +# Execution environment: +# kernel is the crashkernel +# we are pid 1 +# original root is mounted at / +# root might be mounted "ro" (if we have that in the kernel command line of the crash kernel) +# /dev is mounted (prepopulated by devtmpfs) +# we are executed by bash + +# graphics console is not yet working, because the replaced kernel +# most probably used a framebuffer driver which reinitialized the graphics adapter +# so it no longer runs in a mode initialized by bios/grub. +# These bios-settings can not be restored easily, because this would require +# real mode(true?), which kexec doesn't go through. +# +# We can start the framebuffer driver and get a working console (fbcon) if +# we just load the right driver for the graphics adapter. +# +# However, nouveau currently requires to much memory (because of debugging data) and +# would just create a out of memory kernel panic. +# +#modprobe bochs_drm # DRM Support for bochs dispi vga interface (qemu stdvga) +#modprobe nouveau # nvidia : requires 512MB memory.... +# +# We can't use /dev/kmsg (kernel ring buffer), because it only accepts a very limited +# number of messages per time slot.... +# +#exec >/dev/kmsg 2>&1 +# +# So for now just use our serial console /dev/ttyS1 which hopefully is connected to another system. +# +exec >/dev/ttyS1 2>&1 + +reboot() { + test "$1" && echo "$S $1" + echo "+++++++++++++++++++++++++++++++++++++ reboot in 10s" + sleep 10 + /sbin/reboot -f +} + +free_space() { + local path="$1" + echo $(( $(stat -f -c %a "$path") * $(stat -f -c %S "$path") )) +} + +set -x +echo "+++++++++++++++++++++++++++++++++++++ crash recovery" + +mount -n -t proc proc /proc +mount -n -t sysfs sysfs /sys + +test -e /proc/vmcore || reboot "no crashdump" +core_size=$(stat -c %s /proc/vmcore) + +mount -o remount,rw / / + +if mount -L CRASHDUMP /mnt; then + test -e /mnt/crash.vmcore && rm /mnt/crash.vmcore + if (( $(free_space /mnt) - $core_size > 1024*1024*1024 )); then + echo "+++++++++++++++++++++++++++++++++++++ saving crash dump to CRASHDUMP volume" + cp /proc/vmcore /mnt/crash.vmcore + umount /mnt + else + echo "+++++++++++++++++++++++++++++++++++++ not enough free space on CRASHDUMP volume" + fi + reboot +fi; + +test -e /var/crash.vmcore && rm /var/crash.vmcore +if (( $(free_space /var) - $core_size > 1024*1024*1024 )); then + echo "+++++++++++++++++++++++++++++++++++++ saving crash dump to /var" + cp /proc/vmcore /var/crash.vmcore +else + echo "+++++++++++++++++++++++++++++++++++++ not enough free space in /var" +fi +reboot diff --git a/install.sh b/install.sh index 32f7e69..bf9c128 100755 --- a/install.sh +++ b/install.sh @@ -150,4 +150,5 @@ mkdir -p "$DESTDIR$usr_prefix/share/applications" install_data applications-defaults/defaults.list "$DESTDIR$usr_prefix/share/applications/defaults.list" install_data misc_etc_files/modprobe.d/nfs_slotlimit.conf "$DESTDIR$sysconfdir/modprobe.d/nfs_slotlimit.conf" install_data crashkernel/crashkernel.service "$DESTDIR$systemdunitdir/crashkernel.service" +install_exec crashkernel/crash-recovery.sh "$DESTDIR$root_sbindir/crash-recovery.sh" exit