Kernel
Threads by month
- ----- 2025 -----
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
September 2021
- 25 participants
- 125 discussions

[PATCH kernel_4.19 v2 01/30] delayacct: track delays from thrashing cache pages
by Liu Xinpeng 26 Sep '21
by Liu Xinpeng 26 Sep '21
26 Sep '21
mainline inclusion
from mainline-v5.4
commit b1d29ba82cf2bc784f4c963ddd6a2cf29e229b33
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I47QS2
CVE: NA
--------------------------------
Delay accounting already measures the time a task spends in direct reclaim
and waiting for swapin, but in low memory situations tasks spend can spend
a significant amount of their time waiting on thrashing page cache. This
isn't tracked right now.
To know the full impact of memory contention on an individual task,
measure the delay when waiting for a recently evicted active cache page to
read back into memory.
Also update tools/accounting/getdelays.c:
[hannes@computer accounting]$ sudo ./getdelays -d -p 1
print delayacct stats ON
PID 1
CPU count real total virtual total delay total delay average
50318 745000000 847346785 400533713 0.008ms
IO count delay total delay average
435 122601218 0ms
SWAP count delay total delay average
0 0 0ms
RECLAIM count delay total delay average
0 0 0ms
THRASHING count delay total delay average
19 12621439 0ms
Link: http://lkml.kernel.org/r/20180828172258.3185-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Tested-by: Daniel Drake <drake(a)endlessm.com>
Tested-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: Christopher Lameter <cl(a)linux.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Johannes Weiner <jweiner(a)fb.com>
Cc: Mike Galbraith <efault(a)gmx.de>
Cc: Peter Enderborg <peter.enderborg(a)sony.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Vinayak Menon <vinmenon(a)codeaurora.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Liu Xinpeng <liuxp11(a)chinatelecom.cn>
Signed-off-by: Ctyun Kernel <ctyuncommiter01(a)chinatelecom.cn>
---
include/linux/delayacct.h | 23 +++++++++++++++++++++++
include/uapi/linux/taskstats.h | 6 +++++-
kernel/delayacct.c | 15 +++++++++++++++
mm/filemap.c | 11 +++++++++++
tools/accounting/getdelays.c | 8 +++++++-
5 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 31c865d..577d1b2 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -57,7 +57,12 @@ struct task_delay_info {
u64 freepages_start;
u64 freepages_delay; /* wait for memory reclaim */
+
+ u64 thrashing_start;
+ u64 thrashing_delay; /* wait for thrashing page */
+
u32 freepages_count; /* total count of memory reclaim */
+ u32 thrashing_count; /* total count of thrash waits */
};
#endif
@@ -76,6 +81,8 @@ struct task_delay_info {
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
+extern void __delayacct_thrashing_start(void);
+extern void __delayacct_thrashing_end(void);
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{
@@ -156,6 +163,18 @@ static inline void delayacct_freepages_end(void)
__delayacct_freepages_end();
}
+static inline void delayacct_thrashing_start(void)
+{
+ if (current->delays)
+ __delayacct_thrashing_start();
+}
+
+static inline void delayacct_thrashing_end(void)
+{
+ if (current->delays)
+ __delayacct_thrashing_end();
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -182,6 +201,10 @@ static inline void delayacct_freepages_start(void)
{}
static inline void delayacct_freepages_end(void)
{}
+static inline void delayacct_thrashing_start(void)
+{}
+static inline void delayacct_thrashing_end(void)
+{}
#endif /* CONFIG_TASK_DELAY_ACCT */
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index b7aa7bb..5e8ca16 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
*/
-#define TASKSTATS_VERSION 8
+#define TASKSTATS_VERSION 9
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -164,6 +164,10 @@ struct taskstats {
/* Delay waiting for memory reclaim */
__u64 freepages_count;
__u64 freepages_delay_total;
+
+ /* Delay waiting for thrashing page */
+ __u64 thrashing_count;
+ __u64 thrashing_delay_total;
};
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index ca8ac28..2a12b98 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -135,9 +135,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
+ tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
+ d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
d->freepages_count += tsk->delays->freepages_count;
+ d->thrashing_count += tsk->delays->thrashing_count;
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return 0;
@@ -169,3 +172,15 @@ void __delayacct_freepages_end(void)
¤t->delays->freepages_count);
}
+void __delayacct_thrashing_start(void)
+{
+ current->delays->thrashing_start = ktime_get_ns();
+}
+
+void __delayacct_thrashing_end(void)
+{
+ delayacct_end(¤t->delays->lock,
+ ¤t->delays->thrashing_start,
+ ¤t->delays->thrashing_delay,
+ ¤t->delays->thrashing_count);
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 4cf4b09..6c964d8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -36,6 +36,7 @@
#include <linux/cleancache.h>
#include <linux/shmem_fs.h>
#include <linux/rmap.h>
+#include <linux/delayacct.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -1183,8 +1184,15 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
{
struct wait_page_queue wait_page;
wait_queue_entry_t *wait = &wait_page.wait;
+ bool thrashing = false;
int ret = 0;
+ if (bit_nr == PG_locked && !PageSwapBacked(page) &&
+ !PageUptodate(page) && PageWorkingset(page)) {
+ delayacct_thrashing_start();
+ thrashing = true;
+ }
+
init_wait(wait);
wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
wait->func = wake_page_function;
@@ -1223,6 +1231,9 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
finish_wait(q, wait);
+ if (thrashing)
+ delayacct_thrashing_end();
+
/*
* A signal could leave PageWaiters set. Clearing it here if
* !waitqueue_active would be possible (by open-coding finish_wait),
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 6bf6a20..5ef1c15 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -203,6 +203,8 @@ static void print_delayacct(struct taskstats *t)
"SWAP %15s%15s%15s\n"
" %15llu%15llu%15llums\n"
"RECLAIM %12s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "THRASHING%12s%15s%15s\n"
" %15llu%15llu%15llums\n",
"count", "real total", "virtual total",
"delay total", "delay average",
@@ -222,7 +224,11 @@ static void print_delayacct(struct taskstats *t)
"count", "delay total", "delay average",
(unsigned long long)t->freepages_count,
(unsigned long long)t->freepages_delay_total,
- average_ms(t->freepages_delay_total, t->freepages_count));
+ average_ms(t->freepages_delay_total, t->freepages_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->thrashing_count,
+ (unsigned long long)t->thrashing_delay_total,
+ average_ms(t->thrashing_delay_total, t->thrashing_count));
}
static void task_context_switch_counts(struct taskstats *t)
--
1.8.3.1
3
30

24 Sep '21
Support Ramaxel raid controller
Changes since v2:
- Improve IO error handling
- Add new functions to get controller's information:
firmware version, the ability of chip
- Support pass through command of admin
Changes since v1:
- Add "OLK-5.10" to subject
- Add commit header into commit message
- Add CONFIG_RAMAXEL_SPRAID in defconfig
Yanling Song (2):
spraid: support Ramaxel raid controller
spraid: Add CONFIG_RAMAXEL_SPRAID in defconfig of arch arm64 and x86
arch/arm64/configs/openeuler_defconfig | 1 +
arch/x86/configs/openeuler_defconfig | 1 +
drivers/scsi/Kconfig | 1 +
drivers/scsi/Makefile | 1 +
drivers/scsi/spraid/Kconfig | 11 +
drivers/scsi/spraid/Makefile | 7 +
drivers/scsi/spraid/spraid.h | 655 +++++
drivers/scsi/spraid/spraid_main.c | 3616 ++++++++++++++++++++++++
8 files changed, 4293 insertions(+)
create mode 100644 drivers/scsi/spraid/Kconfig
create mode 100644 drivers/scsi/spraid/Makefile
create mode 100644 drivers/scsi/spraid/spraid.h
create mode 100644 drivers/scsi/spraid/spraid_main.c
--
2.17.1
1
2

[PATCH openEuler-1.0-LTS] pid: fix return value when copy_process() failed
by Yang Yingliang 24 Sep '21
by Yang Yingliang 24 Sep '21
24 Sep '21
hulk inclusion
category: bugfix
bugzilla: 182255
CVE: NA
-------------------------------------------------
When allocating futex_exit_mutex fails in copy_process(),
it needs return an error code, or it will lead a null-ptr-deref
when it's used in get_task_pid().
Fixes: 04d0e96b230ac ("futex: sched: fix kabi broken in task_struct")
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
kernel/fork.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index b4fee9799c153..adc8fc99246fb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2000,8 +2000,10 @@ static __latent_entropy struct task_struct *copy_process(
#endif
futex_init_task(p);
p->futex_exit_mutex = kmalloc(sizeof(struct mutex), GFP_KERNEL);
- if (!p->futex_exit_mutex)
+ if (!p->futex_exit_mutex) {
+ retval = -ENOMEM;
goto bad_fork_free_pid;
+ }
mutex_init(p->futex_exit_mutex);
/*
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] block: fix NULL pointer in blkcg_drain_queue()
by Yang Yingliang 24 Sep '21
by Yang Yingliang 24 Sep '21
24 Sep '21
From: Laibin Qiu <qiulaibin(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 182135
CVE: NA
-------------------------------------------------
blkcg_init_queue
blk_iolatency_init
blk_mq_freeze_queue
blk_throtl_drain <- blk_throtl will be
initialized later, so q->td is NULL and
trigger this NULL pointer BUG.
blk_throtl_init
The following is the log of the problem.
------------[ cut here ]------------
[ 8.516269] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.13.0-1ubuntu1.1 04/01/2014
[ 8.516296] RIP: 0010:blk_throtl_drain+0x142/0x760
[ 8.520477] Call Trace:
[ 8.520477] blkcg_drain_queue+0x90/0x120
[ 8.520477] __blk_drain_queue+0x18a/0x760
[ 8.520477] blk_drain_queue+0x45/0x80
[ 8.520477] blk_freeze_queue+0x58/0x70
[ 8.520477] blk_iolatency_init+0xdc/0x3e7
[ 8.520477] blkcg_init_queue+0x214/0x5e0
[ 8.520477] blk_alloc_queue_node+0x826/0xc10
[ 8.520477] ? ramdisk_size+0x27/0x27
[ 8.520477] brd_alloc+0x118/0x540
[ 8.520477] ? ramdisk_size+0x27/0x27
[ 8.520477] brd_init+0x179/0x4be
[ 8.520477] ? do_one_initcall+0x5fe/0x783
[ 8.520477] ? ramdisk_size+0x27/0x27
[ 8.520477] do_one_initcall+0xf7/0x783
[ 8.520477] ? initcall_blacklisted+0x1b0/0x1b0
[ 8.520477] ? __wake_up_common+0x600/0x620
[ 8.520477] ? lock_downgrade+0x720/0x720
[ 8.520477] ? check_preemption_disabled+0x40/0x2a0
[ 8.520477] kernel_init_freeable+0xb34/0xc27
[ 8.520477] ? rest_init+0x41c/0x41c
[ 8.520477] kernel_init+0x10/0x1e0
[ 8.520477] ? rest_init+0x41c/0x41c
[ 8.520477] ret_from_fork+0x24/0x30
[ 8.520477] Modules linked in:
[ 8.520477] Dumping ftrace buffer:
[ 8.520477] (ftrace buffer empty)
[ 8.520477] ---[ end trace 74cf51ecec6ee7a1 ]---
Fix this by judging whether q->td is NULL. If so, Indicates that the
io_throttle module has not been initialized.
Fixes: ee3147b403e5e ("block: fix race between adding/removing rq qos
and normal IO")
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Hou Tao <houtao1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/blk-cgroup.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 592befc26584f..9511330bfebc7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1314,6 +1314,13 @@ void blkcg_drain_queue(struct request_queue *q)
if (!q->root_blkg)
return;
+ /*
+ * @q could be exiting and q->td has not been initialized.
+ * If so, don't need drain any throttled bios.
+ */
+ if (!q->td)
+ return;
+
blk_throtl_drain(q);
}
--
2.25.1
1
0
From: Xingang Wang <wangxingang5(a)huawei.com>
This series patches introduce mpam feature for ascend:
- 1. The mpam driver resctrlfs need to add write interface for rmid, so that
the rmid can be configured from userspace.
- 2. Add support to configure SMMU mpam from svm.
bugzilla: https://gitee.com/openeuler/kernel/issues/I49RB2
Xingang Wang (9):
iommu/arm-smmu-v3: Add support to configure mpam in STE/CD context
iommu/arm-smmu-v3: Add support to get SMMU mpam configuration
iommu/arm-smmu-v3: Add support to enable/disable SMMU user_mpam_en
svm: Add support to get svm mpam configuration
svm: Add support to set svm mpam configuration
svm: Add svm_set_user_mpam_en to enable/disable mpam for smmu
mpam: enable rdt_mon_capable for mbw monitor
mpam: Add support for group rmid modify
mpam: update monitor rmid and group configuration
arch/arm64/kernel/mpam/mpam_ctrlmon.c | 33 ++--
arch/arm64/kernel/mpam/mpam_resctrl.c | 104 +++++++++-
arch/arm64/kernel/mpam/mpam_setup.c | 1 +
drivers/char/svm.c | 227 ++++++++++++++++++++++
drivers/iommu/arm-smmu-v3-context.c | 42 ++++
drivers/iommu/arm-smmu-v3.c | 265 ++++++++++++++++++++++++++
include/linux/ascend_smmu.h | 21 ++
7 files changed, 674 insertions(+), 19 deletions(-)
create mode 100644 include/linux/ascend_smmu.h
--
2.19.1
2
10

[PATCH openEuler-1.0-LTS 1/3] block: fix race between adding/removing rq qos and normal IO
by Yang Yingliang 24 Sep '21
by Yang Yingliang 24 Sep '21
24 Sep '21
From: Ming Lei <ming.lei(a)redhat.com>
mainline inclusion
from mainline-v5.14
commit 2cafe29a8d03f02a3d16193bdaae2f3e82a423f9
category: bugfix
bugzilla: 182135
CVE: NA
-------------------------------------------------
Yi reported several kernel panics on:
[16687.001777] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
...
[16687.163549] pc : __rq_qos_track+0x38/0x60
or
[ 997.690455] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020
...
[ 997.850347] pc : __rq_qos_done+0x2c/0x50
Turns out it is caused by race between adding rq qos(wbt) and normal IO
because rq_qos_add can be run when IO is being submitted, fix this issue
by freezing queue before adding/deleting rq qos to queue.
rq_qos_exit() needn't to freeze queue because it is called after queue
has been frozen.
iolatency calls rq_qos_add() during allocating queue, so freezing won't
add delay because queue usage refcount works at atomic mode at that
time.
iocost calls rq_qos_add() when writing cgroup attribute file, that is
fine to freeze queue at that time since we usually freeze queue when
storing to queue sysfs attribute, meantime iocost only exists on the
root cgroup.
wbt_init calls it in blk_register_queue() and queue sysfs attribute
store(queue_wb_lat_store() when write it 1st time in case of !BLK_WBT_MQ),
the following patch will speedup the queue freezing in wbt_init.
Reported-by: Yi Zhang <yi.zhang(a)redhat.com>
Cc: Bart Van Assche <bvanassche(a)acm.org>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Bart Van Assche <bvanassche(a)acm.org>
Tested-by: Yi Zhang <yi.zhang(a)redhat.com>
Link: https://lore.kernel.org/r/20210609015822.103433-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Conflict: block/blk-rq-qos.h
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Hou Tao <houtao1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/blk-rq-qos.h | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 98caba3e962ee..c99b2facd9ffd 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -6,6 +6,7 @@
#include <linux/blk_types.h>
#include <linux/atomic.h>
#include <linux/wait.h>
+#include <linux/blk-mq.h>
enum rq_qos_id {
RQ_QOS_WBT,
@@ -74,20 +75,43 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
{
+ /*
+ * No IO can be in-flight when adding rqos, so freeze queue, which
+ * is fine since we only support rq_qos for blk-mq queue.
+ *
+ * Reuse ->queue_lock for protecting against other concurrent
+ * rq_qos adding/deleting
+ */
+ blk_mq_freeze_queue(q);
+
+ spin_lock_irq(q->queue_lock);
rqos->next = q->rq_qos;
q->rq_qos = rqos;
+ spin_unlock_irq(q->queue_lock);
+
+ blk_mq_unfreeze_queue(q);
}
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
{
struct rq_qos **cur;
+ /*
+ * See comment in rq_qos_add() about freezing queue & using
+ * ->queue_lock.
+ */
+ blk_mq_freeze_queue(q);
+
+ spin_lock_irq(q->queue_lock);
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
if (*cur == rqos) {
*cur = rqos->next;
break;
}
}
+ spin_unlock_irq(q->queue_lock);
+
+ blk_mq_unfreeze_queue(q);
}
bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
--
2.25.1
1
2

[PATCH openEuler-1.0-LTS] Fix incorrect call to free srb in qla2xxx_mqueuecommand()
by yinxiujiang 23 Sep '21
by yinxiujiang 23 Sep '21
23 Sep '21
From: yin-xiujiang <yinxiujiang(a)kylinos.cn>
Fix incorrect call to free srb in qla2xxx_mqueuecommand(), as srb is now
allocated by upper layers. This fixes smatch warning of srb unintended
free.
Signed-off-by: yin-xiujiang <yinxiujiang(a)kylinos.cn>
---
drivers/scsi/qla2xxx/qla_os.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index bfbf213b15c0..8e9d386146ac 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1028,8 +1028,6 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
if (rval != QLA_SUCCESS) {
ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x3078,
"Start scsi failed rval=%d for cmd=%p.\n", rval, cmd);
- if (rval == QLA_INTERFACE_ERROR)
- goto qc24_free_sp_fail_command;
goto qc24_host_busy_free_sp;
}
@@ -1044,11 +1042,6 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
qc24_target_busy:
return SCSI_MLQUEUE_TARGET_BUSY;
-qc24_free_sp_fail_command:
- sp->free(sp);
- CMD_SP(cmd) = NULL;
- qla2xxx_rel_qpair_sp(sp->qpair, sp);
-
qc24_fail_command:
cmd->scsi_done(cmd);
--
2.27.0
2
1
support arm64 kexec_file(23):
kexec_file: add kexec_file_load system call to unistd.h
kexec_file: make kexec_image_post_load_cleanup_default()
global
kexec_file: drop arch_kexec_mem_walk()
kexec_file: factor out memblock-based
arch_kexec_walk_mem()
kexec_file: kexec_walk_memblock() only walks a dedicated
region at kdump
kexec_file: add image head flag definitions
kexec_file: cpufeature: add MMFR0 helper functions
kexec_file: enable KEXEC_FILE config
kexec_file: load initrd and device-tree
kexec_file: allow for loading Image-format kernel
kexec_file: invoke the kernel without purgatory
kexec_file: pe.h: remove message[] from mz header definition
kexec_file: add kernel signature verification support
kexec_file: add kaslr support
kexec_file: forbid kdump via kexec_file_load()
kexec_file: include fdt_addresses.c
kexec_file: include linux/vmalloc.h
kexec_file: Refactor setup_dtb() to consolidate error
checking
kexec_file: add rng-seed support
kexec_file: add crash dump support
kexec_file: Update to upstream version v1.5.0-23-g87963ee20693
kexec_file: handle empty command-line
kexec_file: fix memory leakage in create_dtb() when
fdt_open_into() fails
support platform keyring(10):
integrity: Define a trusted platform keyring
integrity: Load certs to the platform keyring
integrity: Add EFI signature data types
integrity: Add an EFI signature blob parser
integrity: Import certificates from UEFI Secure Boot
integrity: Allow the "db" UEFI variable to be suppressed
integrity: Support platform keyring for kernel appraisal
integrity, KEYS: add a reference to platform keyring
integrity: Make use of platform keyring for signature
verify
integrity: Fix __integrity_init_keyring() section
mismatch
fix kabi(1):
kexec_file: fix kabi because of the new system call sys_kexec_file_load
---
arch/arm64/Kconfig | 33 ++
arch/arm64/include/asm/cpufeature.h | 48 +++
arch/arm64/include/asm/image.h | 59 +++
arch/arm64/include/asm/kexec.h | 23 ++
arch/arm64/kernel/Makefile | 3 +-
arch/arm64/kernel/cpu-reset.S | 8 +-
arch/arm64/kernel/head.S | 3 +-
arch/arm64/kernel/image.h | 21 +-
arch/arm64/kernel/kexec_image.c | 126 +++++++
arch/arm64/kernel/machine_kexec.c | 12 +-
arch/arm64/kernel/machine_kexec_file.c | 342 ++++++++++++++++++
arch/arm64/kernel/relocate_kernel.S | 3 +-
arch/powerpc/kernel/machine_kexec_file_64.c | 54 ---
arch/s390/kernel/machine_kexec_file.c | 10 -
arch/x86/kernel/kexec-bzimage64.c | 14 +-
certs/system_keyring.c | 23 +-
include/keys/system_keyring.h | 8 +
include/linux/efi.h | 34 ++
include/linux/kexec.h | 15 +-
include/linux/pe.h | 2 +-
include/linux/verification.h | 1 +
include/uapi/linux/kexec.h | 1 +
kernel/kexec.c | 10 +
kernel/kexec_file.c | 160 +++++++-
lib/Makefile | 2 +-
lib/fdt_addresses.c | 2 +
scripts/dtc/libfdt/fdt_addresses.c | 47 +++
scripts/dtc/libfdt/libfdt.h | 61 ++++
security/integrity/Kconfig | 11 +
security/integrity/Makefile | 5 +
security/integrity/digsig.c | 114 ++++--
security/integrity/ima/ima_appraise.c | 14 +-
security/integrity/integrity.h | 23 +-
.../integrity/platform_certs/efi_parser.c | 108 ++++++
security/integrity/platform_certs/load_uefi.c | 194 ++++++++++
.../platform_certs/platform_keyring.c | 58 +++
36 files changed, 1518 insertions(+), 134 deletions(-)
create mode 100644 arch/arm64/include/asm/image.h
create mode 100644 arch/arm64/kernel/kexec_image.c
create mode 100644 arch/arm64/kernel/machine_kexec_file.c
create mode 100644 lib/fdt_addresses.c
create mode 100644 security/integrity/platform_certs/efi_parser.c
create mode 100644 security/integrity/platform_certs/load_uefi.c
create mode 100644 security/integrity/platform_certs/platform_keyring.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9dfbd052b..5381d21bb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -957,6 +957,39 @@ config KEXEC
but it is independent of the system firmware. And like a reboot
you can start any kernel with it, not just Linux.
+config KEXEC_FILE
+ bool "kexec file based system call"
+ select KEXEC_CORE
+ help
+ This is new version of kexec system call. This system call is
+ file based and takes file descriptors as system call argument
+ for kernel and initramfs as opposed to list of segments as
+ accepted by previous system call.
+
+config KEXEC_VERIFY_SIG
+ bool "Verify kernel signature during kexec_file_load() syscall"
+ depends on KEXEC_FILE
+ help
+ Select this option to verify a signature with loaded kernel
+ image. If configured, any attempt of loading a image without
+ valid signature will fail.
+
+ In addition to that option, you need to enable signature
+ verification for the corresponding kernel image type being
+ loaded in order for this to work.
+
+config KEXEC_IMAGE_VERIFY_SIG
+ bool "Enable Image signature verification support"
+ default y
+ depends on KEXEC_VERIFY_SIG
+ depends on EFI && SIGNED_PE_FILE_VERIFICATION
+ help
+ Enable Image signature verification support.
+
+comment "Support for PE file signature verification disabled"
+ depends on KEXEC_VERIFY_SIG
+ depends on !EFI || !SIGNED_PE_FILE_VERIFICATION
+
config CRASH_DUMP
bool "Build kdump crash kernel"
help
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index c43d02365..211aa994b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -497,11 +497,59 @@ static inline bool system_supports_32bit_el0(void)
return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
}
+static inline bool system_supports_4kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_TGRAN4_SHIFT);
+
+ return val == ID_AA64MMFR0_TGRAN4_SUPPORTED;
+}
+
+static inline bool system_supports_64kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_TGRAN64_SHIFT);
+
+ return val == ID_AA64MMFR0_TGRAN64_SUPPORTED;
+}
+
+static inline bool system_supports_16kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_TGRAN16_SHIFT);
+
+ return val == ID_AA64MMFR0_TGRAN16_SUPPORTED;
+}
+
static inline bool system_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1));
}
+static inline bool system_supports_mixed_endian(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_BIGENDEL_SHIFT);
+
+ return val == 0x1;
+}
+
static inline bool system_supports_fpsimd(void)
{
return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
new file mode 100644
index 000000000..e2c27a227
--- /dev/null
+++ b/arch/arm64/include/asm/image.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_IMAGE_H
+#define __ASM_IMAGE_H
+
+#define ARM64_IMAGE_MAGIC "ARM\x64"
+
+#define ARM64_IMAGE_FLAG_BE_SHIFT 0
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT (ARM64_IMAGE_FLAG_BE_SHIFT + 1)
+#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT \
+ (ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2)
+#define ARM64_IMAGE_FLAG_BE_MASK 0x1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_MASK 0x3
+#define ARM64_IMAGE_FLAG_PHYS_BASE_MASK 0x1
+
+#define ARM64_IMAGE_FLAG_LE 0
+#define ARM64_IMAGE_FLAG_BE 1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K 1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K 2
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3
+#define ARM64_IMAGE_FLAG_PHYS_BASE 1
+
+#ifndef __ASSEMBLY__
+
+#define arm64_image_flag_field(flags, field) \
+ (((flags) >> field##_SHIFT) & field##_MASK)
+
+/*
+ * struct arm64_image_header - arm64 kernel image header
+ * See Documentation/arm64/booting.txt for details
+ *
+ * @code0: Executable code, or
+ * @mz_header alternatively used for part of MZ header
+ * @code1: Executable code
+ * @text_offset: Image load offset
+ * @image_size: Effective Image size
+ * @flags: kernel flags
+ * @reserved: reserved
+ * @magic: Magic number
+ * @reserved5: reserved, or
+ * @pe_header: alternatively used for PE COFF offset
+ */
+
+struct arm64_image_header {
+ __le32 code0;
+ __le32 code1;
+ __le64 text_offset;
+ __le64 image_size;
+ __le64 flags;
+ __le64 res2;
+ __le64 res3;
+ __le64 res4;
+ __le32 magic;
+ __le32 res5;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 30ab1fa34..286950227 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -101,6 +101,29 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
+#ifdef CONFIG_KEXEC_FILE
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ void *dtb;
+ unsigned long dtb_mem;
+ /* Core ELF header buffer */
+ void *elf_headers;
+ unsigned long elf_headers_mem;
+ unsigned long elf_headers_sz;
+};
+
+extern const struct kexec_file_ops kexec_image_ops;
+
+struct kimage;
+
+extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
+extern int load_other_segments(struct kimage *image,
+ unsigned long kernel_load_addr, unsigned long kernel_size,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline);
+#endif
+
void machine_kexec_mask_interrupts(void);
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fe759f364..2e21303b6 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -54,8 +54,9 @@ arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
-arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
+arm64-obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
cpu-reset.o
+arm64-obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
arm64-obj-$(CONFIG_ARM64_CPU_PARK) += cpu-park.o
arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 8021b46c9..a2be30275 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -22,11 +22,11 @@
* __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
* cpu_soft_restart.
*
- * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @el2_switch: Flag to indicate a switch to EL2 is needed.
* @entry: Location to jump to for soft reset.
- * arg0: First argument passed to @entry.
- * arg1: Second argument passed to @entry.
- * arg2: Third argument passed to @entry.
+ * arg0: First argument passed to @entry. (relocation list)
+ * arg1: Second argument passed to @entry.(physical kernel entry)
+ * arg2: Third argument passed to @entry. (physical dtb address)
*
* Put the CPU into the same state as it would be if it had been reset, and
* branch to what would be the reset vector. It must be executed with the
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 34bd0790d..5edfb9cfd 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -31,6 +31,7 @@
#include <asm/cache.h>
#include <asm/cputype.h>
#include <asm/elf.h>
+#include <asm/image.h>
#include <asm/kernel-pgtable.h>
#include <asm/kvm_arm.h>
#include <asm/memory.h>
@@ -91,7 +92,7 @@ _head:
.quad 0 // reserved
.quad 0 // reserved
.quad 0 // reserved
- .ascii "ARM\x64" // Magic number
+ .ascii ARM64_IMAGE_MAGIC // Magic number
#ifdef CONFIG_EFI
.long pe_header - _head // Offset to the PE header.
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index eff6a564a..b22e8ad07 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -15,13 +15,15 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __ASM_IMAGE_H
-#define __ASM_IMAGE_H
+#ifndef __ARM64_KERNEL_IMAGE_H
+#define __ARM64_KERNEL_IMAGE_H
#ifndef LINKER_SCRIPT
#error This file should only be included in vmlinux.lds.S
#endif
+#include <asm/image.h>
+
/*
* There aren't any ELF relocations we can use to endian-swap values known only
* at link time (e.g. the subtraction of two symbol addresses), so we must get
@@ -47,19 +49,22 @@
sym##_lo32 = DATA_LE32((data) & 0xffffffff); \
sym##_hi32 = DATA_LE32((data) >> 32)
+#define __HEAD_FLAG(field) (__HEAD_FLAG_##field << \
+ ARM64_IMAGE_FLAG_##field##_SHIFT)
+
#ifdef CONFIG_CPU_BIG_ENDIAN
-#define __HEAD_FLAG_BE 1
+#define __HEAD_FLAG_BE ARM64_IMAGE_FLAG_BE
#else
-#define __HEAD_FLAG_BE 0
+#define __HEAD_FLAG_BE ARM64_IMAGE_FLAG_LE
#endif
#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
#define __HEAD_FLAG_PHYS_BASE 1
-#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
- (__HEAD_FLAG_PAGE_SIZE << 1) | \
- (__HEAD_FLAG_PHYS_BASE << 3))
+#define __HEAD_FLAGS (__HEAD_FLAG(BE) | \
+ __HEAD_FLAG(PAGE_SIZE) | \
+ __HEAD_FLAG(PHYS_BASE))
/*
* These will output as part of the Image header, which should be little-endian
@@ -113,4 +118,4 @@ __efistub_screen_info = screen_info;
#endif
-#endif /* __ASM_IMAGE_H */
+#endif /* __ARM64_KERNEL_IMAGE_H */
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
new file mode 100644
index 000000000..1ad1d5f8f
--- /dev/null
+++ b/arch/arm64/kernel/kexec_image.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kexec image loader
+
+ * Copyright (C) 2018 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi(a)linaro.org>
+ */
+
+#define pr_fmt(fmt) "kexec_file(Image): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/pe.h>
+#include <linux/string.h>
+#include <linux/verification.h>
+#include <asm/byteorder.h>
+#include <asm/cpufeature.h>
+#include <asm/image.h>
+#include <asm/memory.h>
+
+static int image_probe(const char *kernel_buf, unsigned long kernel_len)
+{
+ const struct arm64_image_header *h =
+ (const struct arm64_image_header *)(kernel_buf);
+
+ if (!h || (kernel_len < sizeof(*h)))
+ return -EINVAL;
+
+ if (memcmp(&h->magic, ARM64_IMAGE_MAGIC, sizeof(h->magic)))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void *image_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ struct arm64_image_header *h;
+ u64 flags, value;
+ bool be_image, be_kernel;
+ struct kexec_buf kbuf;
+ unsigned long text_offset;
+ struct kexec_segment *kernel_segment;
+ int ret;
+
+ /*
+ * We require a kernel with an unambiguous Image header. Per
+ * Documentation/booting.txt, this is the case when image_size
+ * is non-zero (practically speaking, since v3.17).
+ */
+ h = (struct arm64_image_header *)kernel;
+ if (!h->image_size)
+ return ERR_PTR(-EINVAL);
+
+ /* Check cpu features */
+ flags = le64_to_cpu(h->flags);
+ be_image = arm64_image_flag_field(flags, ARM64_IMAGE_FLAG_BE);
+ be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+ if ((be_image != be_kernel) && !system_supports_mixed_endian())
+ return ERR_PTR(-EINVAL);
+
+ value = arm64_image_flag_field(flags, ARM64_IMAGE_FLAG_PAGE_SIZE);
+ if (((value == ARM64_IMAGE_FLAG_PAGE_SIZE_4K) &&
+ !system_supports_4kb_granule()) ||
+ ((value == ARM64_IMAGE_FLAG_PAGE_SIZE_64K) &&
+ !system_supports_64kb_granule()) ||
+ ((value == ARM64_IMAGE_FLAG_PAGE_SIZE_16K) &&
+ !system_supports_16kb_granule()))
+ return ERR_PTR(-EINVAL);
+
+ /* Load the kernel */
+ kbuf.image = image;
+ kbuf.buf_min = 0;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = false;
+
+ kbuf.buffer = kernel;
+ kbuf.bufsz = kernel_len;
+ kbuf.mem = 0;
+ kbuf.memsz = le64_to_cpu(h->image_size);
+ text_offset = le64_to_cpu(h->text_offset);
+ kbuf.buf_align = MIN_KIMG_ALIGN;
+
+ /* Adjust kernel segment with TEXT_OFFSET */
+ kbuf.memsz += text_offset;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ return ERR_PTR(ret);
+
+ kernel_segment = &image->segment[image->nr_segments - 1];
+ kernel_segment->mem += text_offset;
+ kernel_segment->memsz -= text_offset;
+ image->start = kernel_segment->mem;
+
+ pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_segment->mem, kbuf.bufsz,
+ kernel_segment->memsz);
+
+ /* Load additional data */
+ ret = load_other_segments(image,
+ kernel_segment->mem, kernel_segment->memsz,
+ initrd, initrd_len, cmdline);
+
+ return ERR_PTR(ret);
+}
+
+#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
+static int image_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+ return verify_pefile_signature(kernel, kernel_len, NULL,
+ VERIFYING_KEXEC_PE_SIGNATURE);
+}
+#endif
+
+const struct kexec_file_ops kexec_image_ops = {
+ .probe = image_probe,
+ .load = image_load,
+#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
+ .verify_sig = image_verify_sig,
+#endif
+};
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index b41d6d482..213d0053e 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -214,9 +214,17 @@ void machine_kexec(struct kimage *kimage)
* uses physical addressing to relocate the new image to its final
* position and transfers control to the image entry point when the
* relocation is complete.
+ * In kexec case, kimage->start points to purgatory assuming that
+ * kernel entry and dtb address are embedded in purgatory by
+ * userspace (kexec-tools).
+ * In kexec_file case, the kernel starts directly without purgatory.
*/
-
- cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, 0);
+ cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start,
+#ifdef CONFIG_KEXEC_FILE
+ kimage->arch.dtb_mem);
+#else
+ 0);
+#endif
BUG(); /* Should never get here. */
}
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
new file mode 100644
index 000000000..ec3b72dc0
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kexec_file for arm64
+ *
+ * Copyright (C) 2018 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi(a)linaro.org>
+ *
+ * Most code is derived from arm64 port of kexec-tools
+ */
+
+#define pr_fmt(fmt) "kexec_file: " fmt
+
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <asm/byteorder.h>
+
+/* relevant device tree properties */
+#define FDT_PROP_KEXEC_ELFHDR "linux,elfcorehdr"
+#define FDT_PROP_MEM_RANGE "linux,usable-memory-range"
+#define FDT_PROP_INITRD_START "linux,initrd-start"
+#define FDT_PROP_INITRD_END "linux,initrd-end"
+#define FDT_PROP_BOOTARGS "bootargs"
+#define FDT_PROP_KASLR_SEED "kaslr-seed"
+#define FDT_PROP_RNG_SEED "rng-seed"
+#define RNG_SEED_SIZE 128
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &kexec_image_ops,
+ NULL
+};
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ vfree(image->arch.dtb);
+ image->arch.dtb = NULL;
+
+ vfree(image->arch.elf_headers);
+ image->arch.elf_headers = NULL;
+ image->arch.elf_headers_sz = 0;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
+
+static int setup_dtb(struct kimage *image,
+ unsigned long initrd_load_addr, unsigned long initrd_len,
+ char *cmdline, void *dtb)
+{
+ int off, ret;
+
+ ret = fdt_path_offset(dtb, "/chosen");
+ if (ret < 0)
+ goto out;
+
+ off = ret;
+
+ ret = fdt_delprop(dtb, off, FDT_PROP_KEXEC_ELFHDR);
+ if (ret && ret != -FDT_ERR_NOTFOUND)
+ goto out;
+ ret = fdt_delprop(dtb, off, FDT_PROP_MEM_RANGE);
+ if (ret && ret != -FDT_ERR_NOTFOUND)
+ goto out;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ /* add linux,elfcorehdr */
+ ret = fdt_appendprop_addrrange(dtb, 0, off,
+ FDT_PROP_KEXEC_ELFHDR,
+ image->arch.elf_headers_mem,
+ image->arch.elf_headers_sz);
+ if (ret)
+ return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+
+ /* add linux,usable-memory-range */
+ ret = fdt_appendprop_addrrange(dtb, 0, off,
+ FDT_PROP_MEM_RANGE,
+ crashk_res.start,
+ crashk_res.end - crashk_res.start + 1);
+ if (ret)
+ return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+ }
+
+ /* add bootargs */
+ if (cmdline) {
+ ret = fdt_setprop_string(dtb, off, FDT_PROP_BOOTARGS, cmdline);
+ if (ret)
+ goto out;
+ } else {
+ ret = fdt_delprop(dtb, off, FDT_PROP_BOOTARGS);
+ if (ret && (ret != -FDT_ERR_NOTFOUND))
+ goto out;
+ }
+
+ /* add initrd-* */
+ if (initrd_load_addr) {
+ ret = fdt_setprop_u64(dtb, off, FDT_PROP_INITRD_START,
+ initrd_load_addr);
+ if (ret)
+ goto out;
+
+ ret = fdt_setprop_u64(dtb, off, FDT_PROP_INITRD_END,
+ initrd_load_addr + initrd_len);
+ if (ret)
+ goto out;
+ } else {
+ ret = fdt_delprop(dtb, off, FDT_PROP_INITRD_START);
+ if (ret && (ret != -FDT_ERR_NOTFOUND))
+ goto out;
+
+ ret = fdt_delprop(dtb, off, FDT_PROP_INITRD_END);
+ if (ret && (ret != -FDT_ERR_NOTFOUND))
+ goto out;
+ }
+
+ /* add kaslr-seed */
+ ret = fdt_delprop(dtb, off, FDT_PROP_KASLR_SEED);
+ if (ret && (ret != -FDT_ERR_NOTFOUND))
+ goto out;
+
+ if (rng_is_initialized()) {
+ u64 seed = get_random_u64();
+ ret = fdt_setprop_u64(dtb, off, FDT_PROP_KASLR_SEED, seed);
+ if (ret)
+ goto out;
+ } else {
+ pr_notice("RNG is not initialised: omitting \"%s\" property\n",
+ FDT_PROP_KASLR_SEED);
+ }
+
+ /* add rng-seed */
+ if (rng_is_initialized()) {
+ u8 rng_seed[RNG_SEED_SIZE];
+ get_random_bytes(rng_seed, RNG_SEED_SIZE);
+ ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed,
+ RNG_SEED_SIZE);
+ if (ret)
+ goto out;
+ } else {
+ pr_notice("RNG is not initialised: omitting \"%s\" property\n",
+ FDT_PROP_RNG_SEED);
+ }
+
+out:
+ if (ret)
+ return (ret == -FDT_ERR_NOSPACE) ? -ENOMEM : -EINVAL;
+
+ return 0;
+}
+
+/*
+ * More space needed so that we can add initrd, bootargs, kaslr-seed,
+ * rng-seed, userable-memory-range and elfcorehdr.
+ */
+#define DTB_EXTRA_SPACE 0x1000
+
+static int create_dtb(struct kimage *image,
+ unsigned long initrd_load_addr, unsigned long initrd_len,
+ char *cmdline, void **dtb)
+{
+ void *buf;
+ size_t buf_size;
+ size_t cmdline_len;
+ int ret;
+
+ cmdline_len = cmdline ? strlen(cmdline) : 0;
+ buf_size = fdt_totalsize(initial_boot_params)
+ + cmdline_len + DTB_EXTRA_SPACE;
+
+ for (;;) {
+ buf = vmalloc(buf_size);
+ if (!buf)
+ return -ENOMEM;
+
+ /* duplicate a device tree blob */
+ ret = fdt_open_into(initial_boot_params, buf, buf_size);
+ if (ret) {
+ vfree(buf);
+ return -EINVAL;
+ }
+
+ ret = setup_dtb(image, initrd_load_addr, initrd_len,
+ cmdline, buf);
+ if (ret) {
+ vfree(buf);
+ if (ret == -ENOMEM) {
+ /* unlikely, but just in case */
+ buf_size += DTB_EXTRA_SPACE;
+ continue;
+ } else {
+ return ret;
+ }
+ }
+
+ /* trim it */
+ fdt_pack(buf);
+ *dtb = buf;
+
+ return 0;
+ }
+}
+
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ struct crash_mem *cmem;
+ unsigned int nr_ranges;
+ int ret;
+ u64 i;
+ phys_addr_t start, end;
+
+ nr_ranges = 1; /* for exclusion of crashkernel region */
+ for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
+ MEMBLOCK_NONE, &start, &end, NULL)
+ nr_ranges++;
+
+ cmem = kmalloc(sizeof(struct crash_mem) +
+ sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
+ MEMBLOCK_NONE, &start, &end, NULL) {
+ cmem->ranges[cmem->nr_ranges].start = start;
+ cmem->ranges[cmem->nr_ranges].end = end - 1;
+ cmem->nr_ranges++;
+ }
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+
+ if (!ret)
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+ kfree(cmem);
+ return ret;
+}
+
+int load_other_segments(struct kimage *image,
+ unsigned long kernel_load_addr,
+ unsigned long kernel_size,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline)
+{
+ struct kexec_buf kbuf;
+ void *headers, *dtb = NULL;
+ unsigned long headers_sz, initrd_load_addr = 0, dtb_len;
+ int ret = 0;
+
+ kbuf.image = image;
+ /* not allocate anything below the kernel */
+ kbuf.buf_min = kernel_load_addr + kernel_size;
+
+ /* load elf core header */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret) {
+ pr_err("Preparing elf core header failed\n");
+ goto out_err;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = SZ_64K; /* largest supported page size */
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out_err;
+ }
+ image->arch.elf_headers = headers;
+ image->arch.elf_headers_mem = kbuf.mem;
+ image->arch.elf_headers_sz = headers_sz;
+
+ pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->arch.elf_headers_mem, headers_sz, headers_sz);
+ }
+
+ /* load initrd */
+ if (initrd) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = initrd_len;
+ kbuf.mem = 0;
+ kbuf.memsz = initrd_len;
+ kbuf.buf_align = 0;
+ /* within 1GB-aligned window of up to 32GB in size */
+ kbuf.buf_max = round_down(kernel_load_addr, SZ_1G)
+ + (unsigned long)SZ_1G * 32;
+ kbuf.top_down = false;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out_err;
+ initrd_load_addr = kbuf.mem;
+
+ pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ initrd_load_addr, initrd_len, initrd_len);
+ }
+
+ /* load dtb */
+ ret = create_dtb(image, initrd_load_addr, initrd_len, cmdline, &dtb);
+ if (ret) {
+ pr_err("Preparing for new dtb failed\n");
+ goto out_err;
+ }
+
+ dtb_len = fdt_totalsize(dtb);
+ kbuf.buffer = dtb;
+ kbuf.bufsz = dtb_len;
+ kbuf.mem = 0;
+ kbuf.memsz = dtb_len;
+ /* not across 2MB boundary */
+ kbuf.buf_align = SZ_2M;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out_err;
+ image->arch.dtb = dtb;
+ image->arch.dtb_mem = kbuf.mem;
+
+ pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kbuf.mem, dtb_len, dtb_len);
+
+ return 0;
+
+out_err:
+ vfree(dtb);
+ return ret;
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index f407e422a..95fd94209 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -32,6 +32,7 @@
ENTRY(arm64_relocate_new_kernel)
/* Setup the list loop variables. */
+ mov x18, x2 /* x18 = dtb address */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
raw_dcache_line_size x15, x0 /* x15 = dcache line size */
@@ -107,7 +108,7 @@ ENTRY(arm64_relocate_new_kernel)
isb
/* Start new image. */
- mov x0, xzr
+ mov x0, x18
mov x1, xzr
mov x2, xzr
mov x3, xzr
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index c77e95e9b..0d20c7ad4 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -24,7 +24,6 @@
#include <linux/slab.h>
#include <linux/kexec.h>
-#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <asm/ima.h>
@@ -46,59 +45,6 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
return kexec_image_probe_default(image, buf, buf_len);
}
-/**
- * arch_kexec_walk_mem - call func(data) for each unreserved memory block
- * @kbuf: Context info for the search. Also passed to @func.
- * @func: Function to call for each memory block.
- *
- * This function is used by kexec_add_buffer and kexec_locate_mem_hole
- * to find unreserved memory to load kexec segments into.
- *
- * Return: The memory walk will stop when func returns a non-zero value
- * and that value will be returned. If all free regions are visited without
- * func returning non-zero, then zero will be returned.
- */
-int arch_kexec_walk_mem(struct kexec_buf *kbuf,
- int (*func)(struct resource *, void *))
-{
- int ret = 0;
- u64 i;
- phys_addr_t mstart, mend;
- struct resource res = { };
-
- if (kbuf->top_down) {
- for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
- &mstart, &mend, NULL) {
- /*
- * In memblock, end points to the first byte after the
- * range while in kexec, end points to the last byte
- * in the range.
- */
- res.start = mstart;
- res.end = mend - 1;
- ret = func(&res, kbuf);
- if (ret)
- break;
- }
- } else {
- for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend,
- NULL) {
- /*
- * In memblock, end points to the first byte after the
- * range while in kexec, end points to the last byte
- * in the range.
- */
- res.start = mstart;
- res.end = mend - 1;
- ret = func(&res, kbuf);
- if (ret)
- break;
- }
- }
-
- return ret;
-}
-
/**
* setup_purgatory - initialize the purgatory's global variables
* @image: kexec image.
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index f413f57f8..32023b4f9 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -134,16 +134,6 @@ int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
return ret;
}
-/*
- * The kernel is loaded to a fixed location. Turn off kexec_locate_mem_hole
- * and provide kbuf->mem by hand.
- */
-int arch_kexec_walk_mem(struct kexec_buf *kbuf,
- int (*func)(struct resource *, void *))
-{
- return 1;
-}
-
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
Elf_Shdr *section,
const Elf_Shdr *relsec,
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 70caa60b4..8977de5ad 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -535,9 +535,17 @@ static int bzImage64_cleanup(void *loader_data)
#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
{
- return verify_pefile_signature(kernel, kernel_len,
- VERIFY_USE_SECONDARY_KEYRING,
- VERIFYING_KEXEC_PE_SIGNATURE);
+ int ret;
+
+ ret = verify_pefile_signature(kernel, kernel_len,
+ VERIFY_USE_SECONDARY_KEYRING,
+ VERIFYING_KEXEC_PE_SIGNATURE);
+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) {
+ ret = verify_pefile_signature(kernel, kernel_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_KEXEC_PE_SIGNATURE);
+ }
+ return ret;
}
#endif
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 817287175..c05c29ae4 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -24,6 +24,9 @@ static struct key *builtin_trusted_keys;
#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
static struct key *secondary_trusted_keys;
#endif
+#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
+static struct key *platform_trusted_keys;
+#endif
extern __initconst const u8 system_certificate_list[];
extern __initconst const unsigned long system_certificate_list_size;
@@ -237,11 +240,22 @@ int verify_pkcs7_signature(const void *data, size_t len,
#else
trusted_keys = builtin_trusted_keys;
#endif
+ } else if (trusted_keys == VERIFY_USE_PLATFORM_KEYRING) {
+#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
+ trusted_keys = platform_trusted_keys;
+#else
+ trusted_keys = NULL;
+#endif
+ if (!trusted_keys) {
+ ret = -ENOKEY;
+ pr_devel("PKCS#7 platform keyring is not available\n");
+ goto error;
+ }
}
ret = pkcs7_validate_trust(pkcs7, trusted_keys);
if (ret < 0) {
if (ret == -ENOKEY)
- pr_err("PKCS#7 signature not signed with a trusted key\n");
+ pr_devel("PKCS#7 signature not signed with a trusted key\n");
goto error;
}
@@ -266,3 +280,10 @@ int verify_pkcs7_signature(const void *data, size_t len,
EXPORT_SYMBOL_GPL(verify_pkcs7_signature);
#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
+
+#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
+void __init set_platform_trusted_keys(struct key *keyring)
+{
+ platform_trusted_keys = keyring;
+}
+#endif
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 359c2f936..42a93eda3 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -61,5 +61,13 @@ static inline struct key *get_ima_blacklist_keyring(void)
}
#endif /* CONFIG_IMA_BLACKLIST_KEYRING */
+#if defined(CONFIG_INTEGRITY_PLATFORM_KEYRING) && \
+ defined(CONFIG_SYSTEM_TRUSTED_KEYRING)
+extern void __init set_platform_trusted_keys(struct key *keyring);
+#else
+static inline void set_platform_trusted_keys(struct key *keyring)
+{
+}
+#endif
#endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 34c255c2a..b8a6df5df 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -676,6 +676,10 @@ void efi_native_runtime_setup(void);
#define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
#define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
+#define EFI_CERT_SHA256_GUID EFI_GUID(0xc1c41626, 0x504c, 0x4092, 0xac, 0xa9, 0x41, 0xf9, 0x36, 0x93, 0x43, 0x28)
+#define EFI_CERT_X509_GUID EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72)
+#define EFI_CERT_X509_SHA256_GUID EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed)
+
/*
* This GUID is used to pass to the kernel proper the struct screen_info
* structure that was populated by the stub based on the GOP protocol instance
@@ -947,6 +951,27 @@ typedef struct {
efi_memory_desc_t entry[0];
} efi_memory_attributes_table_t;
+typedef struct {
+ efi_guid_t signature_owner;
+ u8 signature_data[];
+} efi_signature_data_t;
+
+typedef struct {
+ efi_guid_t signature_type;
+ u32 signature_list_size;
+ u32 signature_header_size;
+ u32 signature_size;
+ u8 signature_header[];
+ /* efi_signature_data_t signatures[][] */
+} efi_signature_list_t;
+
+typedef u8 efi_sha256_hash_t[32];
+
+typedef struct {
+ efi_sha256_hash_t to_be_signed_hash;
+ efi_time_t time_of_revocation;
+} efi_cert_x509_sha256_t;
+
/*
* All runtime access to EFI goes through this structure:
*/
@@ -1129,6 +1154,15 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm,
char * __init efi_md_typeattr_format(char *buf, size_t size,
const efi_memory_desc_t *md);
+
+typedef void (*efi_element_handler_t)(const char *source,
+ const void *element_data,
+ size_t element_size);
+extern int __init parse_efi_signature_list(
+ const char *source,
+ const void *data, size_t size,
+ efi_element_handler_t (*get_handler_for_guid)(const efi_guid_t *));
+
/**
* efi_range_is_wc - check the WC bit on an address range
* @start: starting kvirt address
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 98cf0fb09..542a15c91 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -147,6 +147,19 @@ extern const struct kexec_file_ops * const kexec_file_loaders[];
int kexec_image_probe_default(struct kimage *image, void *buf,
unsigned long buf_len);
+int kexec_image_post_load_cleanup_default(struct kimage *image);
+
+int wrap_kexec_file_load(int kernel_fd, int initrd_fd,
+ unsigned long cmdline_len, const char __user *cmdline_ptr,
+ unsigned long flags);
+
+/*
+ * If kexec_buf.mem is set to this value, kexec_locate_mem_hole()
+ * will try to allocate free memory. Arch may overwrite it.
+ */
+#ifndef KEXEC_BUF_MEM_UNKNOWN
+#define KEXEC_BUF_MEM_UNKNOWN 0
+#endif
/**
* struct kexec_buf - parameters for finding a place for a buffer in memory
@@ -187,8 +200,6 @@ int __weak arch_kexec_apply_relocations(struct purgatory_info *pi,
const Elf_Shdr *relsec,
const Elf_Shdr *symtab);
-int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
- int (*func)(struct resource *, void *));
extern int kexec_add_buffer(struct kexec_buf *kbuf);
int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/include/linux/pe.h b/include/linux/pe.h
index 143ce75be..3482b18a4 100644
--- a/include/linux/pe.h
+++ b/include/linux/pe.h
@@ -166,7 +166,7 @@ struct mz_hdr {
uint16_t oem_info; /* oem specific */
uint16_t reserved1[10]; /* reserved */
uint32_t peaddr; /* address of pe header */
- char message[64]; /* message to print */
+ char message[]; /* message to print */
};
struct mz_reloc {
diff --git a/include/linux/verification.h b/include/linux/verification.h
index cfa4730d6..018fb5f13 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -17,6 +17,7 @@
* should be used.
*/
#define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL)
+#define VERIFY_USE_PLATFORM_KEYRING ((struct key *)2UL)
/*
* The use to which an asymmetric key is being put.
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index ca3cebebd..9385ecba2 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -25,6 +25,7 @@
#define KEXEC_FILE_UNLOAD 0x00000001
#define KEXEC_FILE_ON_CRASH 0x00000002
#define KEXEC_FILE_NO_INITRAMFS 0x00000004
+#define KEXEC_FILE_LOAD_WRAP 0x100000000
/* These values match the ELF architecture values.
* Unless there is a good reason that should continue to be the case.
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 47dfad722..d44a55202 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -238,6 +238,16 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
{
int result;
+ if (flags & KEXEC_FILE_LOAD_WRAP) {
+ int kernel_fd, initrd_fd;
+
+ kernel_fd = entry >> 32;
+ initrd_fd = entry & 0x00000000ffffffff;
+ flags &= ~KEXEC_FILE_LOAD_WRAP;
+ return wrap_kexec_file_load(kernel_fd, initrd_fd, nr_segments,
+ (char __user *)segments, flags);
+ }
+
result = kexec_load_check(nr_segments, flags);
if (result)
return result;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 04f99368b..2a25db8dd 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -16,6 +16,7 @@
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/kexec.h>
+#include <linux/memblock.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/fs.h>
@@ -78,7 +79,7 @@ void * __weak arch_kexec_kernel_image_load(struct kimage *image)
return kexec_image_load_default(image);
}
-static int kexec_image_post_load_cleanup_default(struct kimage *image)
+int kexec_image_post_load_cleanup_default(struct kimage *image)
{
if (!image->fops || !image->fops->cleanup)
return 0;
@@ -407,6 +408,96 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
return ret;
}
+int wrap_kexec_file_load(int kernel_fd, int initrd_fd,
+ unsigned long cmdline_len, const char __user *cmdline_ptr,
+ unsigned long flags)
+{
+ int ret = 0, i;
+ struct kimage **dest_image, *image;
+
+ /* We only trust the superuser with rebooting the system. */
+ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+ return -EPERM;
+
+ /* Make sure we have a legal set of flags */
+ if (flags != (flags & KEXEC_FILE_FLAGS))
+ return -EINVAL;
+
+ image = NULL;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ dest_image = &kexec_image;
+ if (flags & KEXEC_FILE_ON_CRASH) {
+ dest_image = &kexec_crash_image;
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
+ }
+
+ if (flags & KEXEC_FILE_UNLOAD)
+ goto exchange;
+
+ /*
+ * In case of crash, new kernel gets loaded in reserved region. It is
+ * same memory where old crash kernel might be loaded. Free any
+ * current crash dump kernel before we corrupt it.
+ */
+ if (flags & KEXEC_FILE_ON_CRASH)
+ kimage_free(xchg(&kexec_crash_image, NULL));
+
+ ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+ cmdline_len, flags);
+ if (ret)
+ goto out;
+
+ ret = machine_kexec_prepare(image);
+ if (ret)
+ goto out;
+
+ /*
+ * Some architecture(like S390) may touch the crash memory before
+ * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+ */
+ ret = kimage_crash_copy_vmcoreinfo(image);
+ if (ret)
+ goto out;
+
+ ret = kexec_calculate_store_digests(image);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+ i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+ ksegment->memsz);
+
+ ret = kimage_load_segment(image, &image->segment[i]);
+ if (ret)
+ goto out;
+ }
+
+ kimage_terminate(image);
+
+ /*
+ * Free up any temporary buffers allocated which are not needed
+ * after image has been loaded
+ */
+ kimage_file_post_load_cleanup(image);
+exchange:
+ image = xchg(dest_image, image);
+out:
+ if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
+ arch_kexec_protect_crashkres();
+
+ mutex_unlock(&kexec_mutex);
+ kimage_free(image);
+ return ret;
+}
+
static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
struct kexec_buf *kbuf)
{
@@ -501,8 +592,60 @@ static int locate_mem_hole_callback(struct resource *res, void *arg)
return locate_mem_hole_bottom_up(start, end, kbuf);
}
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+static int kexec_walk_memblock(struct kexec_buf *kbuf,
+ int (*func)(struct resource *, void *))
+{
+ return 0;
+}
+#else
+static int kexec_walk_memblock(struct kexec_buf *kbuf,
+ int (*func)(struct resource *, void *))
+{
+ int ret = 0;
+ u64 i;
+ phys_addr_t mstart, mend;
+ struct resource res = { };
+
+ if (kbuf->image->type == KEXEC_TYPE_CRASH)
+ return func(&crashk_res, kbuf);
+
+ if (kbuf->top_down) {
+ for_each_free_mem_range_reverse(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+ &mstart, &mend, NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ res.start = mstart;
+ res.end = mend - 1;
+ ret = func(&res, kbuf);
+ if (ret)
+ break;
+ }
+ } else {
+ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+ &mstart, &mend, NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ res.start = mstart;
+ res.end = mend - 1;
+ ret = func(&res, kbuf);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+#endif
+
/**
- * arch_kexec_walk_mem - call func(data) on free memory regions
+ * kexec_walk_resources - call func(data) on free memory regions
* @kbuf: Context info for the search. Also passed to @func.
* @func: Function to call for each memory region.
*
@@ -510,8 +653,8 @@ static int locate_mem_hole_callback(struct resource *res, void *arg)
* and that value will be returned. If all free regions are visited without
* func returning non-zero, then zero will be returned.
*/
-int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
- int (*func)(struct resource *, void *))
+static int kexec_walk_resources(struct kexec_buf *kbuf,
+ int (*func)(struct resource *, void *))
{
if (kbuf->image->type == KEXEC_TYPE_CRASH)
return walk_iomem_res_desc(crashk_res.desc,
@@ -534,7 +677,14 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
{
int ret;
- ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
+ /* Arch knows where to place */
+ if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_ARCH_DISCARD_MEMBLOCK))
+ ret = kexec_walk_resources(kbuf, locate_mem_hole_callback);
+ else
+ ret = kexec_walk_memblock(kbuf, locate_mem_hole_callback);
return ret == 1 ? 0 : -EADDRNOTAVAIL;
}
diff --git a/lib/Makefile b/lib/Makefile
index 01f8051b4..455797430 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -213,7 +213,7 @@ KASAN_SANITIZE_stackdepot.o := n
KCOV_INSTRUMENT_stackdepot.o := n
libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
- fdt_empty_tree.o
+ fdt_empty_tree.o fdt_addresses.o
$(foreach file, $(libfdt_files), \
$(eval CFLAGS_$(file) = -I$(src)/../scripts/dtc/libfdt))
lib-$(CONFIG_LIBFDT) += $(libfdt_files)
diff --git a/lib/fdt_addresses.c b/lib/fdt_addresses.c
new file mode 100644
index 000000000..23610bcf3
--- /dev/null
+++ b/lib/fdt_addresses.c
@@ -0,0 +1,2 @@
+#include <linux/libfdt_env.h>
+#include "../scripts/dtc/libfdt/fdt_addresses.c"
diff --git a/scripts/dtc/libfdt/fdt_addresses.c b/scripts/dtc/libfdt/fdt_addresses.c
index eff4dbcc7..8211d00ba 100644
--- a/scripts/dtc/libfdt/fdt_addresses.c
+++ b/scripts/dtc/libfdt/fdt_addresses.c
@@ -94,3 +94,50 @@ int fdt_size_cells(const void *fdt, int nodeoffset)
return val;
}
+
+/* This function assumes that [address|size]_cells is 1 or 2 */
+int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset,
+ const char *name, uint64_t addr, uint64_t size)
+{
+ int addr_cells, size_cells, ret;
+ uint8_t data[sizeof(fdt64_t) * 2], *prop;
+
+ ret = fdt_address_cells(fdt, parent);
+ if (ret < 0)
+ return ret;
+ addr_cells = ret;
+
+ ret = fdt_size_cells(fdt, parent);
+ if (ret < 0)
+ return ret;
+ size_cells = ret;
+
+ /* check validity of address */
+ prop = data;
+ if (addr_cells == 1) {
+ if ((addr > UINT32_MAX) || ((UINT32_MAX + 1 - addr) < size))
+ return -FDT_ERR_BADVALUE;
+
+ fdt32_st(prop, (uint32_t)addr);
+ } else if (addr_cells == 2) {
+ fdt64_st(prop, addr);
+ } else {
+ return -FDT_ERR_BADNCELLS;
+ }
+
+ /* check validity of size */
+ prop += addr_cells * sizeof(fdt32_t);
+ if (size_cells == 1) {
+ if (size > UINT32_MAX)
+ return -FDT_ERR_BADVALUE;
+
+ fdt32_st(prop, (uint32_t)size);
+ } else if (size_cells == 2) {
+ fdt64_st(prop, size);
+ } else {
+ return -FDT_ERR_BADNCELLS;
+ }
+
+ return fdt_appendprop(fdt, nodeoffset, name, data,
+ (addr_cells + size_cells) * sizeof(fdt32_t));
+}
diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h
index 1e27780e1..da72b54c4 100644
--- a/scripts/dtc/libfdt/libfdt.h
+++ b/scripts/dtc/libfdt/libfdt.h
@@ -153,6 +153,30 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
+static inline void fdt32_st(void *property, uint32_t value)
+{
+ uint8_t *bp = property;
+
+ bp[0] = value >> 24;
+ bp[1] = (value >> 16) & 0xff;
+ bp[2] = (value >> 8) & 0xff;
+ bp[3] = value & 0xff;
+}
+
+static inline void fdt64_st(void *property, uint64_t value)
+{
+ uint8_t *bp = property;
+
+ bp[0] = value >> 56;
+ bp[1] = (value >> 48) & 0xff;
+ bp[2] = (value >> 40) & 0xff;
+ bp[3] = (value >> 32) & 0xff;
+ bp[4] = (value >> 24) & 0xff;
+ bp[5] = (value >> 16) & 0xff;
+ bp[6] = (value >> 8) & 0xff;
+ bp[7] = value & 0xff;
+}
+
/**********************************************************************/
/* Traversal functions */
/**********************************************************************/
@@ -1765,6 +1789,43 @@ static inline int fdt_appendprop_cell(void *fdt, int nodeoffset,
#define fdt_appendprop_string(fdt, nodeoffset, name, str) \
fdt_appendprop((fdt), (nodeoffset), (name), (str), strlen(str)+1)
+/**
+ * fdt_appendprop_addrrange - append a address range property
+ * @fdt: pointer to the device tree blob
+ * @parent: offset of the parent node
+ * @nodeoffset: offset of the node to add a property at
+ * @name: name of property
+ * @addr: start address of a given range
+ * @size: size of a given range
+ *
+ * fdt_appendprop_addrrange() appends an address range value (start
+ * address and size) to the value of the named property in the given
+ * node, or creates a new property with that value if it does not
+ * already exist.
+ * If "name" is not specified, a default "reg" is used.
+ * Cell sizes are determined by parent's #address-cells and #size-cells.
+ *
+ * This function may insert data into the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
+ * #address-cells property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADVALUE, addr or size doesn't fit to respective cells size
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain a new property
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset,
+ const char *name, uint64_t addr, uint64_t size);
+
/**
* fdt_delprop - delete a property
* @fdt: pointer to the device tree blob
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index da9565891..4b4d2aeef 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -51,6 +51,17 @@ config INTEGRITY_TRUSTED_KEYRING
.evm keyrings be signed by a key on the system trusted
keyring.
+config INTEGRITY_PLATFORM_KEYRING
+ bool "Provide keyring for platform/firmware trusted keys"
+ depends on INTEGRITY_ASYMMETRIC_KEYS
+ depends on SYSTEM_BLACKLIST_KEYRING
+ depends on EFI
+ help
+ Provide a separate, distinct keyring for platform trusted keys, which
+ the kernel automatically populates during initialization from values
+ provided by the platform for verifying the kexec'ed kerned image
+ and, possibly, the initramfs signature.
+
config INTEGRITY_AUDIT
bool "Enables integrity auditing support "
depends on AUDIT
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 04d6e462b..86df9aba8 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -9,6 +9,11 @@ integrity-y := iint.o
integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o
integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
+integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o \
+ platform_certs/efi_parser.o \
+ platform_certs/load_uefi.o
+obj-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/load_uefi.o
+$(obj)/load_uefi.o: KBUILD_CFLAGS += -fshort-wchar
subdir-$(CONFIG_IMA) += ima
obj-$(CONFIG_IMA) += ima/
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 9bb0a7f28..c585ca8db 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -35,6 +35,7 @@ static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
".ima",
#endif
"_module",
+ ".platform",
};
#ifdef CONFIG_INTEGRITY_TRUSTED_KEYRING
@@ -79,12 +80,42 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
return -EOPNOTSUPP;
}
-int __init integrity_init_keyring(const unsigned int id)
+static int __init __integrity_init_keyring(const unsigned int id,
+ key_perm_t perm,
+ struct key_restriction *restriction)
{
const struct cred *cred = current_cred();
- struct key_restriction *restriction;
int err = 0;
+ keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0),
+ KGIDT_INIT(0), cred, perm,
+ KEY_ALLOC_NOT_IN_QUOTA, restriction, NULL);
+ if (IS_ERR(keyring[id])) {
+ err = PTR_ERR(keyring[id]);
+ pr_info("Can't allocate %s keyring (%d)\n",
+ keyring_name[id], err);
+ keyring[id] = NULL;
+ } else {
+ if (id == INTEGRITY_KEYRING_PLATFORM)
+ set_platform_trusted_keys(keyring[id]);
+ }
+
+ return err;
+}
+
+int __init integrity_init_keyring(const unsigned int id)
+{
+ struct key_restriction *restriction;
+ key_perm_t perm;
+
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW
+ | KEY_USR_READ | KEY_USR_SEARCH;
+
+ if (id == INTEGRITY_KEYRING_PLATFORM) {
+ restriction = NULL;
+ goto out;
+ }
+
if (!init_keyring)
return 0;
@@ -93,32 +124,43 @@ int __init integrity_init_keyring(const unsigned int id)
return -ENOMEM;
restriction->check = restrict_link_to_ima;
+ perm |= KEY_USR_WRITE;
- keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0),
- KGIDT_INIT(0), cred,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ |
- KEY_USR_WRITE | KEY_USR_SEARCH),
- KEY_ALLOC_NOT_IN_QUOTA,
- restriction, NULL);
- if (IS_ERR(keyring[id])) {
- err = PTR_ERR(keyring[id]);
- pr_info("Can't allocate %s keyring (%d)\n",
- keyring_name[id], err);
- keyring[id] = NULL;
+out:
+ return __integrity_init_keyring(id, perm, restriction);
+}
+
+int __init integrity_add_key(const unsigned int id, const void *data,
+ off_t size, key_perm_t perm)
+{
+ key_ref_t key;
+ int rc = 0;
+
+ if (!keyring[id])
+ return -EINVAL;
+
+ key = key_create_or_update(make_key_ref(keyring[id], 1), "asymmetric",
+ NULL, data, size, perm,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ if (IS_ERR(key)) {
+ rc = PTR_ERR(key);
+ pr_err("Problem loading X.509 certificate %d\n", rc);
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
}
- return err;
+
+ return rc;
+
}
int __init integrity_load_x509(const unsigned int id, const char *path)
{
- key_ref_t key;
void *data;
loff_t size;
int rc;
-
- if (!keyring[id])
- return -EINVAL;
+ key_perm_t perm;
rc = kernel_read_file_from_path(path, &data, &size, 0,
READING_X509_CERTIFICATE);
@@ -127,23 +169,21 @@ int __init integrity_load_x509(const unsigned int id, const char *path)
return rc;
}
- key = key_create_or_update(make_key_ref(keyring[id], 1),
- "asymmetric",
- NULL,
- data,
- size,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA);
- if (IS_ERR(key)) {
- rc = PTR_ERR(key);
- pr_err("Problem loading X.509 certificate (%d): %s\n",
- rc, path);
- } else {
- pr_notice("Loaded X.509 cert '%s': %s\n",
- key_ref_to_ptr(key)->description, path);
- key_ref_put(key);
- }
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ;
+
+ pr_info("Loading X.509 certificate: %s\n", path);
+ rc = integrity_add_key(id, (const void *)data, size, perm);
+
vfree(data);
- return 0;
+ return rc;
+}
+
+int __init integrity_load_cert(const unsigned int id, const char *source,
+ const void *data, size_t len, key_perm_t perm)
+{
+ if (!data)
+ return -EINVAL;
+
+ pr_info("Loading X.509 certificate: %s\n", source);
+ return integrity_add_key(id, data, len, perm);
}
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 08aabf763..ad285f17d 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -294,12 +294,22 @@ int ima_appraise_measurement(enum ima_hooks func,
case EVM_IMA_XATTR_DIGSIG:
set_bit(IMA_DIGSIG, &iint->atomic_flags);
rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
- (const char *)xattr_value, rc,
+ (const char *)xattr_value,
+ xattr_len,
iint->ima_hash->digest,
iint->ima_hash->length);
if (rc == -EOPNOTSUPP) {
status = INTEGRITY_UNKNOWN;
- } else if (rc) {
+ break;
+ }
+ if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
+ func == KEXEC_KERNEL_CHECK)
+ rc = integrity_digsig_verify(INTEGRITY_KEYRING_PLATFORM,
+ (const char *)xattr_value,
+ xattr_len,
+ iint->ima_hash->digest,
+ iint->ima_hash->length);
+ if (rc) {
cause = "invalid-signature";
status = INTEGRITY_FAIL;
} else {
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 0ec551223..0750bab0d 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -142,7 +142,8 @@ int integrity_kernel_read(struct file *file, loff_t offset,
#define INTEGRITY_KEYRING_EVM 0
#define INTEGRITY_KEYRING_IMA 1
#define INTEGRITY_KEYRING_MODULE 2
-#define INTEGRITY_KEYRING_MAX 3
+#define INTEGRITY_KEYRING_PLATFORM 3
+#define INTEGRITY_KEYRING_MAX 4
extern struct dentry *integrity_dir;
@@ -153,6 +154,8 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
int __init integrity_init_keyring(const unsigned int id);
int __init integrity_load_x509(const unsigned int id, const char *path);
+int __init integrity_load_cert(const unsigned int id, const char *source,
+ const void *data, size_t len, key_perm_t perm);
#else
static inline int integrity_digsig_verify(const unsigned int id,
@@ -166,6 +169,14 @@ static inline int integrity_init_keyring(const unsigned int id)
{
return 0;
}
+
+static inline int __init integrity_load_cert(const unsigned int id,
+ const char *source,
+ const void *data, size_t len,
+ key_perm_t perm)
+{
+ return 0;
+}
#endif /* CONFIG_INTEGRITY_SIGNATURE */
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
@@ -222,3 +233,13 @@ integrity_audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type)
}
#endif
+
+#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
+void __init add_to_platform_keyring(const char *source, const void *data,
+ size_t len);
+#else
+static inline void __init add_to_platform_keyring(const char *source,
+ const void *data, size_t len)
+{
+}
+#endif
diff --git a/security/integrity/platform_certs/efi_parser.c b/security/integrity/platform_certs/efi_parser.c
new file mode 100644
index 000000000..18f01f36f
--- /dev/null
+++ b/security/integrity/platform_certs/efi_parser.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* EFI signature/key/certificate list parser
+ *
+ * Copyright (C) 2012, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells(a)redhat.com)
+ */
+
+#define pr_fmt(fmt) "EFI: "fmt
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/err.h>
+#include <linux/efi.h>
+
+/**
+ * parse_efi_signature_list - Parse an EFI signature list for certificates
+ * @source: The source of the key
+ * @data: The data blob to parse
+ * @size: The size of the data blob
+ * @get_handler_for_guid: Get the handler func for the sig type (or NULL)
+ *
+ * Parse an EFI signature list looking for elements of interest. A list is
+ * made up of a series of sublists, where all the elements in a sublist are of
+ * the same type, but sublists can be of different types.
+ *
+ * For each sublist encountered, the @get_handler_for_guid function is called
+ * with the type specifier GUID and returns either a pointer to a function to
+ * handle elements of that type or NULL if the type is not of interest.
+ *
+ * If the sublist is of interest, each element is passed to the handler
+ * function in turn.
+ *
+ * Error EBADMSG is returned if the list doesn't parse correctly and 0 is
+ * returned if the list was parsed correctly. No error can be returned from
+ * the @get_handler_for_guid function or the element handler function it
+ * returns.
+ */
+int __init parse_efi_signature_list(
+ const char *source,
+ const void *data, size_t size,
+ efi_element_handler_t (*get_handler_for_guid)(const efi_guid_t *))
+{
+ efi_element_handler_t handler;
+ unsigned int offs = 0;
+
+ pr_devel("-->%s(,%zu)\n", __func__, size);
+
+ while (size > 0) {
+ const efi_signature_data_t *elem;
+ efi_signature_list_t list;
+ size_t lsize, esize, hsize, elsize;
+
+ if (size < sizeof(list))
+ return -EBADMSG;
+
+ memcpy(&list, data, sizeof(list));
+ pr_devel("LIST[%04x] guid=%pUl ls=%x hs=%x ss=%x\n",
+ offs,
+ list.signature_type.b, list.signature_list_size,
+ list.signature_header_size, list.signature_size);
+
+ lsize = list.signature_list_size;
+ hsize = list.signature_header_size;
+ esize = list.signature_size;
+ elsize = lsize - sizeof(list) - hsize;
+
+ if (lsize > size) {
+ pr_devel("<--%s() = -EBADMSG [overrun @%x]\n",
+ __func__, offs);
+ return -EBADMSG;
+ }
+
+ if (lsize < sizeof(list) ||
+ lsize - sizeof(list) < hsize ||
+ esize < sizeof(*elem) ||
+ elsize < esize ||
+ elsize % esize != 0) {
+ pr_devel("- bad size combo @%x\n", offs);
+ return -EBADMSG;
+ }
+
+ handler = get_handler_for_guid(&list.signature_type);
+ if (!handler) {
+ data += lsize;
+ size -= lsize;
+ offs += lsize;
+ continue;
+ }
+
+ data += sizeof(list) + hsize;
+ size -= sizeof(list) + hsize;
+ offs += sizeof(list) + hsize;
+
+ for (; elsize > 0; elsize -= esize) {
+ elem = data;
+
+ pr_devel("ELEM[%04x]\n", offs);
+ handler(source,
+ &elem->signature_data,
+ esize - sizeof(*elem));
+
+ data += esize;
+ size -= esize;
+ offs += esize;
+ }
+ }
+
+ return 0;
+}
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
new file mode 100644
index 000000000..81b19c528
--- /dev/null
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "../integrity.h"
+
+static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
+static efi_guid_t efi_cert_x509_sha256_guid __initdata =
+ EFI_CERT_X509_SHA256_GUID;
+static efi_guid_t efi_cert_sha256_guid __initdata = EFI_CERT_SHA256_GUID;
+
+/*
+ * Look to see if a UEFI variable called MokIgnoreDB exists and return true if
+ * it does.
+ *
+ * This UEFI variable is set by the shim if a user tells the shim to not use
+ * the certs/hashes in the UEFI db variable for verification purposes. If it
+ * is set, we should ignore the db variable also and the true return indicates
+ * this.
+ */
+static __init bool uefi_check_ignore_db(void)
+{
+ efi_status_t status;
+ unsigned int db = 0;
+ unsigned long size = sizeof(db);
+ efi_guid_t guid = EFI_SHIM_LOCK_GUID;
+
+ status = efi.get_variable(L"MokIgnoreDB", &guid, NULL, &size, &db);
+ return status == EFI_SUCCESS;
+}
+
+/*
+ * Get a certificate list blob from the named EFI variable.
+ */
+static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid,
+ unsigned long *size)
+{
+ efi_status_t status;
+ unsigned long lsize = 4;
+ unsigned long tmpdb[4];
+ void *db;
+
+ status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb);
+ if (status != EFI_BUFFER_TOO_SMALL) {
+ pr_err("Couldn't get size: 0x%lx\n", status);
+ return NULL;
+ }
+
+ db = kmalloc(lsize, GFP_KERNEL);
+ if (!db)
+ return NULL;
+
+ status = efi.get_variable(name, guid, NULL, &lsize, db);
+ if (status != EFI_SUCCESS) {
+ kfree(db);
+ pr_err("Error reading db var: 0x%lx\n", status);
+ return NULL;
+ }
+
+ *size = lsize;
+ return db;
+}
+
+/*
+ * Blacklist a hash.
+ */
+static __init void uefi_blacklist_hash(const char *source, const void *data,
+ size_t len, const char *type,
+ size_t type_len)
+{
+ char *hash, *p;
+
+ hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL);
+ if (!hash)
+ return;
+ p = memcpy(hash, type, type_len);
+ p += type_len;
+ bin2hex(p, data, len);
+ p += len * 2;
+ *p = 0;
+
+ mark_hash_blacklisted(hash);
+ kfree(hash);
+}
+
+/*
+ * Blacklist an X509 TBS hash.
+ */
+static __init void uefi_blacklist_x509_tbs(const char *source,
+ const void *data, size_t len)
+{
+ uefi_blacklist_hash(source, data, len, "tbs:", 4);
+}
+
+/*
+ * Blacklist the hash of an executable.
+ */
+static __init void uefi_blacklist_binary(const char *source,
+ const void *data, size_t len)
+{
+ uefi_blacklist_hash(source, data, len, "bin:", 4);
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
+ * the UEFI db and MokListRT tables.
+ */
+static __init efi_element_handler_t get_handler_for_db(const efi_guid_t *
+ sig_type)
+{
+ if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
+ return add_to_platform_keyring;
+ return 0;
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
+ * the UEFI dbx and MokListXRT tables.
+ */
+static __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *
+ sig_type)
+{
+ if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0)
+ return uefi_blacklist_x509_tbs;
+ if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0)
+ return uefi_blacklist_binary;
+ return 0;
+}
+
+/*
+ * Load the certs contained in the UEFI databases into the platform trusted
+ * keyring and the UEFI blacklisted X.509 cert SHA256 hashes into the blacklist
+ * keyring.
+ */
+static int __init load_uefi_certs(void)
+{
+ efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID;
+ efi_guid_t mok_var = EFI_SHIM_LOCK_GUID;
+ void *db = NULL, *dbx = NULL, *mok = NULL;
+ unsigned long dbsize = 0, dbxsize = 0, moksize = 0;
+ int rc = 0;
+
+ if (!efi.get_variable)
+ return false;
+
+ /* Get db, MokListRT, and dbx. They might not exist, so it isn't
+ * an error if we can't get them.
+ */
+ if (!uefi_check_ignore_db()) {
+ db = get_cert_list(L"db", &secure_var, &dbsize);
+ if (!db) {
+ pr_err("MODSIGN: Couldn't get UEFI db list\n");
+ } else {
+ rc = parse_efi_signature_list("UEFI:db",
+ db, dbsize, get_handler_for_db);
+ if (rc)
+ pr_err("Couldn't parse db signatures: %d\n",
+ rc);
+ kfree(db);
+ }
+ }
+
+ mok = get_cert_list(L"MokListRT", &mok_var, &moksize);
+ if (!mok) {
+ pr_info("Couldn't get UEFI MokListRT\n");
+ } else {
+ rc = parse_efi_signature_list("UEFI:MokListRT",
+ mok, moksize, get_handler_for_db);
+ if (rc)
+ pr_err("Couldn't parse MokListRT signatures: %d\n", rc);
+ kfree(mok);
+ }
+
+ dbx = get_cert_list(L"dbx", &secure_var, &dbxsize);
+ if (!dbx) {
+ pr_info("Couldn't get UEFI dbx list\n");
+ } else {
+ rc = parse_efi_signature_list("UEFI:dbx",
+ dbx, dbxsize,
+ get_handler_for_dbx);
+ if (rc)
+ pr_err("Couldn't parse dbx signatures: %d\n", rc);
+ kfree(dbx);
+ }
+
+ return rc;
+}
+late_initcall(load_uefi_certs);
diff --git a/security/integrity/platform_certs/platform_keyring.c b/security/integrity/platform_certs/platform_keyring.c
new file mode 100644
index 000000000..bcafd7387
--- /dev/null
+++ b/security/integrity/platform_certs/platform_keyring.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Platform keyring for firmware/platform keys
+ *
+ * Copyright IBM Corporation, 2018
+ * Author(s): Nayna Jain <nayna(a)linux.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include "../integrity.h"
+
+/**
+ * add_to_platform_keyring - Add to platform keyring without validation.
+ * @source: Source of key
+ * @data: The blob holding the key
+ * @len: The length of the data blob
+ *
+ * Add a key to the platform keyring without checking its trust chain. This
+ * is available only during kernel initialisation.
+ */
+void __init add_to_platform_keyring(const char *source, const void *data,
+ size_t len)
+{
+ key_perm_t perm;
+ int rc;
+
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW;
+
+ rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source, data, len,
+ perm);
+ if (rc)
+ pr_info("Error adding keys to platform keyring %s\n", source);
+}
+
+/*
+ * Create the trusted keyrings.
+ */
+static __init int platform_keyring_init(void)
+{
+ int rc;
+
+ rc = integrity_init_keyring(INTEGRITY_KEYRING_PLATFORM);
+ if (rc)
+ return rc;
+
+ pr_notice("Platform Keyring initialized\n");
+ return 0;
+}
+
+/*
+ * Must be initialised before we try and load the keys into the keyring.
+ */
+device_initcall(platform_keyring_init);
--
2.27.0
2
35

[PATCH kernel-4.19] scsi: hisi_sas: set sense data when the sas disk's I/O abnormally completed
by Yang Yingliang 23 Sep '21
by Yang Yingliang 23 Sep '21
23 Sep '21
From: yangxingui <yangxingui(a)huawei.com>
driver inclusion
category: bugfix
bugzilla: NA
CVE: NA
---------------------------
The sense data of the sas disk is used by the kernel and upper layers to
perform some policies on disks when the I/O is abnormally completed.
Such as the logs as follow, if the driver transmit sense data to the upper
layer, the disk may be repaired by remap policy of disk management system.
[Wed Sep 15 13:03:04 2021] hisi_sas_v3_hw 0000:74:02.0: erroneous
completion iptt=3342 task= pK-error dev id=0
sas_addr=0x5541310520e0b000 CQ hdr: 0x1503 0xd0e 0x0 0x20000
Error info: 0x1200 0x0 0x0 0x40
[Wed Sep 15 13:03:04 2021] hisi_sas_v3_hw 0000:74:02.0: data underflow,
rsp_code:0x72, sensekey:0x3, ASC:0x11, ASCQ:0x0.
Signed-off-by: yangxingui <yangxingui(a)huawei.com>
Reviewed-by: ouyangdelong < ouyangdelong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/scsi/hisi_sas/hisi_sas.h | 2 ++
drivers/scsi/hisi_sas/hisi_sas_main.c | 17 +++++++++++++++++
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 +
3 files changed, 20 insertions(+)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index eae0e533ce7fc..193fc960d87fd 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -571,6 +571,8 @@ extern void hisi_sas_free(struct hisi_hba *hisi_hba);
extern u8 hisi_sas_get_ata_protocol(struct host_to_dev_fis *fis,
int direction);
extern struct hisi_sas_port *to_hisi_sas_port(struct asd_sas_port *sas_port);
+extern void hisi_sas_set_sense_data(struct sas_task *task,
+ struct hisi_sas_slot *slot);
extern void hisi_sas_sata_done(struct sas_task *task,
struct hisi_sas_slot *slot);
extern int hisi_sas_get_fw_info(struct hisi_hba *hisi_hba);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index f944371ac3133..51555cfc40578 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -105,6 +105,23 @@ u8 hisi_sas_get_ata_protocol(struct host_to_dev_fis *fis, int direction)
}
EXPORT_SYMBOL_GPL(hisi_sas_get_ata_protocol);
+void hisi_sas_set_sense_data(struct sas_task *task,
+ struct hisi_sas_slot *slot)
+{
+ struct ssp_response_iu *iu =
+ hisi_sas_status_buf_addr_mem(slot) +
+ sizeof(struct hisi_sas_err_record);
+ if (iu->datapres == 2) {
+ struct task_status_struct *ts = &task->task_status;
+
+ ts->buf_valid_size =
+ min_t(int, SAS_STATUS_BUF_SIZE,
+ be32_to_cpu(iu->sense_data_len));
+ memcpy(ts->buf, iu->sense_data, ts->buf_valid_size);
+ }
+}
+EXPORT_SYMBOL_GPL(hisi_sas_set_sense_data);
+
void hisi_sas_sata_done(struct sas_task *task,
struct hisi_sas_slot *slot)
{
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index a9f53dbb91679..06b4f2db62f7b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2234,6 +2234,7 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
ts->stat = SAS_OPEN_REJECT;
ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
}
+ hisi_sas_set_sense_data(task, slot);
break;
case SAS_PROTOCOL_SATA:
case SAS_PROTOCOL_STP:
--
2.25.1
1
0

23 Sep '21
From: Yang Yang <yang.yang(a)vivo.com>
mainline inclusion
from mainline-5.12-rc1
commit ffa772cfe9356ce94d3061335c2681f60e7c1c5b
category: bugfix
bugzilla: 182133
CVE: NA
-------------------------------------------------
Hang occurs when user changes the scheduler queue depth, by writing to
the 'nr_requests' sysfs file of that device.
The details of the environment that we found the problem are as follows:
an eMMC block device
total driver tags: 16
default queue_depth: 32
kqd->async_depth initialized in kyber_init_sched() with queue_depth=32
Then we change queue_depth to 256, by writing to the 'nr_requests' sysfs
file. But kqd->async_depth don't be updated after queue_depth changes.
Now the value of async depth is too small for queue_depth=256, this may
cause hang.
This patch introduces kyber_depth_updated(), so that kyber can update
async depth when queue depth changes.
Signed-off-by: Yang Yang <yang.yang(a)vivo.com>
Reviewed-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Conflict: block/kyber-iosched.c
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Hou Tao <houtao1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/kyber-iosched.c | 29 +++++++++++++----------------
1 file changed, 13 insertions(+), 16 deletions(-)
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index a1660bafc9124..9eeb60d97f0ca 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -288,15 +288,6 @@ static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
blk_stat_activate_msecs(kqd->cb, 100);
}
-static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
-{
- /*
- * All of the hardware queues have the same depth, so we can just grab
- * the shift of the first one.
- */
- return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
-}
-
static int kyber_bucket_fn(const struct request *rq)
{
return kyber_sched_domain(rq->cmd_flags);
@@ -306,7 +297,6 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
{
struct kyber_queue_data *kqd;
unsigned int max_tokens;
- unsigned int shift;
int ret = -ENOMEM;
int i;
@@ -341,9 +331,6 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
}
- shift = kyber_sched_tags_shift(kqd);
- kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
-
kqd->read_lat_nsec = 2000000ULL;
kqd->write_lat_nsec = 10000000ULL;
@@ -403,9 +390,19 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
INIT_LIST_HEAD(&kcq->rq_list[i]);
}
-static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
{
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
+ struct blk_mq_tags *tags = hctx->sched_tags;
+ unsigned int shift = tags->bitmap_tags.sb.shift;
+
+ kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
+
+ sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
+}
+
+static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
struct kyber_hctx_data *khd;
int i;
@@ -446,8 +443,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
khd->batching = 0;
hctx->sched_data = khd;
- sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags,
- kqd->async_depth);
+ kyber_depth_updated(hctx);
return 0;
@@ -966,6 +962,7 @@ static struct elevator_type kyber_sched = {
.completed_request = kyber_completed_request,
.dispatch_request = kyber_dispatch_request,
.has_work = kyber_has_work,
+ .depth_updated = kyber_depth_updated,
},
.uses_mq = true,
#ifdef CONFIG_BLK_DEBUG_FS
--
2.25.1
1
1