Kernel
Threads by month
- ----- 2025 -----
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- 55 participants
- 16918 discussions

[PATCH openEuler-1.0-LTS 1/3] blk-mq: factor out some helps to quiesce/unquiesce queue
by Yang Yingliang 09 Dec '21
by Yang Yingliang 09 Dec '21
09 Dec '21
From: Yu Kuai <yukuai3(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 173974
CVE: NA
---------------------------
Prepare to support concurrent quiesce queue between drivers and block
layer, no functional changes.
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Hou Tao <houtao1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/blk-mq.c | 58 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 40 insertions(+), 18 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ef62a83314a5d..f9b4b73a2f38d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -211,32 +211,29 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+static void __blk_mq_quiesce_queue_nowait(struct request_queue *q,
+ unsigned int flag)
+{
+ blk_queue_flag_set(flag, q);
+}
+
/*
* FIXME: replace the scsi_internal_device_*block_nowait() calls in the
* mpt3sas driver such that this function can be removed.
*/
void blk_mq_quiesce_queue_nowait(struct request_queue *q)
{
- blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+ __blk_mq_quiesce_queue_nowait(q, QUEUE_FLAG_QUIESCED);
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
-/**
- * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
- * @q: request queue.
- *
- * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Once this function is returned, we make
- * sure no dispatch can happen until the queue is unquiesced via
- * blk_mq_unquiesce_queue().
- */
-void blk_mq_quiesce_queue(struct request_queue *q)
+static void __blk_mq_quiesce_queue(struct request_queue *q, unsigned int flag)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
bool rcu = false;
- blk_mq_quiesce_queue_nowait(q);
+ __blk_mq_quiesce_queue_nowait(q, flag);
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
@@ -247,15 +244,30 @@ void blk_mq_quiesce_queue(struct request_queue *q)
if (rcu)
synchronize_rcu();
}
+
+/**
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
+ * @q: request queue.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+ __blk_mq_quiesce_queue(q, QUEUE_FLAG_QUIESCED);
+}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
-bool blk_mq_quiesce_queue_without_rcu(struct request_queue *q)
+static bool __blk_mq_quiesce_queue_without_rcu(struct request_queue *q,
+ unsigned int flag)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
bool rcu = false;
- blk_mq_quiesce_queue_nowait(q);
+ __blk_mq_quiesce_queue_nowait(q, flag);
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
@@ -265,8 +277,21 @@ bool blk_mq_quiesce_queue_without_rcu(struct request_queue *q)
}
return rcu;
}
+
+bool blk_mq_quiesce_queue_without_rcu(struct request_queue *q)
+{
+ return __blk_mq_quiesce_queue_without_rcu(q, QUEUE_FLAG_QUIESCED);
+}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_without_rcu);
+static void __blk_mq_unquiesce_queue(struct request_queue *q, unsigned int flag)
+{
+ blk_queue_flag_clear(flag, q);
+
+ /* dispatch requests which are inserted during quiescing */
+ blk_mq_run_hw_queues(q, true);
+}
+
/*
* blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
* @q: request queue.
@@ -276,10 +301,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_without_rcu);
*/
void blk_mq_unquiesce_queue(struct request_queue *q)
{
- blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
-
- /* dispatch requests which are inserted during quiescing */
- blk_mq_run_hw_queues(q, true);
+ __blk_mq_unquiesce_queue(q, QUEUE_FLAG_QUIESCED);
}
EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
--
2.25.1
1
2

09 Dec '21
The address printed by %p in the kernel will expose the kernel address information, which is extremely unsafe.
So Linux v4.15 limited the information printed by %p which will print a hashed value.
This patchset add no_hash_pointers startup parameter which can disable the restriction that %P only prints hashed values, so that %P can print the actual address in the kernel.
I patched this function and the test modules associated with this and passed these tests after recompiling.
Tobin C. Harding (3):
lib/test_printf: Add empty module_exit function
kselftest: Add test module framework header
lib: Use new kselftest header
Timur Tabi(3):
kselftest: add support for skipped tests
lib: use KSTM_MODULE_GLOBALS macro in kselftest drivers
lib/vsprintf: no_hash_pointers prints all addresses as unhashed
.../admin-guide/kernel-parameters.txt | 15 +++
Documentation/dev-tools/kselftest.rst | 94 +++++++++++++++++-
lib/test_bitmap.c | 23 +----
lib/test_printf.c | 29 +++---
lib/vsprintf.c | 36 ++++++-
tools/testing/selftests/kselftest_module.h | 54 ++++++++++
6 files changed, 215 insertions(+), 36 deletions(-)
create mode 100644 tools/testing/selftests/kselftest_module.h
--
2.30.0
1
6

09 Dec '21
The address printed by %p in the kernel will expose the kernel address information, which is extremely unsafe.
So Linux v4.15 limited the information printed by %p which will print a hashed value.
This patchset add no_hash_pointers startup parameter which can disable the restriction that %P only prints hashed values, so that %P can print the actual address in the kernel.
I patched this function and the test modules associated with this and passed these tests after recompiling.
Tobin C. Harding (3):
lib/test_printf: Add empty module_exit function
kselftest: Add test module framework header
lib: Use new kselftest header
Timur Tabi(3):
kselftest: add support for skipped tests
lib: use KSTM_MODULE_GLOBALS macro in kselftest drivers
lib/vsprintf: no_hash_pointers prints all addresses as unhashed
.../admin-guide/kernel-parameters.txt | 15 +++
Documentation/dev-tools/kselftest.rst | 94 +++++++++++++++++-
lib/test_bitmap.c | 23 +----
lib/test_printf.c | 29 +++---
lib/vsprintf.c | 36 ++++++-
tools/testing/selftests/kselftest_module.h | 54 ++++++++++
6 files changed, 215 insertions(+), 36 deletions(-)
create mode 100644 tools/testing/selftests/kselftest_module.h
--
2.30.0
1
6
The current kernel needs to provide the reboot mode in the boot parameter.
However, we can not know this in advance. The kernel should have a method
to set the reboot options when the system is booted.
This patchset add handles in the <sysfs>/kernel/reboot to read reboot
configuration and rewrite it. Therefore users can change the reboot mode as
they want.
Matteo Croce (1):
reboot: allow to specify reboot mode via sysfs
Nathan Chancellor (1):
reboot: Fix variable assignments in type_store
Documentation/ABI/testing/sysfs-kernel-reboot | 32 +++
kernel/reboot.c | 206 ++++++++++++++++++
2 files changed, 238 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-kernel-reboot
--
2.17.1
1
0
The current kernel needs to provide the reboot mode in the boot parameter.
However, we can not know this in advance. The kernel should have a method
to set the reboot options when the system is booted.
This patchset add handles in the <sysfs>/kernel/reboot to read reboot
configuration and rewrite it. Therefore users can change the reboot mode as
they want.
Matteo Croce (1):
reboot: allow to specify reboot mode via sysfs
Nathan Chancellor (1):
reboot: Fix variable assignments in type_store
Documentation/ABI/testing/sysfs-kernel-reboot | 32 +++
kernel/reboot.c | 206 ++++++++++++++++++
2 files changed, 238 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-kernel-reboot
--
2.17.1
1
0

[PATCH openEuler-1.0-LTS 1/2] bfq: Remove merged request already in bfq_requests_merged()
by Yang Yingliang 09 Dec '21
by Yang Yingliang 09 Dec '21
09 Dec '21
From: Jan Kara <jack(a)suse.cz>
mainline inclusion
from mainline-v5.14-rc1
commit a921c655f2033dd1ce1379128efe881dda23ea37
category: bugfix
bugzilla: 185777, 185811
CVE: NA
Currently, bfq does very little in bfq_requests_merged() and handles all
the request cleanup in bfq_finish_requeue_request() called from
blk_mq_free_request(). That is currently safe only because
blk_mq_free_request() is called shortly after bfq_requests_merged()
while bfqd->lock is still held. However to fix a lock inversion between
bfqd->lock and ioc->lock, we need to call blk_mq_free_request() after
dropping bfqd->lock. That would mean that already merged request could
be seen by other processes inside bfq queues and possibly dispatched to
the device which is wrong. So move cleanup of the request from
bfq_finish_requeue_request() to bfq_requests_merged().
Acked-by: Paolo Valente <paolo.valente(a)linaro.org>
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20210623093634.27879-2-jack@suse.cz
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
conflict: in bfq_finish_requeue_request, 4.19 not have
bfq_update_inject_limit branch;
Signed-off-by: zhangwensheng <zhangwensheng5(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/bfq-iosched.c | 41 +++++++++++++----------------------------
1 file changed, 13 insertions(+), 28 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7d77de9a0f5c0..5452d892480ba 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1937,7 +1937,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
*next_bfqq = bfq_init_rq(next);
if (!bfqq)
- return;
+ goto remove;
/*
* If next and rq belong to the same bfq_queue and next is older
@@ -1960,6 +1960,14 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
bfqq->next_rq = rq;
bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+remove:
+ /* Merged request may be in the IO scheduler. Remove it. */
+ if (!RB_EMPTY_NODE(&next->rb_node)) {
+ bfq_remove_request(next->q, next);
+ if (next_bfqq)
+ bfqg_stats_update_io_remove(bfqq_group(next_bfqq),
+ next->cmd_flags);
+ }
}
/* Must be called with bfqq != NULL */
@@ -4876,6 +4884,7 @@ static void bfq_finish_requeue_request(struct request *rq)
{
struct bfq_queue *bfqq = RQ_BFQQ(rq);
struct bfq_data *bfqd;
+ unsigned long flags;
/*
* rq either is not associated with any icq, or is an already
@@ -4893,36 +4902,12 @@ static void bfq_finish_requeue_request(struct request *rq)
rq->io_start_time_ns,
rq->cmd_flags);
+ spin_lock_irqsave(&bfqd->lock, flags);
if (likely(rq->rq_flags & RQF_STARTED)) {
- unsigned long flags;
-
- spin_lock_irqsave(&bfqd->lock, flags);
-
bfq_completed_request(bfqq, bfqd);
- bfq_finish_requeue_request_body(bfqq);
-
- spin_unlock_irqrestore(&bfqd->lock, flags);
- } else {
- /*
- * Request rq may be still/already in the scheduler,
- * in which case we need to remove it (this should
- * never happen in case of requeue). And we cannot
- * defer such a check and removal, to avoid
- * inconsistencies in the time interval from the end
- * of this function to the start of the deferred work.
- * This situation seems to occur only in process
- * context, as a consequence of a merge. In the
- * current version of the code, this implies that the
- * lock is held.
- */
-
- if (!RB_EMPTY_NODE(&rq->rb_node)) {
- bfq_remove_request(rq->q, rq);
- bfqg_stats_update_io_remove(bfqq_group(bfqq),
- rq->cmd_flags);
- }
- bfq_finish_requeue_request_body(bfqq);
}
+ bfq_finish_requeue_request_body(bfqq);
+ spin_unlock_irqrestore(&bfqd->lock, flags);
/*
* Reset private fields. In case of a requeue, this allows
--
2.25.1
1
1

[PATCH openEuler-1.0-LTS] md: fix a warning caused by a race between concurrent md_ioctl()s
by Yang Yingliang 09 Dec '21
by Yang Yingliang 09 Dec '21
09 Dec '21
From: "Dae R. Jeong" <dae.r.jeong(a)kaist.ac.kr>
mainline inclusion
from mainline-v5.11-rc1
commit c731b84b51bf7fe83448bea8f56a6d55006b0615
category: bugfix
bugzilla: 185833
CVE: NA
-----------------------------------------------
Syzkaller reports a warning as belows.
WARNING: CPU: 0 PID: 9647 at drivers/md/md.c:7169
...
Call Trace:
...
RIP: 0010:md_ioctl+0x4017/0x5980 drivers/md/md.c:7169
RSP: 0018:ffff888096027950 EFLAGS: 00010293
RAX: ffff88809322c380 RBX: 0000000000000932 RCX: ffffffff84e266f2
RDX: 0000000000000000 RSI: ffffffff84e299f7 RDI: 0000000000000007
RBP: ffff888096027bc0 R08: ffff88809322c380 R09: ffffed101341a482
R10: ffff888096027940 R11: ffff88809a0d240f R12: 0000000000000932
R13: ffff8880a2c14100 R14: ffff88809a0d2268 R15: ffff88809a0d2408
__blkdev_driver_ioctl block/ioctl.c:304 [inline]
blkdev_ioctl+0xece/0x1c10 block/ioctl.c:606
block_ioctl+0xee/0x130 fs/block_dev.c:1930
vfs_ioctl fs/ioctl.c:46 [inline]
file_ioctl fs/ioctl.c:509 [inline]
do_vfs_ioctl+0xd5f/0x1380 fs/ioctl.c:696
ksys_ioctl+0xab/0xd0 fs/ioctl.c:713
__do_sys_ioctl fs/ioctl.c:720 [inline]
__se_sys_ioctl fs/ioctl.c:718 [inline]
__x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718
do_syscall_64+0xfd/0x680 arch/x86/entry/common.c:301
entry_SYSCALL_64_after_hwframe+0x49/0xbe
This is caused by a race between two concurrenct md_ioctl()s closing
the array.
CPU1 (md_ioctl()) CPU2 (md_ioctl())
------ ------
set_bit(MD_CLOSING, &mddev->flags);
did_set_md_closing = true;
WARN_ON_ONCE(test_bit(MD_CLOSING,
&mddev->flags));
if(did_set_md_closing)
clear_bit(MD_CLOSING, &mddev->flags);
Fix the warning by returning immediately if the MD_CLOSING bit is set
in &mddev->flags which indicates that the array is being closed.
Fixes: 065e519e71b2 ("md: MD_CLOSING needs to be cleared after called md_set_readonly or do_md_stop")
Reported-by: syzbot+1e46a0864c1a6e9bd3d8(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Dae R. Jeong <dae.r.jeong(a)kaist.ac.kr>
Signed-off-by: Song Liu <songliubraving(a)fb.com>
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/md/md.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8a2656cf7127d..409ec5ffd28d3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7306,8 +7306,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
err = -EBUSY;
goto out;
}
- WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
- set_bit(MD_CLOSING, &mddev->flags);
+ if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
+ mutex_unlock(&mddev->open_mutex);
+ err = -EBUSY;
+ goto out;
+ }
did_set_md_closing = true;
mutex_unlock(&mddev->open_mutex);
sync_blockdev(bdev);
--
2.25.1
1
0

09 Dec '21
From: Yonglong Liu <liuyonglong(a)huawei.com>
driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4LD5U
CVE: NA
----------------------------
When multiple users access debugfs at the same time, the process
of alloc and release memory becomes disordered, causing the
kernel crash like this:
[763845.759089] PC is at kfree+0x19c/0x1a0
[763845.759100] LR is at kvfree+0x3c/0x58
[763845.759103] pc : [<ffff00000828878c>] lr : [<ffff00000823432c>] pstate: 60400009
[763845.759105] sp : ffff00003744fc90
[763845.759108] x29: ffff00003744fc90 x28: ffff8027dc87b800
[763845.759115] x27: ffff0000088a1000 x26: ffff000002970f48
[763845.759121] x25: ffff802502600000 x24: 00000000000000af
[763845.759127] x23: 0000000000010000 x22: 0000000013dc0000
[763845.759133] x21: ffff00000823432c x20: ffff802502600000
[763845.759139] x19: ffff802502600000 x18: 0000ffffdaa06b10
[763845.759145] x17: 00000000004201c8 x16: ffff0000082b2b10
[763845.759151] x15: 000000000003013f x14: 0000ffffa462ffe0
[763845.759157] x13: ffffffffffffffff x12: 0433526ae61f3300
[763845.759163] x11: ffff000009694b30 x10: 0000000000000001
[763845.759169] x9 : 000000000007b224 x8 : ffff000009719edc
[763845.759175] x7 : ffff7fe009409800 x6 : 00000045757af8cf
[763845.759181] x5 : ffff8027fced69f0 x4 : 0000000000000000
[763845.759187] x3 : 0000000000000000 x2 : 0433526ae61f3300
[763845.759192] x1 : 0000000000000000 x0 : dead000000000100
[763845.759200] Process cat (pid: 57988, stack limit = 0xffff000037440000)
[763845.759203] Call trace:
[763845.759207] Exception stack(0xffff00003744fb50 to 0xffff00003744fc90)
[763845.759211] fb40: dead000000000100 0000000000000000
[768745.759215] fb60: 0433526ae61f3300 0000000000000000 0000000000000000 ffff8027fced69f0
[763845.759219] fb80: 00000045757af8cf ffff7fe009409800 ffff000009719edc 000000000007b224
[763845.759222] fba0: 0000000000000001 ffff000009694b30 0433526ae61f3300 ffffffffffffffff
[763845.759226] fbc0: 0000ffffa462ffe0 000000000003013f ffff0000082b2b10 00000000004201c8
[763845.759231] fbe0: 0000ffffdaa06b10 ffff802502600000 ffff802502600000 ffff00000823432c
[763845.759235] fc00: 0000000013dc0000 0000000000010000 00000000000000af ffff802502600000
[763845.759238] fc20: ffff000002970f48 ffff0000088a1000 ffff8027dc87b800 ffff00003744fc90
[763845.759243] fc40: ffff00000823432c ffff00003744fc90 ffff00000828878c 0000000060400009
[763845.759247] fc60: ffff00003744feb0 0000000013dc0000 0000ffffffffffff 0000000000000023
[763845.759250] fc80: ffff00003744fc90 ffff00000828878c
[763845.759259] [<ffff00000828878c>] kfree+0x19c/0x1a0
[763845.759263] [<ffff00000823432c>] kvfree+0x3c/0x58
[763845.759306] [<ffff00000295ab94>] hns3_dbg_read+0x94/0x240 [hns3]
[763845.759318] [<ffff000008359550>] full_proxy_read+0x60/0x90
[763845.759324] [<ffff0000082b22a4>] __vfs_read+0x58/0x178
[763845.759327] [<ffff0000082b2454>] vfs_read+0x90/0x14c
[763845.759332] [<ffff0000082b2b70>] SyS_read+0x60/0xc0
This patch adds a mutex lock to fix the race condition, and need
to call hns3_dbg_read_cmd() function when buffer is NULL to
avoid reading empty data.
Fixes: c91910efc03a ("net: hns3: refactor the debugfs process")
Signed-off-by: Yonglong Liu <liuyonglong(a)huawei.com>
Reviewed-by: li yongxin <liyongxin1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 +
.../ethernet/hisilicon/hns3/hns3_debugfs.c | 19 +++++++++++++------
2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index c9ac1e7cf4492..048de5b367c19 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -764,6 +764,7 @@ struct hnae3_handle {
u8 netdev_flags;
struct dentry *hnae3_dbgfs;
+ struct mutex dbgfs_lock;
/* Network interface message level enabled bits */
u32 msg_enable;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 7f3b7084e382f..c68e5f3d0ba52 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -807,6 +807,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
if (ret)
return ret;
+ mutex_lock(&handle->dbgfs_lock);
save_buf = &hns3_dbg_cmd[index].buf;
if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
@@ -819,15 +820,15 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
read_buf = *save_buf;
} else {
read_buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL);
- if (!read_buf)
- return -ENOMEM;
+ if (!read_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
/* save the buffer addr until the last read operation */
*save_buf = read_buf;
- }
- /* get data ready for the first time to read */
- if (!*ppos) {
+ /* get data ready for the first time to read */
ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd,
read_buf, hns3_dbg_cmd[index].buf_len);
if (ret)
@@ -836,8 +837,10 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
size = simple_read_from_buffer(buffer, count, ppos, read_buf,
strlen(read_buf));
- if (size > 0)
+ if (size > 0) {
+ mutex_unlock(&handle->dbgfs_lock);
return size;
+ }
out:
/* free the buffer for the last read operation */
@@ -846,6 +849,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
*save_buf = NULL;
}
+ mutex_unlock(&handle->dbgfs_lock);
return ret;
}
@@ -916,6 +920,7 @@ int hns3_dbg_init(struct hnae3_handle *handle)
debugfs_create_dir(hns3_dbg_dentry[i].name,
handle->hnae3_dbgfs);
+ mutex_init(&handle->dbgfs_lock);
for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) {
if (!hns3_dbg_cmd[i].init) {
dev_err(&handle->pdev->dev,
@@ -936,6 +941,7 @@ int hns3_dbg_init(struct hnae3_handle *handle)
return 0;
out:
+ mutex_destroy(&handle->dbgfs_lock);
debugfs_remove_recursive(handle->hnae3_dbgfs);
handle->hnae3_dbgfs = NULL;
return ret;
@@ -951,6 +957,7 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
hns3_dbg_cmd[i].buf = NULL;
}
+ mutex_destroy(&handle->dbgfs_lock);
debugfs_remove_recursive(handle->hnae3_dbgfs);
handle->hnae3_dbgfs = NULL;
}
--
2.25.1
1
1

09 Dec '21
hulk inclusion
category: bugfix
bugzilla: NA
CVE: NA
---------------------------
If config DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC is enabled,
don't need fix the kabi broken.
It's introduced by 93c5c1d15abcd ("af_unix: fix races in sk_peer_pid and sk_peer_cred accesses").
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
include/net/sock.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index b90b92882b3b8..803464e66e02c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -472,7 +472,9 @@ struct sock {
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
+#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
spinlock_t sk_peer_lock;
+#endif
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
@@ -513,7 +515,18 @@ struct sock {
struct sock_reuseport __rcu *sk_reuseport_cb;
struct rcu_head sk_rcu;
+#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_DEBUG_LOCK_ALLOC)
+#ifndef __GENKSYMS__
+ union {
+ spinlock_t sk_peer_lock;
+ unsigned long kabi_reserve1;
+ };
+#else
KABI_RESERVE(1)
+#endif
+#else
+ KABI_RESERVE(1)
+#endif
KABI_RESERVE(2)
KABI_RESERVE(3)
KABI_RESERVE(4)
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 01/31] cifs: fix incorrect check for null pointer in header_assemble
by Yang Yingliang 08 Dec '21
by Yang Yingliang 08 Dec '21
08 Dec '21
From: Steve French <stfrench(a)microsoft.com>
stable inclusion
from linux-4.19.209
commit 43d2e0fbc67f8bcfb069130f4028a04887ae76b6
--------------------------------
commit 9ed38fd4a15417cac83967360cf20b853bfab9b6 upstream.
Although very unlikely that the tlink pointer would be null in this case,
get_next_mid function can in theory return null (but not an error)
so need to check for null (not for IS_ERR, which can not be returned
here).
Address warning:
fs/smbfs_client/connect.c:2392 cifs_match_super()
warn: 'tlink' isn't an ERR_PTR
Pointed out by Dan Carpenter via smatch code analysis tool
CC: stable(a)vger.kernel.org
Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Acked-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Acked-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
fs/cifs/connect.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 907be252c5d47..36104dd8eb4dd 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3373,9 +3373,10 @@ cifs_match_super(struct super_block *sb, void *data)
spin_lock(&cifs_tcp_ses_lock);
cifs_sb = CIFS_SB(sb);
tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
- if (IS_ERR(tlink)) {
+ if (tlink == NULL) {
+ /* can not match superblock if tlink were ever null */
spin_unlock(&cifs_tcp_ses_lock);
- return rc;
+ return 0;
}
tcon = tlink_tcon(tlink);
ses = tcon->ses;
--
2.25.1
1
30

[PATCH OLK-5.10 107/107] fs/ntfs3: Add ntfs3 module in openeuler_defconfig
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
kylin inclusion
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
-------------------------------------------------
This adds ntfs3 module in openeuler_defconfig
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
arch/arm64/configs/openeuler_defconfig | 4 ++++
arch/x86/configs/openeuler_defconfig | 4 ++++
2 files changed, 8 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 76d6a118330d..b1e8524eb5e6 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -6163,6 +6163,10 @@ CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
CONFIG_NTFS_FS=m
# CONFIG_NTFS_DEBUG is not set
# CONFIG_NTFS_RW is not set
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_FS_POSIX_ACL=y
# end of DOS/FAT/EXFAT/NT Filesystems
#
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index b25d908dc7a1..83e143d139fa 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -7515,6 +7515,10 @@ CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
CONFIG_NTFS_FS=m
# CONFIG_NTFS_DEBUG is not set
# CONFIG_NTFS_RW is not set
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_FS_POSIX_ACL=y
# end of DOS/FAT/EXFAT/NT Filesystems
#
--
2.30.0
1
0

08 Dec '21
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
mainline inclusion
from mainline-v5.15
commit 808bc0a82bcd2cbe32a139613325b1a3e03f35f1
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
There is already a 'u8 mask' defined at the top of the function.
There is no need to define a new one here.
Remove the useless and shadowing new 'mask' variable.
Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/bitfunc.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/ntfs3/bitfunc.c b/fs/ntfs3/bitfunc.c
index bf10e2da5c6e..50d838093790 100644
--- a/fs/ntfs3/bitfunc.c
+++ b/fs/ntfs3/bitfunc.c
@@ -119,8 +119,7 @@ bool are_bits_set(const ulong *lmap, size_t bit, size_t nbits)
pos = nbits & 7;
if (pos) {
- u8 mask = fill_mask[pos];
-
+ mask = fill_mask[pos];
if ((*map & mask) != mask)
return false;
}
--
2.30.0
1
0

[PATCH OLK-5.10 080/107] fs/ntfs3: Remove a useless test in 'indx_find()'
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
mainline inclusion
from mainline-v5.15
commit d2846bf33c1423ff872c7a7c2afde292ad502c04
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
'fnd' has been dereferenced several time before, so testing it here is
pointless.
Moreover, all callers of 'indx_find()' already have some error handling
code that makes sure that no NULL 'fnd' is passed.
So, remove the useless test.
Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/index.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 4f71a91f07d9..6f81e3a49abf 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1072,9 +1072,7 @@ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni,
if (!e)
return -EINVAL;
- if (fnd)
- fnd->root_de = e;
-
+ fnd->root_de = e;
err = 0;
for (;;) {
--
2.30.0
1
0

08 Dec '21
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15
commit 56eaeb10e2619081cc383febf6740a4c3e806777
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
xfstest generic/041 works with 3003 hardlinks.
Because of this we raise hardlinks limit to 4000.
There are no drawbacks or regressions.
Theoretically we can raise all the way up to ffff,
but there is no practical use for this.
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/ntfs.h | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 303a162c3158..9cc396b117bf 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -26,9 +26,11 @@
#define NTFS_NAME_LEN 255
-/* ntfs.sys used 500 maximum links on-disk struct allows up to 0xffff. */
-#define NTFS_LINK_MAX 0x400
-//#define NTFS_LINK_MAX 0xffff
+/*
+ * ntfs.sys used 500 maximum links on-disk struct allows up to 0xffff.
+ * xfstest generic/041 creates 3003 hardlinks.
+ */
+#define NTFS_LINK_MAX 4000
/*
* Activate to use 64 bit clusters instead of 32 bits in ntfs.sys.
--
2.30.0
1
0

08 Dec '21
From: Colin Ian King <colin.king(a)canonical.com>
mainline inclusion
from mainline-v5.15
commit 880301bb313295a65523e79bc5666f5cf49eb3ed
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Currently a failed allocation on sbi->upcase will cause an exit via
the label free_sbi causing a memory leak on object opts. Fix this by
re-ordering the exit paths free_opts and free_sbi so that kfree's occur
in the reverse allocation order.
Addresses-Coverity: ("Resource leak")
Fixes: 27fac77707a1 ("fs/ntfs3: Init spi more in init_fs_context than fill_super")
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index cefb9ddaf4db..6a535b144ff9 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -1393,10 +1393,10 @@ static int ntfs_init_fs_context(struct fs_context *fc)
fc->ops = &ntfs_context_ops;
return 0;
-free_opts:
- kfree(opts);
free_sbi:
kfree(sbi);
+free_opts:
+ kfree(opts);
return -ENOMEM;
}
--
2.30.0
1
0

[PATCH OLK-5.10 048/107] fs/ntfs3: Add missing header and guards to lib/ headers
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit b6ba81034b1b74cf426abcece4becda2611504a4
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
size_t needs header. Add missing header guards so that compiler will
only include these ones.
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/lib/decompress_common.h | 5 +++++
fs/ntfs3/lib/lib.h | 6 ++++++
2 files changed, 11 insertions(+)
diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h
index 2d70ae42f1b5..dd7ced000d0e 100644
--- a/fs/ntfs3/lib/decompress_common.h
+++ b/fs/ntfs3/lib/decompress_common.h
@@ -5,6 +5,9 @@
* Copyright (C) 2015 Eric Biggers
*/
+#ifndef _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
+#define _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
+
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/types.h>
@@ -336,3 +339,5 @@ static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend
return dst;
}
+
+#endif /* _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H */
diff --git a/fs/ntfs3/lib/lib.h b/fs/ntfs3/lib/lib.h
index f508fbad2e71..90309a5ae59c 100644
--- a/fs/ntfs3/lib/lib.h
+++ b/fs/ntfs3/lib/lib.h
@@ -7,6 +7,10 @@
* - linux kernel code style
*/
+#ifndef _LINUX_NTFS3_LIB_LIB_H
+#define _LINUX_NTFS3_LIB_LIB_H
+
+#include <linux/types.h>
/* globals from xpress_decompress.c */
struct xpress_decompressor *xpress_allocate_decompressor(void);
@@ -24,3 +28,5 @@ int lzx_decompress(struct lzx_decompressor *__restrict d,
const void *__restrict compressed_data,
size_t compressed_size, void *__restrict uncompressed_data,
size_t uncompressed_size);
+
+#endif /* _LINUX_NTFS3_LIB_LIB_H */
--
2.30.0
1
0

[PATCH OLK-5.10 047/107] fs/ntfs3: Add missing headers and forward declarations to ntfs_fs.h
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit f239b3a95dd4f7daba26ea17f339a5b19a7d40a1
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
We do not have headers at all in this file. We should have them so that
not every .c file needs to include all of the stuff which this file need
for building. This way we can remove some headers from other files and
get better picture what is needed. This can save some compilation time.
And this can help if we sometimes want to separate this one big header.
Also use forward declarations for structs and enums when it not included
straight with include and it is used in function declarations input.
This will prevent possible compiler warning:
xxx declared inside parameter list will not be visible
outside of this definition or declaration
Here is list which I made when parsing this. There is not necessarily
all example from this header file, but this just proofs we need it.
<linux/blkdev.h> SECTOR_SHIFT
<linux/buffer_head.h> sb_bread(), put_bh
<linux/cleancache.h> put_page()
<linux/fs.h> struct inode (Just struct ntfs_inode need it)
<linux/highmem.h> kunmap(), kmap()
<linux/kernel.h> cpu_to_leXX() ALIGN
<linux/mm.h> kvfree()
<linux/mutex.h> struct mutex, mutex_(un/try)lock()
<linux/page-flags.h> PageError()
<linux/pagemap.h> read_mapping_page()
<linux/rbtree.h> struct rb_root
<linux/rwsem.h> struct rw_semaphore
<linux/slab.h> krfree(), kzalloc()
<linux/string.h> memset()
<linux/time64.h> struct timespec64
<linux/types.h> uXX, __leXX
<linux/uidgid.h> kuid_t, kgid_t
<asm/div64.h> do_div()
<asm/page.h> PAGE_SIZE
"debug.h" ntfs_err() (Just one entry. Maybe we can drop this)
"ntfs.h" Do you even ask?
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/ntfs_fs.h | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 372cda697dd4..dae6dd4ac619 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -9,6 +9,37 @@
#ifndef _LINUX_NTFS3_NTFS_FS_H
#define _LINUX_NTFS3_NTFS_FS_H
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/cleancache.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+#include <linux/uidgid.h>
+#include <asm/div64.h>
+#include <asm/page.h>
+
+#include "debug.h"
+#include "ntfs.h"
+
+struct dentry;
+struct fiemap_extent_info;
+struct user_namespace;
+struct page;
+struct writeback_control;
+enum utf16_endian;
+
+
#define MINUS_ONE_T ((size_t)(-1))
/* Biggest MFT / smallest cluster */
#define MAXIMUM_BYTES_PER_MFT 4096
--
2.30.0
1
0

08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit 4dfe83320e1e9665b986840b426742ea764e08d7
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
We do not have header files at all in this file. Add following headers
and there is also explanation which for it was added. Note that
explanation might not be complete, but it just proofs it is needed.
<linux/blkdev.h> // SECTOR_SHIFT
<linux/build_bug.h> // static_assert()
<linux/kernel.h> // cpu_to_le64, cpu_to_le32, ALIGN
<linux/stddef.h> // offsetof()
<linux/string.h> // memcmp()
<linux/types.h> //__le32, __le16
"debug.h" // PtrOffset(), Add2Ptr()
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/ntfs.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 6bb3e595263b..695b684bce20 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -10,6 +10,15 @@
#ifndef _LINUX_NTFS3_NTFS_H
#define _LINUX_NTFS3_NTFS_H
+#include <linux/blkdev.h>
+#include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "debug.h"
+
/* TODO: Check 4K MFT record and 512 bytes cluster. */
/* Activate this define to use binary search in indexes. */
--
2.30.0
1
0

[PATCH OLK-5.10 044/107] fs/ntfs3: Remove redundant initialization of variable err
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Colin Ian King <colin.king(a)canonical.com>
mainline inclusion
from mainline-v5.15
commit 0327c6d01a97a3242cf10717819994aa6e095a1d
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
The variable err is being initialized with a value that is never read, it
is being updated later on. The assignment is redundant and can be removed.
Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/index.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 0daca9adc54c..b1175542d854 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1401,7 +1401,7 @@ int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni,
static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
CLST *vbn)
{
- int err = -ENOMEM;
+ int err;
struct ntfs_sb_info *sbi = ni->mi.sbi;
struct ATTRIB *bitmap;
struct ATTRIB *alloc;
--
2.30.0
1
0

[PATCH OLK-5.10 042/107] fs/ntfs3: Rename mount option no_acs_rules > (no)acsrules
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit 28a941ffc1404b66d67228cbe8392bbadb94af0d
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Rename mount option no_acs_rules to (no)acsrules. This allow us to use
possibility to mount with options noaclrules or aclrules.
Acked-by: Christian Brauner <christian.brauner(a)ubuntu.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
Documentation/filesystems/ntfs3.rst | 2 +-
fs/ntfs3/file.c | 2 +-
fs/ntfs3/ntfs_fs.h | 2 +-
fs/ntfs3/super.c | 12 ++++++------
fs/ntfs3/xattr.c | 2 +-
5 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/Documentation/filesystems/ntfs3.rst b/Documentation/filesystems/ntfs3.rst
index ded706474825..7b6afe452197 100644
--- a/Documentation/filesystems/ntfs3.rst
+++ b/Documentation/filesystems/ntfs3.rst
@@ -73,7 +73,7 @@ prealloc Preallocate space for files excessively when file size is
increasing on writes. Decreases fragmentation in case of
parallel write operations to different files.
-no_acs_rules "No access rules" mount option sets access rights for
+noacsrules "No access rules" mount option sets access rights for
files/folders to 777 and owner/group to root. This mount
option absorbs all other permissions:
- permissions change for files/folders will be reported
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index fef57141b161..0743d806c567 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -737,7 +737,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
umode_t mode = inode->i_mode;
int err;
- if (sbi->options->no_acs_rules) {
+ if (sbi->options->noacsrules) {
/* "No access rules" - Force any changes of time etc. */
attr->ia_valid |= ATTR_FORCE;
/* and disable for editing some attributes. */
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index aa18f12b7096..15bab48bc1ad 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -70,7 +70,7 @@ struct ntfs_mount_options {
showmeta : 1, /* Show meta files. */
nohidden : 1, /* Do not show hidden files. */
force : 1, /* Rw mount dirty volume. */
- no_acs_rules : 1, /*Exclude acs rules. */
+ noacsrules : 1, /*Exclude acs rules. */
prealloc : 1 /* Preallocate space when file is growing. */
;
};
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 503e2e23f711..0690e7e4f00d 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -228,7 +228,7 @@ enum Opt {
Opt_acl,
Opt_iocharset,
Opt_prealloc,
- Opt_no_acs_rules,
+ Opt_noacsrules,
Opt_err,
};
@@ -246,7 +246,7 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_flag_no("acl", Opt_acl),
fsparam_flag_no("showmeta", Opt_showmeta),
fsparam_flag_no("prealloc", Opt_prealloc),
- fsparam_flag("no_acs_rules", Opt_no_acs_rules),
+ fsparam_flag_no("acsrules", Opt_noacsrules),
fsparam_string("iocharset", Opt_iocharset),
__fsparam(fs_param_is_string,
@@ -358,8 +358,8 @@ static int ntfs_fs_parse_param(struct fs_context *fc,
case Opt_prealloc:
opts->prealloc = result.negated ? 0 : 1;
break;
- case Opt_no_acs_rules:
- opts->no_acs_rules = 1;
+ case Opt_noacsrules:
+ opts->noacsrules = result.negated ? 1 : 0;
break;
default:
/* Should not be here unless we forget add case. */
@@ -547,8 +547,8 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",nohidden");
if (opts->force)
seq_puts(m, ",force");
- if (opts->no_acs_rules)
- seq_puts(m, ",no_acs_rules");
+ if (opts->noacsrules)
+ seq_puts(m, ",noacsrules");
if (opts->prealloc)
seq_puts(m, ",prealloc");
if (sb->s_flags & SB_POSIXACL)
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index ac4b37bf8832..6f88cb77a17f 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -769,7 +769,7 @@ int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode)
int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
int mask)
{
- if (ntfs_sb(inode->i_sb)->options->no_acs_rules) {
+ if (ntfs_sb(inode->i_sb)->options->noacsrules) {
/* "No access rules" mode - Allow all changes. */
return 0;
}
--
2.30.0
1
0

[PATCH OLK-5.10 041/107] fs/ntfs3: Add iocharset= mount option as alias for nls=
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit e274cde8c7550cac46eb7aba3a77aff44ae0b301
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Other fs drivers are using iocharset= mount option for specifying charset.
So add it also for ntfs3 and mark old nls= mount option as deprecated.
Reviewed-by: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
Documentation/filesystems/ntfs3.rst | 4 ++--
fs/ntfs3/super.c | 18 +++++++++++-------
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/Documentation/filesystems/ntfs3.rst b/Documentation/filesystems/ntfs3.rst
index af7158de6fde..ded706474825 100644
--- a/Documentation/filesystems/ntfs3.rst
+++ b/Documentation/filesystems/ntfs3.rst
@@ -32,12 +32,12 @@ generic ones.
===============================================================================
-nls=name This option informs the driver how to interpret path
+iocharset=name This option informs the driver how to interpret path
strings and translate them to Unicode and back. If
this option is not set, the default codepage will be
used (CONFIG_NLS_DEFAULT).
Examples:
- 'nls=utf8'
+ 'iocharset=utf8'
uid=
gid=
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 729ead6f2fac..503e2e23f711 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -226,7 +226,7 @@ enum Opt {
Opt_nohidden,
Opt_showmeta,
Opt_acl,
- Opt_nls,
+ Opt_iocharset,
Opt_prealloc,
Opt_no_acs_rules,
Opt_err,
@@ -245,9 +245,13 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_flag_no("hidden", Opt_nohidden),
fsparam_flag_no("acl", Opt_acl),
fsparam_flag_no("showmeta", Opt_showmeta),
- fsparam_string("nls", Opt_nls),
fsparam_flag_no("prealloc", Opt_prealloc),
fsparam_flag("no_acs_rules", Opt_no_acs_rules),
+ fsparam_string("iocharset", Opt_iocharset),
+
+ __fsparam(fs_param_is_string,
+ "nls", Opt_iocharset,
+ fs_param_deprecated, NULL),
{}
};
@@ -346,7 +350,7 @@ static int ntfs_fs_parse_param(struct fs_context *fc,
case Opt_showmeta:
opts->showmeta = result.negated ? 0 : 1;
break;
- case Opt_nls:
+ case Opt_iocharset:
kfree(opts->nls_name);
opts->nls_name = param->string;
param->string = NULL;
@@ -380,11 +384,11 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
new_opts->nls = ntfs_load_nls(new_opts->nls_name);
if (IS_ERR(new_opts->nls)) {
new_opts->nls = NULL;
- errorf(fc, "ntfs3: Cannot load nls %s", new_opts->nls_name);
+ errorf(fc, "ntfs3: Cannot load iocharset %s", new_opts->nls_name);
return -EINVAL;
}
if (new_opts->nls != sbi->options->nls)
- return invalf(fc, "ntfs3: Cannot use different nls when remounting!");
+ return invalf(fc, "ntfs3: Cannot use different iocharset when remounting!");
sync_filesystem(sb);
@@ -528,9 +532,9 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root)
if (opts->dmask)
seq_printf(m, ",dmask=%04o", ~opts->fs_dmask_inv);
if (opts->nls)
- seq_printf(m, ",nls=%s", opts->nls->charset);
+ seq_printf(m, ",iocharset=%s", opts->nls->charset);
else
- seq_puts(m, ",nls=utf8");
+ seq_puts(m, ",iocharset=utf8");
if (opts->sys_immutable)
seq_puts(m, ",sys_immutable");
if (opts->discard)
--
2.30.0
1
0

[PATCH OLK-5.10 040/107] fs/ntfs3: Make mount option nohidden more universal
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit 9d1939f4575f3fda70dd94542dbd4d775e104132
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
If we call Opt_nohidden with just keyword hidden, then we can use
hidden/nohidden when mounting. We already use this method for almoust
all other parameters so it is just logical that this will use same
method.
Acked-by: Christian Brauner <christian.brauner(a)ubuntu.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 420cd1409170..729ead6f2fac 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -242,7 +242,7 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_flag_no("discard", Opt_discard),
fsparam_flag_no("force", Opt_force),
fsparam_flag_no("sparse", Opt_sparse),
- fsparam_flag("nohidden", Opt_nohidden),
+ fsparam_flag_no("hidden", Opt_nohidden),
fsparam_flag_no("acl", Opt_acl),
fsparam_flag_no("showmeta", Opt_showmeta),
fsparam_string("nls", Opt_nls),
@@ -331,7 +331,7 @@ static int ntfs_fs_parse_param(struct fs_context *fc,
opts->sparse = result.negated ? 0 : 1;
break;
case Opt_nohidden:
- opts->nohidden = 1;
+ opts->nohidden = result.negated ? 1 : 0;
break;
case Opt_acl:
if (!result.negated)
--
2.30.0
1
0

[PATCH OLK-5.10 035/107] fs/ntfs3: Remove unnecesarry mount option noatime
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit b8a30b4171b9a3c22ef0605ed74a21544d00c680
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Remove unnecesarry mount option noatime because this will be handled
by VFS. Our option parser will never get opt like this.
Acked-by: Christian Brauner <christian.brauner(a)ubuntu.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
Documentation/filesystems/ntfs3.rst | 4 ----
fs/ntfs3/super.c | 7 -------
2 files changed, 11 deletions(-)
diff --git a/Documentation/filesystems/ntfs3.rst b/Documentation/filesystems/ntfs3.rst
index ffe9ea0c1499..af7158de6fde 100644
--- a/Documentation/filesystems/ntfs3.rst
+++ b/Documentation/filesystems/ntfs3.rst
@@ -85,10 +85,6 @@ acl Support POSIX ACLs (Access Control Lists). Effective if
supported by Kernel. Not to be confused with NTFS ACLs.
The option specified as acl enables support for POSIX ACLs.
-noatime All files and directories will not update their last access
- time attribute if a partition is mounted with this parameter.
- This option can speed up file system operation.
-
===============================================================================
ToDo list
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 55bbc9200a10..a18b99a3e3b5 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -223,7 +223,6 @@ enum Opt {
Opt_nohidden,
Opt_showmeta,
Opt_acl,
- Opt_noatime,
Opt_nls,
Opt_prealloc,
Opt_no_acs_rules,
@@ -242,7 +241,6 @@ static const match_table_t ntfs_tokens = {
{ Opt_sparse, "sparse" },
{ Opt_nohidden, "nohidden" },
{ Opt_acl, "acl" },
- { Opt_noatime, "noatime" },
{ Opt_showmeta, "showmeta" },
{ Opt_nls, "nls=%s" },
{ Opt_prealloc, "prealloc" },
@@ -333,9 +331,6 @@ static noinline int ntfs_parse_options(struct super_block *sb, char *options,
ntfs_err(sb, "support for ACL not compiled in!");
return -EINVAL;
#endif
- case Opt_noatime:
- sb->s_flags |= SB_NOATIME;
- break;
case Opt_showmeta:
opts->showmeta = 1;
break;
@@ -587,8 +582,6 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",prealloc");
if (sb->s_flags & SB_POSIXACL)
seq_puts(m, ",acl");
- if (sb->s_flags & SB_NOATIME)
- seq_puts(m, ",noatime");
return 0;
}
--
2.30.0
1
0

[PATCH OLK-5.10 031/107] fs/ntfs3: Fix integer overflow in ni_fiemap with fiemap_prep()
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit d4e8e135a9af7d8d939bba1874ab314322fc2dc2
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Use fiemap_prep() to check valid flags. It also shrink request scope
(@len) to what the fs can actually handle.
This address following Smatch static checker warning:
fs/ntfs3/frecord.c:1894 ni_fiemap()
warn: potential integer overflow from user 'vbo + len'
Because fiemap_prep() shrinks @len this cannot happened anymore.
Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Link: lore.kernel.org/ntfs3/20210825080440.GA17407@kili/
Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation")
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/file.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 89557d60a9b0..f9c9a8c91b46 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -1212,8 +1212,9 @@ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int err;
struct ntfs_inode *ni = ntfs_i(inode);
- if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
- return -EOPNOTSUPP;
+ err = fiemap_prep(inode, fieinfo, start, &len, ~FIEMAP_FLAG_XATTR);
+ if (err)
+ return err;
ni_lock(ni);
--
2.30.0
1
0

[PATCH OLK-5.10 030/107] fs/ntfs3: Restyle comments to better align with kernel-doc
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15
commit d3624466b56dd5b1886c1dff500525b544c19c83
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/attrib.c | 18 +++++++++++-------
fs/ntfs3/bitmap.c | 5 ++---
fs/ntfs3/file.c | 18 +++++++++---------
fs/ntfs3/frecord.c | 27 +++++++++++++--------------
fs/ntfs3/fslog.c | 11 +++++++----
fs/ntfs3/fsntfs.c | 8 ++++----
fs/ntfs3/index.c | 8 +++++---
fs/ntfs3/inode.c | 20 ++++++++++----------
fs/ntfs3/lznt.c | 5 +++--
fs/ntfs3/ntfs.h | 2 +-
fs/ntfs3/ntfs_fs.h | 24 ++++++++++++------------
fs/ntfs3/record.c | 2 +-
fs/ntfs3/super.c | 2 +-
fs/ntfs3/upcase.c | 2 +-
fs/ntfs3/xattr.c | 7 ++++---
15 files changed, 84 insertions(+), 75 deletions(-)
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index ffc323bacc9f..34c4cbf7e29b 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -199,6 +199,7 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
/* Add new fragment into run storage. */
if (!run_add_entry(run, vcn, lcn, flen, opt == ALLOCATE_MFT)) {
+ /* Undo last 'ntfs_look_for_free_space' */
down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
wnd_set_free(wnd, lcn, flen);
up_write(&wnd->rw_lock);
@@ -351,7 +352,6 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
run_close(run);
out1:
kfree(attr_s);
- /* Reinsert le. */
out:
return err;
}
@@ -1153,14 +1153,18 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
u16 ro;
attr = ni_find_attr(ni, NULL, NULL, type, name, name_len, &vcn, NULL);
- if (!attr)
+ if (!attr) {
+ /* Is record corrupted? */
return -ENOENT;
+ }
svcn = le64_to_cpu(attr->nres.svcn);
evcn = le64_to_cpu(attr->nres.evcn);
- if (evcn < vcn || vcn < svcn)
+ if (evcn < vcn || vcn < svcn) {
+ /* Is record corrupted? */
return -EINVAL;
+ }
ro = le16_to_cpu(attr->nres.run_off);
err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, svcn,
@@ -1171,7 +1175,7 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
}
/*
- * attr_wof_load_runs_range - Load runs for given range [from to).
+ * attr_load_runs_range - Load runs for given range [from to).
*/
int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type,
const __le16 *name, u8 name_len, struct runs_tree *run,
@@ -1974,7 +1978,7 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
total_size = le64_to_cpu(attr_b->nres.total_size);
if (vbo >= alloc_size) {
- // NOTE: It is allowed.
+ /* NOTE: It is allowed. */
return 0;
}
@@ -1986,9 +1990,9 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
bytes -= vbo;
if ((vbo & mask) || (bytes & mask)) {
- /* We have to zero a range(s)*/
+ /* We have to zero a range(s). */
if (frame_size == NULL) {
- /* Caller insists range is aligned */
+ /* Caller insists range is aligned. */
return -EINVAL;
}
*frame_size = mask + 1;
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index 06ae38adb8ad..831501555009 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -29,7 +29,6 @@ struct rb_node_key {
size_t key;
};
-/* Tree is sorted by start (key). */
struct e_node {
struct rb_node_key start; /* Tree sorted by start. */
struct rb_node_key count; /* Tree sorted by len. */
@@ -1117,7 +1116,7 @@ size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
sb = wnd->sb;
log2_bits = sb->s_blocksize_bits + 3;
- /* At most two ranges [hint, max_alloc) + [0, hint) */
+ /* At most two ranges [hint, max_alloc) + [0, hint). */
Again:
/* TODO: Optimize request for case nbits > wbits. */
@@ -1241,7 +1240,7 @@ size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
continue;
}
- /* Read window */
+ /* Read window. */
bh = wnd_map(wnd, iw);
if (IS_ERR(bh)) {
// TODO: Error.
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 62ebfa324bff..89557d60a9b0 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -190,7 +190,8 @@ static int ntfs_extend_initialized_size(struct file *file,
/*
* ntfs_zero_range - Helper function for punch_hole.
- * It zeroes a range [vbo, vbo_to)
+ *
+ * It zeroes a range [vbo, vbo_to).
*/
static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
{
@@ -231,12 +232,12 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
if (!buffer_mapped(bh)) {
ntfs_get_block(inode, iblock, bh, 0);
- /* unmapped? It's a hole - nothing to do */
+ /* Unmapped? It's a hole - nothing to do. */
if (!buffer_mapped(bh))
continue;
}
- /* Ok, it's mapped. Make sure it's up-to-date */
+ /* Ok, it's mapped. Make sure it's up-to-date. */
if (PageUptodate(page))
set_buffer_uptodate(bh);
@@ -272,9 +273,8 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
}
/*
- * ntfs_sparse_cluster
+ * ntfs_sparse_cluster - Helper function to zero a new allocated clusters.
*
- * Helper function to zero a new allocated clusters
* NOTE: 512 <= cluster size <= 2M
*/
void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn,
@@ -588,7 +588,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
truncate_pagecache(inode, vbo_down);
if (!is_sparsed(ni) && !is_compressed(ni)) {
- /* normal file */
+ /* Normal file. */
err = ntfs_zero_range(inode, vbo, end);
goto out;
}
@@ -599,7 +599,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
if (err != E_NTFS_NOTALIGNED)
goto out;
- /* process not aligned punch */
+ /* Process not aligned punch. */
mask = frame_size - 1;
vbo_a = (vbo + mask) & ~mask;
end_a = end & ~mask;
@@ -647,7 +647,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
if (err)
goto out;
- /* Wait for existing dio to complete */
+ /* Wait for existing dio to complete. */
inode_dio_wait(inode);
truncate_pagecache(inode, vbo_down);
@@ -1127,7 +1127,7 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
- /* Should never be here, see ntfs_file_open() */
+ /* Should never be here, see ntfs_file_open(). */
ret = -EOPNOTSUPP;
goto out;
}
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 3f48b612ec96..938b12d56ca6 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -56,7 +56,7 @@ static struct mft_inode *ni_find_mi(struct ntfs_inode *ni, CLST rno)
/*
* ni_add_mi - Add new mft_inode into ntfs_inode.
-*/
+ */
static void ni_add_mi(struct ntfs_inode *ni, struct mft_inode *mi)
{
ni_ins_mi(ni, &ni->mi_tree, mi->rno, &mi->node);
@@ -70,9 +70,8 @@ void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi)
rb_erase(&mi->node, &ni->mi_tree);
}
-/* ni_std
- *
- * Return: Pointer into std_info from primary record.
+/*
+ * ni_std - Return: Pointer into std_info from primary record.
*/
struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni)
{
@@ -385,7 +384,7 @@ bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi)
/*
* ni_remove_attr - Remove all attributes for the given type/name/id.
-*/
+ */
int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
const __le16 *name, size_t name_len, bool base_only,
const __le16 *id)
@@ -740,7 +739,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
/*
* ni_create_attr_list - Generates an attribute list for this primary record.
-*/
+ */
int ni_create_attr_list(struct ntfs_inode *ni)
{
struct ntfs_sb_info *sbi = ni->mi.sbi;
@@ -939,7 +938,7 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le,
if (is_mft_data &&
(mi_enum_attr(mi, NULL) ||
vbo <= ((u64)mi->rno << sbi->record_bits))) {
- /* We can't accept this record 'case MFT's bootstrapping. */
+ /* We can't accept this record 'cause MFT's bootstrapping. */
continue;
}
if (is_mft &&
@@ -1078,7 +1077,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
*/
max_free = free;
- /* Estimate the result of moving all possible attributes away.*/
+ /* Estimate the result of moving all possible attributes away. */
attr = NULL;
while ((attr = mi_enum_attr(&ni->mi, attr))) {
@@ -1095,7 +1094,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
goto out;
}
- /* Start real attribute moving */
+ /* Start real attribute moving. */
attr = NULL;
for (;;) {
@@ -1542,7 +1541,7 @@ int ni_delete_all(struct ntfs_inode *ni)
node = next;
}
- /* Free base record */
+ /* Free base record. */
clear_rec_inuse(ni->mi.mrec);
ni->mi.dirty = true;
err = mi_write(&ni->mi, 0);
@@ -2243,7 +2242,7 @@ int ni_decompress_file(struct ntfs_inode *ni)
}
if (attr->non_res && is_attr_sparsed(attr)) {
- /* Sarsed attribute header is 8 bytes bigger than normal. */
+ /* Sparsed attribute header is 8 bytes bigger than normal. */
struct MFT_REC *rec = mi->mrec;
u32 used = le32_to_cpu(rec->used);
u32 asize = le32_to_cpu(attr->size);
@@ -2324,7 +2323,7 @@ static int decompress_lzx_xpress(struct ntfs_sb_info *sbi, const char *cmpr,
mutex_lock(&sbi->compress.mtx_xpress);
ctx = sbi->compress.xpress;
if (!ctx) {
- /* Lazy initialize Xpress decompress context */
+ /* Lazy initialize Xpress decompress context. */
ctx = xpress_allocate_decompressor();
if (!ctx) {
err = -ENOMEM;
@@ -2348,7 +2347,7 @@ static int decompress_lzx_xpress(struct ntfs_sb_info *sbi, const char *cmpr,
/*
* ni_read_frame
*
- * Pages - array of locked pages.
+ * Pages - Array of locked pages.
*/
int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
u32 pages_per_frame)
@@ -2740,7 +2739,7 @@ int ni_write_frame(struct ntfs_inode *ni, struct page **pages,
lznt = NULL;
}
- /* Compress: frame_mem -> frame_ondisk. */
+ /* Compress: frame_mem -> frame_ondisk */
compr_size = compress_lznt(frame_mem, frame_size, frame_ondisk,
frame_size, sbi->compress.lznt);
mutex_unlock(&sbi->compress.mtx_lznt);
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 6f6057129fdd..b5853aed0e25 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -1362,7 +1362,8 @@ static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
/* Compute the log page values. */
log->data_off = ALIGN(
offsetof(struct RECORD_PAGE_HDR, fixups) +
- sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1), 8);
+ sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1),
+ 8);
log->data_size = log->page_size - log->data_off;
log->record_header_len = sizeof(struct LFS_RECORD_HDR);
@@ -1372,7 +1373,9 @@ static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
/* Compute the restart page values. */
log->ra_off = ALIGN(
offsetof(struct RESTART_HDR, fixups) +
- sizeof(short) * ((log->sys_page_size >> SECTOR_SHIFT) + 1), 8);
+ sizeof(short) *
+ ((log->sys_page_size >> SECTOR_SHIFT) + 1),
+ 8);
log->restart_size = log->sys_page_size - log->ra_off;
log->ra_size = struct_size(log->ra, clients, 1);
log->current_openlog_count = open_log_count;
@@ -5132,8 +5135,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
rh->sys_page_size = cpu_to_le32(log->page_size);
rh->page_size = cpu_to_le32(log->page_size);
- t16 = ALIGN(offsetof(struct RESTART_HDR, fixups) +
- sizeof(short) * t16, 8);
+ t16 = ALIGN(offsetof(struct RESTART_HDR, fixups) + sizeof(short) * t16,
+ 8);
rh->ra_off = cpu_to_le16(t16);
rh->minor_ver = cpu_to_le16(1); // 0x1A:
rh->major_ver = cpu_to_le16(1); // 0x1C:
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 669249439217..91e3743e1442 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -312,7 +312,7 @@ int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi)
if (sb_rdonly(sb) || !initialized)
goto out;
- /* Fill LogFile by '-1' if it is initialized.ssss */
+ /* Fill LogFile by '-1' if it is initialized. */
err = ntfs_bio_fill_1(sbi, &ni->file.run);
out:
@@ -960,10 +960,10 @@ int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty)
/* verify(!ntfs_update_mftmirr()); */
/*
- * if we used wait=1, sync_inode_metadata waits for the io for the
+ * If we used wait=1, sync_inode_metadata waits for the io for the
* inode to finish. It hangs when media is removed.
* So wait=0 is sent down to sync_inode_metadata
- * and filemap_fdatawrite is used for the data blocks
+ * and filemap_fdatawrite is used for the data blocks.
*/
err = sync_inode_metadata(&ni->vfs_inode, 0);
if (!err)
@@ -1917,7 +1917,7 @@ int ntfs_security_init(struct ntfs_sb_info *sbi)
sbi->security.next_id = SECURITY_ID_FIRST;
/* Always write new security at the end of bucket. */
sbi->security.next_off =
- ALIGN(sds_size - SecurityDescriptorsBlockSize, 16);
+ ALIGN(sds_size - SecurityDescriptorsBlockSize, 16);
off = 0;
ne = NULL;
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 1224b8e42b3e..0daca9adc54c 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -2624,17 +2624,19 @@ int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi,
e_fname = (struct ATTR_FILE_NAME *)(e + 1);
if (!memcmp(&e_fname->dup, dup, sizeof(*dup))) {
- /* Nothing to update in index! Try to avoid this call. */
+ /*
+ * Nothing to update in index! Try to avoid this call.
+ */
goto out;
}
memcpy(&e_fname->dup, dup, sizeof(*dup));
if (fnd->level) {
- /* directory entry in index */
+ /* Directory entry in index. */
err = indx_write(indx, ni, fnd->nodes[fnd->level - 1], sync);
} else {
- /* directory entry in directory MFT record */
+ /* Directory entry in directory MFT record. */
mi->dirty = true;
if (sync)
err = mi_write(mi, 1);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 8f72066b3229..db2a5a4c38e4 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -89,7 +89,7 @@ static struct inode *ntfs_read_mft(struct inode *inode,
}
if (le32_to_cpu(rec->total) != sbi->record_size) {
- // Bad inode?
+ /* Bad inode? */
err = -EINVAL;
goto out;
}
@@ -605,7 +605,7 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
if (vbo >= valid)
set_buffer_new(bh);
} else if (create) {
- /*normal write*/
+ /* Normal write. */
if (bytes > bh->b_size)
bytes = bh->b_size;
@@ -1091,7 +1091,7 @@ int inode_write_data(struct inode *inode, const void *data, size_t bytes)
/*
* ntfs_reparse_bytes
*
- * Number of bytes to for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK)
+ * Number of bytes for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK)
* for unicode string of @uni_len length.
*/
static inline u32 ntfs_reparse_bytes(u32 uni_len)
@@ -1205,13 +1205,13 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
return ERR_PTR(-EINVAL);
if (S_ISDIR(mode)) {
- /* use parent's directory attributes */
+ /* Use parent's directory attributes. */
fa = dir_ni->std_fa | FILE_ATTRIBUTE_DIRECTORY |
FILE_ATTRIBUTE_ARCHIVE;
/*
- * By default child directory inherits parent attributes
- * root directory is hidden + system
- * Make an exception for children in root
+ * By default child directory inherits parent attributes.
+ * Root directory is hidden + system.
+ * Make an exception for children in root.
*/
if (dir->i_ino == MFT_REC_ROOT)
fa &= ~(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM);
@@ -1220,8 +1220,8 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
fa = FILE_ATTRIBUTE_REPARSE_POINT;
/*
- * linux: there are dir/file/symlink and so on.
- * NTFS: symlinks are "dir + reparse" or "file + reparse".
+ * Linux: there are dir/file/symlink and so on.
+ * NTFS: symlinks are "dir + reparse" or "file + reparse"
* It is good idea to create:
* dir + reparse if 'symname' points to directory
* or
@@ -1860,7 +1860,7 @@ static noinline int ntfs_readlink_hlp(struct inode *inode, char *buffer,
default:
if (IsReparseTagMicrosoft(rp->ReparseTag)) {
- /* unknown Microsoft Tag */
+ /* Unknown Microsoft Tag. */
goto out;
}
if (!IsReparseTagNameSurrogate(rp->ReparseTag) ||
diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c
index 3acf0d9f0b15..f1f691a67cc4 100644
--- a/fs/ntfs3/lznt.c
+++ b/fs/ntfs3/lznt.c
@@ -296,8 +296,9 @@ static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
*/
struct lznt *get_lznt_ctx(int level)
{
- struct lznt *r = kzalloc(level ? offsetof(struct lznt, hash) :
- sizeof(struct lznt), GFP_NOFS);
+ struct lznt *r = kzalloc(level ? offsetof(struct lznt, hash)
+ : sizeof(struct lznt),
+ GFP_NOFS);
if (r)
r->std = !level;
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 0fd7bffb98d4..6bb3e595263b 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -262,7 +262,7 @@ enum RECORD_FLAG {
RECORD_FLAG_UNKNOWN = cpu_to_le16(0x0008),
};
-/* MFT Record structure, */
+/* MFT Record structure. */
struct MFT_REC {
struct NTFS_RECORD_HEADER rhdr; // 'FILE'
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index f9436cbbc347..97e682ebcfb9 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -59,18 +59,18 @@ struct ntfs_mount_options {
u16 fs_fmask_inv;
u16 fs_dmask_inv;
- unsigned uid : 1, /* uid was set. */
- gid : 1, /* gid was set. */
- fmask : 1, /* fmask was set. */
- dmask : 1, /* dmask was set. */
- sys_immutable : 1,/* Immutable system files. */
- discard : 1, /* Issue discard requests on deletions. */
- sparse : 1, /* Create sparse files. */
- showmeta : 1, /* Show meta files. */
- nohidden : 1, /* Do not show hidden files. */
- force : 1, /* Rw mount dirty volume. */
- no_acs_rules : 1,/*Exclude acs rules. */
- prealloc : 1 /* Preallocate space when file is growing. */
+ unsigned uid : 1, /* uid was set. */
+ gid : 1, /* gid was set. */
+ fmask : 1, /* fmask was set. */
+ dmask : 1, /* dmask was set. */
+ sys_immutable : 1, /* Immutable system files. */
+ discard : 1, /* Issue discard requests on deletions. */
+ sparse : 1, /* Create sparse files. */
+ showmeta : 1, /* Show meta files. */
+ nohidden : 1, /* Do not show hidden files. */
+ force : 1, /* Rw mount dirty volume. */
+ no_acs_rules : 1, /*Exclude acs rules. */
+ prealloc : 1 /* Preallocate space when file is growing. */
;
};
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
index 61e3f2fb619f..103705c86772 100644
--- a/fs/ntfs3/record.c
+++ b/fs/ntfs3/record.c
@@ -219,7 +219,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
asize = le32_to_cpu(attr->size);
if (asize < SIZEOF_RESIDENT) {
- /* Impossible 'cause we should not return such attribute */
+ /* Impossible 'cause we should not return such attribute. */
return NULL;
}
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 2fbab8a931ee..dbecf095da59 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -1053,7 +1053,7 @@ static int ntfs_fill_super(struct super_block *sb, void *data, int silent)
iput(inode);
- /* Load $LogFile to replay. */
+ /* Load LogFile to replay. */
ref.low = cpu_to_le32(MFT_REC_LOG);
ref.seq = cpu_to_le16(MFT_REC_LOG);
inode = ntfs_iget5(sb, &ref, &NAME_LOGFILE);
diff --git a/fs/ntfs3/upcase.c b/fs/ntfs3/upcase.c
index eb65bbd939e8..bbeba778237e 100644
--- a/fs/ntfs3/upcase.c
+++ b/fs/ntfs3/upcase.c
@@ -34,7 +34,7 @@ static inline u16 upcase_unicode_char(const u16 *upcase, u16 chr)
* - Case insensitive
* - If name equals and 'bothcases' then
* - Case sensitive
- * 'Straigth way' code scans input names twice in worst case.
+ * 'Straight way' code scans input names twice in worst case.
* Optimized code scans input names only once.
*/
int ntfs_cmp_names(const __le16 *s1, size_t l1, const __le16 *s2, size_t l2,
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 22fd5eb32c5b..b15d532e4a17 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -26,9 +26,10 @@
static inline size_t unpacked_ea_size(const struct EA_FULL *ea)
{
return ea->size ? le32_to_cpu(ea->size)
- : ALIGN(struct_size(
- ea, name,
- 1 + ea->name_len + le16_to_cpu(ea->elength)), 4);
+ : ALIGN(struct_size(ea, name,
+ 1 + ea->name_len +
+ le16_to_cpu(ea->elength)),
+ 4);
}
static inline size_t packed_ea_size(const struct EA_FULL *ea)
--
2.30.0
1
0

[PATCH OLK-5.10 026/107] fs/ntfs3: Fix error handling in indx_insert_into_root()
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Dan Carpenter <dan.carpenter(a)oracle.com>
mainline inclusion
from mainline-v5.15
commit b8155e95de38b25a69dfb03e4731fd6c5a28531e
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
There are three bugs in this code:
1) If indx_get_root() fails, then return -EINVAL instead of success.
2) On the "/* make root external */" -EOPNOTSUPP; error path it should
free "re" but it has a memory leak.
3) If indx_new() fails then it will lead to an error pointer dereference
when we call put_indx_node().
I've re-written the error handling to be more clear.
Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/index.c | 36 ++++++++++++++++--------------------
1 file changed, 16 insertions(+), 20 deletions(-)
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index f4729aa50671..69c6c4e0b4d9 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1555,12 +1555,12 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
u32 root_size, new_root_size;
struct ntfs_sb_info *sbi;
int ds_root;
- struct INDEX_ROOT *root, *a_root = NULL;
+ struct INDEX_ROOT *root, *a_root;
/* Get the record this root placed in */
root = indx_get_root(indx, ni, &attr, &mi);
if (!root)
- goto out;
+ return -EINVAL;
/*
* Try easy case:
@@ -1592,10 +1592,8 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
/* Make a copy of root attribute to restore if error */
a_root = kmemdup(attr, asize, GFP_NOFS);
- if (!a_root) {
- err = -ENOMEM;
- goto out;
- }
+ if (!a_root)
+ return -ENOMEM;
/* copy all the non-end entries from the index root to the new buffer.*/
to_move = 0;
@@ -1605,7 +1603,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
for (e = e0;; e = hdr_next_de(hdr, e)) {
if (!e) {
err = -EINVAL;
- goto out;
+ goto out_free_root;
}
if (de_is_last(e))
@@ -1613,14 +1611,13 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
to_move += le16_to_cpu(e->size);
}
- n = NULL;
if (!to_move) {
re = NULL;
} else {
re = kmemdup(e0, to_move, GFP_NOFS);
if (!re) {
err = -ENOMEM;
- goto out;
+ goto out_free_root;
}
}
@@ -1637,7 +1634,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
if (ds_root > 0 && used + ds_root > sbi->max_bytes_per_attr) {
/* make root external */
err = -EOPNOTSUPP;
- goto out;
+ goto out_free_re;
}
if (ds_root)
@@ -1667,7 +1664,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
/* bug? */
ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
err = -EINVAL;
- goto out1;
+ goto out_free_re;
}
if (err) {
@@ -1678,7 +1675,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
/* bug? */
ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
}
- goto out1;
+ goto out_free_re;
}
e = (struct NTFS_DE *)(root + 1);
@@ -1689,7 +1686,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
n = indx_new(indx, ni, new_vbn, sub_vbn);
if (IS_ERR(n)) {
err = PTR_ERR(n);
- goto out1;
+ goto out_free_re;
}
hdr = &n->index->ihdr;
@@ -1716,7 +1713,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
put_indx_node(n);
fnd_clear(fnd);
err = indx_insert_entry(indx, ni, new_de, ctx, fnd);
- goto out;
+ goto out_free_root;
}
/*
@@ -1726,7 +1723,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
e = hdr_insert_de(indx, hdr, new_de, NULL, ctx);
if (!e) {
err = -EINVAL;
- goto out1;
+ goto out_put_n;
}
fnd_push(fnd, n, e);
@@ -1735,12 +1732,11 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
n = NULL;
-out1:
+out_put_n:
+ put_indx_node(n);
+out_free_re:
kfree(re);
- if (n)
- put_indx_node(n);
-
-out:
+out_free_root:
kfree(a_root);
return err;
}
--
2.30.0
1
0

[PATCH OLK-5.10 025/107] fs/ntfs3: Potential NULL dereference in hdr_find_split()
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Dan Carpenter <dan.carpenter(a)oracle.com>
mainline inclusion
from mainline-v5.15
commit 8c83a4851da1c7eda83098ade238665b15774da3
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
The "e" pointer is dereferenced before it has been checked for NULL.
Move the dereference after the NULL check to prevent an Oops.
Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/index.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 5fb41c9c8910..f4729aa50671 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -557,11 +557,12 @@ static const struct NTFS_DE *hdr_find_split(const struct INDEX_HDR *hdr)
size_t o;
const struct NTFS_DE *e = hdr_first_de(hdr);
u32 used_2 = le32_to_cpu(hdr->used) >> 1;
- u16 esize = le16_to_cpu(e->size);
+ u16 esize;
if (!e || de_is_last(e))
return NULL;
+ esize = le16_to_cpu(e->size);
for (o = le32_to_cpu(hdr->de_off) + esize; o < used_2; o += esize) {
const struct NTFS_DE *p = e;
--
2.30.0
1
0

08 Dec '21
From: Dan Carpenter <dan.carpenter(a)oracle.com>
mainline inclusion
from mainline-v5.15
commit 04810f000afdbdd37825ca7f563f036119422cb7
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Return -EINVAL if ni_find_attr() fails. Don't return success.
Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/index.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 334a3cef714b..5fb41c9c8910 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1500,6 +1500,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
alloc = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, in->name, in->name_len,
NULL, &mi);
if (!alloc) {
+ err = -EINVAL;
if (bmp)
goto out2;
goto out1;
--
2.30.0
1
0

[PATCH OLK-5.10 023/107] fs/ntfs3: fix an error code in ntfs_get_acl_ex()
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Dan Carpenter <dan.carpenter(a)oracle.com>
mainline inclusion
from mainline-v5.15
commit 2926e4297053c735ab65450192dfba32a4f47fa9
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
The ntfs_get_ea() function returns negative error codes or on success
it returns the length. In the original code a zero length return was
treated as -ENODATA and results in a NULL return. But it should be
treated as an invalid length and result in an PTR_ERR(-EINVAL) return.
Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/xattr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index af89e50f7b9f..d3d5b9d331d1 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -521,7 +521,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
ni_unlock(ni);
/* Translate extended attribute to acl */
- if (err > 0) {
+ if (err >= 0) {
acl = posix_acl_from_xattr(mnt_userns, buf, err);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
--
2.30.0
1
0

08 Dec '21
From: Dan Carpenter <dan.carpenter(a)oracle.com>
mainline inclusion
from mainline-v5.15
commit a1b04d380ab64790a7b4a8eb52e14679e47065ab
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Add a check for when the kzalloc() in init_rsttbl() fails. Some of
the callers checked for NULL and some did not. I went down the call
tree and added NULL checks where ever they were missing.
Fixes: b46acd6a6a62 ("fs/ntfs3: Add NTFS journal")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/fslog.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 2c213b55979e..7144ea8a9ab8 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -809,6 +809,9 @@ static inline struct RESTART_TABLE *init_rsttbl(u16 esize, u16 used)
u32 lf = sizeof(struct RESTART_TABLE) + (used - 1) * esize;
struct RESTART_TABLE *t = kzalloc(bytes, GFP_NOFS);
+ if (!t)
+ return NULL;
+
t->size = cpu_to_le16(esize);
t->used = cpu_to_le16(used);
t->free_goal = cpu_to_le32(~0u);
@@ -831,7 +834,11 @@ static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl,
u16 esize = le16_to_cpu(tbl->size);
__le32 osize = cpu_to_le32(bytes_per_rt(tbl));
u32 used = le16_to_cpu(tbl->used);
- struct RESTART_TABLE *rt = init_rsttbl(esize, used + add);
+ struct RESTART_TABLE *rt;
+
+ rt = init_rsttbl(esize, used + add);
+ if (!rt)
+ return NULL;
memcpy(rt + 1, tbl + 1, esize * used);
@@ -864,8 +871,11 @@ static inline void *alloc_rsttbl_idx(struct RESTART_TABLE **tbl)
__le32 *e;
struct RESTART_TABLE *t = *tbl;
- if (!t->first_free)
+ if (!t->first_free) {
*tbl = t = extend_rsttbl(t, 16, ~0u);
+ if (!t)
+ return NULL;
+ }
off = le32_to_cpu(t->first_free);
@@ -4482,6 +4492,10 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
}
dp = alloc_rsttbl_idx(&dptbl);
+ if (!dp) {
+ err = -ENOMEM;
+ goto out;
+ }
dp->target_attr = cpu_to_le32(t16);
dp->transfer_len = cpu_to_le32(t32 << sbi->cluster_bits);
dp->lcns_follow = cpu_to_le32(t32);
--
2.30.0
1
0

[PATCH OLK-5.10 021/107] fs/ntfs3: Use kcalloc/kmalloc_array over kzalloc/kmalloc
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit 345482bc431f6492beb464696341626057f67771
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Use kcalloc/kmalloc_array over kzalloc/kmalloc when we allocate array.
Checkpatch found these after we did not use our own defined allocation
wrappers.
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/bitmap.c | 2 +-
fs/ntfs3/file.c | 2 +-
fs/ntfs3/frecord.c | 7 +++----
3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index d502bba323d0..2de05062c78b 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -683,7 +683,7 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
if (!wnd->bits_last)
wnd->bits_last = wbits;
- wnd->free_bits = kzalloc(wnd->nwnd * sizeof(u16), GFP_NOFS);
+ wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS);
if (!wnd->free_bits)
return -ENOMEM;
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 8d27c520bec5..a959f6197c99 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -900,7 +900,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
return -EOPNOTSUPP;
}
- pages = kmalloc(pages_per_frame * sizeof(struct page *), GFP_NOFS);
+ pages = kmalloc_array(pages_per_frame, sizeof(struct page *), GFP_NOFS);
if (!pages)
return -ENOMEM;
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 2f7d16543530..329bc76dfb09 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -2054,7 +2054,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
idx = (vbo - frame_vbo) >> PAGE_SHIFT;
pages_per_frame = frame_size >> PAGE_SHIFT;
- pages = kzalloc(pages_per_frame * sizeof(struct page *), GFP_NOFS);
+ pages = kcalloc(pages_per_frame, sizeof(struct page *), GFP_NOFS);
if (!pages) {
err = -ENOMEM;
goto out;
@@ -2137,7 +2137,7 @@ int ni_decompress_file(struct ntfs_inode *ni)
frame_bits = ni_ext_compress_bits(ni);
frame_size = 1u << frame_bits;
pages_per_frame = frame_size >> PAGE_SHIFT;
- pages = kzalloc(pages_per_frame * sizeof(struct page *), GFP_NOFS);
+ pages = kcalloc(pages_per_frame, sizeof(struct page *), GFP_NOFS);
if (!pages) {
err = -ENOMEM;
goto out;
@@ -2709,8 +2709,7 @@ int ni_write_frame(struct ntfs_inode *ni, struct page **pages,
goto out;
}
- pages_disk = kzalloc(pages_per_frame * sizeof(struct page *),
- GFP_NOFS);
+ pages_disk = kcalloc(pages_per_frame, sizeof(struct page *), GFP_NOFS);
if (!pages_disk) {
err = -ENOMEM;
goto out;
--
2.30.0
1
0

[PATCH OLK-5.10 017/107] fs/ntfs3: Remove unused including <linux/version.h>
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Jiapeng Chong <jiapeng.chong(a)linux.alibaba.com>
mainline inclusion
from mainline-v5.15
commit 1263eddfea9988125a4b9608efecc8aff2c721f9
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Eliminate the follow versioncheck warning:
./fs/ntfs3/inode.c: 16 linux/version.h not needed.
Reported-by: Abaci Robot <abaci(a)linux.alibaba.com>
Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block")
Signed-off-by: Jiapeng Chong <jiapeng.chong(a)linux.alibaba.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/inode.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index a573c6e98cb8..ed64489edf73 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -13,7 +13,6 @@
#include <linux/namei.h>
#include <linux/nls.h>
#include <linux/uio.h>
-#include <linux/version.h>
#include <linux/writeback.h>
#include "debug.h"
--
2.30.0
1
0

08 Dec '21
From: "Gustavo A. R. Silva" <gustavoars(a)kernel.org>
mainline inclusion
from mainline-v5.15
commit abfeb2ee2103f07dd93b9d7b32317e26d1c8ef79
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Fix the following fallthrough warnings:
fs/ntfs3/inode.c:1792:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/ntfs3/index.c:178:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
This helps with the ongoing efforts to globally enable
-Wimplicit-fallthrough for Clang.
Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars(a)kernel.org>
Reviewed-by: Nathan Chancellor <nathan(a)kernel.org>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/index.c | 1 +
fs/ntfs3/inode.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 9386c551e208..189d46e2c38d 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -175,6 +175,7 @@ static inline NTFS_CMP_FUNC get_cmp_func(const struct INDEX_ROOT *root)
default:
break;
}
+ break;
default:
break;
}
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index bf51e294432e..a573c6e98cb8 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -1789,6 +1789,7 @@ int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry)
switch (err) {
case 0:
drop_nlink(inode);
+ break;
case -ENOTEMPTY:
case -ENOSPC:
case -EROFS:
--
2.30.0
1
0

08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit be87e821fdb5ec8c6d404f29e118130c7879ce5b
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
In one source file there is for some reason non utf8 char. But hey this
is fs development so this kind of thing might happen.
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/frecord.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index c3121bf9c62f..e377d72477df 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -1784,7 +1784,7 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
/*
* WOF - Windows Overlay Filter - used to compress files with lzx/xpress
* Unlike native NTFS file compression, the Windows Overlay Filter supports
- * only read operations. This means that it doesn�t need to sector-align each
+ * only read operations. This means that it doesn't need to sector-align each
* compressed chunk, so the compressed data can be packed more tightly together.
* If you open the file for writing, the Windows Overlay Filter just decompresses
* the entire file, turning it back into a plain file.
--
2.30.0
1
0

[PATCH OLK-5.10 014/107] fs/ntfs3: Remove unused variable cnt in ntfs_security_init()
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Nathan Chancellor <nathan(a)kernel.org>
mainline inclusion
from mainline-v5.15
commit 8c01308b6d6b2bc8e9163c6a3400856fb782dee6
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Clang warns:
fs/ntfs3/fsntfs.c:1874:9: warning: variable 'cnt' set but not used
[-Wunused-but-set-variable]
size_t cnt, off;
^
1 warning generated.
It is indeed unused so remove it.
Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block")
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers(a)google.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/fsntfs.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 92140050fb6c..c6599c514acf 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -1871,7 +1871,7 @@ int ntfs_security_init(struct ntfs_sb_info *sbi)
struct ATTRIB *attr;
struct ATTR_LIST_ENTRY *le;
u64 sds_size;
- size_t cnt, off;
+ size_t off;
struct NTFS_DE *ne;
struct NTFS_DE_SII *sii_e;
struct ntfs_fnd *fnd_sii = NULL;
@@ -1946,7 +1946,6 @@ int ntfs_security_init(struct ntfs_sb_info *sbi)
sbi->security.next_off =
Quad2Align(sds_size - SecurityDescriptorsBlockSize);
- cnt = 0;
off = 0;
ne = NULL;
@@ -1964,8 +1963,6 @@ int ntfs_security_init(struct ntfs_sb_info *sbi)
next_id = le32_to_cpu(sii_e->sec_id) + 1;
if (next_id >= sbi->security.next_id)
sbi->security.next_id = next_id;
-
- cnt += 1;
}
sbi->security.ni = ni;
--
2.30.0
1
0

[PATCH OLK-5.10 013/107] fs/ntfs3: Fix integer overflow in multiplication
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Colin Ian King <colin.king(a)canonical.com>
mainline inclusion
from mainline-v5.15
commit 71eeb6ace80be7389d942b9647765417e5b039f7
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
The multiplication of the u32 data_size with a int is being performed
using 32 bit arithmetic however the results is being assigned to the
variable nbits that is a size_t (64 bit) value. Fix a potential
integer overflow by casting the u32 value to a size_t before the
multiply to use a size_t sized bit multiply operation.
Addresses-Coverity: ("Unintentional integer overflow")
Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block")
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/index.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 6aa9540ece47..9386c551e208 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -2012,7 +2012,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
unsigned long pos;
const unsigned long *bm = resident_data(b);
- nbits = le32_to_cpu(b->res.data_size) * 8;
+ nbits = (size_t)le32_to_cpu(b->res.data_size) * 8;
if (bit >= nbits)
return 0;
--
2.30.0
1
0

[PATCH OLK-5.10 012/107] fs/ntfs3: Add ifndef + define to all header files
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit 87790b65343932411af43bc9b218f086ecebd6a5
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
Add guards so that compiler will only include header files once.
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/debug.h | 5 +++++
fs/ntfs3/ntfs.h | 3 +++
fs/ntfs3/ntfs_fs.h | 5 +++++
3 files changed, 13 insertions(+)
diff --git a/fs/ntfs3/debug.h b/fs/ntfs3/debug.h
index 15ac42185e5b..357d9f4dfba7 100644
--- a/fs/ntfs3/debug.h
+++ b/fs/ntfs3/debug.h
@@ -7,6 +7,9 @@
*/
// clang-format off
+#ifndef _LINUX_NTFS3_DEBUG_H
+#define _LINUX_NTFS3_DEBUG_H
+
#ifndef Add2Ptr
#define Add2Ptr(P, I) ((void *)((u8 *)(P) + (I)))
#define PtrOffset(B, O) ((size_t)((size_t)(O) - (size_t)(B)))
@@ -61,4 +64,6 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
#define ntfs_free(p) kfree(p)
#define ntfs_vfree(p) kvfree(p)
#define ntfs_memdup(src, len) kmemdup(src, len, GFP_NOFS)
+
+#endif /* _LINUX_NTFS3_DEBUG_H */
// clang-format on
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 40398e6c39c9..16da514af124 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -7,6 +7,8 @@
*/
// clang-format off
+#ifndef _LINUX_NTFS3_NTFS_H
+#define _LINUX_NTFS3_NTFS_H
/* TODO:
* - Check 4K mft record and 512 bytes cluster
@@ -1235,4 +1237,5 @@ struct SID {
};
static_assert(offsetof(struct SID, SubAuthority) == 8);
+#endif /* _LINUX_NTFS3_NTFS_H */
// clang-format on
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index c8ea6dd38c21..b5da2f06f7cb 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -6,6 +6,9 @@
*/
// clang-format off
+#ifndef _LINUX_NTFS3_NTFS_FS_H
+#define _LINUX_NTFS3_NTFS_FS_H
+
#define MINUS_ONE_T ((size_t)(-1))
/* Biggest MFT / smallest cluster */
#define MAXIMUM_BYTES_PER_MFT 4096
@@ -1085,3 +1088,5 @@ static inline void le64_sub_cpu(__le64 *var, u64 val)
{
*var = cpu_to_le64(le64_to_cpu(*var) - val);
}
+
+#endif /* _LINUX_NTFS3_NTFS_FS_H */
--
2.30.0
1
0

08 Dec '21
From: Kari Argillander <kari.argillander(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit 528c9b3d1edf291685151afecd741d176f527ddf
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
We do not need our own implementation for this function in this
driver. It is much better to use generic one.
Signed-off-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/ntfs_fs.h | 5 -----
fs/ntfs3/run.c | 3 ++-
fs/ntfs3/super.c | 9 +++++----
3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 0c3ac89c3115..c8ea6dd38c21 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -972,11 +972,6 @@ static inline struct buffer_head *ntfs_bread(struct super_block *sb,
return NULL;
}
-static inline bool is_power_of2(size_t v)
-{
- return v && !(v & (v - 1));
-}
-
static inline struct ntfs_inode *ntfs_i(struct inode *inode)
{
return container_of(inode, struct ntfs_inode, vfs_inode);
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
index f9c362ac672e..60c64deab738 100644
--- a/fs/ntfs3/run.c
+++ b/fs/ntfs3/run.c
@@ -9,6 +9,7 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
+#include <linux/log2.h>
#include <linux/nls.h>
#include "debug.h"
@@ -376,7 +377,7 @@ bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len,
if (!used) {
bytes = 64;
} else if (used <= 16 * PAGE_SIZE) {
- if (is_power_of2(run->allocated))
+ if (is_power_of_2(run->allocated))
bytes = run->allocated << 1;
else
bytes = (size_t)1
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 84d4f389f685..903975b7e832 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -29,6 +29,7 @@
#include <linux/exportfs.h>
#include <linux/fs.h>
#include <linux/iversion.h>
+#include <linux/log2.h>
#include <linux/module.h>
#include <linux/nls.h>
#include <linux/parser.h>
@@ -735,13 +736,13 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
boot_sector_size = (u32)boot->bytes_per_sector[1] << 8;
if (boot->bytes_per_sector[0] || boot_sector_size < SECTOR_SIZE ||
- !is_power_of2(boot_sector_size)) {
+ !is_power_of_2(boot_sector_size)) {
goto out;
}
/* cluster size: 512, 1K, 2K, 4K, ... 2M */
sct_per_clst = true_sectors_per_clst(boot);
- if (!is_power_of2(sct_per_clst))
+ if (!is_power_of_2(sct_per_clst))
goto out;
mlcn = le64_to_cpu(boot->mft_clst);
@@ -757,14 +758,14 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
/* Check MFT record size */
if ((boot->record_size < 0 &&
SECTOR_SIZE > (2U << (-boot->record_size))) ||
- (boot->record_size >= 0 && !is_power_of2(boot->record_size))) {
+ (boot->record_size >= 0 && !is_power_of_2(boot->record_size))) {
goto out;
}
/* Check index record size */
if ((boot->index_size < 0 &&
SECTOR_SIZE > (2U << (-boot->index_size))) ||
- (boot->index_size >= 0 && !is_power_of2(boot->index_size))) {
+ (boot->index_size >= 0 && !is_power_of_2(boot->index_size))) {
goto out;
}
--
2.30.0
1
0

08 Dec '21
From: Colin Ian King <colin.king(a)canonical.com>
mainline inclusion
from mainline-v5.15
commit f8d87ed9f0d546ac5b05e8e7d2b148d4b77599fa
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
There is a spelling mistake in a ntfs_err error message. Also
fix various spelling mistakes in comments.
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
Reviewed-by: Kari Argillander <kari.argillander(a)gmail.com>
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/debug.h | 2 +-
fs/ntfs3/lib/decompress_common.c | 2 +-
fs/ntfs3/run.c | 2 +-
fs/ntfs3/super.c | 4 ++--
fs/ntfs3/upcase.c | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/ntfs3/debug.h b/fs/ntfs3/debug.h
index dfaa4c79dc6d..15ac42185e5b 100644
--- a/fs/ntfs3/debug.h
+++ b/fs/ntfs3/debug.h
@@ -3,7 +3,7 @@
*
* Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
*
- * useful functions for debuging
+ * useful functions for debugging
*/
// clang-format off
diff --git a/fs/ntfs3/lib/decompress_common.c b/fs/ntfs3/lib/decompress_common.c
index 83c9e93aea77..850d8e8c8f1f 100644
--- a/fs/ntfs3/lib/decompress_common.c
+++ b/fs/ntfs3/lib/decompress_common.c
@@ -292,7 +292,7 @@ int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
* of as simply the root of the tree. The
* representation of these internal nodes is
* simply the index of the left child combined
- * with the special bits 0xC000 to distingush
+ * with the special bits 0xC000 to distinguish
* the entry from direct mapping and leaf node
* entries.
*/
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
index 5cdf6efe67e0..f9c362ac672e 100644
--- a/fs/ntfs3/run.c
+++ b/fs/ntfs3/run.c
@@ -949,7 +949,7 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
if (next_vcn > 0x100000000ull || (lcn + len) > 0x100000000ull) {
ntfs_err(
sbi->sb,
- "This driver is compiled whitout CONFIG_NTFS3_64BIT_CLUSTER (like windows driver).\n"
+ "This driver is compiled without CONFIG_NTFS3_64BIT_CLUSTER (like windows driver).\n"
"Volume contains 64 bits run: vcn %llx, lcn %llx, len %llx.\n"
"Activate CONFIG_NTFS3_64BIT_CLUSTER to process this case",
vcn64, lcn, len);
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 6be13e256c1a..84d4f389f685 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -124,7 +124,7 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
/*
* Shared memory struct.
*
- * on-disk ntfs's upcase table is created by ntfs formater
+ * on-disk ntfs's upcase table is created by ntfs formatter
* 'upcase' table is 128K bytes of memory
* we should read it into memory when mounting
* Several ntfs volumes likely use the same 'upcase' table
@@ -1208,7 +1208,7 @@ static int ntfs_fill_super(struct super_block *sb, void *data, int silent)
sbi->def_entries = 1;
done = sizeof(struct ATTR_DEF_ENTRY);
sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE;
- sbi->ea_max_size = 0x10000; /* default formater value */
+ sbi->ea_max_size = 0x10000; /* default formatter value */
while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) {
u32 t32 = le32_to_cpu(t->type);
diff --git a/fs/ntfs3/upcase.c b/fs/ntfs3/upcase.c
index 9617382aca64..b53943538f9f 100644
--- a/fs/ntfs3/upcase.c
+++ b/fs/ntfs3/upcase.c
@@ -27,7 +27,7 @@ static inline u16 upcase_unicode_char(const u16 *upcase, u16 chr)
/*
* Thanks Kari Argillander <kari.argillander(a)gmail.com> for idea and implementation 'bothcase'
*
- * Straigth way to compare names:
+ * Straight way to compare names:
* - case insensitive
* - if name equals and 'bothcases' then
* - case sensitive
--
2.30.0
1
0

[PATCH OLK-5.10 009/107] fs/ntfs3: Add NTFS3 in fs/Kconfig and fs/Makefile
by Yin Xiujiang 08 Dec '21
by Yin Xiujiang 08 Dec '21
08 Dec '21
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15
commit 6e5be40d32fb1907285277c02e74493ed43d77fe
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
This adds NTFS3 in fs/Kconfig and fs/Makefile
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/Kconfig | 1 +
fs/Makefile | 1 +
2 files changed, 2 insertions(+)
diff --git a/fs/Kconfig b/fs/Kconfig
index 3cc647e00f3c..225088d505f4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -145,6 +145,7 @@ menu "DOS/FAT/EXFAT/NT Filesystems"
source "fs/fat/Kconfig"
source "fs/exfat/Kconfig"
source "fs/ntfs/Kconfig"
+source "fs/ntfs3/Kconfig"
endmenu
endif # BLOCK
diff --git a/fs/Makefile b/fs/Makefile
index fec76c1b4e06..73acb48ce6bc 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_SYSV_FS) += sysv/
obj-$(CONFIG_CIFS) += cifs/
obj-$(CONFIG_HPFS_FS) += hpfs/
obj-$(CONFIG_NTFS_FS) += ntfs/
+obj-$(CONFIG_NTFS3_FS) += ntfs3/
obj-$(CONFIG_UFS_FS) += ufs/
obj-$(CONFIG_EFS_FS) += efs/
obj-$(CONFIG_JFFS2_FS) += jffs2/
--
2.30.0
1
0

08 Dec '21
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15
commit 12dad495eaab95e0bb784c43869073617c513ea4
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
This adds Kconfig, Makefile and doc
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
Documentation/filesystems/index.rst | 1 +
Documentation/filesystems/ntfs3.rst | 106 ++++++++++++++++++++++++++++
fs/ntfs3/Kconfig | 46 ++++++++++++
fs/ntfs3/Makefile | 36 ++++++++++
4 files changed, 189 insertions(+)
create mode 100644 Documentation/filesystems/ntfs3.rst
create mode 100644 fs/ntfs3/Kconfig
create mode 100644 fs/ntfs3/Makefile
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 98f59a864242..757684537248 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -97,6 +97,7 @@ Documentation for filesystem implementations.
nilfs2
nfs/index
ntfs
+ ntfs3
ocfs2
ocfs2-online-filecheck
omfs
diff --git a/Documentation/filesystems/ntfs3.rst b/Documentation/filesystems/ntfs3.rst
new file mode 100644
index 000000000000..ffe9ea0c1499
--- /dev/null
+++ b/Documentation/filesystems/ntfs3.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+NTFS3
+=====
+
+
+Summary and Features
+====================
+
+NTFS3 is fully functional NTFS Read-Write driver. The driver works with
+NTFS versions up to 3.1, normal/compressed/sparse files
+and journal replaying. File system type to use on mount is 'ntfs3'.
+
+- This driver implements NTFS read/write support for normal, sparse and
+ compressed files.
+- Supports native journal replaying;
+- Supports extended attributes
+ Predefined extended attributes:
+ - 'system.ntfs_security' gets/sets security
+ descriptor (SECURITY_DESCRIPTOR_RELATIVE)
+ - 'system.ntfs_attrib' gets/sets ntfs file/dir attributes.
+ Note: applied to empty files, this allows to switch type between
+ sparse(0x200), compressed(0x800) and normal;
+- Supports NFS export of mounted NTFS volumes.
+
+Mount Options
+=============
+
+The list below describes mount options supported by NTFS3 driver in addition to
+generic ones.
+
+===============================================================================
+
+nls=name This option informs the driver how to interpret path
+ strings and translate them to Unicode and back. If
+ this option is not set, the default codepage will be
+ used (CONFIG_NLS_DEFAULT).
+ Examples:
+ 'nls=utf8'
+
+uid=
+gid=
+umask= Controls the default permissions for files/directories created
+ after the NTFS volume is mounted.
+
+fmask=
+dmask= Instead of specifying umask which applies both to
+ files and directories, fmask applies only to files and
+ dmask only to directories.
+
+nohidden Files with the Windows-specific HIDDEN (FILE_ATTRIBUTE_HIDDEN)
+ attribute will not be shown under Linux.
+
+sys_immutable Files with the Windows-specific SYSTEM
+ (FILE_ATTRIBUTE_SYSTEM) attribute will be marked as system
+ immutable files.
+
+discard Enable support of the TRIM command for improved performance
+ on delete operations, which is recommended for use with the
+ solid-state drives (SSD).
+
+force Forces the driver to mount partitions even if 'dirty' flag
+ (volume dirty) is set. Not recommended for use.
+
+sparse Create new files as "sparse".
+
+showmeta Use this parameter to show all meta-files (System Files) on
+ a mounted NTFS partition.
+ By default, all meta-files are hidden.
+
+prealloc Preallocate space for files excessively when file size is
+ increasing on writes. Decreases fragmentation in case of
+ parallel write operations to different files.
+
+no_acs_rules "No access rules" mount option sets access rights for
+ files/folders to 777 and owner/group to root. This mount
+ option absorbs all other permissions:
+ - permissions change for files/folders will be reported
+ as successful, but they will remain 777;
+ - owner/group change will be reported as successful, but
+ they will stay as root
+
+acl Support POSIX ACLs (Access Control Lists). Effective if
+ supported by Kernel. Not to be confused with NTFS ACLs.
+ The option specified as acl enables support for POSIX ACLs.
+
+noatime All files and directories will not update their last access
+ time attribute if a partition is mounted with this parameter.
+ This option can speed up file system operation.
+
+===============================================================================
+
+ToDo list
+=========
+
+- Full journaling support (currently journal replaying is supported) over JBD.
+
+
+References
+==========
+https://www.paragon-software.com/home/ntfs-linux-professional/
+ - Commercial version of the NTFS driver for Linux.
+
+almaz.alexandrovich(a)paragon-software.com
+ - Direct e-mail address for feedback and requests on the NTFS3 implementation.
diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig
new file mode 100644
index 000000000000..6e4cbc48ab8e
--- /dev/null
+++ b/fs/ntfs3/Kconfig
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config NTFS3_FS
+ tristate "NTFS Read-Write file system support"
+ select NLS
+ help
+ Windows OS native file system (NTFS) support up to NTFS version 3.1.
+
+ Y or M enables the NTFS3 driver with full features enabled (read,
+ write, journal replaying, sparse/compressed files support).
+ File system type to use on mount is "ntfs3". Module name (M option)
+ is also "ntfs3".
+
+ Documentation: <file:Documentation/filesystems/ntfs3.rst>
+
+config NTFS3_64BIT_CLUSTER
+ bool "64 bits per NTFS clusters"
+ depends on NTFS3_FS && 64BIT
+ help
+ Windows implementation of ntfs.sys uses 32 bits per clusters.
+ If activated 64 bits per clusters you will be able to use 4k cluster
+ for 16T+ volumes. Windows will not be able to mount such volumes.
+
+ It is recommended to say N here.
+
+config NTFS3_LZX_XPRESS
+ bool "activate support of external compressions lzx/xpress"
+ depends on NTFS3_FS
+ help
+ In Windows 10 one can use command "compact" to compress any files.
+ 4 possible variants of compression are: xpress4k, xpress8k, xpress16k and lzx.
+ If activated you will be able to read such files correctly.
+
+ It is recommended to say Y here.
+
+config NTFS3_FS_POSIX_ACL
+ bool "NTFS POSIX Access Control Lists"
+ depends on NTFS3_FS
+ select FS_POSIX_ACL
+ help
+ POSIX Access Control Lists (ACLs) support additional access rights
+ for users and groups beyond the standard owner/group/world scheme,
+ and this option selects support for ACLs specifically for ntfs
+ filesystems.
+ NOTE: this is linux only feature. Windows will ignore these ACLs.
+
+ If you don't know what Access Control Lists are, say N.
diff --git a/fs/ntfs3/Makefile b/fs/ntfs3/Makefile
new file mode 100644
index 000000000000..279701b62bbe
--- /dev/null
+++ b/fs/ntfs3/Makefile
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the ntfs3 filesystem support.
+#
+
+# to check robot warnings
+ccflags-y += -Wint-to-pointer-cast \
+ $(call cc-option,-Wunused-but-set-variable,-Wunused-const-variable) \
+ $(call cc-option,-Wold-style-declaration,-Wout-of-line-declaration)
+
+obj-$(CONFIG_NTFS3_FS) += ntfs3.o
+
+ntfs3-y := attrib.o \
+ attrlist.o \
+ bitfunc.o \
+ bitmap.o \
+ dir.o \
+ fsntfs.o \
+ frecord.o \
+ file.o \
+ fslog.o \
+ inode.o \
+ index.o \
+ lznt.o \
+ namei.o \
+ record.o \
+ run.o \
+ super.o \
+ upcase.o \
+ xattr.o
+
+ntfs3-$(CONFIG_NTFS3_LZX_XPRESS) += $(addprefix lib/,\
+ decompress_common.o \
+ lzx_decompress.o \
+ xpress_decompress.o \
+ )
\ No newline at end of file
--
2.30.0
1
0
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15
commit b46acd6a6a627d876898e1c84d3f84902264b445
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
This adds NTFS journal
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/fslog.c | 5182 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 5182 insertions(+)
create mode 100644 fs/ntfs3/fslog.c
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
new file mode 100644
index 000000000000..397ba6a956e7
--- /dev/null
+++ b/fs/ntfs3/fslog.c
@@ -0,0 +1,5182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/nls.h>
+#include <linux/random.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * LOG FILE structs
+ */
+
+// clang-format off
+
+#define MaxLogFileSize 0x100000000ull
+#define DefaultLogPageSize 4096
+#define MinLogRecordPages 0x30
+
+struct RESTART_HDR {
+ struct NTFS_RECORD_HEADER rhdr; // 'RSTR'
+ __le32 sys_page_size; // 0x10: Page size of the system which initialized the log
+ __le32 page_size; // 0x14: Log page size used for this log file
+ __le16 ra_off; // 0x18:
+ __le16 minor_ver; // 0x1A:
+ __le16 major_ver; // 0x1C:
+ __le16 fixups[];
+};
+
+#define LFS_NO_CLIENT 0xffff
+#define LFS_NO_CLIENT_LE cpu_to_le16(0xffff)
+
+struct CLIENT_REC {
+ __le64 oldest_lsn;
+ __le64 restart_lsn; // 0x08:
+ __le16 prev_client; // 0x10:
+ __le16 next_client; // 0x12:
+ __le16 seq_num; // 0x14:
+ u8 align[6]; // 0x16
+ __le32 name_bytes; // 0x1C: in bytes
+ __le16 name[32]; // 0x20: name of client
+};
+
+static_assert(sizeof(struct CLIENT_REC) == 0x60);
+
+/* Two copies of these will exist at the beginning of the log file */
+struct RESTART_AREA {
+ __le64 current_lsn; // 0x00: Current logical end of log file
+ __le16 log_clients; // 0x08: Maximum number of clients
+ __le16 client_idx[2]; // 0x0A: free/use index into the client record arrays
+ __le16 flags; // 0x0E: See RESTART_SINGLE_PAGE_IO
+ __le32 seq_num_bits; // 0x10: the number of bits in sequence number.
+ __le16 ra_len; // 0x14:
+ __le16 client_off; // 0x16:
+ __le64 l_size; // 0x18: Usable log file size.
+ __le32 last_lsn_data_len; // 0x20:
+ __le16 rec_hdr_len; // 0x24: log page data offset
+ __le16 data_off; // 0x26: log page data length
+ __le32 open_log_count; // 0x28:
+ __le32 align[5]; // 0x2C:
+ struct CLIENT_REC clients[]; // 0x40:
+};
+
+struct LOG_REC_HDR {
+ __le16 redo_op; // 0x00: NTFS_LOG_OPERATION
+ __le16 undo_op; // 0x02: NTFS_LOG_OPERATION
+ __le16 redo_off; // 0x04: Offset to Redo record
+ __le16 redo_len; // 0x06: Redo length
+ __le16 undo_off; // 0x08: Offset to Undo record
+ __le16 undo_len; // 0x0A: Undo length
+ __le16 target_attr; // 0x0C:
+ __le16 lcns_follow; // 0x0E:
+ __le16 record_off; // 0x10:
+ __le16 attr_off; // 0x12:
+ __le16 cluster_off; // 0x14:
+ __le16 reserved; // 0x16:
+ __le64 target_vcn; // 0x18:
+ __le64 page_lcns[]; // 0x20:
+};
+
+static_assert(sizeof(struct LOG_REC_HDR) == 0x20);
+
+#define RESTART_ENTRY_ALLOCATED 0xFFFFFFFF
+#define RESTART_ENTRY_ALLOCATED_LE cpu_to_le32(0xFFFFFFFF)
+
+struct RESTART_TABLE {
+ __le16 size; // 0x00: In bytes
+ __le16 used; // 0x02: entries
+ __le16 total; // 0x04: entries
+ __le16 res[3]; // 0x06:
+ __le32 free_goal; // 0x0C:
+ __le32 first_free; // 0x10
+ __le32 last_free; // 0x14
+
+};
+
+static_assert(sizeof(struct RESTART_TABLE) == 0x18);
+
+struct ATTR_NAME_ENTRY {
+ __le16 off; // offset in the Open attribute Table
+ __le16 name_bytes;
+ __le16 name[];
+};
+
+struct OPEN_ATTR_ENRTY {
+ __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+ __le32 bytes_per_index; // 0x04:
+ enum ATTR_TYPE type; // 0x08:
+ u8 is_dirty_pages; // 0x0C:
+ u8 is_attr_name; // 0x0B: Faked field to manage 'ptr'
+ u8 name_len; // 0x0C: Faked field to manage 'ptr'
+ u8 res;
+ struct MFT_REF ref; // 0x10: File Reference of file containing attribute
+ __le64 open_record_lsn; // 0x18:
+ void *ptr; // 0x20:
+};
+
+/* 32 bit version of 'struct OPEN_ATTR_ENRTY' */
+struct OPEN_ATTR_ENRTY_32 {
+ __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+ __le32 ptr; // 0x04:
+ struct MFT_REF ref; // 0x08:
+ __le64 open_record_lsn; // 0x10:
+ u8 is_dirty_pages; // 0x18:
+ u8 is_attr_name; // 0x19
+ u8 res1[2];
+ enum ATTR_TYPE type; // 0x1C:
+ u8 name_len; // 0x20: in wchar
+ u8 res2[3];
+ __le32 AttributeName; // 0x24:
+ __le32 bytes_per_index; // 0x28:
+};
+
+#define SIZEOF_OPENATTRIBUTEENTRY0 0x2c
+// static_assert( 0x2C == sizeof(struct OPEN_ATTR_ENRTY_32) );
+static_assert(sizeof(struct OPEN_ATTR_ENRTY) < SIZEOF_OPENATTRIBUTEENTRY0);
+
+/*
+ * One entry exists in the Dirty Pages Table for each page which is dirty at the
+ * time the Restart Area is written
+ */
+struct DIR_PAGE_ENTRY {
+ __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+ __le32 target_attr; // 0x04: Index into the Open attribute Table
+ __le32 transfer_len; // 0x08:
+ __le32 lcns_follow; // 0x0C:
+ __le64 vcn; // 0x10: Vcn of dirty page
+ __le64 oldest_lsn; // 0x18:
+ __le64 page_lcns[]; // 0x20:
+};
+
+static_assert(sizeof(struct DIR_PAGE_ENTRY) == 0x20);
+
+/* 32 bit version of 'struct DIR_PAGE_ENTRY' */
+struct DIR_PAGE_ENTRY_32 {
+ __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+ __le32 target_attr; // 0x04: Index into the Open attribute Table
+ __le32 transfer_len; // 0x08:
+ __le32 lcns_follow; // 0x0C:
+ __le32 reserved; // 0x10:
+ __le32 vcn_low; // 0x14: Vcn of dirty page
+ __le32 vcn_hi; // 0x18: Vcn of dirty page
+ __le32 oldest_lsn_low; // 0x1C:
+ __le32 oldest_lsn_hi; // 0x1C:
+ __le32 page_lcns_low; // 0x24:
+ __le32 page_lcns_hi; // 0x24:
+};
+
+static_assert(offsetof(struct DIR_PAGE_ENTRY_32, vcn_low) == 0x14);
+static_assert(sizeof(struct DIR_PAGE_ENTRY_32) == 0x2c);
+
+enum transact_state {
+ TransactionUninitialized = 0,
+ TransactionActive,
+ TransactionPrepared,
+ TransactionCommitted
+};
+
+struct TRANSACTION_ENTRY {
+ __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+ u8 transact_state; // 0x04:
+ u8 reserved[3]; // 0x05:
+ __le64 first_lsn; // 0x08:
+ __le64 prev_lsn; // 0x10:
+ __le64 undo_next_lsn; // 0x18:
+ __le32 undo_records; // 0x20: Number of undo log records pending abort
+ __le32 undo_len; // 0x24: Total undo size
+};
+
+static_assert(sizeof(struct TRANSACTION_ENTRY) == 0x28);
+
+struct NTFS_RESTART {
+ __le32 major_ver; // 0x00:
+ __le32 minor_ver; // 0x04:
+ __le64 check_point_start; // 0x08:
+ __le64 open_attr_table_lsn; // 0x10:
+ __le64 attr_names_lsn; // 0x18:
+ __le64 dirty_pages_table_lsn; // 0x20:
+ __le64 transact_table_lsn; // 0x28:
+ __le32 open_attr_len; // 0x30: In bytes
+ __le32 attr_names_len; // 0x34: In bytes
+ __le32 dirty_pages_len; // 0x38: In bytes
+ __le32 transact_table_len; // 0x3C: In bytes
+};
+
+static_assert(sizeof(struct NTFS_RESTART) == 0x40);
+
+struct NEW_ATTRIBUTE_SIZES {
+ __le64 alloc_size;
+ __le64 valid_size;
+ __le64 data_size;
+ __le64 total_size;
+};
+
+struct BITMAP_RANGE {
+ __le32 bitmap_off;
+ __le32 bits;
+};
+
+struct LCN_RANGE {
+ __le64 lcn;
+ __le64 len;
+};
+
+/* The following type defines the different log record types */
+#define LfsClientRecord cpu_to_le32(1)
+#define LfsClientRestart cpu_to_le32(2)
+
+/* This is used to uniquely identify a client for a particular log file */
+struct CLIENT_ID {
+ __le16 seq_num;
+ __le16 client_idx;
+};
+
+/* This is the header that begins every Log Record in the log file */
+struct LFS_RECORD_HDR {
+ __le64 this_lsn; // 0x00:
+ __le64 client_prev_lsn; // 0x08:
+ __le64 client_undo_next_lsn; // 0x10:
+ __le32 client_data_len; // 0x18:
+ struct CLIENT_ID client; // 0x1C: Owner of this log record
+ __le32 record_type; // 0x20: LfsClientRecord or LfsClientRestart
+ __le32 transact_id; // 0x24:
+ __le16 flags; // 0x28: LOG_RECORD_MULTI_PAGE
+ u8 align[6]; // 0x2A:
+};
+
+#define LOG_RECORD_MULTI_PAGE cpu_to_le16(1)
+
+static_assert(sizeof(struct LFS_RECORD_HDR) == 0x30);
+
+struct LFS_RECORD {
+ __le16 next_record_off; // 0x00: Offset of the free space in the page
+ u8 align[6]; // 0x02:
+ __le64 last_end_lsn; // 0x08: lsn for the last log record which ends on the page
+};
+
+static_assert(sizeof(struct LFS_RECORD) == 0x10);
+
+struct RECORD_PAGE_HDR {
+ struct NTFS_RECORD_HEADER rhdr; // 'RCRD'
+ __le32 rflags; // 0x10: See LOG_PAGE_LOG_RECORD_END
+ __le16 page_count; // 0x14:
+ __le16 page_pos; // 0x16:
+ struct LFS_RECORD record_hdr; // 0x18
+ __le16 fixups[10]; // 0x28
+ __le32 file_off; // 0x3c: used when major version >= 2
+};
+
+// clang-format on
+
+// Page contains the end of a log record
+#define LOG_PAGE_LOG_RECORD_END cpu_to_le32(0x00000001)
+
+static inline bool is_log_record_end(const struct RECORD_PAGE_HDR *hdr)
+{
+ return hdr->rflags & LOG_PAGE_LOG_RECORD_END;
+}
+
+static_assert(offsetof(struct RECORD_PAGE_HDR, file_off) == 0x3c);
+
+/*
+ * END of NTFS LOG structures
+ */
+
+/* Define some tuning parameters to keep the restart tables a reasonable size */
+#define INITIAL_NUMBER_TRANSACTIONS 5
+
+enum NTFS_LOG_OPERATION {
+
+ Noop = 0x00,
+ CompensationLogRecord = 0x01,
+ InitializeFileRecordSegment = 0x02,
+ DeallocateFileRecordSegment = 0x03,
+ WriteEndOfFileRecordSegment = 0x04,
+ CreateAttribute = 0x05,
+ DeleteAttribute = 0x06,
+ UpdateResidentValue = 0x07,
+ UpdateNonresidentValue = 0x08,
+ UpdateMappingPairs = 0x09,
+ DeleteDirtyClusters = 0x0A,
+ SetNewAttributeSizes = 0x0B,
+ AddIndexEntryRoot = 0x0C,
+ DeleteIndexEntryRoot = 0x0D,
+ AddIndexEntryAllocation = 0x0E,
+ DeleteIndexEntryAllocation = 0x0F,
+ WriteEndOfIndexBuffer = 0x10,
+ SetIndexEntryVcnRoot = 0x11,
+ SetIndexEntryVcnAllocation = 0x12,
+ UpdateFileNameRoot = 0x13,
+ UpdateFileNameAllocation = 0x14,
+ SetBitsInNonresidentBitMap = 0x15,
+ ClearBitsInNonresidentBitMap = 0x16,
+ HotFix = 0x17,
+ EndTopLevelAction = 0x18,
+ PrepareTransaction = 0x19,
+ CommitTransaction = 0x1A,
+ ForgetTransaction = 0x1B,
+ OpenNonresidentAttribute = 0x1C,
+ OpenAttributeTableDump = 0x1D,
+ AttributeNamesDump = 0x1E,
+ DirtyPageTableDump = 0x1F,
+ TransactionTableDump = 0x20,
+ UpdateRecordDataRoot = 0x21,
+ UpdateRecordDataAllocation = 0x22,
+
+ UpdateRelativeDataInIndex =
+ 0x23, // NtOfsRestartUpdateRelativeDataInIndex
+ UpdateRelativeDataInIndex2 = 0x24,
+ ZeroEndOfFileRecord = 0x25,
+};
+
+/*
+ * Array for log records which require a target attribute
+ * A true indicates that the corresponding restart operation requires a target attribute
+ */
+static const u8 AttributeRequired[] = {
+ 0xFC, 0xFB, 0xFF, 0x10, 0x06,
+};
+
+static inline bool is_target_required(u16 op)
+{
+ bool ret = op <= UpdateRecordDataAllocation &&
+ (AttributeRequired[op >> 3] >> (op & 7) & 1);
+ return ret;
+}
+
+static inline bool can_skip_action(enum NTFS_LOG_OPERATION op)
+{
+ switch (op) {
+ case Noop:
+ case DeleteDirtyClusters:
+ case HotFix:
+ case EndTopLevelAction:
+ case PrepareTransaction:
+ case CommitTransaction:
+ case ForgetTransaction:
+ case CompensationLogRecord:
+ case OpenNonresidentAttribute:
+ case OpenAttributeTableDump:
+ case AttributeNamesDump:
+ case DirtyPageTableDump:
+ case TransactionTableDump:
+ return true;
+ default:
+ return false;
+ }
+}
+
+enum { lcb_ctx_undo_next, lcb_ctx_prev, lcb_ctx_next };
+
+/* bytes per restart table */
+static inline u32 bytes_per_rt(const struct RESTART_TABLE *rt)
+{
+ return le16_to_cpu(rt->used) * le16_to_cpu(rt->size) +
+ sizeof(struct RESTART_TABLE);
+}
+
+/* log record length */
+static inline u32 lrh_length(const struct LOG_REC_HDR *lr)
+{
+ u16 t16 = le16_to_cpu(lr->lcns_follow);
+
+ return struct_size(lr, page_lcns, max_t(u16, 1, t16));
+}
+
+struct lcb {
+ struct LFS_RECORD_HDR *lrh; // Log record header of the current lsn
+ struct LOG_REC_HDR *log_rec;
+ u32 ctx_mode; // lcb_ctx_undo_next/lcb_ctx_prev/lcb_ctx_next
+ struct CLIENT_ID client;
+ bool alloc; // if true the we should deallocate 'log_rec'
+};
+
+static void lcb_put(struct lcb *lcb)
+{
+ if (lcb->alloc)
+ ntfs_free(lcb->log_rec);
+ ntfs_free(lcb->lrh);
+ ntfs_free(lcb);
+}
+
+/*
+ * oldest_client_lsn
+ *
+ * find the oldest lsn from active clients.
+ */
+static inline void oldest_client_lsn(const struct CLIENT_REC *ca,
+ __le16 next_client, u64 *oldest_lsn)
+{
+ while (next_client != LFS_NO_CLIENT_LE) {
+ const struct CLIENT_REC *cr = ca + le16_to_cpu(next_client);
+ u64 lsn = le64_to_cpu(cr->oldest_lsn);
+
+ /* ignore this block if it's oldest lsn is 0 */
+ if (lsn && lsn < *oldest_lsn)
+ *oldest_lsn = lsn;
+
+ next_client = cr->next_client;
+ }
+}
+
+static inline bool is_rst_page_hdr_valid(u32 file_off,
+ const struct RESTART_HDR *rhdr)
+{
+ u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
+ u32 page_size = le32_to_cpu(rhdr->page_size);
+ u32 end_usa;
+ u16 ro;
+
+ if (sys_page < SECTOR_SIZE || page_size < SECTOR_SIZE ||
+ sys_page & (sys_page - 1) || page_size & (page_size - 1)) {
+ return false;
+ }
+
+ /* Check that if the file offset isn't 0, it is the system page size */
+ if (file_off && file_off != sys_page)
+ return false;
+
+ /* Check support version 1.1+ */
+ if (le16_to_cpu(rhdr->major_ver) <= 1 && !rhdr->minor_ver)
+ return false;
+
+ if (le16_to_cpu(rhdr->major_ver) > 2)
+ return false;
+
+ ro = le16_to_cpu(rhdr->ra_off);
+ if (!IsQuadAligned(ro) || ro > sys_page)
+ return false;
+
+ end_usa = ((sys_page >> SECTOR_SHIFT) + 1) * sizeof(short);
+ end_usa += le16_to_cpu(rhdr->rhdr.fix_off);
+
+ if (ro < end_usa)
+ return false;
+
+ return true;
+}
+
+static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
+{
+ const struct RESTART_AREA *ra;
+ u16 cl, fl, ul;
+ u32 off, l_size, file_dat_bits, file_size_round;
+ u16 ro = le16_to_cpu(rhdr->ra_off);
+ u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
+
+ if (ro + offsetof(struct RESTART_AREA, l_size) >
+ SECTOR_SIZE - sizeof(short))
+ return false;
+
+ ra = Add2Ptr(rhdr, ro);
+ cl = le16_to_cpu(ra->log_clients);
+
+ if (cl > 1)
+ return false;
+
+ off = le16_to_cpu(ra->client_off);
+
+ if (!IsQuadAligned(off) || ro + off > SECTOR_SIZE - sizeof(short))
+ return false;
+
+ off += cl * sizeof(struct CLIENT_REC);
+
+ if (off > sys_page)
+ return false;
+
+ /*
+ * Check the restart length field and whether the entire
+ * restart area is contained that length
+ */
+ if (le16_to_cpu(rhdr->ra_off) + le16_to_cpu(ra->ra_len) > sys_page ||
+ off > le16_to_cpu(ra->ra_len)) {
+ return false;
+ }
+
+ /*
+ * As a final check make sure that the use list and the free list
+ * are either empty or point to a valid client
+ */
+ fl = le16_to_cpu(ra->client_idx[0]);
+ ul = le16_to_cpu(ra->client_idx[1]);
+ if ((fl != LFS_NO_CLIENT && fl >= cl) ||
+ (ul != LFS_NO_CLIENT && ul >= cl))
+ return false;
+
+ /* Make sure the sequence number bits match the log file size */
+ l_size = le64_to_cpu(ra->l_size);
+
+ file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits);
+ file_size_round = 1u << (file_dat_bits + 3);
+ if (file_size_round != l_size &&
+ (file_size_round < l_size || (file_size_round / 2) > l_size)) {
+ return false;
+ }
+
+ /* The log page data offset and record header length must be quad-aligned */
+ if (!IsQuadAligned(le16_to_cpu(ra->data_off)) ||
+ !IsQuadAligned(le16_to_cpu(ra->rec_hdr_len)))
+ return false;
+
+ return true;
+}
+
+static inline bool is_client_area_valid(const struct RESTART_HDR *rhdr,
+ bool usa_error)
+{
+ u16 ro = le16_to_cpu(rhdr->ra_off);
+ const struct RESTART_AREA *ra = Add2Ptr(rhdr, ro);
+ u16 ra_len = le16_to_cpu(ra->ra_len);
+ const struct CLIENT_REC *ca;
+ u32 i;
+
+ if (usa_error && ra_len + ro > SECTOR_SIZE - sizeof(short))
+ return false;
+
+ /* Find the start of the client array */
+ ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
+
+ /*
+ * Start with the free list
+ * Check that all the clients are valid and that there isn't a cycle
+ * Do the in-use list on the second pass
+ */
+ for (i = 0; i < 2; i++) {
+ u16 client_idx = le16_to_cpu(ra->client_idx[i]);
+ bool first_client = true;
+ u16 clients = le16_to_cpu(ra->log_clients);
+
+ while (client_idx != LFS_NO_CLIENT) {
+ const struct CLIENT_REC *cr;
+
+ if (!clients ||
+ client_idx >= le16_to_cpu(ra->log_clients))
+ return false;
+
+ clients -= 1;
+ cr = ca + client_idx;
+
+ client_idx = le16_to_cpu(cr->next_client);
+
+ if (first_client) {
+ first_client = false;
+ if (cr->prev_client != LFS_NO_CLIENT_LE)
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/*
+ * remove_client
+ *
+ * remove a client record from a client record list an restart area
+ */
+static inline void remove_client(struct CLIENT_REC *ca,
+ const struct CLIENT_REC *cr, __le16 *head)
+{
+ if (cr->prev_client == LFS_NO_CLIENT_LE)
+ *head = cr->next_client;
+ else
+ ca[le16_to_cpu(cr->prev_client)].next_client = cr->next_client;
+
+ if (cr->next_client != LFS_NO_CLIENT_LE)
+ ca[le16_to_cpu(cr->next_client)].prev_client = cr->prev_client;
+}
+
+/*
+ * add_client
+ *
+ * add a client record to the start of a list
+ */
+static inline void add_client(struct CLIENT_REC *ca, u16 index, __le16 *head)
+{
+ struct CLIENT_REC *cr = ca + index;
+
+ cr->prev_client = LFS_NO_CLIENT_LE;
+ cr->next_client = *head;
+
+ if (*head != LFS_NO_CLIENT_LE)
+ ca[le16_to_cpu(*head)].prev_client = cpu_to_le16(index);
+
+ *head = cpu_to_le16(index);
+}
+
+/*
+ * enum_rstbl
+ *
+ */
+static inline void *enum_rstbl(struct RESTART_TABLE *t, void *c)
+{
+ __le32 *e;
+ u32 bprt;
+ u16 rsize = t ? le16_to_cpu(t->size) : 0;
+
+ if (!c) {
+ if (!t || !t->total)
+ return NULL;
+ e = Add2Ptr(t, sizeof(struct RESTART_TABLE));
+ } else {
+ e = Add2Ptr(c, rsize);
+ }
+
+ /* Loop until we hit the first one allocated, or the end of the list */
+ for (bprt = bytes_per_rt(t); PtrOffset(t, e) < bprt;
+ e = Add2Ptr(e, rsize)) {
+ if (*e == RESTART_ENTRY_ALLOCATED_LE)
+ return e;
+ }
+ return NULL;
+}
+
+/*
+ * find_dp
+ *
+ * searches for a 'vcn' in Dirty Page Table,
+ */
+static inline struct DIR_PAGE_ENTRY *find_dp(struct RESTART_TABLE *dptbl,
+ u32 target_attr, u64 vcn)
+{
+ __le32 ta = cpu_to_le32(target_attr);
+ struct DIR_PAGE_ENTRY *dp = NULL;
+
+ while ((dp = enum_rstbl(dptbl, dp))) {
+ u64 dp_vcn = le64_to_cpu(dp->vcn);
+
+ if (dp->target_attr == ta && vcn >= dp_vcn &&
+ vcn < dp_vcn + le32_to_cpu(dp->lcns_follow)) {
+ return dp;
+ }
+ }
+ return NULL;
+}
+
+static inline u32 norm_file_page(u32 page_size, u32 *l_size, bool use_default)
+{
+ if (use_default)
+ page_size = DefaultLogPageSize;
+
+ /* Round the file size down to a system page boundary */
+ *l_size &= ~(page_size - 1);
+
+ /* File should contain at least 2 restart pages and MinLogRecordPages pages */
+ if (*l_size < (MinLogRecordPages + 2) * page_size)
+ return 0;
+
+ return page_size;
+}
+
+static bool check_log_rec(const struct LOG_REC_HDR *lr, u32 bytes, u32 tr,
+ u32 bytes_per_attr_entry)
+{
+ u16 t16;
+
+ if (bytes < sizeof(struct LOG_REC_HDR))
+ return false;
+ if (!tr)
+ return false;
+
+ if ((tr - sizeof(struct RESTART_TABLE)) %
+ sizeof(struct TRANSACTION_ENTRY))
+ return false;
+
+ if (le16_to_cpu(lr->redo_off) & 7)
+ return false;
+
+ if (le16_to_cpu(lr->undo_off) & 7)
+ return false;
+
+ if (lr->target_attr)
+ goto check_lcns;
+
+ if (is_target_required(le16_to_cpu(lr->redo_op)))
+ return false;
+
+ if (is_target_required(le16_to_cpu(lr->undo_op)))
+ return false;
+
+check_lcns:
+ if (!lr->lcns_follow)
+ goto check_length;
+
+ t16 = le16_to_cpu(lr->target_attr);
+ if ((t16 - sizeof(struct RESTART_TABLE)) % bytes_per_attr_entry)
+ return false;
+
+check_length:
+ if (bytes < lrh_length(lr))
+ return false;
+
+ return true;
+}
+
+static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
+{
+ u32 ts;
+ u32 i, off;
+ u16 rsize = le16_to_cpu(rt->size);
+ u16 ne = le16_to_cpu(rt->used);
+ u32 ff = le32_to_cpu(rt->first_free);
+ u32 lf = le32_to_cpu(rt->last_free);
+
+ ts = rsize * ne + sizeof(struct RESTART_TABLE);
+
+ if (!rsize || rsize > bytes ||
+ rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts ||
+ le16_to_cpu(rt->total) > ne || ff > ts || lf > ts ||
+ (ff && ff < sizeof(struct RESTART_TABLE)) ||
+ (lf && lf < sizeof(struct RESTART_TABLE))) {
+ return false;
+ }
+
+ /* Verify each entry is either allocated or points
+ * to a valid offset the table
+ */
+ for (i = 0; i < ne; i++) {
+ off = le32_to_cpu(*(__le32 *)Add2Ptr(
+ rt, i * rsize + sizeof(struct RESTART_TABLE)));
+
+ if (off != RESTART_ENTRY_ALLOCATED && off &&
+ (off < sizeof(struct RESTART_TABLE) ||
+ ((off - sizeof(struct RESTART_TABLE)) % rsize))) {
+ return false;
+ }
+ }
+
+ /* Walk through the list headed by the first entry to make
+ * sure none of the entries are currently being used
+ */
+ for (off = ff; off;) {
+ if (off == RESTART_ENTRY_ALLOCATED)
+ return false;
+
+ off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off));
+ }
+
+ return true;
+}
+
+/*
+ * free_rsttbl_idx
+ *
+ * frees a previously allocated index a Restart Table.
+ */
+static inline void free_rsttbl_idx(struct RESTART_TABLE *rt, u32 off)
+{
+ __le32 *e;
+ u32 lf = le32_to_cpu(rt->last_free);
+ __le32 off_le = cpu_to_le32(off);
+
+ e = Add2Ptr(rt, off);
+
+ if (off < le32_to_cpu(rt->free_goal)) {
+ *e = rt->first_free;
+ rt->first_free = off_le;
+ if (!lf)
+ rt->last_free = off_le;
+ } else {
+ if (lf)
+ *(__le32 *)Add2Ptr(rt, lf) = off_le;
+ else
+ rt->first_free = off_le;
+
+ rt->last_free = off_le;
+ *e = 0;
+ }
+
+ le16_sub_cpu(&rt->total, 1);
+}
+
+static inline struct RESTART_TABLE *init_rsttbl(u16 esize, u16 used)
+{
+ __le32 *e, *last_free;
+ u32 off;
+ u32 bytes = esize * used + sizeof(struct RESTART_TABLE);
+ u32 lf = sizeof(struct RESTART_TABLE) + (used - 1) * esize;
+ struct RESTART_TABLE *t = ntfs_zalloc(bytes);
+
+ t->size = cpu_to_le16(esize);
+ t->used = cpu_to_le16(used);
+ t->free_goal = cpu_to_le32(~0u);
+ t->first_free = cpu_to_le32(sizeof(struct RESTART_TABLE));
+ t->last_free = cpu_to_le32(lf);
+
+ e = (__le32 *)(t + 1);
+ last_free = Add2Ptr(t, lf);
+
+ for (off = sizeof(struct RESTART_TABLE) + esize; e < last_free;
+ e = Add2Ptr(e, esize), off += esize) {
+ *e = cpu_to_le32(off);
+ }
+ return t;
+}
+
+static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl,
+ u32 add, u32 free_goal)
+{
+ u16 esize = le16_to_cpu(tbl->size);
+ __le32 osize = cpu_to_le32(bytes_per_rt(tbl));
+ u32 used = le16_to_cpu(tbl->used);
+ struct RESTART_TABLE *rt = init_rsttbl(esize, used + add);
+
+ memcpy(rt + 1, tbl + 1, esize * used);
+
+ rt->free_goal = free_goal == ~0u
+ ? cpu_to_le32(~0u)
+ : cpu_to_le32(sizeof(struct RESTART_TABLE) +
+ free_goal * esize);
+
+ if (tbl->first_free) {
+ rt->first_free = tbl->first_free;
+ *(__le32 *)Add2Ptr(rt, le32_to_cpu(tbl->last_free)) = osize;
+ } else {
+ rt->first_free = osize;
+ }
+
+ rt->total = tbl->total;
+
+ ntfs_free(tbl);
+ return rt;
+}
+
+/*
+ * alloc_rsttbl_idx
+ *
+ * allocates an index from within a previously initialized Restart Table
+ */
+static inline void *alloc_rsttbl_idx(struct RESTART_TABLE **tbl)
+{
+ u32 off;
+ __le32 *e;
+ struct RESTART_TABLE *t = *tbl;
+
+ if (!t->first_free)
+ *tbl = t = extend_rsttbl(t, 16, ~0u);
+
+ off = le32_to_cpu(t->first_free);
+
+ /* Dequeue this entry and zero it. */
+ e = Add2Ptr(t, off);
+
+ t->first_free = *e;
+
+ memset(e, 0, le16_to_cpu(t->size));
+
+ *e = RESTART_ENTRY_ALLOCATED_LE;
+
+ /* If list is going empty, then we fix the last_free as well. */
+ if (!t->first_free)
+ t->last_free = 0;
+
+ le16_add_cpu(&t->total, 1);
+
+ return Add2Ptr(t, off);
+}
+
+/*
+ * alloc_rsttbl_from_idx
+ *
+ * allocates a specific index from within a previously initialized Restart Table
+ */
+static inline void *alloc_rsttbl_from_idx(struct RESTART_TABLE **tbl, u32 vbo)
+{
+ u32 off;
+ __le32 *e;
+ struct RESTART_TABLE *rt = *tbl;
+ u32 bytes = bytes_per_rt(rt);
+ u16 esize = le16_to_cpu(rt->size);
+
+ /* If the entry is not the table, we will have to extend the table */
+ if (vbo >= bytes) {
+ /*
+ * extend the size by computing the number of entries between
+ * the existing size and the desired index and adding
+ * 1 to that
+ */
+ u32 bytes2idx = vbo - bytes;
+
+ /* There should always be an integral number of entries being added */
+ /* Now extend the table */
+ *tbl = rt = extend_rsttbl(rt, bytes2idx / esize + 1, bytes);
+ if (!rt)
+ return NULL;
+ }
+
+ /* see if the entry is already allocated, and just return if it is. */
+ e = Add2Ptr(rt, vbo);
+
+ if (*e == RESTART_ENTRY_ALLOCATED_LE)
+ return e;
+
+ /*
+ * Walk through the table, looking for the entry we're
+ * interested and the previous entry
+ */
+ off = le32_to_cpu(rt->first_free);
+ e = Add2Ptr(rt, off);
+
+ if (off == vbo) {
+ /* this is a match */
+ rt->first_free = *e;
+ goto skip_looking;
+ }
+
+ /*
+ * need to walk through the list looking for the predecessor of our entry
+ */
+ for (;;) {
+ /* Remember the entry just found */
+ u32 last_off = off;
+ __le32 *last_e = e;
+
+ /* should never run of entries. */
+
+ /* Lookup up the next entry the list */
+ off = le32_to_cpu(*last_e);
+ e = Add2Ptr(rt, off);
+
+ /* If this is our match we are done */
+ if (off == vbo) {
+ *last_e = *e;
+
+ /* If this was the last entry, we update that the table as well */
+ if (le32_to_cpu(rt->last_free) == off)
+ rt->last_free = cpu_to_le32(last_off);
+ break;
+ }
+ }
+
+skip_looking:
+ /* If the list is now empty, we fix the last_free as well */
+ if (!rt->first_free)
+ rt->last_free = 0;
+
+ /* Zero this entry */
+ memset(e, 0, esize);
+ *e = RESTART_ENTRY_ALLOCATED_LE;
+
+ le16_add_cpu(&rt->total, 1);
+
+ return e;
+}
+
+#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
+
+#define NTFSLOG_WRAPPED 0x00000001
+#define NTFSLOG_MULTIPLE_PAGE_IO 0x00000002
+#define NTFSLOG_NO_LAST_LSN 0x00000004
+#define NTFSLOG_REUSE_TAIL 0x00000010
+#define NTFSLOG_NO_OLDEST_LSN 0x00000020
+
+/*
+ * Helper struct to work with NTFS LogFile
+ */
+struct ntfs_log {
+ struct ntfs_inode *ni;
+
+ u32 l_size;
+ u32 sys_page_size;
+ u32 sys_page_mask;
+ u32 page_size;
+ u32 page_mask; // page_size - 1
+ u8 page_bits;
+ struct RECORD_PAGE_HDR *one_page_buf;
+
+ struct RESTART_TABLE *open_attr_tbl;
+ u32 transaction_id;
+ u32 clst_per_page;
+
+ u32 first_page;
+ u32 next_page;
+ u32 ra_off;
+ u32 data_off;
+ u32 restart_size;
+ u32 data_size;
+ u16 record_header_len;
+ u64 seq_num;
+ u32 seq_num_bits;
+ u32 file_data_bits;
+ u32 seq_num_mask; /* (1 << file_data_bits) - 1 */
+
+ struct RESTART_AREA *ra; /* in-memory image of the next restart area */
+ u32 ra_size; /* the usable size of the restart area */
+
+ /*
+ * If true, then the in-memory restart area is to be written
+ * to the first position on the disk
+ */
+ bool init_ra;
+ bool set_dirty; /* true if we need to set dirty flag */
+
+ u64 oldest_lsn;
+
+ u32 oldest_lsn_off;
+ u64 last_lsn;
+
+ u32 total_avail;
+ u32 total_avail_pages;
+ u32 total_undo_commit;
+ u32 max_current_avail;
+ u32 current_avail;
+ u32 reserved;
+
+ short major_ver;
+ short minor_ver;
+
+ u32 l_flags; /* See NTFSLOG_XXX */
+ u32 current_openlog_count; /* On-disk value for open_log_count */
+
+ struct CLIENT_ID client_id;
+ u32 client_undo_commit;
+};
+
+static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
+{
+ u32 vbo = (lsn << log->seq_num_bits) >> (log->seq_num_bits - 3);
+
+ return vbo;
+}
+
+/* compute the offset in the log file of the next log page */
+static inline u32 next_page_off(struct ntfs_log *log, u32 off)
+{
+ off = (off & ~log->sys_page_mask) + log->page_size;
+ return off >= log->l_size ? log->first_page : off;
+}
+
+static inline u32 lsn_to_page_off(struct ntfs_log *log, u64 lsn)
+{
+ return (((u32)lsn) << 3) & log->page_mask;
+}
+
+static inline u64 vbo_to_lsn(struct ntfs_log *log, u32 off, u64 Seq)
+{
+ return (off >> 3) + (Seq << log->file_data_bits);
+}
+
+static inline bool is_lsn_in_file(struct ntfs_log *log, u64 lsn)
+{
+ return lsn >= log->oldest_lsn &&
+ lsn <= le64_to_cpu(log->ra->current_lsn);
+}
+
+static inline u32 hdr_file_off(struct ntfs_log *log,
+ struct RECORD_PAGE_HDR *hdr)
+{
+ if (log->major_ver < 2)
+ return le64_to_cpu(hdr->rhdr.lsn);
+
+ return le32_to_cpu(hdr->file_off);
+}
+
+static inline u64 base_lsn(struct ntfs_log *log,
+ const struct RECORD_PAGE_HDR *hdr, u64 lsn)
+{
+ u64 h_lsn = le64_to_cpu(hdr->rhdr.lsn);
+ u64 ret = (((h_lsn >> log->file_data_bits) +
+ (lsn < (lsn_to_vbo(log, h_lsn) & ~log->page_mask) ? 1 : 0))
+ << log->file_data_bits) +
+ ((((is_log_record_end(hdr) &&
+ h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn))
+ ? le16_to_cpu(hdr->record_hdr.next_record_off)
+ : log->page_size) +
+ lsn) >>
+ 3);
+
+ return ret;
+}
+
+static inline bool verify_client_lsn(struct ntfs_log *log,
+ const struct CLIENT_REC *client, u64 lsn)
+{
+ return lsn >= le64_to_cpu(client->oldest_lsn) &&
+ lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
+}
+
+struct restart_info {
+ u64 last_lsn;
+ struct RESTART_HDR *r_page;
+ u32 vbo;
+ bool chkdsk_was_run;
+ bool valid_page;
+ bool initialized;
+ bool restart;
+};
+
+static int read_log_page(struct ntfs_log *log, u32 vbo,
+ struct RECORD_PAGE_HDR **buffer, bool *usa_error)
+{
+ int err = 0;
+ u32 page_idx = vbo >> log->page_bits;
+ u32 page_off = vbo & log->page_mask;
+ u32 bytes = log->page_size - page_off;
+ void *to_free = NULL;
+ u32 page_vbo = page_idx << log->page_bits;
+ struct RECORD_PAGE_HDR *page_buf;
+ struct ntfs_inode *ni = log->ni;
+ bool bBAAD;
+
+ if (vbo >= log->l_size)
+ return -EINVAL;
+
+ if (!*buffer) {
+ to_free = ntfs_malloc(bytes);
+ if (!to_free)
+ return -ENOMEM;
+ *buffer = to_free;
+ }
+
+ page_buf = page_off ? log->one_page_buf : *buffer;
+
+ err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf,
+ log->page_size, NULL);
+ if (err)
+ goto out;
+
+ if (page_buf->rhdr.sign != NTFS_FFFF_SIGNATURE)
+ ntfs_fix_post_read(&page_buf->rhdr, PAGE_SIZE, false);
+
+ if (page_buf != *buffer)
+ memcpy(*buffer, Add2Ptr(page_buf, page_off), bytes);
+
+ bBAAD = page_buf->rhdr.sign == NTFS_BAAD_SIGNATURE;
+
+ if (usa_error)
+ *usa_error = bBAAD;
+ /* Check that the update sequence array for this page is valid */
+ /* If we don't allow errors, raise an error status */
+ else if (bBAAD)
+ err = -EINVAL;
+
+out:
+ if (err && to_free) {
+ ntfs_free(to_free);
+ *buffer = NULL;
+ }
+
+ return err;
+}
+
+/*
+ * log_read_rst
+ *
+ * it walks through 512 blocks of the file looking for a valid restart page header
+ * It will stop the first time we find a valid page header
+ */
+static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+ struct restart_info *info)
+{
+ u32 skip, vbo;
+ struct RESTART_HDR *r_page = ntfs_malloc(DefaultLogPageSize);
+
+ if (!r_page)
+ return -ENOMEM;
+
+ memset(info, 0, sizeof(struct restart_info));
+
+ /* Determine which restart area we are looking for */
+ if (first) {
+ vbo = 0;
+ skip = 512;
+ } else {
+ vbo = 512;
+ skip = 0;
+ }
+
+ /* loop continuously until we succeed */
+ for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) {
+ bool usa_error;
+ u32 sys_page_size;
+ bool brst, bchk;
+ struct RESTART_AREA *ra;
+
+ /* Read a page header at the current offset */
+ if (read_log_page(log, vbo, (struct RECORD_PAGE_HDR **)&r_page,
+ &usa_error)) {
+ /* ignore any errors */
+ continue;
+ }
+
+ /* exit if the signature is a log record page */
+ if (r_page->rhdr.sign == NTFS_RCRD_SIGNATURE) {
+ info->initialized = true;
+ break;
+ }
+
+ brst = r_page->rhdr.sign == NTFS_RSTR_SIGNATURE;
+ bchk = r_page->rhdr.sign == NTFS_CHKD_SIGNATURE;
+
+ if (!bchk && !brst) {
+ if (r_page->rhdr.sign != NTFS_FFFF_SIGNATURE) {
+ /*
+ * Remember if the signature does not
+ * indicate uninitialized file
+ */
+ info->initialized = true;
+ }
+ continue;
+ }
+
+ ra = NULL;
+ info->valid_page = false;
+ info->initialized = true;
+ info->vbo = vbo;
+
+ /* Let's check the restart area if this is a valid page */
+ if (!is_rst_page_hdr_valid(vbo, r_page))
+ goto check_result;
+ ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
+
+ if (!is_rst_area_valid(r_page))
+ goto check_result;
+
+ /*
+ * We have a valid restart page header and restart area.
+ * If chkdsk was run or we have no clients then we have
+ * no more checking to do
+ */
+ if (bchk || ra->client_idx[1] == LFS_NO_CLIENT_LE) {
+ info->valid_page = true;
+ goto check_result;
+ }
+
+ /* Read the entire restart area */
+ sys_page_size = le32_to_cpu(r_page->sys_page_size);
+ if (DefaultLogPageSize != sys_page_size) {
+ ntfs_free(r_page);
+ r_page = ntfs_zalloc(sys_page_size);
+ if (!r_page)
+ return -ENOMEM;
+
+ if (read_log_page(log, vbo,
+ (struct RECORD_PAGE_HDR **)&r_page,
+ &usa_error)) {
+ /* ignore any errors */
+ ntfs_free(r_page);
+ r_page = NULL;
+ continue;
+ }
+ }
+
+ if (is_client_area_valid(r_page, usa_error)) {
+ info->valid_page = true;
+ ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
+ }
+
+check_result:
+ /* If chkdsk was run then update the caller's values and return */
+ if (r_page->rhdr.sign == NTFS_CHKD_SIGNATURE) {
+ info->chkdsk_was_run = true;
+ info->last_lsn = le64_to_cpu(r_page->rhdr.lsn);
+ info->restart = true;
+ info->r_page = r_page;
+ return 0;
+ }
+
+ /* If we have a valid page then copy the values we need from it */
+ if (info->valid_page) {
+ info->last_lsn = le64_to_cpu(ra->current_lsn);
+ info->restart = true;
+ info->r_page = r_page;
+ return 0;
+ }
+ }
+
+ ntfs_free(r_page);
+
+ return 0;
+}
+
+/*
+ * log_init_pg_hdr
+ *
+ * init "log' from restart page header
+ */
+static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
+ u32 page_size, u16 major_ver, u16 minor_ver)
+{
+ log->sys_page_size = sys_page_size;
+ log->sys_page_mask = sys_page_size - 1;
+ log->page_size = page_size;
+ log->page_mask = page_size - 1;
+ log->page_bits = blksize_bits(page_size);
+
+ log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
+ if (!log->clst_per_page)
+ log->clst_per_page = 1;
+
+ log->first_page = major_ver >= 2
+ ? 0x22 * page_size
+ : ((sys_page_size << 1) + (page_size << 1));
+ log->major_ver = major_ver;
+ log->minor_ver = minor_ver;
+}
+
+/*
+ * log_create
+ *
+ * init "log" in cases when we don't have a restart area to use
+ */
+static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
+ u32 open_log_count, bool wrapped, bool use_multi_page)
+{
+ log->l_size = l_size;
+ /* All file offsets must be quadword aligned */
+ log->file_data_bits = blksize_bits(l_size) - 3;
+ log->seq_num_mask = (8 << log->file_data_bits) - 1;
+ log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
+ log->seq_num = (last_lsn >> log->file_data_bits) + 2;
+ log->next_page = log->first_page;
+ log->oldest_lsn = log->seq_num << log->file_data_bits;
+ log->oldest_lsn_off = 0;
+ log->last_lsn = log->oldest_lsn;
+
+ log->l_flags |= NTFSLOG_NO_LAST_LSN | NTFSLOG_NO_OLDEST_LSN;
+
+ /* Set the correct flags for the I/O and indicate if we have wrapped */
+ if (wrapped)
+ log->l_flags |= NTFSLOG_WRAPPED;
+
+ if (use_multi_page)
+ log->l_flags |= NTFSLOG_MULTIPLE_PAGE_IO;
+
+ /* Compute the log page values */
+ log->data_off = QuadAlign(
+ offsetof(struct RECORD_PAGE_HDR, fixups) +
+ sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1));
+ log->data_size = log->page_size - log->data_off;
+ log->record_header_len = sizeof(struct LFS_RECORD_HDR);
+
+ /* Remember the different page sizes for reservation */
+ log->reserved = log->data_size - log->record_header_len;
+
+ /* Compute the restart page values. */
+ log->ra_off = QuadAlign(
+ offsetof(struct RESTART_HDR, fixups) +
+ sizeof(short) * ((log->sys_page_size >> SECTOR_SHIFT) + 1));
+ log->restart_size = log->sys_page_size - log->ra_off;
+ log->ra_size = struct_size(log->ra, clients, 1);
+ log->current_openlog_count = open_log_count;
+
+ /*
+ * The total available log file space is the number of
+ * log file pages times the space available on each page
+ */
+ log->total_avail_pages = log->l_size - log->first_page;
+ log->total_avail = log->total_avail_pages >> log->page_bits;
+
+ /*
+ * We assume that we can't use the end of the page less than
+ * the file record size
+ * Then we won't need to reserve more than the caller asks for
+ */
+ log->max_current_avail = log->total_avail * log->reserved;
+ log->total_avail = log->total_avail * log->data_size;
+ log->current_avail = log->max_current_avail;
+}
+
+/*
+ * log_create_ra
+ *
+ * This routine is called to fill a restart area from the values stored in 'log'
+ */
+static struct RESTART_AREA *log_create_ra(struct ntfs_log *log)
+{
+ struct CLIENT_REC *cr;
+ struct RESTART_AREA *ra = ntfs_zalloc(log->restart_size);
+
+ if (!ra)
+ return NULL;
+
+ ra->current_lsn = cpu_to_le64(log->last_lsn);
+ ra->log_clients = cpu_to_le16(1);
+ ra->client_idx[1] = LFS_NO_CLIENT_LE;
+ if (log->l_flags & NTFSLOG_MULTIPLE_PAGE_IO)
+ ra->flags = RESTART_SINGLE_PAGE_IO;
+ ra->seq_num_bits = cpu_to_le32(log->seq_num_bits);
+ ra->ra_len = cpu_to_le16(log->ra_size);
+ ra->client_off = cpu_to_le16(offsetof(struct RESTART_AREA, clients));
+ ra->l_size = cpu_to_le64(log->l_size);
+ ra->rec_hdr_len = cpu_to_le16(log->record_header_len);
+ ra->data_off = cpu_to_le16(log->data_off);
+ ra->open_log_count = cpu_to_le32(log->current_openlog_count + 1);
+
+ cr = ra->clients;
+
+ cr->prev_client = LFS_NO_CLIENT_LE;
+ cr->next_client = LFS_NO_CLIENT_LE;
+
+ return ra;
+}
+
+static u32 final_log_off(struct ntfs_log *log, u64 lsn, u32 data_len)
+{
+ u32 base_vbo = lsn << 3;
+ u32 final_log_off = (base_vbo & log->seq_num_mask) & ~log->page_mask;
+ u32 page_off = base_vbo & log->page_mask;
+ u32 tail = log->page_size - page_off;
+
+ page_off -= 1;
+
+ /* Add the length of the header */
+ data_len += log->record_header_len;
+
+ /*
+ * If this lsn is contained this log page we are done
+ * Otherwise we need to walk through several log pages
+ */
+ if (data_len > tail) {
+ data_len -= tail;
+ tail = log->data_size;
+ page_off = log->data_off - 1;
+
+ for (;;) {
+ final_log_off = next_page_off(log, final_log_off);
+
+ /* We are done if the remaining bytes fit on this page */
+ if (data_len <= tail)
+ break;
+ data_len -= tail;
+ }
+ }
+
+ /*
+ * We add the remaining bytes to our starting position on this page
+ * and then add that value to the file offset of this log page
+ */
+ return final_log_off + data_len + page_off;
+}
+
+static int next_log_lsn(struct ntfs_log *log, const struct LFS_RECORD_HDR *rh,
+ u64 *lsn)
+{
+ int err;
+ u64 this_lsn = le64_to_cpu(rh->this_lsn);
+ u32 vbo = lsn_to_vbo(log, this_lsn);
+ u32 end =
+ final_log_off(log, this_lsn, le32_to_cpu(rh->client_data_len));
+ u32 hdr_off = end & ~log->sys_page_mask;
+ u64 seq = this_lsn >> log->file_data_bits;
+ struct RECORD_PAGE_HDR *page = NULL;
+
+ /* Remember if we wrapped */
+ if (end <= vbo)
+ seq += 1;
+
+ /* log page header for this page */
+ err = read_log_page(log, hdr_off, &page, NULL);
+ if (err)
+ return err;
+
+ /*
+ * If the lsn we were given was not the last lsn on this page,
+ * then the starting offset for the next lsn is on a quad word
+ * boundary following the last file offset for the current lsn
+ * Otherwise the file offset is the start of the data on the next page
+ */
+ if (this_lsn == le64_to_cpu(page->rhdr.lsn)) {
+ /* If we wrapped, we need to increment the sequence number */
+ hdr_off = next_page_off(log, hdr_off);
+ if (hdr_off == log->first_page)
+ seq += 1;
+
+ vbo = hdr_off + log->data_off;
+ } else {
+ vbo = QuadAlign(end);
+ }
+
+ /* Compute the lsn based on the file offset and the sequence count */
+ *lsn = vbo_to_lsn(log, vbo, seq);
+
+ /*
+ * If this lsn is within the legal range for the file, we return true
+ * Otherwise false indicates that there are no more lsn's
+ */
+ if (!is_lsn_in_file(log, *lsn))
+ *lsn = 0;
+
+ ntfs_free(page);
+
+ return 0;
+}
+
+/*
+ * current_log_avail
+ *
+ * calculate the number of bytes available for log records
+ */
+static u32 current_log_avail(struct ntfs_log *log)
+{
+ u32 oldest_off, next_free_off, free_bytes;
+
+ if (log->l_flags & NTFSLOG_NO_LAST_LSN) {
+ /* The entire file is available */
+ return log->max_current_avail;
+ }
+
+ /*
+ * If there is a last lsn the restart area then we know that we will
+ * have to compute the free range
+ * If there is no oldest lsn then start at the first page of the file
+ */
+ oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN)
+ ? log->first_page
+ : (log->oldest_lsn_off & ~log->sys_page_mask);
+
+ /*
+ * We will use the next log page offset to compute the next free page\
+ * If we are going to reuse this page go to the next page
+ * If we are at the first page then use the end of the file
+ */
+ next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL)
+ ? log->next_page + log->page_size
+ : log->next_page == log->first_page
+ ? log->l_size
+ : log->next_page;
+
+ /* If the two offsets are the same then there is no available space */
+ if (oldest_off == next_free_off)
+ return 0;
+ /*
+ * If the free offset follows the oldest offset then subtract
+ * this range from the total available pages
+ */
+ free_bytes =
+ oldest_off < next_free_off
+ ? log->total_avail_pages - (next_free_off - oldest_off)
+ : oldest_off - next_free_off;
+
+ free_bytes >>= log->page_bits;
+ return free_bytes * log->reserved;
+}
+
+static bool check_subseq_log_page(struct ntfs_log *log,
+ const struct RECORD_PAGE_HDR *rp, u32 vbo,
+ u64 seq)
+{
+ u64 lsn_seq;
+ const struct NTFS_RECORD_HEADER *rhdr = &rp->rhdr;
+ u64 lsn = le64_to_cpu(rhdr->lsn);
+
+ if (rhdr->sign == NTFS_FFFF_SIGNATURE || !rhdr->sign)
+ return false;
+
+ /*
+ * If the last lsn on the page occurs was written after the page
+ * that caused the original error then we have a fatal error
+ */
+ lsn_seq = lsn >> log->file_data_bits;
+
+ /*
+ * If the sequence number for the lsn the page is equal or greater
+ * than lsn we expect, then this is a subsequent write
+ */
+ return lsn_seq >= seq ||
+ (lsn_seq == seq - 1 && log->first_page == vbo &&
+ vbo != (lsn_to_vbo(log, lsn) & ~log->page_mask));
+}
+
+/*
+ * last_log_lsn
+ *
+ * This routine walks through the log pages for a file, searching for the
+ * last log page written to the file
+ */
+static int last_log_lsn(struct ntfs_log *log)
+{
+ int err;
+ bool usa_error = false;
+ bool replace_page = false;
+ bool reuse_page = log->l_flags & NTFSLOG_REUSE_TAIL;
+ bool wrapped_file, wrapped;
+
+ u32 page_cnt = 1, page_pos = 1;
+ u32 page_off = 0, page_off1 = 0, saved_off = 0;
+ u32 final_off, second_off, final_off_prev = 0, second_off_prev = 0;
+ u32 first_file_off = 0, second_file_off = 0;
+ u32 part_io_count = 0;
+ u32 tails = 0;
+ u32 this_off, curpage_off, nextpage_off, remain_pages;
+
+ u64 expected_seq, seq_base = 0, lsn_base = 0;
+ u64 best_lsn, best_lsn1, best_lsn2;
+ u64 lsn_cur, lsn1, lsn2;
+ u64 last_ok_lsn = reuse_page ? log->last_lsn : 0;
+
+ u16 cur_pos, best_page_pos;
+
+ struct RECORD_PAGE_HDR *page = NULL;
+ struct RECORD_PAGE_HDR *tst_page = NULL;
+ struct RECORD_PAGE_HDR *first_tail = NULL;
+ struct RECORD_PAGE_HDR *second_tail = NULL;
+ struct RECORD_PAGE_HDR *tail_page = NULL;
+ struct RECORD_PAGE_HDR *second_tail_prev = NULL;
+ struct RECORD_PAGE_HDR *first_tail_prev = NULL;
+ struct RECORD_PAGE_HDR *page_bufs = NULL;
+ struct RECORD_PAGE_HDR *best_page;
+
+ if (log->major_ver >= 2) {
+ final_off = 0x02 * log->page_size;
+ second_off = 0x12 * log->page_size;
+
+ // 0x10 == 0x12 - 0x2
+ page_bufs = ntfs_malloc(log->page_size * 0x10);
+ if (!page_bufs)
+ return -ENOMEM;
+ } else {
+ second_off = log->first_page - log->page_size;
+ final_off = second_off - log->page_size;
+ }
+
+next_tail:
+ /* Read second tail page (at pos 3/0x12000) */
+ if (read_log_page(log, second_off, &second_tail, &usa_error) ||
+ usa_error || second_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
+ ntfs_free(second_tail);
+ second_tail = NULL;
+ second_file_off = 0;
+ lsn2 = 0;
+ } else {
+ second_file_off = hdr_file_off(log, second_tail);
+ lsn2 = le64_to_cpu(second_tail->record_hdr.last_end_lsn);
+ }
+
+ /* Read first tail page (at pos 2/0x2000 ) */
+ if (read_log_page(log, final_off, &first_tail, &usa_error) ||
+ usa_error || first_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
+ ntfs_free(first_tail);
+ first_tail = NULL;
+ first_file_off = 0;
+ lsn1 = 0;
+ } else {
+ first_file_off = hdr_file_off(log, first_tail);
+ lsn1 = le64_to_cpu(first_tail->record_hdr.last_end_lsn);
+ }
+
+ if (log->major_ver < 2) {
+ int best_page;
+
+ first_tail_prev = first_tail;
+ final_off_prev = first_file_off;
+ second_tail_prev = second_tail;
+ second_off_prev = second_file_off;
+ tails = 1;
+
+ if (!first_tail && !second_tail)
+ goto tail_read;
+
+ if (first_tail && second_tail)
+ best_page = lsn1 < lsn2 ? 1 : 0;
+ else if (first_tail)
+ best_page = 0;
+ else
+ best_page = 1;
+
+ page_off = best_page ? second_file_off : first_file_off;
+ seq_base = (best_page ? lsn2 : lsn1) >> log->file_data_bits;
+ goto tail_read;
+ }
+
+ best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0;
+ best_lsn2 =
+ second_tail ? base_lsn(log, second_tail, second_file_off) : 0;
+
+ if (first_tail && second_tail) {
+ if (best_lsn1 > best_lsn2) {
+ best_lsn = best_lsn1;
+ best_page = first_tail;
+ this_off = first_file_off;
+ } else {
+ best_lsn = best_lsn2;
+ best_page = second_tail;
+ this_off = second_file_off;
+ }
+ } else if (first_tail) {
+ best_lsn = best_lsn1;
+ best_page = first_tail;
+ this_off = first_file_off;
+ } else if (second_tail) {
+ best_lsn = best_lsn2;
+ best_page = second_tail;
+ this_off = second_file_off;
+ } else {
+ goto tail_read;
+ }
+
+ best_page_pos = le16_to_cpu(best_page->page_pos);
+
+ if (!tails) {
+ if (best_page_pos == page_pos) {
+ seq_base = best_lsn >> log->file_data_bits;
+ saved_off = page_off = le32_to_cpu(best_page->file_off);
+ lsn_base = best_lsn;
+
+ memmove(page_bufs, best_page, log->page_size);
+
+ page_cnt = le16_to_cpu(best_page->page_count);
+ if (page_cnt > 1)
+ page_pos += 1;
+
+ tails = 1;
+ }
+ } else if (seq_base == (best_lsn >> log->file_data_bits) &&
+ saved_off + log->page_size == this_off &&
+ lsn_base < best_lsn &&
+ (page_pos != page_cnt || best_page_pos == page_pos ||
+ best_page_pos == 1) &&
+ (page_pos >= page_cnt || best_page_pos == page_pos)) {
+ u16 bppc = le16_to_cpu(best_page->page_count);
+
+ saved_off += log->page_size;
+ lsn_base = best_lsn;
+
+ memmove(Add2Ptr(page_bufs, tails * log->page_size), best_page,
+ log->page_size);
+
+ tails += 1;
+
+ if (best_page_pos != bppc) {
+ page_cnt = bppc;
+ page_pos = best_page_pos;
+
+ if (page_cnt > 1)
+ page_pos += 1;
+ } else {
+ page_pos = page_cnt = 1;
+ }
+ } else {
+ ntfs_free(first_tail);
+ ntfs_free(second_tail);
+ goto tail_read;
+ }
+
+ ntfs_free(first_tail_prev);
+ first_tail_prev = first_tail;
+ final_off_prev = first_file_off;
+ first_tail = NULL;
+
+ ntfs_free(second_tail_prev);
+ second_tail_prev = second_tail;
+ second_off_prev = second_file_off;
+ second_tail = NULL;
+
+ final_off += log->page_size;
+ second_off += log->page_size;
+
+ if (tails < 0x10)
+ goto next_tail;
+tail_read:
+ first_tail = first_tail_prev;
+ final_off = final_off_prev;
+
+ second_tail = second_tail_prev;
+ second_off = second_off_prev;
+
+ page_cnt = page_pos = 1;
+
+ curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off)
+ : log->next_page;
+
+ wrapped_file =
+ curpage_off == log->first_page &&
+ !(log->l_flags & (NTFSLOG_NO_LAST_LSN | NTFSLOG_REUSE_TAIL));
+
+ expected_seq = wrapped_file ? (log->seq_num + 1) : log->seq_num;
+
+ nextpage_off = curpage_off;
+
+next_page:
+ tail_page = NULL;
+ /* Read the next log page */
+ err = read_log_page(log, curpage_off, &page, &usa_error);
+
+ /* Compute the next log page offset the file */
+ nextpage_off = next_page_off(log, curpage_off);
+ wrapped = nextpage_off == log->first_page;
+
+ if (tails > 1) {
+ struct RECORD_PAGE_HDR *cur_page =
+ Add2Ptr(page_bufs, curpage_off - page_off);
+
+ if (curpage_off == saved_off) {
+ tail_page = cur_page;
+ goto use_tail_page;
+ }
+
+ if (page_off > curpage_off || curpage_off >= saved_off)
+ goto use_tail_page;
+
+ if (page_off1)
+ goto use_cur_page;
+
+ if (!err && !usa_error &&
+ page->rhdr.sign == NTFS_RCRD_SIGNATURE &&
+ cur_page->rhdr.lsn == page->rhdr.lsn &&
+ cur_page->record_hdr.next_record_off ==
+ page->record_hdr.next_record_off &&
+ ((page_pos == page_cnt &&
+ le16_to_cpu(page->page_pos) == 1) ||
+ (page_pos != page_cnt &&
+ le16_to_cpu(page->page_pos) == page_pos + 1 &&
+ le16_to_cpu(page->page_count) == page_cnt))) {
+ cur_page = NULL;
+ goto use_tail_page;
+ }
+
+ page_off1 = page_off;
+
+use_cur_page:
+
+ lsn_cur = le64_to_cpu(cur_page->rhdr.lsn);
+
+ if (last_ok_lsn !=
+ le64_to_cpu(cur_page->record_hdr.last_end_lsn) &&
+ ((lsn_cur >> log->file_data_bits) +
+ ((curpage_off <
+ (lsn_to_vbo(log, lsn_cur) & ~log->page_mask))
+ ? 1
+ : 0)) != expected_seq) {
+ goto check_tail;
+ }
+
+ if (!is_log_record_end(cur_page)) {
+ tail_page = NULL;
+ last_ok_lsn = lsn_cur;
+ goto next_page_1;
+ }
+
+ log->seq_num = expected_seq;
+ log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
+ log->last_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
+ log->ra->current_lsn = cur_page->record_hdr.last_end_lsn;
+
+ if (log->record_header_len <=
+ log->page_size -
+ le16_to_cpu(cur_page->record_hdr.next_record_off)) {
+ log->l_flags |= NTFSLOG_REUSE_TAIL;
+ log->next_page = curpage_off;
+ } else {
+ log->l_flags &= ~NTFSLOG_REUSE_TAIL;
+ log->next_page = nextpage_off;
+ }
+
+ if (wrapped_file)
+ log->l_flags |= NTFSLOG_WRAPPED;
+
+ last_ok_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
+ goto next_page_1;
+ }
+
+ /*
+ * If we are at the expected first page of a transfer check to see
+ * if either tail copy is at this offset
+ * If this page is the last page of a transfer, check if we wrote
+ * a subsequent tail copy
+ */
+ if (page_cnt == page_pos || page_cnt == page_pos + 1) {
+ /*
+ * Check if the offset matches either the first or second
+ * tail copy. It is possible it will match both
+ */
+ if (curpage_off == final_off)
+ tail_page = first_tail;
+
+ /*
+ * If we already matched on the first page then
+ * check the ending lsn's.
+ */
+ if (curpage_off == second_off) {
+ if (!tail_page ||
+ (second_tail &&
+ le64_to_cpu(second_tail->record_hdr.last_end_lsn) >
+ le64_to_cpu(first_tail->record_hdr
+ .last_end_lsn))) {
+ tail_page = second_tail;
+ }
+ }
+ }
+
+use_tail_page:
+ if (tail_page) {
+ /* we have a candidate for a tail copy */
+ lsn_cur = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
+
+ if (last_ok_lsn < lsn_cur) {
+ /*
+ * If the sequence number is not expected,
+ * then don't use the tail copy
+ */
+ if (expected_seq != (lsn_cur >> log->file_data_bits))
+ tail_page = NULL;
+ } else if (last_ok_lsn > lsn_cur) {
+ /*
+ * If the last lsn is greater than the one on
+ * this page then forget this tail
+ */
+ tail_page = NULL;
+ }
+ }
+
+ /* If we have an error on the current page, we will break of this loop */
+ if (err || usa_error)
+ goto check_tail;
+
+ /*
+ * Done if the last lsn on this page doesn't match the previous known
+ * last lsn or the sequence number is not expected
+ */
+ lsn_cur = le64_to_cpu(page->rhdr.lsn);
+ if (last_ok_lsn != lsn_cur &&
+ expected_seq != (lsn_cur >> log->file_data_bits)) {
+ goto check_tail;
+ }
+
+ /*
+ * Check that the page position and page count values are correct
+ * If this is the first page of a transfer the position must be 1
+ * and the count will be unknown
+ */
+ if (page_cnt == page_pos) {
+ if (page->page_pos != cpu_to_le16(1) &&
+ (!reuse_page || page->page_pos != page->page_count)) {
+ /*
+ * If the current page is the first page we are
+ * looking at and we are reusing this page then
+ * it can be either the first or last page of a
+ * transfer. Otherwise it can only be the first.
+ */
+ goto check_tail;
+ }
+ } else if (le16_to_cpu(page->page_count) != page_cnt ||
+ le16_to_cpu(page->page_pos) != page_pos + 1) {
+ /*
+ * The page position better be 1 more than the last page
+ * position and the page count better match
+ */
+ goto check_tail;
+ }
+
+ /*
+ * We have a valid page the file and may have a valid page
+ * the tail copy area
+ * If the tail page was written after the page the file then
+ * break of the loop
+ */
+ if (tail_page &&
+ le64_to_cpu(tail_page->record_hdr.last_end_lsn) > lsn_cur) {
+ /* Remember if we will replace the page */
+ replace_page = true;
+ goto check_tail;
+ }
+
+ tail_page = NULL;
+
+ if (is_log_record_end(page)) {
+ /*
+ * Since we have read this page we know the sequence number
+ * is the same as our expected value
+ */
+ log->seq_num = expected_seq;
+ log->last_lsn = le64_to_cpu(page->record_hdr.last_end_lsn);
+ log->ra->current_lsn = page->record_hdr.last_end_lsn;
+ log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
+
+ /*
+ * If there is room on this page for another header then
+ * remember we want to reuse the page
+ */
+ if (log->record_header_len <=
+ log->page_size -
+ le16_to_cpu(page->record_hdr.next_record_off)) {
+ log->l_flags |= NTFSLOG_REUSE_TAIL;
+ log->next_page = curpage_off;
+ } else {
+ log->l_flags &= ~NTFSLOG_REUSE_TAIL;
+ log->next_page = nextpage_off;
+ }
+
+ /* Remember if we wrapped the log file */
+ if (wrapped_file)
+ log->l_flags |= NTFSLOG_WRAPPED;
+ }
+
+ /*
+ * Remember the last page count and position.
+ * Also remember the last known lsn
+ */
+ page_cnt = le16_to_cpu(page->page_count);
+ page_pos = le16_to_cpu(page->page_pos);
+ last_ok_lsn = le64_to_cpu(page->rhdr.lsn);
+
+next_page_1:
+
+ if (wrapped) {
+ expected_seq += 1;
+ wrapped_file = 1;
+ }
+
+ curpage_off = nextpage_off;
+ ntfs_free(page);
+ page = NULL;
+ reuse_page = 0;
+ goto next_page;
+
+check_tail:
+ if (tail_page) {
+ log->seq_num = expected_seq;
+ log->last_lsn = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
+ log->ra->current_lsn = tail_page->record_hdr.last_end_lsn;
+ log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
+
+ if (log->page_size -
+ le16_to_cpu(
+ tail_page->record_hdr.next_record_off) >=
+ log->record_header_len) {
+ log->l_flags |= NTFSLOG_REUSE_TAIL;
+ log->next_page = curpage_off;
+ } else {
+ log->l_flags &= ~NTFSLOG_REUSE_TAIL;
+ log->next_page = nextpage_off;
+ }
+
+ if (wrapped)
+ log->l_flags |= NTFSLOG_WRAPPED;
+ }
+
+ /* Remember that the partial IO will start at the next page */
+ second_off = nextpage_off;
+
+ /*
+ * If the next page is the first page of the file then update
+ * the sequence number for log records which begon the next page
+ */
+ if (wrapped)
+ expected_seq += 1;
+
+ /*
+ * If we have a tail copy or are performing single page I/O we can
+ * immediately look at the next page
+ */
+ if (replace_page || (log->ra->flags & RESTART_SINGLE_PAGE_IO)) {
+ page_cnt = 2;
+ page_pos = 1;
+ goto check_valid;
+ }
+
+ if (page_pos != page_cnt)
+ goto check_valid;
+ /*
+ * If the next page causes us to wrap to the beginning of the log
+ * file then we know which page to check next.
+ */
+ if (wrapped) {
+ page_cnt = 2;
+ page_pos = 1;
+ goto check_valid;
+ }
+
+ cur_pos = 2;
+
+next_test_page:
+ ntfs_free(tst_page);
+ tst_page = NULL;
+
+ /* Walk through the file, reading log pages */
+ err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
+
+ /*
+ * If we get a USA error then assume that we correctly found
+ * the end of the original transfer
+ */
+ if (usa_error)
+ goto file_is_valid;
+
+ /*
+ * If we were able to read the page, we examine it to see if it
+ * is the same or different Io block
+ */
+ if (err)
+ goto next_test_page_1;
+
+ if (le16_to_cpu(tst_page->page_pos) == cur_pos &&
+ check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
+ page_cnt = le16_to_cpu(tst_page->page_count) + 1;
+ page_pos = le16_to_cpu(tst_page->page_pos);
+ goto check_valid;
+ } else {
+ goto file_is_valid;
+ }
+
+next_test_page_1:
+
+ nextpage_off = next_page_off(log, curpage_off);
+ wrapped = nextpage_off == log->first_page;
+
+ if (wrapped) {
+ expected_seq += 1;
+ page_cnt = 2;
+ page_pos = 1;
+ }
+
+ cur_pos += 1;
+ part_io_count += 1;
+ if (!wrapped)
+ goto next_test_page;
+
+check_valid:
+ /* Skip over the remaining pages this transfer */
+ remain_pages = page_cnt - page_pos - 1;
+ part_io_count += remain_pages;
+
+ while (remain_pages--) {
+ nextpage_off = next_page_off(log, curpage_off);
+ wrapped = nextpage_off == log->first_page;
+
+ if (wrapped)
+ expected_seq += 1;
+ }
+
+ /* Call our routine to check this log page */
+ ntfs_free(tst_page);
+ tst_page = NULL;
+
+ err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
+ if (!err && !usa_error &&
+ check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+file_is_valid:
+
+ /* We have a valid file */
+ if (page_off1 || tail_page) {
+ struct RECORD_PAGE_HDR *tmp_page;
+
+ if (sb_rdonly(log->ni->mi.sbi->sb)) {
+ err = -EROFS;
+ goto out;
+ }
+
+ if (page_off1) {
+ tmp_page = Add2Ptr(page_bufs, page_off1 - page_off);
+ tails -= (page_off1 - page_off) / log->page_size;
+ if (!tail_page)
+ tails -= 1;
+ } else {
+ tmp_page = tail_page;
+ tails = 1;
+ }
+
+ while (tails--) {
+ u64 off = hdr_file_off(log, tmp_page);
+
+ if (!page) {
+ page = ntfs_malloc(log->page_size);
+ if (!page)
+ return -ENOMEM;
+ }
+
+ /*
+ * Correct page and copy the data from this page
+ * into it and flush it to disk
+ */
+ memcpy(page, tmp_page, log->page_size);
+
+ /* Fill last flushed lsn value flush the page */
+ if (log->major_ver < 2)
+ page->rhdr.lsn = page->record_hdr.last_end_lsn;
+ else
+ page->file_off = 0;
+
+ page->page_pos = page->page_count = cpu_to_le16(1);
+
+ ntfs_fix_pre_write(&page->rhdr, log->page_size);
+
+ err = ntfs_sb_write_run(log->ni->mi.sbi,
+ &log->ni->file.run, off, page,
+ log->page_size);
+
+ if (err)
+ goto out;
+
+ if (part_io_count && second_off == off) {
+ second_off += log->page_size;
+ part_io_count -= 1;
+ }
+
+ tmp_page = Add2Ptr(tmp_page, log->page_size);
+ }
+ }
+
+ if (part_io_count) {
+ if (sb_rdonly(log->ni->mi.sbi->sb)) {
+ err = -EROFS;
+ goto out;
+ }
+ }
+
+out:
+ ntfs_free(second_tail);
+ ntfs_free(first_tail);
+ ntfs_free(page);
+ ntfs_free(tst_page);
+ ntfs_free(page_bufs);
+
+ return err;
+}
+
+/*
+ * read_log_rec_buf
+ *
+ * copies a log record from the file to a buffer
+ * The log record may span several log pages and may even wrap the file
+ */
+static int read_log_rec_buf(struct ntfs_log *log,
+ const struct LFS_RECORD_HDR *rh, void *buffer)
+{
+ int err;
+ struct RECORD_PAGE_HDR *ph = NULL;
+ u64 lsn = le64_to_cpu(rh->this_lsn);
+ u32 vbo = lsn_to_vbo(log, lsn) & ~log->page_mask;
+ u32 off = lsn_to_page_off(log, lsn) + log->record_header_len;
+ u32 data_len = le32_to_cpu(rh->client_data_len);
+
+ /*
+ * While there are more bytes to transfer,
+ * we continue to attempt to perform the read
+ */
+ for (;;) {
+ bool usa_error;
+ u32 tail = log->page_size - off;
+
+ if (tail >= data_len)
+ tail = data_len;
+
+ data_len -= tail;
+
+ err = read_log_page(log, vbo, &ph, &usa_error);
+ if (err)
+ goto out;
+
+ /*
+ * The last lsn on this page better be greater or equal
+ * to the lsn we are copying
+ */
+ if (lsn > le64_to_cpu(ph->rhdr.lsn)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(buffer, Add2Ptr(ph, off), tail);
+
+ /* If there are no more bytes to transfer, we exit the loop */
+ if (!data_len) {
+ if (!is_log_record_end(ph) ||
+ lsn > le64_to_cpu(ph->record_hdr.last_end_lsn)) {
+ err = -EINVAL;
+ goto out;
+ }
+ break;
+ }
+
+ if (ph->rhdr.lsn == ph->record_hdr.last_end_lsn ||
+ lsn > le64_to_cpu(ph->rhdr.lsn)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ vbo = next_page_off(log, vbo);
+ off = log->data_off;
+
+ /*
+ * adjust our pointer the user's buffer to transfer
+ * the next block to
+ */
+ buffer = Add2Ptr(buffer, tail);
+ }
+
+out:
+ ntfs_free(ph);
+ return err;
+}
+
+static int read_rst_area(struct ntfs_log *log, struct NTFS_RESTART **rst_,
+ u64 *lsn)
+{
+ int err;
+ struct LFS_RECORD_HDR *rh = NULL;
+ const struct CLIENT_REC *cr =
+ Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
+ u64 lsnr, lsnc = le64_to_cpu(cr->restart_lsn);
+ u32 len;
+ struct NTFS_RESTART *rst;
+
+ *lsn = 0;
+ *rst_ = NULL;
+
+ /* If the client doesn't have a restart area, go ahead and exit now */
+ if (!lsnc)
+ return 0;
+
+ err = read_log_page(log, lsn_to_vbo(log, lsnc),
+ (struct RECORD_PAGE_HDR **)&rh, NULL);
+ if (err)
+ return err;
+
+ rst = NULL;
+ lsnr = le64_to_cpu(rh->this_lsn);
+
+ if (lsnc != lsnr) {
+ /* If the lsn values don't match, then the disk is corrupt */
+ err = -EINVAL;
+ goto out;
+ }
+
+ *lsn = lsnr;
+ len = le32_to_cpu(rh->client_data_len);
+
+ if (!len) {
+ err = 0;
+ goto out;
+ }
+
+ if (len < sizeof(struct NTFS_RESTART)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ rst = ntfs_malloc(len);
+ if (!rst) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Copy the data into the 'rst' buffer */
+ err = read_log_rec_buf(log, rh, rst);
+ if (err)
+ goto out;
+
+ *rst_ = rst;
+ rst = NULL;
+
+out:
+ ntfs_free(rh);
+ ntfs_free(rst);
+
+ return err;
+}
+
+static int find_log_rec(struct ntfs_log *log, u64 lsn, struct lcb *lcb)
+{
+ int err;
+ struct LFS_RECORD_HDR *rh = lcb->lrh;
+ u32 rec_len, len;
+
+ /* Read the record header for this lsn */
+ if (!rh) {
+ err = read_log_page(log, lsn_to_vbo(log, lsn),
+ (struct RECORD_PAGE_HDR **)&rh, NULL);
+
+ lcb->lrh = rh;
+ if (err)
+ return err;
+ }
+
+ /*
+ * If the lsn the log record doesn't match the desired
+ * lsn then the disk is corrupt
+ */
+ if (lsn != le64_to_cpu(rh->this_lsn))
+ return -EINVAL;
+
+ len = le32_to_cpu(rh->client_data_len);
+
+ /*
+ * check that the length field isn't greater than the total
+ * available space the log file
+ */
+ rec_len = len + log->record_header_len;
+ if (rec_len >= log->total_avail)
+ return -EINVAL;
+
+ /*
+ * If the entire log record is on this log page,
+ * put a pointer to the log record the context block
+ */
+ if (rh->flags & LOG_RECORD_MULTI_PAGE) {
+ void *lr = ntfs_malloc(len);
+
+ if (!lr)
+ return -ENOMEM;
+
+ lcb->log_rec = lr;
+ lcb->alloc = true;
+
+ /* Copy the data into the buffer returned */
+ err = read_log_rec_buf(log, rh, lr);
+ if (err)
+ return err;
+ } else {
+ /* If beyond the end of the current page -> an error */
+ u32 page_off = lsn_to_page_off(log, lsn);
+
+ if (page_off + len + log->record_header_len > log->page_size)
+ return -EINVAL;
+
+ lcb->log_rec = Add2Ptr(rh, sizeof(struct LFS_RECORD_HDR));
+ lcb->alloc = false;
+ }
+
+ return 0;
+}
+
+/*
+ * read_log_rec_lcb
+ *
+ * initiates the query operation.
+ */
+static int read_log_rec_lcb(struct ntfs_log *log, u64 lsn, u32 ctx_mode,
+ struct lcb **lcb_)
+{
+ int err;
+ const struct CLIENT_REC *cr;
+ struct lcb *lcb;
+
+ switch (ctx_mode) {
+ case lcb_ctx_undo_next:
+ case lcb_ctx_prev:
+ case lcb_ctx_next:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check that the given lsn is the legal range for this client */
+ cr = Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
+
+ if (!verify_client_lsn(log, cr, lsn))
+ return -EINVAL;
+
+ lcb = ntfs_zalloc(sizeof(struct lcb));
+ if (!lcb)
+ return -ENOMEM;
+ lcb->client = log->client_id;
+ lcb->ctx_mode = ctx_mode;
+
+ /* Find the log record indicated by the given lsn */
+ err = find_log_rec(log, lsn, lcb);
+ if (err)
+ goto out;
+
+ *lcb_ = lcb;
+ return 0;
+
+out:
+ lcb_put(lcb);
+ *lcb_ = NULL;
+ return err;
+}
+
+/*
+ * find_client_next_lsn
+ *
+ * attempt to find the next lsn to return to a client based on the context mode.
+ */
+static int find_client_next_lsn(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
+{
+ int err;
+ u64 next_lsn;
+ struct LFS_RECORD_HDR *hdr;
+
+ hdr = lcb->lrh;
+ *lsn = 0;
+
+ if (lcb_ctx_next != lcb->ctx_mode)
+ goto check_undo_next;
+
+ /* Loop as long as another lsn can be found */
+ for (;;) {
+ u64 current_lsn;
+
+ err = next_log_lsn(log, hdr, ¤t_lsn);
+ if (err)
+ goto out;
+
+ if (!current_lsn)
+ break;
+
+ if (hdr != lcb->lrh)
+ ntfs_free(hdr);
+
+ hdr = NULL;
+ err = read_log_page(log, lsn_to_vbo(log, current_lsn),
+ (struct RECORD_PAGE_HDR **)&hdr, NULL);
+ if (err)
+ goto out;
+
+ if (memcmp(&hdr->client, &lcb->client,
+ sizeof(struct CLIENT_ID))) {
+ /*err = -EINVAL; */
+ } else if (LfsClientRecord == hdr->record_type) {
+ ntfs_free(lcb->lrh);
+ lcb->lrh = hdr;
+ *lsn = current_lsn;
+ return 0;
+ }
+ }
+
+out:
+ if (hdr != lcb->lrh)
+ ntfs_free(hdr);
+ return err;
+
+check_undo_next:
+ if (lcb_ctx_undo_next == lcb->ctx_mode)
+ next_lsn = le64_to_cpu(hdr->client_undo_next_lsn);
+ else if (lcb_ctx_prev == lcb->ctx_mode)
+ next_lsn = le64_to_cpu(hdr->client_prev_lsn);
+ else
+ return 0;
+
+ if (!next_lsn)
+ return 0;
+
+ if (!verify_client_lsn(
+ log, Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)),
+ next_lsn))
+ return 0;
+
+ hdr = NULL;
+ err = read_log_page(log, lsn_to_vbo(log, next_lsn),
+ (struct RECORD_PAGE_HDR **)&hdr, NULL);
+ if (err)
+ return err;
+ ntfs_free(lcb->lrh);
+ lcb->lrh = hdr;
+
+ *lsn = next_lsn;
+
+ return 0;
+}
+
+static int read_next_log_rec(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
+{
+ int err;
+
+ err = find_client_next_lsn(log, lcb, lsn);
+ if (err)
+ return err;
+
+ if (!*lsn)
+ return 0;
+
+ if (lcb->alloc)
+ ntfs_free(lcb->log_rec);
+
+ lcb->log_rec = NULL;
+ lcb->alloc = false;
+ ntfs_free(lcb->lrh);
+ lcb->lrh = NULL;
+
+ return find_log_rec(log, *lsn, lcb);
+}
+
+static inline bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes)
+{
+ __le16 mask;
+ u32 min_de, de_off, used, total;
+ const struct NTFS_DE *e;
+
+ if (hdr_has_subnode(hdr)) {
+ min_de = sizeof(struct NTFS_DE) + sizeof(u64);
+ mask = NTFS_IE_HAS_SUBNODES;
+ } else {
+ min_de = sizeof(struct NTFS_DE);
+ mask = 0;
+ }
+
+ de_off = le32_to_cpu(hdr->de_off);
+ used = le32_to_cpu(hdr->used);
+ total = le32_to_cpu(hdr->total);
+
+ if (de_off > bytes - min_de || used > bytes || total > bytes ||
+ de_off + min_de > used || used > total) {
+ return false;
+ }
+
+ e = Add2Ptr(hdr, de_off);
+ for (;;) {
+ u16 esize = le16_to_cpu(e->size);
+ struct NTFS_DE *next = Add2Ptr(e, esize);
+
+ if (esize < min_de || PtrOffset(hdr, next) > used ||
+ (e->flags & NTFS_IE_HAS_SUBNODES) != mask) {
+ return false;
+ }
+
+ if (de_is_last(e))
+ break;
+
+ e = next;
+ }
+
+ return true;
+}
+
+static inline bool check_index_buffer(const struct INDEX_BUFFER *ib, u32 bytes)
+{
+ u16 fo;
+ const struct NTFS_RECORD_HEADER *r = &ib->rhdr;
+
+ if (r->sign != NTFS_INDX_SIGNATURE)
+ return false;
+
+ fo = (SECTOR_SIZE - ((bytes >> SECTOR_SHIFT) + 1) * sizeof(short));
+
+ if (le16_to_cpu(r->fix_off) > fo)
+ return false;
+
+ if ((le16_to_cpu(r->fix_num) - 1) * SECTOR_SIZE != bytes)
+ return false;
+
+ return check_index_header(&ib->ihdr,
+ bytes - offsetof(struct INDEX_BUFFER, ihdr));
+}
+
+static inline bool check_index_root(const struct ATTRIB *attr,
+ struct ntfs_sb_info *sbi)
+{
+ bool ret;
+ const struct INDEX_ROOT *root = resident_data(attr);
+ u8 index_bits = le32_to_cpu(root->index_block_size) >= sbi->cluster_size
+ ? sbi->cluster_bits
+ : SECTOR_SHIFT;
+ u8 block_clst = root->index_block_clst;
+
+ if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) ||
+ (root->type != ATTR_NAME && root->type != ATTR_ZERO) ||
+ (root->type == ATTR_NAME &&
+ root->rule != NTFS_COLLATION_TYPE_FILENAME) ||
+ (le32_to_cpu(root->index_block_size) !=
+ (block_clst << index_bits)) ||
+ (block_clst != 1 && block_clst != 2 && block_clst != 4 &&
+ block_clst != 8 && block_clst != 0x10 && block_clst != 0x20 &&
+ block_clst != 0x40 && block_clst != 0x80)) {
+ return false;
+ }
+
+ ret = check_index_header(&root->ihdr,
+ le32_to_cpu(attr->res.data_size) -
+ offsetof(struct INDEX_ROOT, ihdr));
+ return ret;
+}
+
+static inline bool check_attr(const struct MFT_REC *rec,
+ const struct ATTRIB *attr,
+ struct ntfs_sb_info *sbi)
+{
+ u32 asize = le32_to_cpu(attr->size);
+ u32 rsize = 0;
+ u64 dsize, svcn, evcn;
+ u16 run_off;
+
+ /* Check the fixed part of the attribute record header */
+ if (asize >= sbi->record_size ||
+ asize + PtrOffset(rec, attr) >= sbi->record_size ||
+ (attr->name_len &&
+ le16_to_cpu(attr->name_off) + attr->name_len * sizeof(short) >
+ asize)) {
+ return false;
+ }
+
+ /* Check the attribute fields */
+ switch (attr->non_res) {
+ case 0:
+ rsize = le32_to_cpu(attr->res.data_size);
+ if (rsize >= asize ||
+ le16_to_cpu(attr->res.data_off) + rsize > asize) {
+ return false;
+ }
+ break;
+
+ case 1:
+ dsize = le64_to_cpu(attr->nres.data_size);
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn = le64_to_cpu(attr->nres.evcn);
+ run_off = le16_to_cpu(attr->nres.run_off);
+
+ if (svcn > evcn + 1 || run_off >= asize ||
+ le64_to_cpu(attr->nres.valid_size) > dsize ||
+ dsize > le64_to_cpu(attr->nres.alloc_size)) {
+ return false;
+ }
+
+ if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn,
+ Add2Ptr(attr, run_off), asize - run_off) < 0) {
+ return false;
+ }
+
+ return true;
+
+ default:
+ return false;
+ }
+
+ switch (attr->type) {
+ case ATTR_NAME:
+ if (fname_full_size(Add2Ptr(
+ attr, le16_to_cpu(attr->res.data_off))) > asize) {
+ return false;
+ }
+ break;
+
+ case ATTR_ROOT:
+ return check_index_root(attr, sbi);
+
+ case ATTR_STD:
+ if (rsize < sizeof(struct ATTR_STD_INFO5) &&
+ rsize != sizeof(struct ATTR_STD_INFO)) {
+ return false;
+ }
+ break;
+
+ case ATTR_LIST:
+ case ATTR_ID:
+ case ATTR_SECURE:
+ case ATTR_LABEL:
+ case ATTR_VOL_INFO:
+ case ATTR_DATA:
+ case ATTR_ALLOC:
+ case ATTR_BITMAP:
+ case ATTR_REPARSE:
+ case ATTR_EA_INFO:
+ case ATTR_EA:
+ case ATTR_PROPERTYSET:
+ case ATTR_LOGGED_UTILITY_STREAM:
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static inline bool check_file_record(const struct MFT_REC *rec,
+ const struct MFT_REC *rec2,
+ struct ntfs_sb_info *sbi)
+{
+ const struct ATTRIB *attr;
+ u16 fo = le16_to_cpu(rec->rhdr.fix_off);
+ u16 fn = le16_to_cpu(rec->rhdr.fix_num);
+ u16 ao = le16_to_cpu(rec->attr_off);
+ u32 rs = sbi->record_size;
+
+ /* check the file record header for consistency */
+ if (rec->rhdr.sign != NTFS_FILE_SIGNATURE ||
+ fo > (SECTOR_SIZE - ((rs >> SECTOR_SHIFT) + 1) * sizeof(short)) ||
+ (fn - 1) * SECTOR_SIZE != rs || ao < MFTRECORD_FIXUP_OFFSET_1 ||
+ ao > sbi->record_size - SIZEOF_RESIDENT || !is_rec_inuse(rec) ||
+ le32_to_cpu(rec->total) != rs) {
+ return false;
+ }
+
+ /* Loop to check all of the attributes */
+ for (attr = Add2Ptr(rec, ao); attr->type != ATTR_END;
+ attr = Add2Ptr(attr, le32_to_cpu(attr->size))) {
+ if (check_attr(rec, attr, sbi))
+ continue;
+ return false;
+ }
+
+ return true;
+}
+
+static inline int check_lsn(const struct NTFS_RECORD_HEADER *hdr,
+ const u64 *rlsn)
+{
+ u64 lsn;
+
+ if (!rlsn)
+ return true;
+
+ lsn = le64_to_cpu(hdr->lsn);
+
+ if (hdr->sign == NTFS_HOLE_SIGNATURE)
+ return false;
+
+ if (*rlsn > lsn)
+ return true;
+
+ return false;
+}
+
+static inline bool check_if_attr(const struct MFT_REC *rec,
+ const struct LOG_REC_HDR *lrh)
+{
+ u16 ro = le16_to_cpu(lrh->record_off);
+ u16 o = le16_to_cpu(rec->attr_off);
+ const struct ATTRIB *attr = Add2Ptr(rec, o);
+
+ while (o < ro) {
+ u32 asize;
+
+ if (attr->type == ATTR_END)
+ break;
+
+ asize = le32_to_cpu(attr->size);
+ if (!asize)
+ break;
+
+ o += asize;
+ attr = Add2Ptr(attr, asize);
+ }
+
+ return o == ro;
+}
+
+static inline bool check_if_index_root(const struct MFT_REC *rec,
+ const struct LOG_REC_HDR *lrh)
+{
+ u16 ro = le16_to_cpu(lrh->record_off);
+ u16 o = le16_to_cpu(rec->attr_off);
+ const struct ATTRIB *attr = Add2Ptr(rec, o);
+
+ while (o < ro) {
+ u32 asize;
+
+ if (attr->type == ATTR_END)
+ break;
+
+ asize = le32_to_cpu(attr->size);
+ if (!asize)
+ break;
+
+ o += asize;
+ attr = Add2Ptr(attr, asize);
+ }
+
+ return o == ro && attr->type == ATTR_ROOT;
+}
+
+static inline bool check_if_root_index(const struct ATTRIB *attr,
+ const struct INDEX_HDR *hdr,
+ const struct LOG_REC_HDR *lrh)
+{
+ u16 ao = le16_to_cpu(lrh->attr_off);
+ u32 de_off = le32_to_cpu(hdr->de_off);
+ u32 o = PtrOffset(attr, hdr) + de_off;
+ const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
+ u32 asize = le32_to_cpu(attr->size);
+
+ while (o < ao) {
+ u16 esize;
+
+ if (o >= asize)
+ break;
+
+ esize = le16_to_cpu(e->size);
+ if (!esize)
+ break;
+
+ o += esize;
+ e = Add2Ptr(e, esize);
+ }
+
+ return o == ao;
+}
+
+static inline bool check_if_alloc_index(const struct INDEX_HDR *hdr,
+ u32 attr_off)
+{
+ u32 de_off = le32_to_cpu(hdr->de_off);
+ u32 o = offsetof(struct INDEX_BUFFER, ihdr) + de_off;
+ const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
+ u32 used = le32_to_cpu(hdr->used);
+
+ while (o < attr_off) {
+ u16 esize;
+
+ if (de_off >= used)
+ break;
+
+ esize = le16_to_cpu(e->size);
+ if (!esize)
+ break;
+
+ o += esize;
+ de_off += esize;
+ e = Add2Ptr(e, esize);
+ }
+
+ return o == attr_off;
+}
+
+static inline void change_attr_size(struct MFT_REC *rec, struct ATTRIB *attr,
+ u32 nsize)
+{
+ u32 asize = le32_to_cpu(attr->size);
+ int dsize = nsize - asize;
+ u8 *next = Add2Ptr(attr, asize);
+ u32 used = le32_to_cpu(rec->used);
+
+ memmove(Add2Ptr(attr, nsize), next, used - PtrOffset(rec, next));
+
+ rec->used = cpu_to_le32(used + dsize);
+ attr->size = cpu_to_le32(nsize);
+}
+
+struct OpenAttr {
+ struct ATTRIB *attr;
+ struct runs_tree *run1;
+ struct runs_tree run0;
+ struct ntfs_inode *ni;
+ // CLST rno;
+};
+
+/* Returns 0 if 'attr' has the same type and name */
+static inline int cmp_type_and_name(const struct ATTRIB *a1,
+ const struct ATTRIB *a2)
+{
+ return a1->type != a2->type || a1->name_len != a2->name_len ||
+ (a1->name_len && memcmp(attr_name(a1), attr_name(a2),
+ a1->name_len * sizeof(short)));
+}
+
+static struct OpenAttr *find_loaded_attr(struct ntfs_log *log,
+ const struct ATTRIB *attr, CLST rno)
+{
+ struct OPEN_ATTR_ENRTY *oe = NULL;
+
+ while ((oe = enum_rstbl(log->open_attr_tbl, oe))) {
+ struct OpenAttr *op_attr;
+
+ if (ino_get(&oe->ref) != rno)
+ continue;
+
+ op_attr = (struct OpenAttr *)oe->ptr;
+ if (!cmp_type_and_name(op_attr->attr, attr))
+ return op_attr;
+ }
+ return NULL;
+}
+
+static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi,
+ enum ATTR_TYPE type, u64 size,
+ const u16 *name, size_t name_len,
+ __le16 flags)
+{
+ struct ATTRIB *attr;
+ u32 name_size = QuadAlign(name_len * sizeof(short));
+ bool is_ext = flags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED);
+ u32 asize = name_size +
+ (is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT);
+
+ attr = ntfs_zalloc(asize);
+ if (!attr)
+ return NULL;
+
+ attr->type = type;
+ attr->size = cpu_to_le32(asize);
+ attr->flags = flags;
+ attr->non_res = 1;
+ attr->name_len = name_len;
+
+ attr->nres.evcn = cpu_to_le64((u64)bytes_to_cluster(sbi, size) - 1);
+ attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, size));
+ attr->nres.data_size = cpu_to_le64(size);
+ attr->nres.valid_size = attr->nres.data_size;
+ if (is_ext) {
+ attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+ if (is_attr_compressed(attr))
+ attr->nres.c_unit = COMPRESSION_UNIT;
+
+ attr->nres.run_off =
+ cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size);
+ memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT_EX), name,
+ name_len * sizeof(short));
+ } else {
+ attr->name_off = SIZEOF_NONRESIDENT_LE;
+ attr->nres.run_off =
+ cpu_to_le16(SIZEOF_NONRESIDENT + name_size);
+ memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT), name,
+ name_len * sizeof(short));
+ }
+
+ return attr;
+}
+
+/*
+ * do_action
+ *
+ * common routine for the Redo and Undo Passes
+ * If rlsn is NULL then undo
+ */
+static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe,
+ const struct LOG_REC_HDR *lrh, u32 op, void *data,
+ u32 dlen, u32 rec_len, const u64 *rlsn)
+{
+ int err = 0;
+ struct ntfs_sb_info *sbi = log->ni->mi.sbi;
+ struct inode *inode = NULL, *inode_parent;
+ struct mft_inode *mi = NULL, *mi2_child = NULL;
+ CLST rno = 0, rno_base = 0;
+ struct INDEX_BUFFER *ib = NULL;
+ struct MFT_REC *rec = NULL;
+ struct ATTRIB *attr = NULL, *attr2;
+ struct INDEX_HDR *hdr;
+ struct INDEX_ROOT *root;
+ struct NTFS_DE *e, *e1, *e2;
+ struct NEW_ATTRIBUTE_SIZES *new_sz;
+ struct ATTR_FILE_NAME *fname;
+ struct OpenAttr *oa, *oa2;
+ u32 nsize, t32, asize, used, esize, bmp_off, bmp_bits;
+ u16 id, id2;
+ u32 record_size = sbi->record_size;
+ u64 t64;
+ u16 roff = le16_to_cpu(lrh->record_off);
+ u16 aoff = le16_to_cpu(lrh->attr_off);
+ u64 lco = 0;
+ u64 cbo = (u64)le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
+ u64 tvo = le64_to_cpu(lrh->target_vcn) << sbi->cluster_bits;
+ u64 vbo = cbo + tvo;
+ void *buffer_le = NULL;
+ u32 bytes = 0;
+ bool a_dirty = false;
+ u16 data_off;
+
+ oa = oe->ptr;
+
+ /* Big switch to prepare */
+ switch (op) {
+ /* ============================================================
+ * Process MFT records, as described by the current log record
+ * ============================================================
+ */
+ case InitializeFileRecordSegment:
+ case DeallocateFileRecordSegment:
+ case WriteEndOfFileRecordSegment:
+ case CreateAttribute:
+ case DeleteAttribute:
+ case UpdateResidentValue:
+ case UpdateMappingPairs:
+ case SetNewAttributeSizes:
+ case AddIndexEntryRoot:
+ case DeleteIndexEntryRoot:
+ case SetIndexEntryVcnRoot:
+ case UpdateFileNameRoot:
+ case UpdateRecordDataRoot:
+ case ZeroEndOfFileRecord:
+ rno = vbo >> sbi->record_bits;
+ inode = ilookup(sbi->sb, rno);
+ if (inode) {
+ mi = &ntfs_i(inode)->mi;
+ } else if (op == InitializeFileRecordSegment) {
+ mi = ntfs_zalloc(sizeof(struct mft_inode));
+ if (!mi)
+ return -ENOMEM;
+ err = mi_format_new(mi, sbi, rno, 0, false);
+ if (err)
+ goto out;
+ } else {
+ /* read from disk */
+ err = mi_get(sbi, rno, &mi);
+ if (err)
+ return err;
+ }
+ rec = mi->mrec;
+
+ if (op == DeallocateFileRecordSegment)
+ goto skip_load_parent;
+
+ if (InitializeFileRecordSegment != op) {
+ if (rec->rhdr.sign == NTFS_BAAD_SIGNATURE)
+ goto dirty_vol;
+ if (!check_lsn(&rec->rhdr, rlsn))
+ goto out;
+ if (!check_file_record(rec, NULL, sbi))
+ goto dirty_vol;
+ attr = Add2Ptr(rec, roff);
+ }
+
+ if (is_rec_base(rec) || InitializeFileRecordSegment == op) {
+ rno_base = rno;
+ goto skip_load_parent;
+ }
+
+ rno_base = ino_get(&rec->parent_ref);
+ inode_parent = ntfs_iget5(sbi->sb, &rec->parent_ref, NULL);
+ if (IS_ERR(inode_parent))
+ goto skip_load_parent;
+
+ if (is_bad_inode(inode_parent)) {
+ iput(inode_parent);
+ goto skip_load_parent;
+ }
+
+ if (ni_load_mi_ex(ntfs_i(inode_parent), rno, &mi2_child)) {
+ iput(inode_parent);
+ } else {
+ if (mi2_child->mrec != mi->mrec)
+ memcpy(mi2_child->mrec, mi->mrec,
+ sbi->record_size);
+
+ if (inode)
+ iput(inode);
+ else if (mi)
+ mi_put(mi);
+
+ inode = inode_parent;
+ mi = mi2_child;
+ rec = mi2_child->mrec;
+ attr = Add2Ptr(rec, roff);
+ }
+
+skip_load_parent:
+ inode_parent = NULL;
+ break;
+
+ /* ============================================================
+ * Process attributes, as described by the current log record
+ * ============================================================
+ */
+ case UpdateNonresidentValue:
+ case AddIndexEntryAllocation:
+ case DeleteIndexEntryAllocation:
+ case WriteEndOfIndexBuffer:
+ case SetIndexEntryVcnAllocation:
+ case UpdateFileNameAllocation:
+ case SetBitsInNonresidentBitMap:
+ case ClearBitsInNonresidentBitMap:
+ case UpdateRecordDataAllocation:
+ attr = oa->attr;
+ bytes = UpdateNonresidentValue == op ? dlen : 0;
+ lco = (u64)le16_to_cpu(lrh->lcns_follow) << sbi->cluster_bits;
+
+ if (attr->type == ATTR_ALLOC) {
+ t32 = le32_to_cpu(oe->bytes_per_index);
+ if (bytes < t32)
+ bytes = t32;
+ }
+
+ if (!bytes)
+ bytes = lco - cbo;
+
+ bytes += roff;
+ if (attr->type == ATTR_ALLOC)
+ bytes = (bytes + 511) & ~511; // align
+
+ buffer_le = ntfs_malloc(bytes);
+ if (!buffer_le)
+ return -ENOMEM;
+
+ err = ntfs_read_run_nb(sbi, oa->run1, vbo, buffer_le, bytes,
+ NULL);
+ if (err)
+ goto out;
+
+ if (attr->type == ATTR_ALLOC && *(int *)buffer_le)
+ ntfs_fix_post_read(buffer_le, bytes, false);
+ break;
+
+ default:
+ WARN_ON(1);
+ }
+
+ /* Big switch to do operation */
+ switch (op) {
+ case InitializeFileRecordSegment:
+ if (roff + dlen > record_size)
+ goto dirty_vol;
+
+ memcpy(Add2Ptr(rec, roff), data, dlen);
+ mi->dirty = true;
+ break;
+
+ case DeallocateFileRecordSegment:
+ clear_rec_inuse(rec);
+ le16_add_cpu(&rec->seq, 1);
+ mi->dirty = true;
+ break;
+
+ case WriteEndOfFileRecordSegment:
+ attr2 = (struct ATTRIB *)data;
+ if (!check_if_attr(rec, lrh) || roff + dlen > record_size)
+ goto dirty_vol;
+
+ memmove(attr, attr2, dlen);
+ rec->used = cpu_to_le32(QuadAlign(roff + dlen));
+
+ mi->dirty = true;
+ break;
+
+ case CreateAttribute:
+ attr2 = (struct ATTRIB *)data;
+ asize = le32_to_cpu(attr2->size);
+ used = le32_to_cpu(rec->used);
+
+ if (!check_if_attr(rec, lrh) || dlen < SIZEOF_RESIDENT ||
+ !IsQuadAligned(asize) ||
+ Add2Ptr(attr2, asize) > Add2Ptr(lrh, rec_len) ||
+ dlen > record_size - used) {
+ goto dirty_vol;
+ }
+
+ memmove(Add2Ptr(attr, asize), attr, used - roff);
+ memcpy(attr, attr2, asize);
+
+ rec->used = cpu_to_le32(used + asize);
+ id = le16_to_cpu(rec->next_attr_id);
+ id2 = le16_to_cpu(attr2->id);
+ if (id <= id2)
+ rec->next_attr_id = cpu_to_le16(id2 + 1);
+ if (is_attr_indexed(attr))
+ le16_add_cpu(&rec->hard_links, 1);
+
+ oa2 = find_loaded_attr(log, attr, rno_base);
+ if (oa2) {
+ void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size));
+
+ if (p2) {
+ // run_close(oa2->run1);
+ ntfs_free(oa2->attr);
+ oa2->attr = p2;
+ }
+ }
+
+ mi->dirty = true;
+ break;
+
+ case DeleteAttribute:
+ asize = le32_to_cpu(attr->size);
+ used = le32_to_cpu(rec->used);
+
+ if (!check_if_attr(rec, lrh))
+ goto dirty_vol;
+
+ rec->used = cpu_to_le32(used - asize);
+ if (is_attr_indexed(attr))
+ le16_add_cpu(&rec->hard_links, -1);
+
+ memmove(attr, Add2Ptr(attr, asize), used - asize - roff);
+
+ mi->dirty = true;
+ break;
+
+ case UpdateResidentValue:
+ nsize = aoff + dlen;
+
+ if (!check_if_attr(rec, lrh))
+ goto dirty_vol;
+
+ asize = le32_to_cpu(attr->size);
+ used = le32_to_cpu(rec->used);
+
+ if (lrh->redo_len == lrh->undo_len) {
+ if (nsize > asize)
+ goto dirty_vol;
+ goto move_data;
+ }
+
+ if (nsize > asize && nsize - asize > record_size - used)
+ goto dirty_vol;
+
+ nsize = QuadAlign(nsize);
+ data_off = le16_to_cpu(attr->res.data_off);
+
+ if (nsize < asize) {
+ memmove(Add2Ptr(attr, aoff), data, dlen);
+ data = NULL; // To skip below memmove
+ }
+
+ memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
+ used - le16_to_cpu(lrh->record_off) - asize);
+
+ rec->used = cpu_to_le32(used + nsize - asize);
+ attr->size = cpu_to_le32(nsize);
+ attr->res.data_size = cpu_to_le32(aoff + dlen - data_off);
+
+move_data:
+ if (data)
+ memmove(Add2Ptr(attr, aoff), data, dlen);
+
+ oa2 = find_loaded_attr(log, attr, rno_base);
+ if (oa2) {
+ void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size));
+
+ if (p2) {
+ // run_close(&oa2->run0);
+ oa2->run1 = &oa2->run0;
+ ntfs_free(oa2->attr);
+ oa2->attr = p2;
+ }
+ }
+
+ mi->dirty = true;
+ break;
+
+ case UpdateMappingPairs:
+ nsize = aoff + dlen;
+ asize = le32_to_cpu(attr->size);
+ used = le32_to_cpu(rec->used);
+
+ if (!check_if_attr(rec, lrh) || !attr->non_res ||
+ aoff < le16_to_cpu(attr->nres.run_off) || aoff > asize ||
+ (nsize > asize && nsize - asize > record_size - used)) {
+ goto dirty_vol;
+ }
+
+ nsize = QuadAlign(nsize);
+
+ memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
+ used - le16_to_cpu(lrh->record_off) - asize);
+ rec->used = cpu_to_le32(used + nsize - asize);
+ attr->size = cpu_to_le32(nsize);
+ memmove(Add2Ptr(attr, aoff), data, dlen);
+
+ if (run_get_highest_vcn(le64_to_cpu(attr->nres.svcn),
+ attr_run(attr), &t64)) {
+ goto dirty_vol;
+ }
+
+ attr->nres.evcn = cpu_to_le64(t64);
+ oa2 = find_loaded_attr(log, attr, rno_base);
+ if (oa2 && oa2->attr->non_res)
+ oa2->attr->nres.evcn = attr->nres.evcn;
+
+ mi->dirty = true;
+ break;
+
+ case SetNewAttributeSizes:
+ new_sz = data;
+ if (!check_if_attr(rec, lrh) || !attr->non_res)
+ goto dirty_vol;
+
+ attr->nres.alloc_size = new_sz->alloc_size;
+ attr->nres.data_size = new_sz->data_size;
+ attr->nres.valid_size = new_sz->valid_size;
+
+ if (dlen >= sizeof(struct NEW_ATTRIBUTE_SIZES))
+ attr->nres.total_size = new_sz->total_size;
+
+ oa2 = find_loaded_attr(log, attr, rno_base);
+ if (oa2) {
+ void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size));
+
+ if (p2) {
+ ntfs_free(oa2->attr);
+ oa2->attr = p2;
+ }
+ }
+ mi->dirty = true;
+ break;
+
+ case AddIndexEntryRoot:
+ e = (struct NTFS_DE *)data;
+ esize = le16_to_cpu(e->size);
+ root = resident_data(attr);
+ hdr = &root->ihdr;
+ used = le32_to_cpu(hdr->used);
+
+ if (!check_if_index_root(rec, lrh) ||
+ !check_if_root_index(attr, hdr, lrh) ||
+ Add2Ptr(data, esize) > Add2Ptr(lrh, rec_len) ||
+ esize > le32_to_cpu(rec->total) - le32_to_cpu(rec->used)) {
+ goto dirty_vol;
+ }
+
+ e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+
+ change_attr_size(rec, attr, le32_to_cpu(attr->size) + esize);
+
+ memmove(Add2Ptr(e1, esize), e1,
+ PtrOffset(e1, Add2Ptr(hdr, used)));
+ memmove(e1, e, esize);
+
+ le32_add_cpu(&attr->res.data_size, esize);
+ hdr->used = cpu_to_le32(used + esize);
+ le32_add_cpu(&hdr->total, esize);
+
+ mi->dirty = true;
+ break;
+
+ case DeleteIndexEntryRoot:
+ root = resident_data(attr);
+ hdr = &root->ihdr;
+ used = le32_to_cpu(hdr->used);
+
+ if (!check_if_index_root(rec, lrh) ||
+ !check_if_root_index(attr, hdr, lrh)) {
+ goto dirty_vol;
+ }
+
+ e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+ esize = le16_to_cpu(e1->size);
+ e2 = Add2Ptr(e1, esize);
+
+ memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used)));
+
+ le32_sub_cpu(&attr->res.data_size, esize);
+ hdr->used = cpu_to_le32(used - esize);
+ le32_sub_cpu(&hdr->total, esize);
+
+ change_attr_size(rec, attr, le32_to_cpu(attr->size) - esize);
+
+ mi->dirty = true;
+ break;
+
+ case SetIndexEntryVcnRoot:
+ root = resident_data(attr);
+ hdr = &root->ihdr;
+
+ if (!check_if_index_root(rec, lrh) ||
+ !check_if_root_index(attr, hdr, lrh)) {
+ goto dirty_vol;
+ }
+
+ e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+
+ de_set_vbn_le(e, *(__le64 *)data);
+ mi->dirty = true;
+ break;
+
+ case UpdateFileNameRoot:
+ root = resident_data(attr);
+ hdr = &root->ihdr;
+
+ if (!check_if_index_root(rec, lrh) ||
+ !check_if_root_index(attr, hdr, lrh)) {
+ goto dirty_vol;
+ }
+
+ e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+ fname = (struct ATTR_FILE_NAME *)(e + 1);
+ memmove(&fname->dup, data, sizeof(fname->dup)); //
+ mi->dirty = true;
+ break;
+
+ case UpdateRecordDataRoot:
+ root = resident_data(attr);
+ hdr = &root->ihdr;
+
+ if (!check_if_index_root(rec, lrh) ||
+ !check_if_root_index(attr, hdr, lrh)) {
+ goto dirty_vol;
+ }
+
+ e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+
+ memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
+
+ mi->dirty = true;
+ break;
+
+ case ZeroEndOfFileRecord:
+ if (roff + dlen > record_size)
+ goto dirty_vol;
+
+ memset(attr, 0, dlen);
+ mi->dirty = true;
+ break;
+
+ case UpdateNonresidentValue:
+ if (lco < cbo + roff + dlen)
+ goto dirty_vol;
+
+ memcpy(Add2Ptr(buffer_le, roff), data, dlen);
+
+ a_dirty = true;
+ if (attr->type == ATTR_ALLOC)
+ ntfs_fix_pre_write(buffer_le, bytes);
+ break;
+
+ case AddIndexEntryAllocation:
+ ib = Add2Ptr(buffer_le, roff);
+ hdr = &ib->ihdr;
+ e = data;
+ esize = le16_to_cpu(e->size);
+ e1 = Add2Ptr(ib, aoff);
+
+ if (is_baad(&ib->rhdr))
+ goto dirty_vol;
+ if (!check_lsn(&ib->rhdr, rlsn))
+ goto out;
+
+ used = le32_to_cpu(hdr->used);
+
+ if (!check_index_buffer(ib, bytes) ||
+ !check_if_alloc_index(hdr, aoff) ||
+ Add2Ptr(e, esize) > Add2Ptr(lrh, rec_len) ||
+ used + esize > le32_to_cpu(hdr->total)) {
+ goto dirty_vol;
+ }
+
+ memmove(Add2Ptr(e1, esize), e1,
+ PtrOffset(e1, Add2Ptr(hdr, used)));
+ memcpy(e1, e, esize);
+
+ hdr->used = cpu_to_le32(used + esize);
+
+ a_dirty = true;
+
+ ntfs_fix_pre_write(&ib->rhdr, bytes);
+ break;
+
+ case DeleteIndexEntryAllocation:
+ ib = Add2Ptr(buffer_le, roff);
+ hdr = &ib->ihdr;
+ e = Add2Ptr(ib, aoff);
+ esize = le16_to_cpu(e->size);
+
+ if (is_baad(&ib->rhdr))
+ goto dirty_vol;
+ if (!check_lsn(&ib->rhdr, rlsn))
+ goto out;
+
+ if (!check_index_buffer(ib, bytes) ||
+ !check_if_alloc_index(hdr, aoff)) {
+ goto dirty_vol;
+ }
+
+ e1 = Add2Ptr(e, esize);
+ nsize = esize;
+ used = le32_to_cpu(hdr->used);
+
+ memmove(e, e1, PtrOffset(e1, Add2Ptr(hdr, used)));
+
+ hdr->used = cpu_to_le32(used - nsize);
+
+ a_dirty = true;
+
+ ntfs_fix_pre_write(&ib->rhdr, bytes);
+ break;
+
+ case WriteEndOfIndexBuffer:
+ ib = Add2Ptr(buffer_le, roff);
+ hdr = &ib->ihdr;
+ e = Add2Ptr(ib, aoff);
+
+ if (is_baad(&ib->rhdr))
+ goto dirty_vol;
+ if (!check_lsn(&ib->rhdr, rlsn))
+ goto out;
+ if (!check_index_buffer(ib, bytes) ||
+ !check_if_alloc_index(hdr, aoff) ||
+ aoff + dlen > offsetof(struct INDEX_BUFFER, ihdr) +
+ le32_to_cpu(hdr->total)) {
+ goto dirty_vol;
+ }
+
+ hdr->used = cpu_to_le32(dlen + PtrOffset(hdr, e));
+ memmove(e, data, dlen);
+
+ a_dirty = true;
+ ntfs_fix_pre_write(&ib->rhdr, bytes);
+ break;
+
+ case SetIndexEntryVcnAllocation:
+ ib = Add2Ptr(buffer_le, roff);
+ hdr = &ib->ihdr;
+ e = Add2Ptr(ib, aoff);
+
+ if (is_baad(&ib->rhdr))
+ goto dirty_vol;
+
+ if (!check_lsn(&ib->rhdr, rlsn))
+ goto out;
+ if (!check_index_buffer(ib, bytes) ||
+ !check_if_alloc_index(hdr, aoff)) {
+ goto dirty_vol;
+ }
+
+ de_set_vbn_le(e, *(__le64 *)data);
+
+ a_dirty = true;
+ ntfs_fix_pre_write(&ib->rhdr, bytes);
+ break;
+
+ case UpdateFileNameAllocation:
+ ib = Add2Ptr(buffer_le, roff);
+ hdr = &ib->ihdr;
+ e = Add2Ptr(ib, aoff);
+
+ if (is_baad(&ib->rhdr))
+ goto dirty_vol;
+
+ if (!check_lsn(&ib->rhdr, rlsn))
+ goto out;
+ if (!check_index_buffer(ib, bytes) ||
+ !check_if_alloc_index(hdr, aoff)) {
+ goto dirty_vol;
+ }
+
+ fname = (struct ATTR_FILE_NAME *)(e + 1);
+ memmove(&fname->dup, data, sizeof(fname->dup));
+
+ a_dirty = true;
+ ntfs_fix_pre_write(&ib->rhdr, bytes);
+ break;
+
+ case SetBitsInNonresidentBitMap:
+ bmp_off =
+ le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
+ bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
+
+ if (cbo + (bmp_off + 7) / 8 > lco ||
+ cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) {
+ goto dirty_vol;
+ }
+
+ __bitmap_set(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits);
+ a_dirty = true;
+ break;
+
+ case ClearBitsInNonresidentBitMap:
+ bmp_off =
+ le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
+ bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
+
+ if (cbo + (bmp_off + 7) / 8 > lco ||
+ cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) {
+ goto dirty_vol;
+ }
+
+ __bitmap_clear(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits);
+ a_dirty = true;
+ break;
+
+ case UpdateRecordDataAllocation:
+ ib = Add2Ptr(buffer_le, roff);
+ hdr = &ib->ihdr;
+ e = Add2Ptr(ib, aoff);
+
+ if (is_baad(&ib->rhdr))
+ goto dirty_vol;
+
+ if (!check_lsn(&ib->rhdr, rlsn))
+ goto out;
+ if (!check_index_buffer(ib, bytes) ||
+ !check_if_alloc_index(hdr, aoff)) {
+ goto dirty_vol;
+ }
+
+ memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
+
+ a_dirty = true;
+ ntfs_fix_pre_write(&ib->rhdr, bytes);
+ break;
+
+ default:
+ WARN_ON(1);
+ }
+
+ if (rlsn) {
+ __le64 t64 = cpu_to_le64(*rlsn);
+
+ if (rec)
+ rec->rhdr.lsn = t64;
+ if (ib)
+ ib->rhdr.lsn = t64;
+ }
+
+ if (mi && mi->dirty) {
+ err = mi_write(mi, 0);
+ if (err)
+ goto out;
+ }
+
+ if (a_dirty) {
+ attr = oa->attr;
+ err = ntfs_sb_write_run(sbi, oa->run1, vbo, buffer_le, bytes);
+ if (err)
+ goto out;
+ }
+
+out:
+
+ if (inode)
+ iput(inode);
+ else if (mi != mi2_child)
+ mi_put(mi);
+
+ ntfs_free(buffer_le);
+
+ return err;
+
+dirty_vol:
+ log->set_dirty = true;
+ goto out;
+}
+
+/*
+ * log_replay
+ *
+ * this function is called during mount operation
+ * it replays log and empties it
+ * initialized is set false if logfile contains '-1'
+ */
+int log_replay(struct ntfs_inode *ni, bool *initialized)
+{
+ int err;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct ntfs_log *log;
+
+ struct restart_info rst_info, rst_info2;
+ u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0;
+ struct ATTR_NAME_ENTRY *attr_names = NULL;
+ struct ATTR_NAME_ENTRY *ane;
+ struct RESTART_TABLE *dptbl = NULL;
+ struct RESTART_TABLE *trtbl = NULL;
+ const struct RESTART_TABLE *rt;
+ struct RESTART_TABLE *oatbl = NULL;
+ struct inode *inode;
+ struct OpenAttr *oa;
+ struct ntfs_inode *ni_oe;
+ struct ATTRIB *attr = NULL;
+ u64 size, vcn, undo_next_lsn;
+ CLST rno, lcn, lcn0, len0, clen;
+ void *data;
+ struct NTFS_RESTART *rst = NULL;
+ struct lcb *lcb = NULL;
+ struct OPEN_ATTR_ENRTY *oe;
+ struct TRANSACTION_ENTRY *tr;
+ struct DIR_PAGE_ENTRY *dp;
+ u32 i, bytes_per_attr_entry;
+ u32 l_size = ni->vfs_inode.i_size;
+ u32 orig_file_size = l_size;
+ u32 page_size, vbo, tail, off, dlen;
+ u32 saved_len, rec_len, transact_id;
+ bool use_second_page;
+ struct RESTART_AREA *ra2, *ra = NULL;
+ struct CLIENT_REC *ca, *cr;
+ __le16 client;
+ struct RESTART_HDR *rh;
+ const struct LFS_RECORD_HDR *frh;
+ const struct LOG_REC_HDR *lrh;
+ bool is_mapped;
+ bool is_ro = sb_rdonly(sbi->sb);
+ u64 t64;
+ u16 t16;
+ u32 t32;
+
+ /* Get the size of page. NOTE: To replay we can use default page */
+#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
+ page_size = norm_file_page(PAGE_SIZE, &l_size, true);
+#else
+ page_size = norm_file_page(PAGE_SIZE, &l_size, false);
+#endif
+ if (!page_size)
+ return -EINVAL;
+
+ log = ntfs_zalloc(sizeof(struct ntfs_log));
+ if (!log)
+ return -ENOMEM;
+
+ log->ni = ni;
+ log->l_size = l_size;
+ log->one_page_buf = ntfs_malloc(page_size);
+
+ if (!log->one_page_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ log->page_size = page_size;
+ log->page_mask = page_size - 1;
+ log->page_bits = blksize_bits(page_size);
+
+ /* Look for a restart area on the disk */
+ err = log_read_rst(log, l_size, true, &rst_info);
+ if (err)
+ goto out;
+
+ /* remember 'initialized' */
+ *initialized = rst_info.initialized;
+
+ if (!rst_info.restart) {
+ if (rst_info.initialized) {
+ /* no restart area but the file is not initialized */
+ err = -EINVAL;
+ goto out;
+ }
+
+ log_init_pg_hdr(log, page_size, page_size, 1, 1);
+ log_create(log, l_size, 0, get_random_int(), false, false);
+
+ log->ra = ra;
+
+ ra = log_create_ra(log);
+ if (!ra) {
+ err = -ENOMEM;
+ goto out;
+ }
+ log->ra = ra;
+ log->init_ra = true;
+
+ goto process_log;
+ }
+
+ /*
+ * If the restart offset above wasn't zero then we won't
+ * look for a second restart
+ */
+ if (rst_info.vbo)
+ goto check_restart_area;
+
+ err = log_read_rst(log, l_size, false, &rst_info2);
+
+ /* Determine which restart area to use */
+ if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn)
+ goto use_first_page;
+
+ use_second_page = true;
+
+ if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) {
+ struct RECORD_PAGE_HDR *sp = NULL;
+ bool usa_error;
+
+ if (!read_log_page(log, page_size, &sp, &usa_error) &&
+ sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
+ use_second_page = false;
+ }
+ ntfs_free(sp);
+ }
+
+ if (use_second_page) {
+ ntfs_free(rst_info.r_page);
+ memcpy(&rst_info, &rst_info2, sizeof(struct restart_info));
+ rst_info2.r_page = NULL;
+ }
+
+use_first_page:
+ ntfs_free(rst_info2.r_page);
+
+check_restart_area:
+ /* If the restart area is at offset 0, we want to write the second restart area first */
+ log->init_ra = !!rst_info.vbo;
+
+ /* If we have a valid page then grab a pointer to the restart area */
+ ra2 = rst_info.valid_page
+ ? Add2Ptr(rst_info.r_page,
+ le16_to_cpu(rst_info.r_page->ra_off))
+ : NULL;
+
+ if (rst_info.chkdsk_was_run ||
+ (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
+ bool wrapped = false;
+ bool use_multi_page = false;
+ u32 open_log_count;
+
+ /* Do some checks based on whether we have a valid log page */
+ if (!rst_info.valid_page) {
+ open_log_count = get_random_int();
+ goto init_log_instance;
+ }
+ open_log_count = le32_to_cpu(ra2->open_log_count);
+
+ /*
+ * If the restart page size isn't changing then we want to
+ * check how much work we need to do
+ */
+ if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size))
+ goto init_log_instance;
+
+init_log_instance:
+ log_init_pg_hdr(log, page_size, page_size, 1, 1);
+
+ log_create(log, l_size, rst_info.last_lsn, open_log_count,
+ wrapped, use_multi_page);
+
+ ra = log_create_ra(log);
+ if (!ra) {
+ err = -ENOMEM;
+ goto out;
+ }
+ log->ra = ra;
+
+ /* Put the restart areas and initialize the log file as required */
+ goto process_log;
+ }
+
+ if (!ra2) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * If the log page or the system page sizes have changed, we can't use the log file
+ * We must use the system page size instead of the default size
+ * if there is not a clean shutdown
+ */
+ t32 = le32_to_cpu(rst_info.r_page->sys_page_size);
+ if (page_size != t32) {
+ l_size = orig_file_size;
+ page_size =
+ norm_file_page(t32, &l_size, t32 == DefaultLogPageSize);
+ }
+
+ if (page_size != t32 ||
+ page_size != le32_to_cpu(rst_info.r_page->page_size)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* If the file size has shrunk then we won't mount it */
+ if (l_size < le64_to_cpu(ra2->l_size)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ log_init_pg_hdr(log, page_size, page_size,
+ le16_to_cpu(rst_info.r_page->major_ver),
+ le16_to_cpu(rst_info.r_page->minor_ver));
+
+ log->l_size = le64_to_cpu(ra2->l_size);
+ log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
+ log->file_data_bits = sizeof(u64) * 8 - log->seq_num_bits;
+ log->seq_num_mask = (8 << log->file_data_bits) - 1;
+ log->last_lsn = le64_to_cpu(ra2->current_lsn);
+ log->seq_num = log->last_lsn >> log->file_data_bits;
+ log->ra_off = le16_to_cpu(rst_info.r_page->ra_off);
+ log->restart_size = log->sys_page_size - log->ra_off;
+ log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
+ log->ra_size = le16_to_cpu(ra2->ra_len);
+ log->data_off = le16_to_cpu(ra2->data_off);
+ log->data_size = log->page_size - log->data_off;
+ log->reserved = log->data_size - log->record_header_len;
+
+ vbo = lsn_to_vbo(log, log->last_lsn);
+
+ if (vbo < log->first_page) {
+ /* This is a pseudo lsn */
+ log->l_flags |= NTFSLOG_NO_LAST_LSN;
+ log->next_page = log->first_page;
+ goto find_oldest;
+ }
+
+ /* Find the end of this log record */
+ off = final_log_off(log, log->last_lsn,
+ le32_to_cpu(ra2->last_lsn_data_len));
+
+ /* If we wrapped the file then increment the sequence number */
+ if (off <= vbo) {
+ log->seq_num += 1;
+ log->l_flags |= NTFSLOG_WRAPPED;
+ }
+
+ /* Now compute the next log page to use */
+ vbo &= ~log->sys_page_mask;
+ tail = log->page_size - (off & log->page_mask) - 1;
+
+ /* If we can fit another log record on the page, move back a page the log file */
+ if (tail >= log->record_header_len) {
+ log->l_flags |= NTFSLOG_REUSE_TAIL;
+ log->next_page = vbo;
+ } else {
+ log->next_page = next_page_off(log, vbo);
+ }
+
+find_oldest:
+ /* Find the oldest client lsn. Use the last flushed lsn as a starting point */
+ log->oldest_lsn = log->last_lsn;
+ oldest_client_lsn(Add2Ptr(ra2, le16_to_cpu(ra2->client_off)),
+ ra2->client_idx[1], &log->oldest_lsn);
+ log->oldest_lsn_off = lsn_to_vbo(log, log->oldest_lsn);
+
+ if (log->oldest_lsn_off < log->first_page)
+ log->l_flags |= NTFSLOG_NO_OLDEST_LSN;
+
+ if (!(ra2->flags & RESTART_SINGLE_PAGE_IO))
+ log->l_flags |= NTFSLOG_WRAPPED | NTFSLOG_MULTIPLE_PAGE_IO;
+
+ log->current_openlog_count = le32_to_cpu(ra2->open_log_count);
+ log->total_avail_pages = log->l_size - log->first_page;
+ log->total_avail = log->total_avail_pages >> log->page_bits;
+ log->max_current_avail = log->total_avail * log->reserved;
+ log->total_avail = log->total_avail * log->data_size;
+
+ log->current_avail = current_log_avail(log);
+
+ ra = ntfs_zalloc(log->restart_size);
+ if (!ra) {
+ err = -ENOMEM;
+ goto out;
+ }
+ log->ra = ra;
+
+ t16 = le16_to_cpu(ra2->client_off);
+ if (t16 == offsetof(struct RESTART_AREA, clients)) {
+ memcpy(ra, ra2, log->ra_size);
+ } else {
+ memcpy(ra, ra2, offsetof(struct RESTART_AREA, clients));
+ memcpy(ra->clients, Add2Ptr(ra2, t16),
+ le16_to_cpu(ra2->ra_len) - t16);
+
+ log->current_openlog_count = get_random_int();
+ ra->open_log_count = cpu_to_le32(log->current_openlog_count);
+ log->ra_size = offsetof(struct RESTART_AREA, clients) +
+ sizeof(struct CLIENT_REC);
+ ra->client_off =
+ cpu_to_le16(offsetof(struct RESTART_AREA, clients));
+ ra->ra_len = cpu_to_le16(log->ra_size);
+ }
+
+ le32_add_cpu(&ra->open_log_count, 1);
+
+ /* Now we need to walk through looking for the last lsn */
+ err = last_log_lsn(log);
+ if (err)
+ goto out;
+
+ log->current_avail = current_log_avail(log);
+
+ /* Remember which restart area to write first */
+ log->init_ra = rst_info.vbo;
+
+process_log:
+ /* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values */
+ switch ((log->major_ver << 16) + log->minor_ver) {
+ case 0x10000:
+ case 0x10001:
+ case 0x20000:
+ break;
+ default:
+ ntfs_warn(sbi->sb, "\x24LogFile version %d.%d is not supported",
+ log->major_ver, log->minor_ver);
+ err = -EOPNOTSUPP;
+ log->set_dirty = true;
+ goto out;
+ }
+
+ /* One client "NTFS" per logfile */
+ ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
+
+ for (client = ra->client_idx[1];; client = cr->next_client) {
+ if (client == LFS_NO_CLIENT_LE) {
+ /* Insert "NTFS" client LogFile */
+ client = ra->client_idx[0];
+ if (client == LFS_NO_CLIENT_LE)
+ return -EINVAL;
+
+ t16 = le16_to_cpu(client);
+ cr = ca + t16;
+
+ remove_client(ca, cr, &ra->client_idx[0]);
+
+ cr->restart_lsn = 0;
+ cr->oldest_lsn = cpu_to_le64(log->oldest_lsn);
+ cr->name_bytes = cpu_to_le32(8);
+ cr->name[0] = cpu_to_le16('N');
+ cr->name[1] = cpu_to_le16('T');
+ cr->name[2] = cpu_to_le16('F');
+ cr->name[3] = cpu_to_le16('S');
+
+ add_client(ca, t16, &ra->client_idx[1]);
+ break;
+ }
+
+ cr = ca + le16_to_cpu(client);
+
+ if (cpu_to_le32(8) == cr->name_bytes &&
+ cpu_to_le16('N') == cr->name[0] &&
+ cpu_to_le16('T') == cr->name[1] &&
+ cpu_to_le16('F') == cr->name[2] &&
+ cpu_to_le16('S') == cr->name[3])
+ break;
+ }
+
+ /* Update the client handle with the client block information */
+ log->client_id.seq_num = cr->seq_num;
+ log->client_id.client_idx = client;
+
+ err = read_rst_area(log, &rst, &ra_lsn);
+ if (err)
+ goto out;
+
+ if (!rst)
+ goto out;
+
+ bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
+
+ checkpt_lsn = le64_to_cpu(rst->check_point_start);
+ if (!checkpt_lsn)
+ checkpt_lsn = ra_lsn;
+
+ /* Allocate and Read the Transaction Table */
+ if (!rst->transact_table_len)
+ goto check_dirty_page_table;
+
+ t64 = le64_to_cpu(rst->transact_table_lsn);
+ err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+ if (err)
+ goto out;
+
+ lrh = lcb->log_rec;
+ frh = lcb->lrh;
+ rec_len = le32_to_cpu(frh->client_data_len);
+
+ if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+ bytes_per_attr_entry)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ t16 = le16_to_cpu(lrh->redo_off);
+
+ rt = Add2Ptr(lrh, t16);
+ t32 = rec_len - t16;
+
+ /* Now check that this is a valid restart table */
+ if (!check_rstbl(rt, t32)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ trtbl = ntfs_memdup(rt, t32);
+ if (!trtbl) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ lcb_put(lcb);
+ lcb = NULL;
+
+check_dirty_page_table:
+ /* The next record back should be the Dirty Pages Table */
+ if (!rst->dirty_pages_len)
+ goto check_attribute_names;
+
+ t64 = le64_to_cpu(rst->dirty_pages_table_lsn);
+ err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+ if (err)
+ goto out;
+
+ lrh = lcb->log_rec;
+ frh = lcb->lrh;
+ rec_len = le32_to_cpu(frh->client_data_len);
+
+ if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+ bytes_per_attr_entry)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ t16 = le16_to_cpu(lrh->redo_off);
+
+ rt = Add2Ptr(lrh, t16);
+ t32 = rec_len - t16;
+
+ /* Now check that this is a valid restart table */
+ if (!check_rstbl(rt, t32)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ dptbl = ntfs_memdup(rt, t32);
+ if (!dptbl) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Convert Ra version '0' into version '1' */
+ if (rst->major_ver)
+ goto end_conv_1;
+
+ dp = NULL;
+ while ((dp = enum_rstbl(dptbl, dp))) {
+ struct DIR_PAGE_ENTRY_32 *dp0 = (struct DIR_PAGE_ENTRY_32 *)dp;
+ // NOTE: Danger. Check for of boundary
+ memmove(&dp->vcn, &dp0->vcn_low,
+ 2 * sizeof(u64) +
+ le32_to_cpu(dp->lcns_follow) * sizeof(u64));
+ }
+
+end_conv_1:
+ lcb_put(lcb);
+ lcb = NULL;
+
+ /* Go through the table and remove the duplicates, remembering the oldest lsn values */
+ if (sbi->cluster_size <= log->page_size)
+ goto trace_dp_table;
+
+ dp = NULL;
+ while ((dp = enum_rstbl(dptbl, dp))) {
+ struct DIR_PAGE_ENTRY *next = dp;
+
+ while ((next = enum_rstbl(dptbl, next))) {
+ if (next->target_attr == dp->target_attr &&
+ next->vcn == dp->vcn) {
+ if (le64_to_cpu(next->oldest_lsn) <
+ le64_to_cpu(dp->oldest_lsn)) {
+ dp->oldest_lsn = next->oldest_lsn;
+ }
+
+ free_rsttbl_idx(dptbl, PtrOffset(dptbl, next));
+ }
+ }
+ }
+trace_dp_table:
+check_attribute_names:
+ /* The next record should be the Attribute Names */
+ if (!rst->attr_names_len)
+ goto check_attr_table;
+
+ t64 = le64_to_cpu(rst->attr_names_lsn);
+ err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+ if (err)
+ goto out;
+
+ lrh = lcb->log_rec;
+ frh = lcb->lrh;
+ rec_len = le32_to_cpu(frh->client_data_len);
+
+ if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+ bytes_per_attr_entry)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ t32 = lrh_length(lrh);
+ rec_len -= t32;
+
+ attr_names = ntfs_memdup(Add2Ptr(lrh, t32), rec_len);
+
+ lcb_put(lcb);
+ lcb = NULL;
+
+check_attr_table:
+ /* The next record should be the attribute Table */
+ if (!rst->open_attr_len)
+ goto check_attribute_names2;
+
+ t64 = le64_to_cpu(rst->open_attr_table_lsn);
+ err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+ if (err)
+ goto out;
+
+ lrh = lcb->log_rec;
+ frh = lcb->lrh;
+ rec_len = le32_to_cpu(frh->client_data_len);
+
+ if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+ bytes_per_attr_entry)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ t16 = le16_to_cpu(lrh->redo_off);
+
+ rt = Add2Ptr(lrh, t16);
+ t32 = rec_len - t16;
+
+ if (!check_rstbl(rt, t32)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ oatbl = ntfs_memdup(rt, t32);
+ if (!oatbl) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ log->open_attr_tbl = oatbl;
+
+ /* Clear all of the Attr pointers */
+ oe = NULL;
+ while ((oe = enum_rstbl(oatbl, oe))) {
+ if (!rst->major_ver) {
+ struct OPEN_ATTR_ENRTY_32 oe0;
+
+ /* Really 'oe' points to OPEN_ATTR_ENRTY_32 */
+ memcpy(&oe0, oe, SIZEOF_OPENATTRIBUTEENTRY0);
+
+ oe->bytes_per_index = oe0.bytes_per_index;
+ oe->type = oe0.type;
+ oe->is_dirty_pages = oe0.is_dirty_pages;
+ oe->name_len = 0;
+ oe->ref = oe0.ref;
+ oe->open_record_lsn = oe0.open_record_lsn;
+ }
+
+ oe->is_attr_name = 0;
+ oe->ptr = NULL;
+ }
+
+ lcb_put(lcb);
+ lcb = NULL;
+
+check_attribute_names2:
+ if (!rst->attr_names_len)
+ goto trace_attribute_table;
+
+ ane = attr_names;
+ if (!oatbl)
+ goto trace_attribute_table;
+ while (ane->off) {
+ /* TODO: Clear table on exit! */
+ oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
+ t16 = le16_to_cpu(ane->name_bytes);
+ oe->name_len = t16 / sizeof(short);
+ oe->ptr = ane->name;
+ oe->is_attr_name = 2;
+ ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16);
+ }
+
+trace_attribute_table:
+ /*
+ * If the checkpt_lsn is zero, then this is a freshly
+ * formatted disk and we have no work to do
+ */
+ if (!checkpt_lsn) {
+ err = 0;
+ goto out;
+ }
+
+ if (!oatbl) {
+ oatbl = init_rsttbl(bytes_per_attr_entry, 8);
+ if (!oatbl) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ log->open_attr_tbl = oatbl;
+
+ /* Start the analysis pass from the Checkpoint lsn. */
+ rec_lsn = checkpt_lsn;
+
+ /* Read the first lsn */
+ err = read_log_rec_lcb(log, checkpt_lsn, lcb_ctx_next, &lcb);
+ if (err)
+ goto out;
+
+ /* Loop to read all subsequent records to the end of the log file */
+next_log_record_analyze:
+ err = read_next_log_rec(log, lcb, &rec_lsn);
+ if (err)
+ goto out;
+
+ if (!rec_lsn)
+ goto end_log_records_enumerate;
+
+ frh = lcb->lrh;
+ transact_id = le32_to_cpu(frh->transact_id);
+ rec_len = le32_to_cpu(frh->client_data_len);
+ lrh = lcb->log_rec;
+
+ if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * The first lsn after the previous lsn remembered
+ * the checkpoint is the first candidate for the rlsn
+ */
+ if (!rlsn)
+ rlsn = rec_lsn;
+
+ if (LfsClientRecord != frh->record_type)
+ goto next_log_record_analyze;
+
+ /*
+ * Now update the Transaction Table for this transaction
+ * If there is no entry present or it is unallocated we allocate the entry
+ */
+ if (!trtbl) {
+ trtbl = init_rsttbl(sizeof(struct TRANSACTION_ENTRY),
+ INITIAL_NUMBER_TRANSACTIONS);
+ if (!trtbl) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ tr = Add2Ptr(trtbl, transact_id);
+
+ if (transact_id >= bytes_per_rt(trtbl) ||
+ tr->next != RESTART_ENTRY_ALLOCATED_LE) {
+ tr = alloc_rsttbl_from_idx(&trtbl, transact_id);
+ if (!tr) {
+ err = -ENOMEM;
+ goto out;
+ }
+ tr->transact_state = TransactionActive;
+ tr->first_lsn = cpu_to_le64(rec_lsn);
+ }
+
+ tr->prev_lsn = tr->undo_next_lsn = cpu_to_le64(rec_lsn);
+
+ /*
+ * If this is a compensation log record, then change
+ * the undo_next_lsn to be the undo_next_lsn of this record
+ */
+ if (lrh->undo_op == cpu_to_le16(CompensationLogRecord))
+ tr->undo_next_lsn = frh->client_undo_next_lsn;
+
+ /* Dispatch to handle log record depending on type */
+ switch (le16_to_cpu(lrh->redo_op)) {
+ case InitializeFileRecordSegment:
+ case DeallocateFileRecordSegment:
+ case WriteEndOfFileRecordSegment:
+ case CreateAttribute:
+ case DeleteAttribute:
+ case UpdateResidentValue:
+ case UpdateNonresidentValue:
+ case UpdateMappingPairs:
+ case SetNewAttributeSizes:
+ case AddIndexEntryRoot:
+ case DeleteIndexEntryRoot:
+ case AddIndexEntryAllocation:
+ case DeleteIndexEntryAllocation:
+ case WriteEndOfIndexBuffer:
+ case SetIndexEntryVcnRoot:
+ case SetIndexEntryVcnAllocation:
+ case UpdateFileNameRoot:
+ case UpdateFileNameAllocation:
+ case SetBitsInNonresidentBitMap:
+ case ClearBitsInNonresidentBitMap:
+ case UpdateRecordDataRoot:
+ case UpdateRecordDataAllocation:
+ case ZeroEndOfFileRecord:
+ t16 = le16_to_cpu(lrh->target_attr);
+ t64 = le64_to_cpu(lrh->target_vcn);
+ dp = find_dp(dptbl, t16, t64);
+
+ if (dp)
+ goto copy_lcns;
+
+ /*
+ * Calculate the number of clusters per page the system
+ * which wrote the checkpoint, possibly creating the table
+ */
+ if (dptbl) {
+ t32 = (le16_to_cpu(dptbl->size) -
+ sizeof(struct DIR_PAGE_ENTRY)) /
+ sizeof(u64);
+ } else {
+ t32 = log->clst_per_page;
+ ntfs_free(dptbl);
+ dptbl = init_rsttbl(struct_size(dp, page_lcns, t32),
+ 32);
+ if (!dptbl) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ dp = alloc_rsttbl_idx(&dptbl);
+ dp->target_attr = cpu_to_le32(t16);
+ dp->transfer_len = cpu_to_le32(t32 << sbi->cluster_bits);
+ dp->lcns_follow = cpu_to_le32(t32);
+ dp->vcn = cpu_to_le64(t64 & ~((u64)t32 - 1));
+ dp->oldest_lsn = cpu_to_le64(rec_lsn);
+
+copy_lcns:
+ /*
+ * Copy the Lcns from the log record into the Dirty Page Entry
+ * TODO: for different page size support, must somehow make
+ * whole routine a loop, case Lcns do not fit below
+ */
+ t16 = le16_to_cpu(lrh->lcns_follow);
+ for (i = 0; i < t16; i++) {
+ size_t j = (size_t)(le64_to_cpu(lrh->target_vcn) -
+ le64_to_cpu(dp->vcn));
+ dp->page_lcns[j + i] = lrh->page_lcns[i];
+ }
+
+ goto next_log_record_analyze;
+
+ case DeleteDirtyClusters: {
+ u32 range_count =
+ le16_to_cpu(lrh->redo_len) / sizeof(struct LCN_RANGE);
+ const struct LCN_RANGE *r =
+ Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
+
+ /* Loop through all of the Lcn ranges this log record */
+ for (i = 0; i < range_count; i++, r++) {
+ u64 lcn0 = le64_to_cpu(r->lcn);
+ u64 lcn_e = lcn0 + le64_to_cpu(r->len) - 1;
+
+ dp = NULL;
+ while ((dp = enum_rstbl(dptbl, dp))) {
+ u32 j;
+
+ t32 = le32_to_cpu(dp->lcns_follow);
+ for (j = 0; j < t32; j++) {
+ t64 = le64_to_cpu(dp->page_lcns[j]);
+ if (t64 >= lcn0 && t64 <= lcn_e)
+ dp->page_lcns[j] = 0;
+ }
+ }
+ }
+ goto next_log_record_analyze;
+ ;
+ }
+
+ case OpenNonresidentAttribute:
+ t16 = le16_to_cpu(lrh->target_attr);
+ if (t16 >= bytes_per_rt(oatbl)) {
+ /*
+ * Compute how big the table needs to be.
+ * Add 10 extra entries for some cushion
+ */
+ u32 new_e = t16 / le16_to_cpu(oatbl->size);
+
+ new_e += 10 - le16_to_cpu(oatbl->used);
+
+ oatbl = extend_rsttbl(oatbl, new_e, ~0u);
+ log->open_attr_tbl = oatbl;
+ if (!oatbl) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* Point to the entry being opened */
+ oe = alloc_rsttbl_from_idx(&oatbl, t16);
+ log->open_attr_tbl = oatbl;
+ if (!oe) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Initialize this entry from the log record */
+ t16 = le16_to_cpu(lrh->redo_off);
+ if (!rst->major_ver) {
+ /* Convert version '0' into version '1' */
+ struct OPEN_ATTR_ENRTY_32 *oe0 = Add2Ptr(lrh, t16);
+
+ oe->bytes_per_index = oe0->bytes_per_index;
+ oe->type = oe0->type;
+ oe->is_dirty_pages = oe0->is_dirty_pages;
+ oe->name_len = 0; //oe0.name_len;
+ oe->ref = oe0->ref;
+ oe->open_record_lsn = oe0->open_record_lsn;
+ } else {
+ memcpy(oe, Add2Ptr(lrh, t16), bytes_per_attr_entry);
+ }
+
+ t16 = le16_to_cpu(lrh->undo_len);
+ if (t16) {
+ oe->ptr = ntfs_malloc(t16);
+ if (!oe->ptr) {
+ err = -ENOMEM;
+ goto out;
+ }
+ oe->name_len = t16 / sizeof(short);
+ memcpy(oe->ptr,
+ Add2Ptr(lrh, le16_to_cpu(lrh->undo_off)), t16);
+ oe->is_attr_name = 1;
+ } else {
+ oe->ptr = NULL;
+ oe->is_attr_name = 0;
+ }
+
+ goto next_log_record_analyze;
+
+ case HotFix:
+ t16 = le16_to_cpu(lrh->target_attr);
+ t64 = le64_to_cpu(lrh->target_vcn);
+ dp = find_dp(dptbl, t16, t64);
+ if (dp) {
+ size_t j = le64_to_cpu(lrh->target_vcn) -
+ le64_to_cpu(dp->vcn);
+ if (dp->page_lcns[j])
+ dp->page_lcns[j] = lrh->page_lcns[0];
+ }
+ goto next_log_record_analyze;
+
+ case EndTopLevelAction:
+ tr = Add2Ptr(trtbl, transact_id);
+ tr->prev_lsn = cpu_to_le64(rec_lsn);
+ tr->undo_next_lsn = frh->client_undo_next_lsn;
+ goto next_log_record_analyze;
+
+ case PrepareTransaction:
+ tr = Add2Ptr(trtbl, transact_id);
+ tr->transact_state = TransactionPrepared;
+ goto next_log_record_analyze;
+
+ case CommitTransaction:
+ tr = Add2Ptr(trtbl, transact_id);
+ tr->transact_state = TransactionCommitted;
+ goto next_log_record_analyze;
+
+ case ForgetTransaction:
+ free_rsttbl_idx(trtbl, transact_id);
+ goto next_log_record_analyze;
+
+ case Noop:
+ case OpenAttributeTableDump:
+ case AttributeNamesDump:
+ case DirtyPageTableDump:
+ case TransactionTableDump:
+ /* The following cases require no action the Analysis Pass */
+ goto next_log_record_analyze;
+
+ default:
+ /*
+ * All codes will be explicitly handled.
+ * If we see a code we do not expect, then we are trouble
+ */
+ goto next_log_record_analyze;
+ }
+
+end_log_records_enumerate:
+ lcb_put(lcb);
+ lcb = NULL;
+
+ /*
+ * Scan the Dirty Page Table and Transaction Table for
+ * the lowest lsn, and return it as the Redo lsn
+ */
+ dp = NULL;
+ while ((dp = enum_rstbl(dptbl, dp))) {
+ t64 = le64_to_cpu(dp->oldest_lsn);
+ if (t64 && t64 < rlsn)
+ rlsn = t64;
+ }
+
+ tr = NULL;
+ while ((tr = enum_rstbl(trtbl, tr))) {
+ t64 = le64_to_cpu(tr->first_lsn);
+ if (t64 && t64 < rlsn)
+ rlsn = t64;
+ }
+
+ /* Only proceed if the Dirty Page Table or Transaction table are not empty */
+ if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total))
+ goto end_reply;
+
+ sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
+ if (is_ro)
+ goto out;
+
+ /* Reopen all of the attributes with dirty pages */
+ oe = NULL;
+next_open_attribute:
+
+ oe = enum_rstbl(oatbl, oe);
+ if (!oe) {
+ err = 0;
+ dp = NULL;
+ goto next_dirty_page;
+ }
+
+ oa = ntfs_zalloc(sizeof(struct OpenAttr));
+ if (!oa) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ inode = ntfs_iget5(sbi->sb, &oe->ref, NULL);
+ if (IS_ERR(inode))
+ goto fake_attr;
+
+ if (is_bad_inode(inode)) {
+ iput(inode);
+fake_attr:
+ if (oa->ni) {
+ iput(&oa->ni->vfs_inode);
+ oa->ni = NULL;
+ }
+
+ attr = attr_create_nonres_log(sbi, oe->type, 0, oe->ptr,
+ oe->name_len, 0);
+ if (!attr) {
+ ntfs_free(oa);
+ err = -ENOMEM;
+ goto out;
+ }
+ oa->attr = attr;
+ oa->run1 = &oa->run0;
+ goto final_oe;
+ }
+
+ ni_oe = ntfs_i(inode);
+ oa->ni = ni_oe;
+
+ attr = ni_find_attr(ni_oe, NULL, NULL, oe->type, oe->ptr, oe->name_len,
+ NULL, NULL);
+
+ if (!attr)
+ goto fake_attr;
+
+ t32 = le32_to_cpu(attr->size);
+ oa->attr = ntfs_memdup(attr, t32);
+ if (!oa->attr)
+ goto fake_attr;
+
+ if (!S_ISDIR(inode->i_mode)) {
+ if (attr->type == ATTR_DATA && !attr->name_len) {
+ oa->run1 = &ni_oe->file.run;
+ goto final_oe;
+ }
+ } else {
+ if (attr->type == ATTR_ALLOC &&
+ attr->name_len == ARRAY_SIZE(I30_NAME) &&
+ !memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) {
+ oa->run1 = &ni_oe->dir.alloc_run;
+ goto final_oe;
+ }
+ }
+
+ if (attr->non_res) {
+ u16 roff = le16_to_cpu(attr->nres.run_off);
+ CLST svcn = le64_to_cpu(attr->nres.svcn);
+
+ err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn,
+ le64_to_cpu(attr->nres.evcn), svcn,
+ Add2Ptr(attr, roff), t32 - roff);
+ if (err < 0) {
+ ntfs_free(oa->attr);
+ oa->attr = NULL;
+ goto fake_attr;
+ }
+ err = 0;
+ }
+ oa->run1 = &oa->run0;
+ attr = oa->attr;
+
+final_oe:
+ if (oe->is_attr_name == 1)
+ ntfs_free(oe->ptr);
+ oe->is_attr_name = 0;
+ oe->ptr = oa;
+ oe->name_len = attr->name_len;
+
+ goto next_open_attribute;
+
+ /*
+ * Now loop through the dirty page table to extract all of the Vcn/Lcn
+ * Mapping that we have, and insert it into the appropriate run
+ */
+next_dirty_page:
+ dp = enum_rstbl(dptbl, dp);
+ if (!dp)
+ goto do_redo_1;
+
+ oe = Add2Ptr(oatbl, le32_to_cpu(dp->target_attr));
+
+ if (oe->next != RESTART_ENTRY_ALLOCATED_LE)
+ goto next_dirty_page;
+
+ oa = oe->ptr;
+ if (!oa)
+ goto next_dirty_page;
+
+ i = -1;
+next_dirty_page_vcn:
+ i += 1;
+ if (i >= le32_to_cpu(dp->lcns_follow))
+ goto next_dirty_page;
+
+ vcn = le64_to_cpu(dp->vcn) + i;
+ size = (vcn + 1) << sbi->cluster_bits;
+
+ if (!dp->page_lcns[i])
+ goto next_dirty_page_vcn;
+
+ rno = ino_get(&oe->ref);
+ if (rno <= MFT_REC_MIRR &&
+ size < (MFT_REC_VOL + 1) * sbi->record_size &&
+ oe->type == ATTR_DATA) {
+ goto next_dirty_page_vcn;
+ }
+
+ lcn = le64_to_cpu(dp->page_lcns[i]);
+
+ if ((!run_lookup_entry(oa->run1, vcn, &lcn0, &len0, NULL) ||
+ lcn0 != lcn) &&
+ !run_add_entry(oa->run1, vcn, lcn, 1, false)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ attr = oa->attr;
+ t64 = le64_to_cpu(attr->nres.alloc_size);
+ if (size > t64) {
+ attr->nres.valid_size = attr->nres.data_size =
+ attr->nres.alloc_size = cpu_to_le64(size);
+ }
+ goto next_dirty_page_vcn;
+
+do_redo_1:
+ /*
+ * Perform the Redo Pass, to restore all of the dirty pages to the same
+ * contents that they had immediately before the crash
+ * If the dirty page table is empty, then we can skip the entire Redo Pass
+ */
+ if (!dptbl || !dptbl->total)
+ goto do_undo_action;
+
+ rec_lsn = rlsn;
+
+ /*
+ * Read the record at the Redo lsn, before falling
+ * into common code to handle each record
+ */
+ err = read_log_rec_lcb(log, rlsn, lcb_ctx_next, &lcb);
+ if (err)
+ goto out;
+
+ /*
+ * Now loop to read all of our log records forwards,
+ * until we hit the end of the file, cleaning up at the end
+ */
+do_action_next:
+ frh = lcb->lrh;
+
+ if (LfsClientRecord != frh->record_type)
+ goto read_next_log_do_action;
+
+ transact_id = le32_to_cpu(frh->transact_id);
+ rec_len = le32_to_cpu(frh->client_data_len);
+ lrh = lcb->log_rec;
+
+ if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Ignore log records that do not update pages */
+ if (lrh->lcns_follow)
+ goto find_dirty_page;
+
+ goto read_next_log_do_action;
+
+find_dirty_page:
+ t16 = le16_to_cpu(lrh->target_attr);
+ t64 = le64_to_cpu(lrh->target_vcn);
+ dp = find_dp(dptbl, t16, t64);
+
+ if (!dp)
+ goto read_next_log_do_action;
+
+ if (rec_lsn < le64_to_cpu(dp->oldest_lsn))
+ goto read_next_log_do_action;
+
+ t16 = le16_to_cpu(lrh->target_attr);
+ if (t16 >= bytes_per_rt(oatbl)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ oe = Add2Ptr(oatbl, t16);
+
+ if (oe->next != RESTART_ENTRY_ALLOCATED_LE) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ oa = oe->ptr;
+
+ if (!oa) {
+ err = -EINVAL;
+ goto out;
+ }
+ attr = oa->attr;
+
+ vcn = le64_to_cpu(lrh->target_vcn);
+
+ if (!run_lookup_entry(oa->run1, vcn, &lcn, NULL, NULL) ||
+ lcn == SPARSE_LCN) {
+ goto read_next_log_do_action;
+ }
+
+ /* Point to the Redo data and get its length */
+ data = Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
+ dlen = le16_to_cpu(lrh->redo_len);
+
+ /* Shorten length by any Lcns which were deleted */
+ saved_len = dlen;
+
+ for (i = le16_to_cpu(lrh->lcns_follow); i; i--) {
+ size_t j;
+ u32 alen, voff;
+
+ voff = le16_to_cpu(lrh->record_off) +
+ le16_to_cpu(lrh->attr_off);
+ voff += le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
+
+ /* If the Vcn question is allocated, we can just get out.*/
+ j = le64_to_cpu(lrh->target_vcn) - le64_to_cpu(dp->vcn);
+ if (dp->page_lcns[j + i - 1])
+ break;
+
+ if (!saved_len)
+ saved_len = 1;
+
+ /*
+ * Calculate the allocated space left relative to the
+ * log record Vcn, after removing this unallocated Vcn
+ */
+ alen = (i - 1) << sbi->cluster_bits;
+
+ /*
+ * If the update described this log record goes beyond
+ * the allocated space, then we will have to reduce the length
+ */
+ if (voff >= alen)
+ dlen = 0;
+ else if (voff + dlen > alen)
+ dlen = alen - voff;
+ }
+
+ /* If the resulting dlen from above is now zero, we can skip this log record */
+ if (!dlen && saved_len)
+ goto read_next_log_do_action;
+
+ t16 = le16_to_cpu(lrh->redo_op);
+ if (can_skip_action(t16))
+ goto read_next_log_do_action;
+
+ /* Apply the Redo operation a common routine */
+ err = do_action(log, oe, lrh, t16, data, dlen, rec_len, &rec_lsn);
+ if (err)
+ goto out;
+
+ /* Keep reading and looping back until end of file */
+read_next_log_do_action:
+ err = read_next_log_rec(log, lcb, &rec_lsn);
+ if (!err && rec_lsn)
+ goto do_action_next;
+
+ lcb_put(lcb);
+ lcb = NULL;
+
+do_undo_action:
+ /* Scan Transaction Table */
+ tr = NULL;
+transaction_table_next:
+ tr = enum_rstbl(trtbl, tr);
+ if (!tr)
+ goto undo_action_done;
+
+ if (TransactionActive != tr->transact_state || !tr->undo_next_lsn) {
+ free_rsttbl_idx(trtbl, PtrOffset(trtbl, tr));
+ goto transaction_table_next;
+ }
+
+ log->transaction_id = PtrOffset(trtbl, tr);
+ undo_next_lsn = le64_to_cpu(tr->undo_next_lsn);
+
+ /*
+ * We only have to do anything if the transaction has
+ * something its undo_next_lsn field
+ */
+ if (!undo_next_lsn)
+ goto commit_undo;
+
+ /* Read the first record to be undone by this transaction */
+ err = read_log_rec_lcb(log, undo_next_lsn, lcb_ctx_undo_next, &lcb);
+ if (err)
+ goto out;
+
+ /*
+ * Now loop to read all of our log records forwards,
+ * until we hit the end of the file, cleaning up at the end
+ */
+undo_action_next:
+
+ lrh = lcb->log_rec;
+ frh = lcb->lrh;
+ transact_id = le32_to_cpu(frh->transact_id);
+ rec_len = le32_to_cpu(frh->client_data_len);
+
+ if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (lrh->undo_op == cpu_to_le16(Noop))
+ goto read_next_log_undo_action;
+
+ oe = Add2Ptr(oatbl, le16_to_cpu(lrh->target_attr));
+ oa = oe->ptr;
+
+ t16 = le16_to_cpu(lrh->lcns_follow);
+ if (!t16)
+ goto add_allocated_vcns;
+
+ is_mapped = run_lookup_entry(oa->run1, le64_to_cpu(lrh->target_vcn),
+ &lcn, &clen, NULL);
+
+ /*
+ * If the mapping isn't already the table or the mapping
+ * corresponds to a hole the mapping, we need to make sure
+ * there is no partial page already memory
+ */
+ if (is_mapped && lcn != SPARSE_LCN && clen >= t16)
+ goto add_allocated_vcns;
+
+ vcn = le64_to_cpu(lrh->target_vcn);
+ vcn &= ~(log->clst_per_page - 1);
+
+add_allocated_vcns:
+ for (i = 0, vcn = le64_to_cpu(lrh->target_vcn),
+ size = (vcn + 1) << sbi->cluster_bits;
+ i < t16; i++, vcn += 1, size += sbi->cluster_size) {
+ attr = oa->attr;
+ if (!attr->non_res) {
+ if (size > le32_to_cpu(attr->res.data_size))
+ attr->res.data_size = cpu_to_le32(size);
+ } else {
+ if (size > le64_to_cpu(attr->nres.data_size))
+ attr->nres.valid_size = attr->nres.data_size =
+ attr->nres.alloc_size =
+ cpu_to_le64(size);
+ }
+ }
+
+ t16 = le16_to_cpu(lrh->undo_op);
+ if (can_skip_action(t16))
+ goto read_next_log_undo_action;
+
+ /* Point to the Redo data and get its length */
+ data = Add2Ptr(lrh, le16_to_cpu(lrh->undo_off));
+ dlen = le16_to_cpu(lrh->undo_len);
+
+ /* it is time to apply the undo action */
+ err = do_action(log, oe, lrh, t16, data, dlen, rec_len, NULL);
+
+read_next_log_undo_action:
+ /*
+ * Keep reading and looping back until we have read the
+ * last record for this transaction
+ */
+ err = read_next_log_rec(log, lcb, &rec_lsn);
+ if (err)
+ goto out;
+
+ if (rec_lsn)
+ goto undo_action_next;
+
+ lcb_put(lcb);
+ lcb = NULL;
+
+commit_undo:
+ free_rsttbl_idx(trtbl, log->transaction_id);
+
+ log->transaction_id = 0;
+
+ goto transaction_table_next;
+
+undo_action_done:
+
+ ntfs_update_mftmirr(sbi, 0);
+
+ sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY;
+
+end_reply:
+
+ err = 0;
+ if (is_ro)
+ goto out;
+
+ rh = ntfs_zalloc(log->page_size);
+ if (!rh) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rh->rhdr.sign = NTFS_RSTR_SIGNATURE;
+ rh->rhdr.fix_off = cpu_to_le16(offsetof(struct RESTART_HDR, fixups));
+ t16 = (log->page_size >> SECTOR_SHIFT) + 1;
+ rh->rhdr.fix_num = cpu_to_le16(t16);
+ rh->sys_page_size = cpu_to_le32(log->page_size);
+ rh->page_size = cpu_to_le32(log->page_size);
+
+ t16 = QuadAlign(offsetof(struct RESTART_HDR, fixups) +
+ sizeof(short) * t16);
+ rh->ra_off = cpu_to_le16(t16);
+ rh->minor_ver = cpu_to_le16(1); // 0x1A:
+ rh->major_ver = cpu_to_le16(1); // 0x1C:
+
+ ra2 = Add2Ptr(rh, t16);
+ memcpy(ra2, ra, sizeof(struct RESTART_AREA));
+
+ ra2->client_idx[0] = 0;
+ ra2->client_idx[1] = LFS_NO_CLIENT_LE;
+ ra2->flags = cpu_to_le16(2);
+
+ le32_add_cpu(&ra2->open_log_count, 1);
+
+ ntfs_fix_pre_write(&rh->rhdr, log->page_size);
+
+ err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rh, log->page_size);
+ if (!err)
+ err = ntfs_sb_write_run(sbi, &log->ni->file.run, log->page_size,
+ rh, log->page_size);
+
+ ntfs_free(rh);
+ if (err)
+ goto out;
+
+out:
+ ntfs_free(rst);
+ if (lcb)
+ lcb_put(lcb);
+
+ /* Scan the Open Attribute Table to close all of the open attributes */
+ oe = NULL;
+ while ((oe = enum_rstbl(oatbl, oe))) {
+ rno = ino_get(&oe->ref);
+
+ if (oe->is_attr_name == 1) {
+ ntfs_free(oe->ptr);
+ oe->ptr = NULL;
+ continue;
+ }
+
+ if (oe->is_attr_name)
+ continue;
+
+ oa = oe->ptr;
+ if (!oa)
+ continue;
+
+ run_close(&oa->run0);
+ ntfs_free(oa->attr);
+ if (oa->ni)
+ iput(&oa->ni->vfs_inode);
+ ntfs_free(oa);
+ }
+
+ ntfs_free(trtbl);
+ ntfs_free(oatbl);
+ ntfs_free(dptbl);
+ ntfs_free(attr_names);
+ ntfs_free(rst_info.r_page);
+
+ ntfs_free(ra);
+ ntfs_free(log->one_page_buf);
+
+ if (err)
+ sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
+
+ if (err == -EROFS)
+ err = 0;
+ else if (log->set_dirty)
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+
+ ntfs_free(log);
+
+ return err;
+}
--
2.30.0
1
0
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15-rc1
commit 522e010b58379fbe19b38fdef5016bca0c3cf405
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
This patch adds different types of NTFS-applicable compressions:
- lznt
- lzx
- xpress
Latter two (lzx, xpress) implement Windows Compact OS feature and
were taken from ntfs-3g system comression plugin authored by Eric Biggers
(https://github.com/ebiggers/ntfs-3g-system-compression)
which were ported to ntfs3 and adapted to Linux Kernel environment.
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/lib/decompress_common.c | 332 +++++++++++++++
fs/ntfs3/lib/decompress_common.h | 352 ++++++++++++++++
fs/ntfs3/lib/lib.h | 26 ++
fs/ntfs3/lib/lzx_decompress.c | 683 +++++++++++++++++++++++++++++++
fs/ntfs3/lib/xpress_decompress.c | 155 +++++++
fs/ntfs3/lznt.c | 452 ++++++++++++++++++++
6 files changed, 2000 insertions(+)
create mode 100644 fs/ntfs3/lib/decompress_common.c
create mode 100644 fs/ntfs3/lib/decompress_common.h
create mode 100644 fs/ntfs3/lib/lib.h
create mode 100644 fs/ntfs3/lib/lzx_decompress.c
create mode 100644 fs/ntfs3/lib/xpress_decompress.c
create mode 100644 fs/ntfs3/lznt.c
diff --git a/fs/ntfs3/lib/decompress_common.c b/fs/ntfs3/lib/decompress_common.c
new file mode 100644
index 000000000000..83c9e93aea77
--- /dev/null
+++ b/fs/ntfs3/lib/decompress_common.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * decompress_common.c - Code shared by the XPRESS and LZX decompressors
+ *
+ * Copyright (C) 2015 Eric Biggers
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "decompress_common.h"
+
+/*
+ * make_huffman_decode_table() -
+ *
+ * Build a decoding table for a canonical prefix code, or "Huffman code".
+ *
+ * This is an internal function, not part of the library API!
+ *
+ * This takes as input the length of the codeword for each symbol in the
+ * alphabet and produces as output a table that can be used for fast
+ * decoding of prefix-encoded symbols using read_huffsym().
+ *
+ * Strictly speaking, a canonical prefix code might not be a Huffman
+ * code. But this algorithm will work either way; and in fact, since
+ * Huffman codes are defined in terms of symbol frequencies, there is no
+ * way for the decompressor to know whether the code is a true Huffman
+ * code or not until all symbols have been decoded.
+ *
+ * Because the prefix code is assumed to be "canonical", it can be
+ * reconstructed directly from the codeword lengths. A prefix code is
+ * canonical if and only if a longer codeword never lexicographically
+ * precedes a shorter codeword, and the lexicographic ordering of
+ * codewords of the same length is the same as the lexicographic ordering
+ * of the corresponding symbols. Consequently, we can sort the symbols
+ * primarily by codeword length and secondarily by symbol value, then
+ * reconstruct the prefix code by generating codewords lexicographically
+ * in that order.
+ *
+ * This function does not, however, generate the prefix code explicitly.
+ * Instead, it directly builds a table for decoding symbols using the
+ * code. The basic idea is this: given the next 'max_codeword_len' bits
+ * in the input, we can look up the decoded symbol by indexing a table
+ * containing 2**max_codeword_len entries. A codeword with length
+ * 'max_codeword_len' will have exactly one entry in this table, whereas
+ * a codeword shorter than 'max_codeword_len' will have multiple entries
+ * in this table. Precisely, a codeword of length n will be represented
+ * by 2**(max_codeword_len - n) entries in this table. The 0-based index
+ * of each such entry will contain the corresponding codeword as a prefix
+ * when zero-padded on the left to 'max_codeword_len' binary digits.
+ *
+ * That's the basic idea, but we implement two optimizations regarding
+ * the format of the decode table itself:
+ *
+ * - For many compression formats, the maximum codeword length is too
+ * long for it to be efficient to build the full decoding table
+ * whenever a new prefix code is used. Instead, we can build the table
+ * using only 2**table_bits entries, where 'table_bits' is some number
+ * less than or equal to 'max_codeword_len'. Then, only codewords of
+ * length 'table_bits' and shorter can be directly looked up. For
+ * longer codewords, the direct lookup instead produces the root of a
+ * binary tree. Using this tree, the decoder can do traditional
+ * bit-by-bit decoding of the remainder of the codeword. Child nodes
+ * are allocated in extra entries at the end of the table; leaf nodes
+ * contain symbols. Note that the long-codeword case is, in general,
+ * not performance critical, since in Huffman codes the most frequently
+ * used symbols are assigned the shortest codeword lengths.
+ *
+ * - When we decode a symbol using a direct lookup of the table, we still
+ * need to know its length so that the bitstream can be advanced by the
+ * appropriate number of bits. The simple solution is to simply retain
+ * the 'lens' array and use the decoded symbol as an index into it.
+ * However, this requires two separate array accesses in the fast path.
+ * The optimization is to store the length directly in the decode
+ * table. We use the bottom 11 bits for the symbol and the top 5 bits
+ * for the length. In addition, to combine this optimization with the
+ * previous one, we introduce a special case where the top 2 bits of
+ * the length are both set if the entry is actually the root of a
+ * binary tree.
+ *
+ * @decode_table:
+ * The array in which to create the decoding table. This must have
+ * a length of at least ((2**table_bits) + 2 * num_syms) entries.
+ *
+ * @num_syms:
+ * The number of symbols in the alphabet; also, the length of the
+ * 'lens' array. Must be less than or equal to 2048.
+ *
+ * @table_bits:
+ * The order of the decode table size, as explained above. Must be
+ * less than or equal to 13.
+ *
+ * @lens:
+ * An array of length @num_syms, indexable by symbol, that gives the
+ * length of the codeword, in bits, for that symbol. The length can
+ * be 0, which means that the symbol does not have a codeword
+ * assigned.
+ *
+ * @max_codeword_len:
+ * The longest codeword length allowed in the compression format.
+ * All entries in 'lens' must be less than or equal to this value.
+ * This must be less than or equal to 23.
+ *
+ * @working_space
+ * A temporary array of length '2 * (max_codeword_len + 1) +
+ * num_syms'.
+ *
+ * Returns 0 on success, or -1 if the lengths do not form a valid prefix
+ * code.
+ */
+int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
+ const u32 table_bits, const u8 lens[],
+ const u32 max_codeword_len,
+ u16 working_space[])
+{
+ const u32 table_num_entries = 1 << table_bits;
+ u16 * const len_counts = &working_space[0];
+ u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
+ u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
+ int left;
+ void *decode_table_ptr;
+ u32 sym_idx;
+ u32 codeword_len;
+ u32 stores_per_loop;
+ u32 decode_table_pos;
+ u32 len;
+ u32 sym;
+
+ /* Count how many symbols have each possible codeword length.
+ * Note that a length of 0 indicates the corresponding symbol is not
+ * used in the code and therefore does not have a codeword.
+ */
+ for (len = 0; len <= max_codeword_len; len++)
+ len_counts[len] = 0;
+ for (sym = 0; sym < num_syms; sym++)
+ len_counts[lens[sym]]++;
+
+ /* We can assume all lengths are <= max_codeword_len, but we
+ * cannot assume they form a valid prefix code. A codeword of
+ * length n should require a proportion of the codespace equaling
+ * (1/2)^n. The code is valid if and only if the codespace is
+ * exactly filled by the lengths, by this measure.
+ */
+ left = 1;
+ for (len = 1; len <= max_codeword_len; len++) {
+ left <<= 1;
+ left -= len_counts[len];
+ if (left < 0) {
+ /* The lengths overflow the codespace; that is, the code
+ * is over-subscribed.
+ */
+ return -1;
+ }
+ }
+
+ if (left) {
+ /* The lengths do not fill the codespace; that is, they form an
+ * incomplete set.
+ */
+ if (left == (1 << max_codeword_len)) {
+ /* The code is completely empty. This is arguably
+ * invalid, but in fact it is valid in LZX and XPRESS,
+ * so we must allow it. By definition, no symbols can
+ * be decoded with an empty code. Consequently, we
+ * technically don't even need to fill in the decode
+ * table. However, to avoid accessing uninitialized
+ * memory if the algorithm nevertheless attempts to
+ * decode symbols using such a code, we zero out the
+ * decode table.
+ */
+ memset(decode_table, 0,
+ table_num_entries * sizeof(decode_table[0]));
+ return 0;
+ }
+ return -1;
+ }
+
+ /* Sort the symbols primarily by length and secondarily by symbol order.
+ */
+
+ /* Initialize 'offsets' so that offsets[len] for 1 <= len <=
+ * max_codeword_len is the number of codewords shorter than 'len' bits.
+ */
+ offsets[1] = 0;
+ for (len = 1; len < max_codeword_len; len++)
+ offsets[len + 1] = offsets[len] + len_counts[len];
+
+ /* Use the 'offsets' array to sort the symbols. Note that we do not
+ * include symbols that are not used in the code. Consequently, fewer
+ * than 'num_syms' entries in 'sorted_syms' may be filled.
+ */
+ for (sym = 0; sym < num_syms; sym++)
+ if (lens[sym])
+ sorted_syms[offsets[lens[sym]]++] = sym;
+
+ /* Fill entries for codewords with length <= table_bits
+ * --- that is, those short enough for a direct mapping.
+ *
+ * The table will start with entries for the shortest codeword(s), which
+ * have the most entries. From there, the number of entries per
+ * codeword will decrease.
+ */
+ decode_table_ptr = decode_table;
+ sym_idx = 0;
+ codeword_len = 1;
+ stores_per_loop = (1 << (table_bits - codeword_len));
+ for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
+ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
+
+ for (; sym_idx < end_sym_idx; sym_idx++) {
+ u16 entry;
+ u16 *p;
+ u32 n;
+
+ entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx];
+ p = (u16 *)decode_table_ptr;
+ n = stores_per_loop;
+
+ do {
+ *p++ = entry;
+ } while (--n);
+
+ decode_table_ptr = p;
+ }
+ }
+
+ /* If we've filled in the entire table, we are done. Otherwise,
+ * there are codewords longer than table_bits for which we must
+ * generate binary trees.
+ */
+ decode_table_pos = (u16 *)decode_table_ptr - decode_table;
+ if (decode_table_pos != table_num_entries) {
+ u32 j;
+ u32 next_free_tree_slot;
+ u32 cur_codeword;
+
+ /* First, zero out the remaining entries. This is
+ * necessary so that these entries appear as
+ * "unallocated" in the next part. Each of these entries
+ * will eventually be filled with the representation of
+ * the root node of a binary tree.
+ */
+ j = decode_table_pos;
+ do {
+ decode_table[j] = 0;
+ } while (++j != table_num_entries);
+
+ /* We allocate child nodes starting at the end of the
+ * direct lookup table. Note that there should be
+ * 2*num_syms extra entries for this purpose, although
+ * fewer than this may actually be needed.
+ */
+ next_free_tree_slot = table_num_entries;
+
+ /* Iterate through each codeword with length greater than
+ * 'table_bits', primarily in order of codeword length
+ * and secondarily in order of symbol.
+ */
+ for (cur_codeword = decode_table_pos << 1;
+ codeword_len <= max_codeword_len;
+ codeword_len++, cur_codeword <<= 1) {
+ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
+
+ for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) {
+ /* 'sorted_sym' is the symbol represented by the
+ * codeword.
+ */
+ u32 sorted_sym = sorted_syms[sym_idx];
+ u32 extra_bits = codeword_len - table_bits;
+ u32 node_idx = cur_codeword >> extra_bits;
+
+ /* Go through each bit of the current codeword
+ * beyond the prefix of length @table_bits and
+ * walk the appropriate binary tree, allocating
+ * any slots that have not yet been allocated.
+ *
+ * Note that the 'pointer' entry to the binary
+ * tree, which is stored in the direct lookup
+ * portion of the table, is represented
+ * identically to other internal (non-leaf)
+ * nodes of the binary tree; it can be thought
+ * of as simply the root of the tree. The
+ * representation of these internal nodes is
+ * simply the index of the left child combined
+ * with the special bits 0xC000 to distingush
+ * the entry from direct mapping and leaf node
+ * entries.
+ */
+ do {
+ /* At least one bit remains in the
+ * codeword, but the current node is an
+ * unallocated leaf. Change it to an
+ * internal node.
+ */
+ if (decode_table[node_idx] == 0) {
+ decode_table[node_idx] =
+ next_free_tree_slot | 0xC000;
+ decode_table[next_free_tree_slot++] = 0;
+ decode_table[next_free_tree_slot++] = 0;
+ }
+
+ /* Go to the left child if the next bit
+ * in the codeword is 0; otherwise go to
+ * the right child.
+ */
+ node_idx = decode_table[node_idx] & 0x3FFF;
+ --extra_bits;
+ node_idx += (cur_codeword >> extra_bits) & 1;
+ } while (extra_bits != 0);
+
+ /* We've traversed the tree using the entire
+ * codeword, and we're now at the entry where
+ * the actual symbol will be stored. This is
+ * distinguished from internal nodes by not
+ * having its high two bits set.
+ */
+ decode_table[node_idx] = sorted_sym;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h
new file mode 100644
index 000000000000..66297f398403
--- /dev/null
+++ b/fs/ntfs3/lib/decompress_common.h
@@ -0,0 +1,352 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/*
+ * decompress_common.h - Code shared by the XPRESS and LZX decompressors
+ *
+ * Copyright (C) 2015 Eric Biggers
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+
+/* "Force inline" macro (not required, but helpful for performance) */
+#define forceinline __always_inline
+
+/* Enable whole-word match copying on selected architectures */
+#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
+# define FAST_UNALIGNED_ACCESS
+#endif
+
+/* Size of a machine word */
+#define WORDBYTES (sizeof(size_t))
+
+static forceinline void
+copy_unaligned_word(const void *src, void *dst)
+{
+ put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
+}
+
+
+/* Generate a "word" with platform-dependent size whose bytes all contain the
+ * value 'b'.
+ */
+static forceinline size_t repeat_byte(u8 b)
+{
+ size_t v;
+
+ v = b;
+ v |= v << 8;
+ v |= v << 16;
+ v |= v << ((WORDBYTES == 8) ? 32 : 0);
+ return v;
+}
+
+/* Structure that encapsulates a block of in-memory data being interpreted as a
+ * stream of bits, optionally with interwoven literal bytes. Bits are assumed
+ * to be stored in little endian 16-bit coding units, with the bits ordered high
+ * to low.
+ */
+struct input_bitstream {
+
+ /* Bits that have been read from the input buffer. The bits are
+ * left-justified; the next bit is always bit 31.
+ */
+ u32 bitbuf;
+
+ /* Number of bits currently held in @bitbuf. */
+ u32 bitsleft;
+
+ /* Pointer to the next byte to be retrieved from the input buffer. */
+ const u8 *next;
+
+ /* Pointer to just past the end of the input buffer. */
+ const u8 *end;
+};
+
+/* Initialize a bitstream to read from the specified input buffer. */
+static forceinline void init_input_bitstream(struct input_bitstream *is,
+ const void *buffer, u32 size)
+{
+ is->bitbuf = 0;
+ is->bitsleft = 0;
+ is->next = buffer;
+ is->end = is->next + size;
+}
+
+/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
+ * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
+ * may be called on the bitstream to peek or remove up to @num_bits bits. Note
+ * that @num_bits must be <= 16.
+ */
+static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
+ u32 num_bits)
+{
+ if (is->bitsleft < num_bits) {
+ if (is->end - is->next >= 2) {
+ is->bitbuf |= (u32)get_unaligned_le16(is->next)
+ << (16 - is->bitsleft);
+ is->next += 2;
+ }
+ is->bitsleft += 16;
+ }
+}
+
+/* Return the next @num_bits bits from the bitstream, without removing them.
+ * There must be at least @num_bits remaining in the buffer variable, from a
+ * previous call to bitstream_ensure_bits().
+ */
+static forceinline u32
+bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
+{
+ return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
+}
+
+/* Remove @num_bits from the bitstream. There must be at least @num_bits
+ * remaining in the buffer variable, from a previous call to
+ * bitstream_ensure_bits().
+ */
+static forceinline void
+bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
+{
+ is->bitbuf <<= num_bits;
+ is->bitsleft -= num_bits;
+}
+
+/* Remove and return @num_bits bits from the bitstream. There must be at least
+ * @num_bits remaining in the buffer variable, from a previous call to
+ * bitstream_ensure_bits().
+ */
+static forceinline u32
+bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
+{
+ u32 bits = bitstream_peek_bits(is, num_bits);
+
+ bitstream_remove_bits(is, num_bits);
+ return bits;
+}
+
+/* Read and return the next @num_bits bits from the bitstream. */
+static forceinline u32
+bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
+{
+ bitstream_ensure_bits(is, num_bits);
+ return bitstream_pop_bits(is, num_bits);
+}
+
+/* Read and return the next literal byte embedded in the bitstream. */
+static forceinline u8
+bitstream_read_byte(struct input_bitstream *is)
+{
+ if (unlikely(is->end == is->next))
+ return 0;
+ return *is->next++;
+}
+
+/* Read and return the next 16-bit integer embedded in the bitstream. */
+static forceinline u16
+bitstream_read_u16(struct input_bitstream *is)
+{
+ u16 v;
+
+ if (unlikely(is->end - is->next < 2))
+ return 0;
+ v = get_unaligned_le16(is->next);
+ is->next += 2;
+ return v;
+}
+
+/* Read and return the next 32-bit integer embedded in the bitstream. */
+static forceinline u32
+bitstream_read_u32(struct input_bitstream *is)
+{
+ u32 v;
+
+ if (unlikely(is->end - is->next < 4))
+ return 0;
+ v = get_unaligned_le32(is->next);
+ is->next += 4;
+ return v;
+}
+
+/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
+ * Return either a pointer to the byte past the last written, or NULL if the
+ * read overflows the input buffer.
+ */
+static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
+ void *dst_buffer, size_t count)
+{
+ if ((size_t)(is->end - is->next) < count)
+ return NULL;
+ memcpy(dst_buffer, is->next, count);
+ is->next += count;
+ return (u8 *)dst_buffer + count;
+}
+
+/* Align the input bitstream on a coding-unit boundary. */
+static forceinline void bitstream_align(struct input_bitstream *is)
+{
+ is->bitsleft = 0;
+ is->bitbuf = 0;
+}
+
+extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
+ const u32 num_bits, const u8 lens[],
+ const u32 max_codeword_len,
+ u16 working_space[]);
+
+
+/* Reads and returns the next Huffman-encoded symbol from a bitstream. If the
+ * input data is exhausted, the Huffman symbol is decoded as if the missing bits
+ * are all zeroes.
+ */
+static forceinline u32 read_huffsym(struct input_bitstream *istream,
+ const u16 decode_table[],
+ u32 table_bits,
+ u32 max_codeword_len)
+{
+ u32 entry;
+ u32 key_bits;
+
+ bitstream_ensure_bits(istream, max_codeword_len);
+
+ /* Index the decode table by the next table_bits bits of the input. */
+ key_bits = bitstream_peek_bits(istream, table_bits);
+ entry = decode_table[key_bits];
+ if (entry < 0xC000) {
+ /* Fast case: The decode table directly provided the
+ * symbol and codeword length. The low 11 bits are the
+ * symbol, and the high 5 bits are the codeword length.
+ */
+ bitstream_remove_bits(istream, entry >> 11);
+ return entry & 0x7FF;
+ }
+ /* Slow case: The codeword for the symbol is longer than
+ * table_bits, so the symbol does not have an entry
+ * directly in the first (1 << table_bits) entries of the
+ * decode table. Traverse the appropriate binary tree
+ * bit-by-bit to decode the symbol.
+ */
+ bitstream_remove_bits(istream, table_bits);
+ do {
+ key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
+ } while ((entry = decode_table[key_bits]) >= 0xC000);
+ return entry;
+}
+
+/*
+ * Copy an LZ77 match at (dst - offset) to dst.
+ *
+ * The length and offset must be already validated --- that is, (dst - offset)
+ * can't underrun the output buffer, and (dst + length) can't overrun the output
+ * buffer. Also, the length cannot be 0.
+ *
+ * @bufend points to the byte past the end of the output buffer. This function
+ * won't write any data beyond this position.
+ *
+ * Returns dst + length.
+ */
+static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
+ u32 min_length)
+{
+ const u8 *src = dst - offset;
+
+ /*
+ * Try to copy one machine word at a time. On i386 and x86_64 this is
+ * faster than copying one byte at a time, unless the data is
+ * near-random and all the matches have very short lengths. Note that
+ * since this requires unaligned memory accesses, it won't necessarily
+ * be faster on every architecture.
+ *
+ * Also note that we might copy more than the length of the match. For
+ * example, if a word is 8 bytes and the match is of length 5, then
+ * we'll simply copy 8 bytes. This is okay as long as we don't write
+ * beyond the end of the output buffer, hence the check for (bufend -
+ * end >= WORDBYTES - 1).
+ */
+#ifdef FAST_UNALIGNED_ACCESS
+ u8 * const end = dst + length;
+
+ if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
+
+ if (offset >= WORDBYTES) {
+ /* The source and destination words don't overlap. */
+
+ /* To improve branch prediction, one iteration of this
+ * loop is unrolled. Most matches are short and will
+ * fail the first check. But if that check passes, then
+ * it becomes increasing likely that the match is long
+ * and we'll need to continue copying.
+ */
+
+ copy_unaligned_word(src, dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+
+ if (dst < end) {
+ do {
+ copy_unaligned_word(src, dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ } while (dst < end);
+ }
+ return end;
+ } else if (offset == 1) {
+
+ /* Offset 1 matches are equivalent to run-length
+ * encoding of the previous byte. This case is common
+ * if the data contains many repeated bytes.
+ */
+ size_t v = repeat_byte(*(dst - 1));
+
+ do {
+ put_unaligned(v, (size_t *)dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ } while (dst < end);
+ return end;
+ }
+ /*
+ * We don't bother with special cases for other 'offset <
+ * WORDBYTES', which are usually rarer than 'offset == 1'. Extra
+ * checks will just slow things down. Actually, it's possible
+ * to handle all the 'offset < WORDBYTES' cases using the same
+ * code, but it still becomes more complicated doesn't seem any
+ * faster overall; it definitely slows down the more common
+ * 'offset == 1' case.
+ */
+ }
+#endif /* FAST_UNALIGNED_ACCESS */
+
+ /* Fall back to a bytewise copy. */
+
+ if (min_length >= 2) {
+ *dst++ = *src++;
+ length--;
+ }
+ if (min_length >= 3) {
+ *dst++ = *src++;
+ length--;
+ }
+ do {
+ *dst++ = *src++;
+ } while (--length);
+
+ return dst;
+}
diff --git a/fs/ntfs3/lib/lib.h b/fs/ntfs3/lib/lib.h
new file mode 100644
index 000000000000..f508fbad2e71
--- /dev/null
+++ b/fs/ntfs3/lib/lib.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Adapted for linux kernel by Alexander Mamaev:
+ * - remove implementations of get_unaligned_
+ * - assume GCC is always defined
+ * - ISO C90
+ * - linux kernel code style
+ */
+
+
+/* globals from xpress_decompress.c */
+struct xpress_decompressor *xpress_allocate_decompressor(void);
+void xpress_free_decompressor(struct xpress_decompressor *d);
+int xpress_decompress(struct xpress_decompressor *__restrict d,
+ const void *__restrict compressed_data,
+ size_t compressed_size,
+ void *__restrict uncompressed_data,
+ size_t uncompressed_size);
+
+/* globals from lzx_decompress.c */
+struct lzx_decompressor *lzx_allocate_decompressor(void);
+void lzx_free_decompressor(struct lzx_decompressor *d);
+int lzx_decompress(struct lzx_decompressor *__restrict d,
+ const void *__restrict compressed_data,
+ size_t compressed_size, void *__restrict uncompressed_data,
+ size_t uncompressed_size);
diff --git a/fs/ntfs3/lib/lzx_decompress.c b/fs/ntfs3/lib/lzx_decompress.c
new file mode 100644
index 000000000000..77a381a693d1
--- /dev/null
+++ b/fs/ntfs3/lib/lzx_decompress.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * lzx_decompress.c - A decompressor for the LZX compression format, which can
+ * be used in "System Compressed" files. This is based on the code from wimlib.
+ * This code only supports a window size (dictionary size) of 32768 bytes, since
+ * this is the only size used in System Compression.
+ *
+ * Copyright (C) 2015 Eric Biggers
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "decompress_common.h"
+#include "lib.h"
+
+/* Number of literal byte values */
+#define LZX_NUM_CHARS 256
+
+/* The smallest and largest allowed match lengths */
+#define LZX_MIN_MATCH_LEN 2
+#define LZX_MAX_MATCH_LEN 257
+
+/* Number of distinct match lengths that can be represented */
+#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
+
+/* Number of match lengths for which no length symbol is required */
+#define LZX_NUM_PRIMARY_LENS 7
+#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
+
+/* Valid values of the 3-bit block type field */
+#define LZX_BLOCKTYPE_VERBATIM 1
+#define LZX_BLOCKTYPE_ALIGNED 2
+#define LZX_BLOCKTYPE_UNCOMPRESSED 3
+
+/* Number of offset slots for a window size of 32768 */
+#define LZX_NUM_OFFSET_SLOTS 30
+
+/* Number of symbols in the main code for a window size of 32768 */
+#define LZX_MAINCODE_NUM_SYMBOLS \
+ (LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
+
+/* Number of symbols in the length code */
+#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
+
+/* Number of symbols in the precode */
+#define LZX_PRECODE_NUM_SYMBOLS 20
+
+/* Number of bits in which each precode codeword length is represented */
+#define LZX_PRECODE_ELEMENT_SIZE 4
+
+/* Number of low-order bits of each match offset that are entropy-encoded in
+ * aligned offset blocks
+ */
+#define LZX_NUM_ALIGNED_OFFSET_BITS 3
+
+/* Number of symbols in the aligned offset code */
+#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
+
+/* Mask for the match offset bits that are entropy-encoded in aligned offset
+ * blocks
+ */
+#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
+
+/* Number of bits in which each aligned offset codeword length is represented */
+#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
+
+/* Maximum lengths (in bits) of the codewords in each Huffman code */
+#define LZX_MAX_MAIN_CODEWORD_LEN 16
+#define LZX_MAX_LEN_CODEWORD_LEN 16
+#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
+#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
+
+/* The default "filesize" value used in pre/post-processing. In the LZX format
+ * used in cabinet files this value must be given to the decompressor, whereas
+ * in the LZX format used in WIM files and system-compressed files this value is
+ * fixed at 12000000.
+ */
+#define LZX_DEFAULT_FILESIZE 12000000
+
+/* Assumed block size when the encoded block size begins with a 0 bit. */
+#define LZX_DEFAULT_BLOCK_SIZE 32768
+
+/* Number of offsets in the recent (or "repeat") offsets queue. */
+#define LZX_NUM_RECENT_OFFSETS 3
+
+/* These values are chosen for fast decompression. */
+#define LZX_MAINCODE_TABLEBITS 11
+#define LZX_LENCODE_TABLEBITS 10
+#define LZX_PRECODE_TABLEBITS 6
+#define LZX_ALIGNEDCODE_TABLEBITS 7
+
+#define LZX_READ_LENS_MAX_OVERRUN 50
+
+/* Mapping: offset slot => first match offset that uses that offset slot.
+ */
+static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = {
+ 0, 1, 2, 3, 4, /* 0 --- 4 */
+ 6, 8, 12, 16, 24, /* 5 --- 9 */
+ 32, 48, 64, 96, 128, /* 10 --- 14 */
+ 192, 256, 384, 512, 768, /* 15 --- 19 */
+ 1024, 1536, 2048, 3072, 4096, /* 20 --- 24 */
+ 6144, 8192, 12288, 16384, 24576, /* 25 --- 29 */
+ 32768, /* extra */
+};
+
+/* Mapping: offset slot => how many extra bits must be read and added to the
+ * corresponding offset slot base to decode the match offset.
+ */
+static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = {
+ 0, 0, 0, 0, 1,
+ 1, 2, 2, 3, 3,
+ 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8,
+ 9, 9, 10, 10, 11,
+ 11, 12, 12, 13, 13,
+};
+
+/* Reusable heap-allocated memory for LZX decompression */
+struct lzx_decompressor {
+
+ /* Huffman decoding tables, and arrays that map symbols to codeword
+ * lengths
+ */
+
+ u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
+ (LZX_MAINCODE_NUM_SYMBOLS * 2)];
+ u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
+
+
+ u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
+ (LZX_LENCODE_NUM_SYMBOLS * 2)];
+ u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
+
+
+ u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
+ (LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)];
+ u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
+
+ u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
+ (LZX_PRECODE_NUM_SYMBOLS * 2)];
+ u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
+
+ /* Temporary space for make_huffman_decode_table() */
+ u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) +
+ LZX_MAINCODE_NUM_SYMBOLS];
+};
+
+static void undo_e8_translation(void *target, s32 input_pos)
+{
+ s32 abs_offset, rel_offset;
+
+ abs_offset = get_unaligned_le32(target);
+ if (abs_offset >= 0) {
+ if (abs_offset < LZX_DEFAULT_FILESIZE) {
+ /* "good translation" */
+ rel_offset = abs_offset - input_pos;
+ put_unaligned_le32(rel_offset, target);
+ }
+ } else {
+ if (abs_offset >= -input_pos) {
+ /* "compensating translation" */
+ rel_offset = abs_offset + LZX_DEFAULT_FILESIZE;
+ put_unaligned_le32(rel_offset, target);
+ }
+ }
+}
+
+/*
+ * Undo the 'E8' preprocessing used in LZX. Before compression, the
+ * uncompressed data was preprocessed by changing the targets of suspected x86
+ * CALL instructions from relative offsets to absolute offsets. After
+ * match/literal decoding, the decompressor must undo the translation.
+ */
+static void lzx_postprocess(u8 *data, u32 size)
+{
+ /*
+ * A worthwhile optimization is to push the end-of-buffer check into the
+ * relatively rare E8 case. This is possible if we replace the last six
+ * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
+ * before reaching end-of-buffer. In addition, this scheme guarantees
+ * that no translation can begin following an E8 byte in the last 10
+ * bytes because a 4-byte offset containing E8 as its high byte is a
+ * large negative number that is not valid for translation. That is
+ * exactly what we need.
+ */
+ u8 *tail;
+ u8 saved_bytes[6];
+ u8 *p;
+
+ if (size <= 10)
+ return;
+
+ tail = &data[size - 6];
+ memcpy(saved_bytes, tail, 6);
+ memset(tail, 0xE8, 6);
+ p = data;
+ for (;;) {
+ while (*p != 0xE8)
+ p++;
+ if (p >= tail)
+ break;
+ undo_e8_translation(p + 1, p - data);
+ p += 5;
+ }
+ memcpy(tail, saved_bytes, 6);
+}
+
+/* Read a Huffman-encoded symbol using the precode. */
+static forceinline u32 read_presym(const struct lzx_decompressor *d,
+ struct input_bitstream *is)
+{
+ return read_huffsym(is, d->precode_decode_table,
+ LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the main code. */
+static forceinline u32 read_mainsym(const struct lzx_decompressor *d,
+ struct input_bitstream *is)
+{
+ return read_huffsym(is, d->maincode_decode_table,
+ LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the length code. */
+static forceinline u32 read_lensym(const struct lzx_decompressor *d,
+ struct input_bitstream *is)
+{
+ return read_huffsym(is, d->lencode_decode_table,
+ LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the aligned offset code. */
+static forceinline u32 read_alignedsym(const struct lzx_decompressor *d,
+ struct input_bitstream *is)
+{
+ return read_huffsym(is, d->alignedcode_decode_table,
+ LZX_ALIGNEDCODE_TABLEBITS,
+ LZX_MAX_ALIGNED_CODEWORD_LEN);
+}
+
+/*
+ * Read the precode from the compressed input bitstream, then use it to decode
+ * @num_lens codeword length values.
+ *
+ * @is: The input bitstream.
+ *
+ * @lens: An array that contains the length values from the previous time
+ * the codeword lengths for this Huffman code were read, or all 0's
+ * if this is the first time. This array must have at least
+ * (@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries.
+ *
+ * @num_lens: Number of length values to decode.
+ *
+ * Returns 0 on success, or -1 if the data was invalid.
+ */
+static int lzx_read_codeword_lens(struct lzx_decompressor *d,
+ struct input_bitstream *is,
+ u8 *lens, u32 num_lens)
+{
+ u8 *len_ptr = lens;
+ u8 *lens_end = lens + num_lens;
+ int i;
+
+ /* Read the lengths of the precode codewords. These are given
+ * explicitly.
+ */
+ for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
+ d->precode_lens[i] =
+ bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
+ }
+
+ /* Make the decoding table for the precode. */
+ if (make_huffman_decode_table(d->precode_decode_table,
+ LZX_PRECODE_NUM_SYMBOLS,
+ LZX_PRECODE_TABLEBITS,
+ d->precode_lens,
+ LZX_MAX_PRE_CODEWORD_LEN,
+ d->working_space))
+ return -1;
+
+ /* Decode the codeword lengths. */
+ do {
+ u32 presym;
+ u8 len;
+
+ /* Read the next precode symbol. */
+ presym = read_presym(d, is);
+ if (presym < 17) {
+ /* Difference from old length */
+ len = *len_ptr - presym;
+ if ((s8)len < 0)
+ len += 17;
+ *len_ptr++ = len;
+ } else {
+ /* Special RLE values */
+
+ u32 run_len;
+
+ if (presym == 17) {
+ /* Run of 0's */
+ run_len = 4 + bitstream_read_bits(is, 4);
+ len = 0;
+ } else if (presym == 18) {
+ /* Longer run of 0's */
+ run_len = 20 + bitstream_read_bits(is, 5);
+ len = 0;
+ } else {
+ /* Run of identical lengths */
+ run_len = 4 + bitstream_read_bits(is, 1);
+ presym = read_presym(d, is);
+ if (presym > 17)
+ return -1;
+ len = *len_ptr - presym;
+ if ((s8)len < 0)
+ len += 17;
+ }
+
+ do {
+ *len_ptr++ = len;
+ } while (--run_len);
+ /* Worst case overrun is when presym == 18,
+ * run_len == 20 + 31, and only 1 length was remaining.
+ * So LZX_READ_LENS_MAX_OVERRUN == 50.
+ *
+ * Overrun while reading the first half of maincode_lens
+ * can corrupt the previous values in the second half.
+ * This doesn't really matter because the resulting
+ * lengths will still be in range, and data that
+ * generates overruns is invalid anyway.
+ */
+ }
+ } while (len_ptr < lens_end);
+
+ return 0;
+}
+
+/*
+ * Read the header of an LZX block and save the block type and (uncompressed)
+ * size in *block_type_ret and *block_size_ret, respectively.
+ *
+ * If the block is compressed, also update the Huffman decode @tables with the
+ * new Huffman codes. If the block is uncompressed, also update the match
+ * offset @queue with the new match offsets.
+ *
+ * Return 0 on success, or -1 if the data was invalid.
+ */
+static int lzx_read_block_header(struct lzx_decompressor *d,
+ struct input_bitstream *is,
+ int *block_type_ret,
+ u32 *block_size_ret,
+ u32 recent_offsets[])
+{
+ int block_type;
+ u32 block_size;
+ int i;
+
+ bitstream_ensure_bits(is, 4);
+
+ /* The first three bits tell us what kind of block it is, and should be
+ * one of the LZX_BLOCKTYPE_* values.
+ */
+ block_type = bitstream_pop_bits(is, 3);
+
+ /* Read the block size. */
+ if (bitstream_pop_bits(is, 1)) {
+ block_size = LZX_DEFAULT_BLOCK_SIZE;
+ } else {
+ block_size = 0;
+ block_size |= bitstream_read_bits(is, 8);
+ block_size <<= 8;
+ block_size |= bitstream_read_bits(is, 8);
+ }
+
+ switch (block_type) {
+
+ case LZX_BLOCKTYPE_ALIGNED:
+
+ /* Read the aligned offset code and prepare its decode table.
+ */
+
+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
+ d->alignedcode_lens[i] =
+ bitstream_read_bits(is,
+ LZX_ALIGNEDCODE_ELEMENT_SIZE);
+ }
+
+ if (make_huffman_decode_table(d->alignedcode_decode_table,
+ LZX_ALIGNEDCODE_NUM_SYMBOLS,
+ LZX_ALIGNEDCODE_TABLEBITS,
+ d->alignedcode_lens,
+ LZX_MAX_ALIGNED_CODEWORD_LEN,
+ d->working_space))
+ return -1;
+
+ /* Fall though, since the rest of the header for aligned offset
+ * blocks is the same as that for verbatim blocks.
+ */
+ fallthrough;
+
+ case LZX_BLOCKTYPE_VERBATIM:
+
+ /* Read the main code and prepare its decode table.
+ *
+ * Note that the codeword lengths in the main code are encoded
+ * in two parts: one part for literal symbols, and one part for
+ * match symbols.
+ */
+
+ if (lzx_read_codeword_lens(d, is, d->maincode_lens,
+ LZX_NUM_CHARS))
+ return -1;
+
+ if (lzx_read_codeword_lens(d, is,
+ d->maincode_lens + LZX_NUM_CHARS,
+ LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS))
+ return -1;
+
+ if (make_huffman_decode_table(d->maincode_decode_table,
+ LZX_MAINCODE_NUM_SYMBOLS,
+ LZX_MAINCODE_TABLEBITS,
+ d->maincode_lens,
+ LZX_MAX_MAIN_CODEWORD_LEN,
+ d->working_space))
+ return -1;
+
+ /* Read the length code and prepare its decode table. */
+
+ if (lzx_read_codeword_lens(d, is, d->lencode_lens,
+ LZX_LENCODE_NUM_SYMBOLS))
+ return -1;
+
+ if (make_huffman_decode_table(d->lencode_decode_table,
+ LZX_LENCODE_NUM_SYMBOLS,
+ LZX_LENCODE_TABLEBITS,
+ d->lencode_lens,
+ LZX_MAX_LEN_CODEWORD_LEN,
+ d->working_space))
+ return -1;
+
+ break;
+
+ case LZX_BLOCKTYPE_UNCOMPRESSED:
+
+ /* Before reading the three recent offsets from the uncompressed
+ * block header, the stream must be aligned on a 16-bit
+ * boundary. But if the stream is *already* aligned, then the
+ * next 16 bits must be discarded.
+ */
+ bitstream_ensure_bits(is, 1);
+ bitstream_align(is);
+
+ recent_offsets[0] = bitstream_read_u32(is);
+ recent_offsets[1] = bitstream_read_u32(is);
+ recent_offsets[2] = bitstream_read_u32(is);
+
+ /* Offsets of 0 are invalid. */
+ if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
+ recent_offsets[2] == 0)
+ return -1;
+ break;
+
+ default:
+ /* Unrecognized block type. */
+ return -1;
+ }
+
+ *block_type_ret = block_type;
+ *block_size_ret = block_size;
+ return 0;
+}
+
+/* Decompress a block of LZX-compressed data. */
+static int lzx_decompress_block(const struct lzx_decompressor *d,
+ struct input_bitstream *is,
+ int block_type, u32 block_size,
+ u8 * const out_begin, u8 *out_next,
+ u32 recent_offsets[])
+{
+ u8 * const block_end = out_next + block_size;
+ u32 ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
+
+ do {
+ u32 mainsym;
+ u32 match_len;
+ u32 match_offset;
+ u32 offset_slot;
+ u32 num_extra_bits;
+
+ mainsym = read_mainsym(d, is);
+ if (mainsym < LZX_NUM_CHARS) {
+ /* Literal */
+ *out_next++ = mainsym;
+ continue;
+ }
+
+ /* Match */
+
+ /* Decode the length header and offset slot. */
+ mainsym -= LZX_NUM_CHARS;
+ match_len = mainsym % LZX_NUM_LEN_HEADERS;
+ offset_slot = mainsym / LZX_NUM_LEN_HEADERS;
+
+ /* If needed, read a length symbol to decode the full length. */
+ if (match_len == LZX_NUM_PRIMARY_LENS)
+ match_len += read_lensym(d, is);
+ match_len += LZX_MIN_MATCH_LEN;
+
+ if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
+ /* Repeat offset */
+
+ /* Note: This isn't a real LRU queue, since using the R2
+ * offset doesn't bump the R1 offset down to R2. This
+ * quirk allows all 3 recent offsets to be handled by
+ * the same code. (For R0, the swap is a no-op.)
+ */
+ match_offset = recent_offsets[offset_slot];
+ recent_offsets[offset_slot] = recent_offsets[0];
+ recent_offsets[0] = match_offset;
+ } else {
+ /* Explicit offset */
+
+ /* Look up the number of extra bits that need to be read
+ * to decode offsets with this offset slot.
+ */
+ num_extra_bits = lzx_extra_offset_bits[offset_slot];
+
+ /* Start with the offset slot base value. */
+ match_offset = lzx_offset_slot_base[offset_slot];
+
+ /* In aligned offset blocks, the low-order 3 bits of
+ * each offset are encoded using the aligned offset
+ * code. Otherwise, all the extra bits are literal.
+ */
+
+ if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) {
+ match_offset +=
+ bitstream_read_bits(is, num_extra_bits -
+ LZX_NUM_ALIGNED_OFFSET_BITS)
+ << LZX_NUM_ALIGNED_OFFSET_BITS;
+ match_offset += read_alignedsym(d, is);
+ } else {
+ match_offset += bitstream_read_bits(is, num_extra_bits);
+ }
+
+ /* Adjust the offset. */
+ match_offset -= (LZX_NUM_RECENT_OFFSETS - 1);
+
+ /* Update the recent offsets. */
+ recent_offsets[2] = recent_offsets[1];
+ recent_offsets[1] = recent_offsets[0];
+ recent_offsets[0] = match_offset;
+ }
+
+ /* Validate the match, then copy it to the current position. */
+
+ if (match_len > (size_t)(block_end - out_next))
+ return -1;
+
+ if (match_offset > (size_t)(out_next - out_begin))
+ return -1;
+
+ out_next = lz_copy(out_next, match_len, match_offset,
+ block_end, LZX_MIN_MATCH_LEN);
+
+ } while (out_next != block_end);
+
+ return 0;
+}
+
+/*
+ * lzx_allocate_decompressor - Allocate an LZX decompressor
+ *
+ * Return the pointer to the decompressor on success, or return NULL and set
+ * errno on failure.
+ */
+struct lzx_decompressor *lzx_allocate_decompressor(void)
+{
+ return kmalloc(sizeof(struct lzx_decompressor), GFP_NOFS);
+}
+
+/*
+ * lzx_decompress - Decompress a buffer of LZX-compressed data
+ *
+ * @decompressor: A decompressor allocated with lzx_allocate_decompressor()
+ * @compressed_data: The buffer of data to decompress
+ * @compressed_size: Number of bytes of compressed data
+ * @uncompressed_data: The buffer in which to store the decompressed data
+ * @uncompressed_size: The number of bytes the data decompresses into
+ *
+ * Return 0 on success, or return -1 and set errno on failure.
+ */
+int lzx_decompress(struct lzx_decompressor *decompressor,
+ const void *compressed_data, size_t compressed_size,
+ void *uncompressed_data, size_t uncompressed_size)
+{
+ struct lzx_decompressor *d = decompressor;
+ u8 * const out_begin = uncompressed_data;
+ u8 *out_next = out_begin;
+ u8 * const out_end = out_begin + uncompressed_size;
+ struct input_bitstream is;
+ u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
+ int e8_status = 0;
+
+ init_input_bitstream(&is, compressed_data, compressed_size);
+
+ /* Codeword lengths begin as all 0's for delta encoding purposes. */
+ memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS);
+ memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
+
+ /* Decompress blocks until we have all the uncompressed data. */
+
+ while (out_next != out_end) {
+ int block_type;
+ u32 block_size;
+
+ if (lzx_read_block_header(d, &is, &block_type, &block_size,
+ recent_offsets))
+ goto invalid;
+
+ if (block_size < 1 || block_size > (size_t)(out_end - out_next))
+ goto invalid;
+
+ if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) {
+
+ /* Compressed block */
+
+ if (lzx_decompress_block(d,
+ &is,
+ block_type,
+ block_size,
+ out_begin,
+ out_next,
+ recent_offsets))
+ goto invalid;
+
+ e8_status |= d->maincode_lens[0xe8];
+ out_next += block_size;
+ } else {
+ /* Uncompressed block */
+
+ out_next = bitstream_read_bytes(&is, out_next,
+ block_size);
+ if (!out_next)
+ goto invalid;
+
+ if (block_size & 1)
+ bitstream_read_byte(&is);
+
+ e8_status = 1;
+ }
+ }
+
+ /* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */
+ if (e8_status)
+ lzx_postprocess(uncompressed_data, uncompressed_size);
+
+ return 0;
+
+invalid:
+ return -1;
+}
+
+/*
+ * lzx_free_decompressor - Free an LZX decompressor
+ *
+ * @decompressor: A decompressor that was allocated with
+ * lzx_allocate_decompressor(), or NULL.
+ */
+void lzx_free_decompressor(struct lzx_decompressor *decompressor)
+{
+ kfree(decompressor);
+}
diff --git a/fs/ntfs3/lib/xpress_decompress.c b/fs/ntfs3/lib/xpress_decompress.c
new file mode 100644
index 000000000000..3d98f36a981e
--- /dev/null
+++ b/fs/ntfs3/lib/xpress_decompress.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * xpress_decompress.c - A decompressor for the XPRESS compression format
+ * (Huffman variant), which can be used in "System Compressed" files. This is
+ * based on the code from wimlib.
+ *
+ * Copyright (C) 2015 Eric Biggers
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "decompress_common.h"
+#include "lib.h"
+
+#define XPRESS_NUM_SYMBOLS 512
+#define XPRESS_MAX_CODEWORD_LEN 15
+#define XPRESS_MIN_MATCH_LEN 3
+
+/* This value is chosen for fast decompression. */
+#define XPRESS_TABLEBITS 12
+
+/* Reusable heap-allocated memory for XPRESS decompression */
+struct xpress_decompressor {
+
+ /* The Huffman decoding table */
+ u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
+
+ /* An array that maps symbols to codeword lengths */
+ u8 lens[XPRESS_NUM_SYMBOLS];
+
+ /* Temporary space for make_huffman_decode_table() */
+ u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) +
+ XPRESS_NUM_SYMBOLS];
+};
+
+/*
+ * xpress_allocate_decompressor - Allocate an XPRESS decompressor
+ *
+ * Return the pointer to the decompressor on success, or return NULL and set
+ * errno on failure.
+ */
+struct xpress_decompressor *xpress_allocate_decompressor(void)
+{
+ return kmalloc(sizeof(struct xpress_decompressor), GFP_NOFS);
+}
+
+/*
+ * xpress_decompress - Decompress a buffer of XPRESS-compressed data
+ *
+ * @decompressor: A decompressor that was allocated with
+ * xpress_allocate_decompressor()
+ * @compressed_data: The buffer of data to decompress
+ * @compressed_size: Number of bytes of compressed data
+ * @uncompressed_data: The buffer in which to store the decompressed data
+ * @uncompressed_size: The number of bytes the data decompresses into
+ *
+ * Return 0 on success, or return -1 and set errno on failure.
+ */
+int xpress_decompress(struct xpress_decompressor *decompressor,
+ const void *compressed_data, size_t compressed_size,
+ void *uncompressed_data, size_t uncompressed_size)
+{
+ struct xpress_decompressor *d = decompressor;
+ const u8 * const in_begin = compressed_data;
+ u8 * const out_begin = uncompressed_data;
+ u8 *out_next = out_begin;
+ u8 * const out_end = out_begin + uncompressed_size;
+ struct input_bitstream is;
+ u32 i;
+
+ /* Read the Huffman codeword lengths. */
+ if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
+ goto invalid;
+ for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
+ d->lens[i*2 + 0] = in_begin[i] & 0xF;
+ d->lens[i*2 + 1] = in_begin[i] >> 4;
+ }
+
+ /* Build a decoding table for the Huffman code. */
+ if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
+ XPRESS_TABLEBITS, d->lens,
+ XPRESS_MAX_CODEWORD_LEN,
+ d->working_space))
+ goto invalid;
+
+ /* Decode the matches and literals. */
+
+ init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
+ compressed_size - XPRESS_NUM_SYMBOLS / 2);
+
+ while (out_next != out_end) {
+ u32 sym;
+ u32 log2_offset;
+ u32 length;
+ u32 offset;
+
+ sym = read_huffsym(&is, d->decode_table,
+ XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
+ if (sym < 256) {
+ /* Literal */
+ *out_next++ = sym;
+ } else {
+ /* Match */
+ length = sym & 0xf;
+ log2_offset = (sym >> 4) & 0xf;
+
+ bitstream_ensure_bits(&is, 16);
+
+ offset = ((u32)1 << log2_offset) |
+ bitstream_pop_bits(&is, log2_offset);
+
+ if (length == 0xf) {
+ length += bitstream_read_byte(&is);
+ if (length == 0xf + 0xff)
+ length = bitstream_read_u16(&is);
+ }
+ length += XPRESS_MIN_MATCH_LEN;
+
+ if (offset > (size_t)(out_next - out_begin))
+ goto invalid;
+
+ if (length > (size_t)(out_end - out_next))
+ goto invalid;
+
+ out_next = lz_copy(out_next, length, offset, out_end,
+ XPRESS_MIN_MATCH_LEN);
+ }
+ }
+ return 0;
+
+invalid:
+ return -1;
+}
+
+/*
+ * xpress_free_decompressor - Free an XPRESS decompressor
+ *
+ * @decompressor: A decompressor that was allocated with
+ * xpress_allocate_decompressor(), or NULL.
+ */
+void xpress_free_decompressor(struct xpress_decompressor *decompressor)
+{
+ kfree(decompressor);
+}
diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c
new file mode 100644
index 000000000000..ead9ab7d69b3
--- /dev/null
+++ b/fs/ntfs3/lznt.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+// clang-format off
+/* src buffer is zero */
+#define LZNT_ERROR_ALL_ZEROS 1
+#define LZNT_CHUNK_SIZE 0x1000
+// clang-format on
+
+struct lznt_hash {
+ const u8 *p1;
+ const u8 *p2;
+};
+
+struct lznt {
+ const u8 *unc;
+ const u8 *unc_end;
+ const u8 *best_match;
+ size_t max_len;
+ bool std;
+
+ struct lznt_hash hash[LZNT_CHUNK_SIZE];
+};
+
+static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev,
+ size_t max_len)
+{
+ size_t len = 0;
+
+ while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len)
+ ;
+ return len;
+}
+
+static size_t longest_match_std(const u8 *src, struct lznt *ctx)
+{
+ size_t hash_index;
+ size_t len1 = 0, len2 = 0;
+ const u8 **hash;
+
+ hash_index =
+ ((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) &
+ (LZNT_CHUNK_SIZE - 1);
+
+ hash = &(ctx->hash[hash_index].p1);
+
+ if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] &&
+ hash[0][1] == src[1] && hash[0][2] == src[2]) {
+ len1 = 3;
+ if (ctx->max_len > 3)
+ len1 += get_match_len(src + 3, ctx->unc_end,
+ hash[0] + 3, ctx->max_len - 3);
+ }
+
+ if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] &&
+ hash[1][1] == src[1] && hash[1][2] == src[2]) {
+ len2 = 3;
+ if (ctx->max_len > 3)
+ len2 += get_match_len(src + 3, ctx->unc_end,
+ hash[1] + 3, ctx->max_len - 3);
+ }
+
+ /* Compare two matches and select the best one */
+ if (len1 < len2) {
+ ctx->best_match = hash[1];
+ len1 = len2;
+ } else {
+ ctx->best_match = hash[0];
+ }
+
+ hash[1] = hash[0];
+ hash[0] = src;
+ return len1;
+}
+
+static size_t longest_match_best(const u8 *src, struct lznt *ctx)
+{
+ size_t max_len;
+ const u8 *ptr;
+
+ if (ctx->unc >= src || !ctx->max_len)
+ return 0;
+
+ max_len = 0;
+ for (ptr = ctx->unc; ptr < src; ++ptr) {
+ size_t len =
+ get_match_len(src, ctx->unc_end, ptr, ctx->max_len);
+ if (len >= max_len) {
+ max_len = len;
+ ctx->best_match = ptr;
+ }
+ }
+
+ return max_len >= 3 ? max_len : 0;
+}
+
+static const size_t s_max_len[] = {
+ 0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12,
+};
+
+static const size_t s_max_off[] = {
+ 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+};
+
+static inline u16 make_pair(size_t offset, size_t len, size_t index)
+{
+ return ((offset - 1) << (12 - index)) |
+ ((len - 3) & (((1 << (12 - index)) - 1)));
+}
+
+static inline size_t parse_pair(u16 pair, size_t *offset, size_t index)
+{
+ *offset = 1 + (pair >> (12 - index));
+ return 3 + (pair & ((1 << (12 - index)) - 1));
+}
+
+/*
+ * compress_chunk
+ *
+ * returns one of the three values:
+ * 0 - ok, 'cmpr' contains 'cmpr_chunk_size' bytes of compressed data
+ * 1 - input buffer is full zero
+ * -2 - the compressed buffer is too small to hold the compressed data
+ */
+static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *),
+ const u8 *unc, const u8 *unc_end, u8 *cmpr,
+ u8 *cmpr_end, size_t *cmpr_chunk_size,
+ struct lznt *ctx)
+{
+ size_t cnt = 0;
+ size_t idx = 0;
+ const u8 *up = unc;
+ u8 *cp = cmpr + 3;
+ u8 *cp2 = cmpr + 2;
+ u8 not_zero = 0;
+ /* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ) */
+ u8 ohdr = 0;
+ u8 *last;
+ u16 t16;
+
+ if (unc + LZNT_CHUNK_SIZE < unc_end)
+ unc_end = unc + LZNT_CHUNK_SIZE;
+
+ last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end);
+
+ ctx->unc = unc;
+ ctx->unc_end = unc_end;
+ ctx->max_len = s_max_len[0];
+
+ while (up < unc_end) {
+ size_t max_len;
+
+ while (unc + s_max_off[idx] < up)
+ ctx->max_len = s_max_len[++idx];
+
+ // Find match
+ max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0;
+
+ if (!max_len) {
+ if (cp >= last)
+ goto NotCompressed;
+ not_zero |= *cp++ = *up++;
+ } else if (cp + 1 >= last) {
+ goto NotCompressed;
+ } else {
+ t16 = make_pair(up - ctx->best_match, max_len, idx);
+ *cp++ = t16;
+ *cp++ = t16 >> 8;
+
+ ohdr |= 1 << cnt;
+ up += max_len;
+ }
+
+ cnt = (cnt + 1) & 7;
+ if (!cnt) {
+ *cp2 = ohdr;
+ ohdr = 0;
+ cp2 = cp;
+ cp += 1;
+ }
+ }
+
+ if (cp2 < last)
+ *cp2 = ohdr;
+ else
+ cp -= 1;
+
+ *cmpr_chunk_size = cp - cmpr;
+
+ t16 = (*cmpr_chunk_size - 3) | 0xB000;
+ cmpr[0] = t16;
+ cmpr[1] = t16 >> 8;
+
+ return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS;
+
+NotCompressed:
+
+ if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last)
+ return -2;
+
+ /*
+ * Copy non cmpr data
+ * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000)
+ */
+ cmpr[0] = 0xff;
+ cmpr[1] = 0x3f;
+
+ memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE);
+ *cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short);
+
+ return 0;
+}
+
+static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
+ const u8 *cmpr_end)
+{
+ u8 *up = unc;
+ u8 ch = *cmpr++;
+ size_t bit = 0;
+ size_t index = 0;
+ u16 pair;
+ size_t offset, length;
+
+ /* Do decompression until pointers are inside range */
+ while (up < unc_end && cmpr < cmpr_end) {
+ /* Correct index */
+ while (unc + s_max_off[index] < up)
+ index += 1;
+
+ /* Check the current flag for zero */
+ if (!(ch & (1 << bit))) {
+ /* Just copy byte */
+ *up++ = *cmpr++;
+ goto next;
+ }
+
+ /* Check for boundary */
+ if (cmpr + 1 >= cmpr_end)
+ return -EINVAL;
+
+ /* Read a short from little endian stream */
+ pair = cmpr[1];
+ pair <<= 8;
+ pair |= cmpr[0];
+
+ cmpr += 2;
+
+ /* Translate packed information into offset and length */
+ length = parse_pair(pair, &offset, index);
+
+ /* Check offset for boundary */
+ if (unc + offset > up)
+ return -EINVAL;
+
+ /* Truncate the length if necessary */
+ if (up + length >= unc_end)
+ length = unc_end - up;
+
+ /* Now we copy bytes. This is the heart of LZ algorithm. */
+ for (; length > 0; length--, up++)
+ *up = *(up - offset);
+
+next:
+ /* Advance flag bit value */
+ bit = (bit + 1) & 7;
+
+ if (!bit) {
+ if (cmpr >= cmpr_end)
+ break;
+
+ ch = *cmpr++;
+ }
+ }
+
+ /* return the size of uncompressed data */
+ return up - unc;
+}
+
+/*
+ * 0 - standard compression
+ * !0 - best compression, requires a lot of cpu
+ */
+struct lznt *get_lznt_ctx(int level)
+{
+ struct lznt *r = ntfs_zalloc(level ? offsetof(struct lznt, hash)
+ : sizeof(struct lznt));
+
+ if (r)
+ r->std = !level;
+ return r;
+}
+
+/*
+ * compress_lznt
+ *
+ * Compresses "unc" into "cmpr"
+ * +x - ok, 'cmpr' contains 'final_compressed_size' bytes of compressed data
+ * 0 - input buffer is full zero
+ */
+size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr,
+ size_t cmpr_size, struct lznt *ctx)
+{
+ int err;
+ size_t (*match)(const u8 *src, struct lznt *ctx);
+ u8 *p = cmpr;
+ u8 *end = p + cmpr_size;
+ const u8 *unc_chunk = unc;
+ const u8 *unc_end = unc_chunk + unc_size;
+ bool is_zero = true;
+
+ if (ctx->std) {
+ match = &longest_match_std;
+ memset(ctx->hash, 0, sizeof(ctx->hash));
+ } else {
+ match = &longest_match_best;
+ }
+
+ /* compression cycle */
+ for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) {
+ cmpr_size = 0;
+ err = compress_chunk(match, unc_chunk, unc_end, p, end,
+ &cmpr_size, ctx);
+ if (err < 0)
+ return unc_size;
+
+ if (is_zero && err != LZNT_ERROR_ALL_ZEROS)
+ is_zero = false;
+
+ p += cmpr_size;
+ }
+
+ if (p <= end - 2)
+ p[0] = p[1] = 0;
+
+ return is_zero ? 0 : PtrOffset(cmpr, p);
+}
+
+/*
+ * decompress_lznt
+ *
+ * decompresses "cmpr" into "unc"
+ */
+ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
+ size_t unc_size)
+{
+ const u8 *cmpr_chunk = cmpr;
+ const u8 *cmpr_end = cmpr_chunk + cmpr_size;
+ u8 *unc_chunk = unc;
+ u8 *unc_end = unc_chunk + unc_size;
+ u16 chunk_hdr;
+
+ if (cmpr_size < sizeof(short))
+ return -EINVAL;
+
+ /* read chunk header */
+ chunk_hdr = cmpr_chunk[1];
+ chunk_hdr <<= 8;
+ chunk_hdr |= cmpr_chunk[0];
+
+ /* loop through decompressing chunks */
+ for (;;) {
+ size_t chunk_size_saved;
+ size_t unc_use;
+ size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1));
+
+ /* Check that the chunk actually fits the supplied buffer */
+ if (cmpr_chunk + cmpr_use > cmpr_end)
+ return -EINVAL;
+
+ /* First make sure the chunk contains compressed data */
+ if (chunk_hdr & 0x8000) {
+ /* Decompress a chunk and return if we get an error */
+ ssize_t err =
+ decompress_chunk(unc_chunk, unc_end,
+ cmpr_chunk + sizeof(chunk_hdr),
+ cmpr_chunk + cmpr_use);
+ if (err < 0)
+ return err;
+ unc_use = err;
+ } else {
+ /* This chunk does not contain compressed data */
+ unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end
+ ? unc_end - unc_chunk
+ : LZNT_CHUNK_SIZE;
+
+ if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
+ cmpr_end) {
+ return -EINVAL;
+ }
+
+ memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr),
+ unc_use);
+ }
+
+ /* Advance pointers */
+ cmpr_chunk += cmpr_use;
+ unc_chunk += unc_use;
+
+ /* Check for the end of unc buffer */
+ if (unc_chunk >= unc_end)
+ break;
+
+ /* Proceed the next chunk */
+ if (cmpr_chunk > cmpr_end - 2)
+ break;
+
+ chunk_size_saved = LZNT_CHUNK_SIZE;
+
+ /* read chunk header */
+ chunk_hdr = cmpr_chunk[1];
+ chunk_hdr <<= 8;
+ chunk_hdr |= cmpr_chunk[0];
+
+ if (!chunk_hdr)
+ break;
+
+ /* Check the size of unc buffer */
+ if (unc_use < chunk_size_saved) {
+ size_t t1 = chunk_size_saved - unc_use;
+ u8 *t2 = unc_chunk + t1;
+
+ /* 'Zero' memory */
+ if (t2 >= unc_end)
+ break;
+
+ memset(unc_chunk, 0, t1);
+ unc_chunk = t2;
+ }
+ }
+
+ /* Check compression boundary */
+ if (cmpr_chunk > cmpr_end)
+ return -EINVAL;
+
+ /*
+ * The unc size is just a difference between current
+ * pointer and original one
+ */
+ return PtrOffset(unc, unc_chunk);
+}
--
2.30.0
1
0
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15-rc1
commit be71b5cba2e6485e8959da7a9f9a44461a1bb074
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
This adds attrib operations
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/attrib.c | 2096 +++++++++++++++++++++++++++++++++++++++++++
fs/ntfs3/attrlist.c | 456 ++++++++++
fs/ntfs3/xattr.c | 1128 +++++++++++++++++++++++
3 files changed, 3680 insertions(+)
create mode 100644 fs/ntfs3/attrib.c
create mode 100644 fs/ntfs3/attrlist.c
create mode 100644 fs/ntfs3/xattr.c
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
new file mode 100644
index 000000000000..046dc57f75f2
--- /dev/null
+++ b/fs/ntfs3/attrib.c
@@ -0,0 +1,2096 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ * TODO: merge attr_set_size/attr_data_get_block/attr_allocate_frame?
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/nls.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * You can set external NTFS_MIN_LOG2_OF_CLUMP/NTFS_MAX_LOG2_OF_CLUMP to manage
+ * preallocate algorithm
+ */
+#ifndef NTFS_MIN_LOG2_OF_CLUMP
+#define NTFS_MIN_LOG2_OF_CLUMP 16
+#endif
+
+#ifndef NTFS_MAX_LOG2_OF_CLUMP
+#define NTFS_MAX_LOG2_OF_CLUMP 26
+#endif
+
+// 16M
+#define NTFS_CLUMP_MIN (1 << (NTFS_MIN_LOG2_OF_CLUMP + 8))
+// 16G
+#define NTFS_CLUMP_MAX (1ull << (NTFS_MAX_LOG2_OF_CLUMP + 8))
+
+/*
+ * get_pre_allocated
+ *
+ */
+static inline u64 get_pre_allocated(u64 size)
+{
+ u32 clump;
+ u8 align_shift;
+ u64 ret;
+
+ if (size <= NTFS_CLUMP_MIN) {
+ clump = 1 << NTFS_MIN_LOG2_OF_CLUMP;
+ align_shift = NTFS_MIN_LOG2_OF_CLUMP;
+ } else if (size >= NTFS_CLUMP_MAX) {
+ clump = 1 << NTFS_MAX_LOG2_OF_CLUMP;
+ align_shift = NTFS_MAX_LOG2_OF_CLUMP;
+ } else {
+ align_shift = NTFS_MIN_LOG2_OF_CLUMP - 1 +
+ __ffs(size >> (8 + NTFS_MIN_LOG2_OF_CLUMP));
+ clump = 1u << align_shift;
+ }
+
+ ret = (((size + clump - 1) >> align_shift)) << align_shift;
+
+ return ret;
+}
+
+/*
+ * attr_must_be_resident
+ *
+ * returns true if attribute must be resident
+ */
+static inline bool attr_must_be_resident(struct ntfs_sb_info *sbi,
+ enum ATTR_TYPE type)
+{
+ const struct ATTR_DEF_ENTRY *de;
+
+ switch (type) {
+ case ATTR_STD:
+ case ATTR_NAME:
+ case ATTR_ID:
+ case ATTR_LABEL:
+ case ATTR_VOL_INFO:
+ case ATTR_ROOT:
+ case ATTR_EA_INFO:
+ return true;
+ default:
+ de = ntfs_query_def(sbi, type);
+ if (de && (de->flags & NTFS_ATTR_MUST_BE_RESIDENT))
+ return true;
+ return false;
+ }
+}
+
+/*
+ * attr_load_runs
+ *
+ * load all runs stored in 'attr'
+ */
+int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
+ struct runs_tree *run, const CLST *vcn)
+{
+ int err;
+ CLST svcn = le64_to_cpu(attr->nres.svcn);
+ CLST evcn = le64_to_cpu(attr->nres.evcn);
+ u32 asize;
+ u16 run_off;
+
+ if (svcn >= evcn + 1 || run_is_mapped_full(run, svcn, evcn))
+ return 0;
+
+ if (vcn && (evcn < *vcn || *vcn < svcn))
+ return -EINVAL;
+
+ asize = le32_to_cpu(attr->size);
+ run_off = le16_to_cpu(attr->nres.run_off);
+ err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn,
+ vcn ? *vcn : svcn, Add2Ptr(attr, run_off),
+ asize - run_off);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+/*
+ * int run_deallocate_ex
+ *
+ * Deallocate clusters
+ */
+static int run_deallocate_ex(struct ntfs_sb_info *sbi, struct runs_tree *run,
+ CLST vcn, CLST len, CLST *done, bool trim)
+{
+ int err = 0;
+ CLST vcn_next, vcn0 = vcn, lcn, clen, dn = 0;
+ size_t idx;
+
+ if (!len)
+ goto out;
+
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+failed:
+ run_truncate(run, vcn0);
+ err = -EINVAL;
+ goto out;
+ }
+
+ for (;;) {
+ if (clen > len)
+ clen = len;
+
+ if (!clen) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (lcn != SPARSE_LCN) {
+ mark_as_free_ex(sbi, lcn, clen, trim);
+ dn += clen;
+ }
+
+ len -= clen;
+ if (!len)
+ break;
+
+ vcn_next = vcn + clen;
+ if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+ vcn != vcn_next) {
+ // save memory - don't load entire run
+ goto failed;
+ }
+ }
+
+out:
+ if (done)
+ *done += dn;
+
+ return err;
+}
+
+/*
+ * attr_allocate_clusters
+ *
+ * find free space, mark it as used and store in 'run'
+ */
+int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
+ CLST vcn, CLST lcn, CLST len, CLST *pre_alloc,
+ enum ALLOCATE_OPT opt, CLST *alen, const size_t fr,
+ CLST *new_lcn)
+{
+ int err;
+ CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0;
+ struct wnd_bitmap *wnd = &sbi->used.bitmap;
+ size_t cnt = run->count;
+
+ for (;;) {
+ err = ntfs_look_for_free_space(sbi, lcn, len + pre, &lcn, &flen,
+ opt);
+
+ if (err == -ENOSPC && pre) {
+ pre = 0;
+ if (*pre_alloc)
+ *pre_alloc = 0;
+ continue;
+ }
+
+ if (err)
+ goto out;
+
+ if (new_lcn && vcn == vcn0)
+ *new_lcn = lcn;
+
+ /* Add new fragment into run storage */
+ if (!run_add_entry(run, vcn, lcn, flen, opt == ALLOCATE_MFT)) {
+ down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+ wnd_set_free(wnd, lcn, flen);
+ up_write(&wnd->rw_lock);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vcn += flen;
+
+ if (flen >= len || opt == ALLOCATE_MFT ||
+ (fr && run->count - cnt >= fr)) {
+ *alen = vcn - vcn0;
+ return 0;
+ }
+
+ len -= flen;
+ }
+
+out:
+ /* undo */
+ run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false);
+ run_truncate(run, vcn0);
+
+ return err;
+}
+
+/*
+ * if page is not NULL - it is already contains resident data
+ * and locked (called from ni_write_frame)
+ */
+int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
+ struct ATTR_LIST_ENTRY *le, struct mft_inode *mi,
+ u64 new_size, struct runs_tree *run,
+ struct ATTRIB **ins_attr, struct page *page)
+{
+ struct ntfs_sb_info *sbi;
+ struct ATTRIB *attr_s;
+ struct MFT_REC *rec;
+ u32 used, asize, rsize, aoff, align;
+ bool is_data;
+ CLST len, alen;
+ char *next;
+ int err;
+
+ if (attr->non_res) {
+ *ins_attr = attr;
+ return 0;
+ }
+
+ sbi = mi->sbi;
+ rec = mi->mrec;
+ attr_s = NULL;
+ used = le32_to_cpu(rec->used);
+ asize = le32_to_cpu(attr->size);
+ next = Add2Ptr(attr, asize);
+ aoff = PtrOffset(rec, attr);
+ rsize = le32_to_cpu(attr->res.data_size);
+ is_data = attr->type == ATTR_DATA && !attr->name_len;
+
+ align = sbi->cluster_size;
+ if (is_attr_compressed(attr))
+ align <<= COMPRESSION_UNIT;
+ len = (rsize + align - 1) >> sbi->cluster_bits;
+
+ run_init(run);
+
+ /* make a copy of original attribute */
+ attr_s = ntfs_memdup(attr, asize);
+ if (!attr_s) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (!len) {
+ /* empty resident -> empty nonresident */
+ alen = 0;
+ } else {
+ const char *data = resident_data(attr);
+
+ err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL,
+ ALLOCATE_DEF, &alen, 0, NULL);
+ if (err)
+ goto out1;
+
+ if (!rsize) {
+ /* empty resident -> non empty nonresident */
+ } else if (!is_data) {
+ err = ntfs_sb_write_run(sbi, run, 0, data, rsize);
+ if (err)
+ goto out2;
+ } else if (!page) {
+ char *kaddr;
+
+ page = grab_cache_page(ni->vfs_inode.i_mapping, 0);
+ if (!page) {
+ err = -ENOMEM;
+ goto out2;
+ }
+ kaddr = kmap_atomic(page);
+ memcpy(kaddr, data, rsize);
+ memset(kaddr + rsize, 0, PAGE_SIZE - rsize);
+ kunmap_atomic(kaddr);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ put_page(page);
+ }
+ }
+
+ /* remove original attribute */
+ used -= asize;
+ memmove(attr, Add2Ptr(attr, asize), used - aoff);
+ rec->used = cpu_to_le32(used);
+ mi->dirty = true;
+ if (le)
+ al_remove_le(ni, le);
+
+ err = ni_insert_nonresident(ni, attr_s->type, attr_name(attr_s),
+ attr_s->name_len, run, 0, alen,
+ attr_s->flags, &attr, NULL);
+ if (err)
+ goto out3;
+
+ ntfs_free(attr_s);
+ attr->nres.data_size = cpu_to_le64(rsize);
+ attr->nres.valid_size = attr->nres.data_size;
+
+ *ins_attr = attr;
+
+ if (is_data)
+ ni->ni_flags &= ~NI_FLAG_RESIDENT;
+
+ /* Resident attribute becomes non resident */
+ return 0;
+
+out3:
+ attr = Add2Ptr(rec, aoff);
+ memmove(next, attr, used - aoff);
+ memcpy(attr, attr_s, asize);
+ rec->used = cpu_to_le32(used + asize);
+ mi->dirty = true;
+out2:
+ /* undo: do not trim new allocated clusters */
+ run_deallocate(sbi, run, false);
+ run_close(run);
+out1:
+ ntfs_free(attr_s);
+ /*reinsert le*/
+out:
+ return err;
+}
+
+/*
+ * attr_set_size_res
+ *
+ * helper for attr_set_size
+ */
+static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr,
+ struct ATTR_LIST_ENTRY *le, struct mft_inode *mi,
+ u64 new_size, struct runs_tree *run,
+ struct ATTRIB **ins_attr)
+{
+ struct ntfs_sb_info *sbi = mi->sbi;
+ struct MFT_REC *rec = mi->mrec;
+ u32 used = le32_to_cpu(rec->used);
+ u32 asize = le32_to_cpu(attr->size);
+ u32 aoff = PtrOffset(rec, attr);
+ u32 rsize = le32_to_cpu(attr->res.data_size);
+ u32 tail = used - aoff - asize;
+ char *next = Add2Ptr(attr, asize);
+ s64 dsize = QuadAlign(new_size) - QuadAlign(rsize);
+
+ if (dsize < 0) {
+ memmove(next + dsize, next, tail);
+ } else if (dsize > 0) {
+ if (used + dsize > sbi->max_bytes_per_attr)
+ return attr_make_nonresident(ni, attr, le, mi, new_size,
+ run, ins_attr, NULL);
+
+ memmove(next + dsize, next, tail);
+ memset(next, 0, dsize);
+ }
+
+ if (new_size > rsize)
+ memset(Add2Ptr(resident_data(attr), rsize), 0,
+ new_size - rsize);
+
+ rec->used = cpu_to_le32(used + dsize);
+ attr->size = cpu_to_le32(asize + dsize);
+ attr->res.data_size = cpu_to_le32(new_size);
+ mi->dirty = true;
+ *ins_attr = attr;
+
+ return 0;
+}
+
+/*
+ * attr_set_size
+ *
+ * change the size of attribute
+ * Extend:
+ * - sparse/compressed: no allocated clusters
+ * - normal: append allocated and preallocated new clusters
+ * Shrink:
+ * - no deallocate if keep_prealloc is set
+ */
+int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, struct runs_tree *run,
+ u64 new_size, const u64 *new_valid, bool keep_prealloc,
+ struct ATTRIB **ret)
+{
+ int err = 0;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ u8 cluster_bits = sbi->cluster_bits;
+ bool is_mft =
+ ni->mi.rno == MFT_REC_MFT && type == ATTR_DATA && !name_len;
+ u64 old_valid, old_size, old_alloc, new_alloc, new_alloc_tmp;
+ struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTR_LIST_ENTRY *le, *le_b;
+ struct mft_inode *mi, *mi_b;
+ CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn;
+ CLST next_svcn, pre_alloc = -1, done = 0;
+ bool is_ext;
+ u32 align;
+ struct MFT_REC *rec;
+
+again:
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len, NULL,
+ &mi_b);
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!attr_b->non_res) {
+ err = attr_set_size_res(ni, attr_b, le_b, mi_b, new_size, run,
+ &attr_b);
+ if (err || !attr_b->non_res)
+ goto out;
+
+ /* layout of records may be changed, so do a full search */
+ goto again;
+ }
+
+ is_ext = is_attr_ext(attr_b);
+
+again_1:
+ align = sbi->cluster_size;
+
+ if (is_ext) {
+ align <<= attr_b->nres.c_unit;
+ if (is_attr_sparsed(attr_b))
+ keep_prealloc = false;
+ }
+
+ old_valid = le64_to_cpu(attr_b->nres.valid_size);
+ old_size = le64_to_cpu(attr_b->nres.data_size);
+ old_alloc = le64_to_cpu(attr_b->nres.alloc_size);
+ old_alen = old_alloc >> cluster_bits;
+
+ new_alloc = (new_size + align - 1) & ~(u64)(align - 1);
+ new_alen = new_alloc >> cluster_bits;
+
+ if (keep_prealloc && is_ext)
+ keep_prealloc = false;
+
+ if (keep_prealloc && new_size < old_size) {
+ attr_b->nres.data_size = cpu_to_le64(new_size);
+ mi_b->dirty = true;
+ goto ok;
+ }
+
+ vcn = old_alen - 1;
+
+ svcn = le64_to_cpu(attr_b->nres.svcn);
+ evcn = le64_to_cpu(attr_b->nres.evcn);
+
+ if (svcn <= vcn && vcn <= evcn) {
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ } else if (!le_b) {
+ err = -EINVAL;
+ goto out;
+ } else {
+ le = le_b;
+ attr = ni_find_attr(ni, attr_b, &le, type, name, name_len, &vcn,
+ &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+next_le_1:
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn = le64_to_cpu(attr->nres.evcn);
+ }
+
+next_le:
+ rec = mi->mrec;
+
+ err = attr_load_runs(attr, ni, run, NULL);
+ if (err)
+ goto out;
+
+ if (new_size > old_size) {
+ CLST to_allocate;
+ size_t free;
+
+ if (new_alloc <= old_alloc) {
+ attr_b->nres.data_size = cpu_to_le64(new_size);
+ mi_b->dirty = true;
+ goto ok;
+ }
+
+ to_allocate = new_alen - old_alen;
+add_alloc_in_same_attr_seg:
+ lcn = 0;
+ if (is_mft) {
+ /* mft allocates clusters from mftzone */
+ pre_alloc = 0;
+ } else if (is_ext) {
+ /* no preallocate for sparse/compress */
+ pre_alloc = 0;
+ } else if (pre_alloc == -1) {
+ pre_alloc = 0;
+ if (type == ATTR_DATA && !name_len &&
+ sbi->options.prealloc) {
+ CLST new_alen2 = bytes_to_cluster(
+ sbi, get_pre_allocated(new_size));
+ pre_alloc = new_alen2 - new_alen;
+ }
+
+ /* Get the last lcn to allocate from */
+ if (old_alen &&
+ !run_lookup_entry(run, vcn, &lcn, NULL, NULL)) {
+ lcn = SPARSE_LCN;
+ }
+
+ if (lcn == SPARSE_LCN)
+ lcn = 0;
+ else if (lcn)
+ lcn += 1;
+
+ free = wnd_zeroes(&sbi->used.bitmap);
+ if (to_allocate > free) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ if (pre_alloc && to_allocate + pre_alloc > free)
+ pre_alloc = 0;
+ }
+
+ vcn = old_alen;
+
+ if (is_ext) {
+ if (!run_add_entry(run, vcn, SPARSE_LCN, to_allocate,
+ false)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ alen = to_allocate;
+ } else {
+ /* ~3 bytes per fragment */
+ err = attr_allocate_clusters(
+ sbi, run, vcn, lcn, to_allocate, &pre_alloc,
+ is_mft ? ALLOCATE_MFT : 0, &alen,
+ is_mft ? 0
+ : (sbi->record_size -
+ le32_to_cpu(rec->used) + 8) /
+ 3 +
+ 1,
+ NULL);
+ if (err)
+ goto out;
+ }
+
+ done += alen;
+ vcn += alen;
+ if (to_allocate > alen)
+ to_allocate -= alen;
+ else
+ to_allocate = 0;
+
+pack_runs:
+ err = mi_pack_runs(mi, attr, run, vcn - svcn);
+ if (err)
+ goto out;
+
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+ new_alloc_tmp = (u64)next_svcn << cluster_bits;
+ attr_b->nres.alloc_size = cpu_to_le64(new_alloc_tmp);
+ mi_b->dirty = true;
+
+ if (next_svcn >= vcn && !to_allocate) {
+ /* Normal way. update attribute and exit */
+ attr_b->nres.data_size = cpu_to_le64(new_size);
+ goto ok;
+ }
+
+ /* at least two mft to avoid recursive loop*/
+ if (is_mft && next_svcn == vcn &&
+ ((u64)done << sbi->cluster_bits) >= 2 * sbi->record_size) {
+ new_size = new_alloc_tmp;
+ attr_b->nres.data_size = attr_b->nres.alloc_size;
+ goto ok;
+ }
+
+ if (le32_to_cpu(rec->used) < sbi->record_size) {
+ old_alen = next_svcn;
+ evcn = old_alen - 1;
+ goto add_alloc_in_same_attr_seg;
+ }
+
+ attr_b->nres.data_size = attr_b->nres.alloc_size;
+ if (new_alloc_tmp < old_valid)
+ attr_b->nres.valid_size = attr_b->nres.data_size;
+
+ if (type == ATTR_LIST) {
+ err = ni_expand_list(ni);
+ if (err)
+ goto out;
+ if (next_svcn < vcn)
+ goto pack_runs;
+
+ /* layout of records is changed */
+ goto again;
+ }
+
+ if (!ni->attr_list.size) {
+ err = ni_create_attr_list(ni);
+ if (err)
+ goto out;
+ /* layout of records is changed */
+ }
+
+ if (next_svcn >= vcn) {
+ /* this is mft data, repeat */
+ goto again;
+ }
+
+ /* insert new attribute segment */
+ err = ni_insert_nonresident(ni, type, name, name_len, run,
+ next_svcn, vcn - next_svcn,
+ attr_b->flags, &attr, &mi);
+ if (err)
+ goto out;
+
+ if (!is_mft)
+ run_truncate_head(run, evcn + 1);
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn = le64_to_cpu(attr->nres.evcn);
+
+ le_b = NULL;
+ /* layout of records maybe changed */
+ /* find base attribute to update*/
+ attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len,
+ NULL, &mi_b);
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ attr_b->nres.alloc_size = cpu_to_le64((u64)vcn << cluster_bits);
+ attr_b->nres.data_size = attr_b->nres.alloc_size;
+ attr_b->nres.valid_size = attr_b->nres.alloc_size;
+ mi_b->dirty = true;
+ goto again_1;
+ }
+
+ if (new_size != old_size ||
+ (new_alloc != old_alloc && !keep_prealloc)) {
+ vcn = max(svcn, new_alen);
+ new_alloc_tmp = (u64)vcn << cluster_bits;
+
+ alen = 0;
+ err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &alen,
+ true);
+ if (err)
+ goto out;
+
+ run_truncate(run, vcn);
+
+ if (vcn > svcn) {
+ err = mi_pack_runs(mi, attr, run, vcn - svcn);
+ if (err)
+ goto out;
+ } else if (le && le->vcn) {
+ u16 le_sz = le16_to_cpu(le->size);
+
+ /*
+ * NOTE: list entries for one attribute are always
+ * the same size. We deal with last entry (vcn==0)
+ * and it is not first in entries array
+ * (list entry for std attribute always first)
+ * So it is safe to step back
+ */
+ mi_remove_attr(mi, attr);
+
+ if (!al_remove_le(ni, le)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz);
+ } else {
+ attr->nres.evcn = cpu_to_le64((u64)vcn - 1);
+ mi->dirty = true;
+ }
+
+ attr_b->nres.alloc_size = cpu_to_le64(new_alloc_tmp);
+
+ if (vcn == new_alen) {
+ attr_b->nres.data_size = cpu_to_le64(new_size);
+ if (new_size < old_valid)
+ attr_b->nres.valid_size =
+ attr_b->nres.data_size;
+ } else {
+ if (new_alloc_tmp <=
+ le64_to_cpu(attr_b->nres.data_size))
+ attr_b->nres.data_size =
+ attr_b->nres.alloc_size;
+ if (new_alloc_tmp <
+ le64_to_cpu(attr_b->nres.valid_size))
+ attr_b->nres.valid_size =
+ attr_b->nres.alloc_size;
+ }
+
+ if (is_ext)
+ le64_sub_cpu(&attr_b->nres.total_size,
+ ((u64)alen << cluster_bits));
+
+ mi_b->dirty = true;
+
+ if (new_alloc_tmp <= new_alloc)
+ goto ok;
+
+ old_size = new_alloc_tmp;
+ vcn = svcn - 1;
+
+ if (le == le_b) {
+ attr = attr_b;
+ mi = mi_b;
+ evcn = svcn - 1;
+ svcn = 0;
+ goto next_le;
+ }
+
+ if (le->type != type || le->name_len != name_len ||
+ memcmp(le_name(le), name, name_len * sizeof(short))) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = ni_load_mi(ni, le, &mi);
+ if (err)
+ goto out;
+
+ attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+ goto next_le_1;
+ }
+
+ok:
+ if (new_valid) {
+ __le64 valid = cpu_to_le64(min(*new_valid, new_size));
+
+ if (attr_b->nres.valid_size != valid) {
+ attr_b->nres.valid_size = valid;
+ mi_b->dirty = true;
+ }
+ }
+
+out:
+ if (!err && attr_b && ret)
+ *ret = attr_b;
+
+ /* update inode_set_bytes*/
+ if (!err && ((type == ATTR_DATA && !name_len) ||
+ (type == ATTR_ALLOC && name == I30_NAME))) {
+ bool dirty = false;
+
+ if (ni->vfs_inode.i_size != new_size) {
+ ni->vfs_inode.i_size = new_size;
+ dirty = true;
+ }
+
+ if (attr_b && attr_b->non_res) {
+ new_alloc = le64_to_cpu(attr_b->nres.alloc_size);
+ if (inode_get_bytes(&ni->vfs_inode) != new_alloc) {
+ inode_set_bytes(&ni->vfs_inode, new_alloc);
+ dirty = true;
+ }
+ }
+
+ if (dirty) {
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+ mark_inode_dirty(&ni->vfs_inode);
+ }
+ }
+
+ return err;
+}
+
+int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
+ CLST *len, bool *new)
+{
+ int err = 0;
+ struct runs_tree *run = &ni->file.run;
+ struct ntfs_sb_info *sbi;
+ u8 cluster_bits;
+ struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTR_LIST_ENTRY *le, *le_b;
+ struct mft_inode *mi, *mi_b;
+ CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end;
+ u64 total_size;
+ u32 clst_per_frame;
+ bool ok;
+
+ if (new)
+ *new = false;
+
+ down_read(&ni->file.run_lock);
+ ok = run_lookup_entry(run, vcn, lcn, len, NULL);
+ up_read(&ni->file.run_lock);
+
+ if (ok && (*lcn != SPARSE_LCN || !new)) {
+ /* normal way */
+ return 0;
+ }
+
+ if (!clen)
+ clen = 1;
+
+ if (ok && clen > *len)
+ clen = *len;
+
+ sbi = ni->mi.sbi;
+ cluster_bits = sbi->cluster_bits;
+
+ ni_lock(ni);
+ down_write(&ni->file.run_lock);
+
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!attr_b->non_res) {
+ *lcn = RESIDENT_LCN;
+ *len = 1;
+ goto out;
+ }
+
+ asize = le64_to_cpu(attr_b->nres.alloc_size) >> sbi->cluster_bits;
+ if (vcn >= asize) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ clst_per_frame = 1u << attr_b->nres.c_unit;
+ to_alloc = (clen + clst_per_frame - 1) & ~(clst_per_frame - 1);
+
+ if (vcn + to_alloc > asize)
+ to_alloc = asize - vcn;
+
+ svcn = le64_to_cpu(attr_b->nres.svcn);
+ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+
+ if (le_b && (vcn < svcn || evcn1 <= vcn)) {
+ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+ &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ err = attr_load_runs(attr, ni, run, NULL);
+ if (err)
+ goto out;
+
+ if (!ok) {
+ ok = run_lookup_entry(run, vcn, lcn, len, NULL);
+ if (ok && (*lcn != SPARSE_LCN || !new)) {
+ /* normal way */
+ err = 0;
+ goto ok;
+ }
+
+ if (!ok && !new) {
+ *len = 0;
+ err = 0;
+ goto ok;
+ }
+
+ if (ok && clen > *len) {
+ clen = *len;
+ to_alloc = (clen + clst_per_frame - 1) &
+ ~(clst_per_frame - 1);
+ }
+ }
+
+ if (!is_attr_ext(attr_b)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Get the last lcn to allocate from */
+ hint = 0;
+
+ if (vcn > evcn1) {
+ if (!run_add_entry(run, evcn1, SPARSE_LCN, vcn - evcn1,
+ false)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ } else if (vcn && !run_lookup_entry(run, vcn - 1, &hint, NULL, NULL)) {
+ hint = -1;
+ }
+
+ err = attr_allocate_clusters(
+ sbi, run, vcn, hint + 1, to_alloc, NULL, 0, len,
+ (sbi->record_size - le32_to_cpu(mi->mrec->used) + 8) / 3 + 1,
+ lcn);
+ if (err)
+ goto out;
+ *new = true;
+
+ end = vcn + *len;
+
+ total_size = le64_to_cpu(attr_b->nres.total_size) +
+ ((u64)*len << cluster_bits);
+
+repack:
+ err = mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn);
+ if (err)
+ goto out;
+
+ attr_b->nres.total_size = cpu_to_le64(total_size);
+ inode_set_bytes(&ni->vfs_inode, total_size);
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+
+ mi_b->dirty = true;
+ mark_inode_dirty(&ni->vfs_inode);
+
+ /* stored [vcn : next_svcn) from [vcn : end) */
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+
+ if (end <= evcn1) {
+ if (next_svcn == evcn1) {
+ /* Normal way. update attribute and exit */
+ goto ok;
+ }
+ /* add new segment [next_svcn : evcn1 - next_svcn )*/
+ if (!ni->attr_list.size) {
+ err = ni_create_attr_list(ni);
+ if (err)
+ goto out;
+ /* layout of records is changed */
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
+ 0, NULL, &mi_b);
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ goto repack;
+ }
+ }
+
+ svcn = evcn1;
+
+ /* Estimate next attribute */
+ attr = ni_find_attr(ni, attr, &le, ATTR_DATA, NULL, 0, &svcn, &mi);
+
+ if (attr) {
+ CLST alloc = bytes_to_cluster(
+ sbi, le64_to_cpu(attr_b->nres.alloc_size));
+ CLST evcn = le64_to_cpu(attr->nres.evcn);
+
+ if (end < next_svcn)
+ end = next_svcn;
+ while (end > evcn) {
+ /* remove segment [svcn : evcn)*/
+ mi_remove_attr(mi, attr);
+
+ if (!al_remove_le(ni, le)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (evcn + 1 >= alloc) {
+ /* last attribute segment */
+ evcn1 = evcn + 1;
+ goto ins_ext;
+ }
+
+ if (ni_load_mi(ni, le, &mi)) {
+ attr = NULL;
+ goto out;
+ }
+
+ attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0,
+ &le->id);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn = le64_to_cpu(attr->nres.evcn);
+ }
+
+ if (end < svcn)
+ end = svcn;
+
+ err = attr_load_runs(attr, ni, run, &end);
+ if (err)
+ goto out;
+
+ evcn1 = evcn + 1;
+ attr->nres.svcn = cpu_to_le64(next_svcn);
+ err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn);
+ if (err)
+ goto out;
+
+ le->vcn = cpu_to_le64(next_svcn);
+ ni->attr_list.dirty = true;
+ mi->dirty = true;
+
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+ins_ext:
+ if (evcn1 > next_svcn) {
+ err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
+ next_svcn, evcn1 - next_svcn,
+ attr_b->flags, &attr, &mi);
+ if (err)
+ goto out;
+ }
+ok:
+ run_truncate_around(run, vcn);
+out:
+ up_write(&ni->file.run_lock);
+ ni_unlock(ni);
+
+ return err;
+}
+
+int attr_data_read_resident(struct ntfs_inode *ni, struct page *page)
+{
+ u64 vbo;
+ struct ATTRIB *attr;
+ u32 data_size;
+
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL);
+ if (!attr)
+ return -EINVAL;
+
+ if (attr->non_res)
+ return E_NTFS_NONRESIDENT;
+
+ vbo = page->index << PAGE_SHIFT;
+ data_size = le32_to_cpu(attr->res.data_size);
+ if (vbo < data_size) {
+ const char *data = resident_data(attr);
+ char *kaddr = kmap_atomic(page);
+ u32 use = data_size - vbo;
+
+ if (use > PAGE_SIZE)
+ use = PAGE_SIZE;
+
+ memcpy(kaddr, data + vbo, use);
+ memset(kaddr + use, 0, PAGE_SIZE - use);
+ kunmap_atomic(kaddr);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ } else if (!PageUptodate(page)) {
+ zero_user_segment(page, 0, PAGE_SIZE);
+ SetPageUptodate(page);
+ }
+
+ return 0;
+}
+
+int attr_data_write_resident(struct ntfs_inode *ni, struct page *page)
+{
+ u64 vbo;
+ struct mft_inode *mi;
+ struct ATTRIB *attr;
+ u32 data_size;
+
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, &mi);
+ if (!attr)
+ return -EINVAL;
+
+ if (attr->non_res) {
+ /*return special error code to check this case*/
+ return E_NTFS_NONRESIDENT;
+ }
+
+ vbo = page->index << PAGE_SHIFT;
+ data_size = le32_to_cpu(attr->res.data_size);
+ if (vbo < data_size) {
+ char *data = resident_data(attr);
+ char *kaddr = kmap_atomic(page);
+ u32 use = data_size - vbo;
+
+ if (use > PAGE_SIZE)
+ use = PAGE_SIZE;
+ memcpy(data + vbo, kaddr, use);
+ kunmap_atomic(kaddr);
+ mi->dirty = true;
+ }
+ ni->i_valid = data_size;
+
+ return 0;
+}
+
+/*
+ * attr_load_runs_vcn
+ *
+ * load runs with vcn
+ */
+int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, struct runs_tree *run,
+ CLST vcn)
+{
+ struct ATTRIB *attr;
+ int err;
+ CLST svcn, evcn;
+ u16 ro;
+
+ attr = ni_find_attr(ni, NULL, NULL, type, name, name_len, &vcn, NULL);
+ if (!attr)
+ return -ENOENT;
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn = le64_to_cpu(attr->nres.evcn);
+
+ if (evcn < vcn || vcn < svcn)
+ return -EINVAL;
+
+ ro = le16_to_cpu(attr->nres.run_off);
+ err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, svcn,
+ Add2Ptr(attr, ro), le32_to_cpu(attr->size) - ro);
+ if (err < 0)
+ return err;
+ return 0;
+}
+
+/*
+ * load runs for given range [from to)
+ */
+int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, struct runs_tree *run,
+ u64 from, u64 to)
+{
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ u8 cluster_bits = sbi->cluster_bits;
+ CLST vcn = from >> cluster_bits;
+ CLST vcn_last = (to - 1) >> cluster_bits;
+ CLST lcn, clen;
+ int err;
+
+ for (vcn = from >> cluster_bits; vcn <= vcn_last; vcn += clen) {
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, NULL)) {
+ err = attr_load_runs_vcn(ni, type, name, name_len, run,
+ vcn);
+ if (err)
+ return err;
+ clen = 0; /*next run_lookup_entry(vcn) must be success*/
+ }
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+/*
+ * attr_wof_frame_info
+ *
+ * read header of xpress/lzx file to get info about frame
+ */
+int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
+ struct runs_tree *run, u64 frame, u64 frames,
+ u8 frame_bits, u32 *ondisk_size, u64 *vbo_data)
+{
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ u64 vbo[2], off[2], wof_size;
+ u32 voff;
+ u8 bytes_per_off;
+ char *addr;
+ struct page *page;
+ int i, err;
+ __le32 *off32;
+ __le64 *off64;
+
+ if (ni->vfs_inode.i_size < 0x100000000ull) {
+ /* file starts with array of 32 bit offsets */
+ bytes_per_off = sizeof(__le32);
+ vbo[1] = frame << 2;
+ *vbo_data = frames << 2;
+ } else {
+ /* file starts with array of 64 bit offsets */
+ bytes_per_off = sizeof(__le64);
+ vbo[1] = frame << 3;
+ *vbo_data = frames << 3;
+ }
+
+ /*
+ * read 4/8 bytes at [vbo - 4(8)] == offset where compressed frame starts
+ * read 4/8 bytes at [vbo] == offset where compressed frame ends
+ */
+ if (!attr->non_res) {
+ if (vbo[1] + bytes_per_off > le32_to_cpu(attr->res.data_size)) {
+ ntfs_inode_err(&ni->vfs_inode, "is corrupted");
+ return -EINVAL;
+ }
+ addr = resident_data(attr);
+
+ if (bytes_per_off == sizeof(__le32)) {
+ off32 = Add2Ptr(addr, vbo[1]);
+ off[0] = vbo[1] ? le32_to_cpu(off32[-1]) : 0;
+ off[1] = le32_to_cpu(off32[0]);
+ } else {
+ off64 = Add2Ptr(addr, vbo[1]);
+ off[0] = vbo[1] ? le64_to_cpu(off64[-1]) : 0;
+ off[1] = le64_to_cpu(off64[0]);
+ }
+
+ *vbo_data += off[0];
+ *ondisk_size = off[1] - off[0];
+ return 0;
+ }
+
+ wof_size = le64_to_cpu(attr->nres.data_size);
+ down_write(&ni->file.run_lock);
+ page = ni->file.offs_page;
+ if (!page) {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+ page->index = -1;
+ ni->file.offs_page = page;
+ }
+ lock_page(page);
+ addr = page_address(page);
+
+ if (vbo[1]) {
+ voff = vbo[1] & (PAGE_SIZE - 1);
+ vbo[0] = vbo[1] - bytes_per_off;
+ i = 0;
+ } else {
+ voff = 0;
+ vbo[0] = 0;
+ off[0] = 0;
+ i = 1;
+ }
+
+ do {
+ pgoff_t index = vbo[i] >> PAGE_SHIFT;
+
+ if (index != page->index) {
+ u64 from = vbo[i] & ~(u64)(PAGE_SIZE - 1);
+ u64 to = min(from + PAGE_SIZE, wof_size);
+
+ err = attr_load_runs_range(ni, ATTR_DATA, WOF_NAME,
+ ARRAY_SIZE(WOF_NAME), run,
+ from, to);
+ if (err)
+ goto out1;
+
+ err = ntfs_bio_pages(sbi, run, &page, 1, from,
+ to - from, REQ_OP_READ);
+ if (err) {
+ page->index = -1;
+ goto out1;
+ }
+ page->index = index;
+ }
+
+ if (i) {
+ if (bytes_per_off == sizeof(__le32)) {
+ off32 = Add2Ptr(addr, voff);
+ off[1] = le32_to_cpu(*off32);
+ } else {
+ off64 = Add2Ptr(addr, voff);
+ off[1] = le64_to_cpu(*off64);
+ }
+ } else if (!voff) {
+ if (bytes_per_off == sizeof(__le32)) {
+ off32 = Add2Ptr(addr, PAGE_SIZE - sizeof(u32));
+ off[0] = le32_to_cpu(*off32);
+ } else {
+ off64 = Add2Ptr(addr, PAGE_SIZE - sizeof(u64));
+ off[0] = le64_to_cpu(*off64);
+ }
+ } else {
+ /* two values in one page*/
+ if (bytes_per_off == sizeof(__le32)) {
+ off32 = Add2Ptr(addr, voff);
+ off[0] = le32_to_cpu(off32[-1]);
+ off[1] = le32_to_cpu(off32[0]);
+ } else {
+ off64 = Add2Ptr(addr, voff);
+ off[0] = le64_to_cpu(off64[-1]);
+ off[1] = le64_to_cpu(off64[0]);
+ }
+ break;
+ }
+ } while (++i < 2);
+
+ *vbo_data += off[0];
+ *ondisk_size = off[1] - off[0];
+
+out1:
+ unlock_page(page);
+out:
+ up_write(&ni->file.run_lock);
+ return err;
+}
+#endif
+
+/*
+ * attr_is_frame_compressed
+ *
+ * This function is used to detect compressed frame
+ */
+int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr,
+ CLST frame, CLST *clst_data)
+{
+ int err;
+ u32 clst_frame;
+ CLST clen, lcn, vcn, alen, slen, vcn_next;
+ size_t idx;
+ struct runs_tree *run;
+
+ *clst_data = 0;
+
+ if (!is_attr_compressed(attr))
+ return 0;
+
+ if (!attr->non_res)
+ return 0;
+
+ clst_frame = 1u << attr->nres.c_unit;
+ vcn = frame * clst_frame;
+ run = &ni->file.run;
+
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+ err = attr_load_runs_vcn(ni, attr->type, attr_name(attr),
+ attr->name_len, run, vcn);
+ if (err)
+ return err;
+
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
+ return -EINVAL;
+ }
+
+ if (lcn == SPARSE_LCN) {
+ /* sparsed frame */
+ return 0;
+ }
+
+ if (clen >= clst_frame) {
+ /*
+ * The frame is not compressed 'cause
+ * it does not contain any sparse clusters
+ */
+ *clst_data = clst_frame;
+ return 0;
+ }
+
+ alen = bytes_to_cluster(ni->mi.sbi, le64_to_cpu(attr->nres.alloc_size));
+ slen = 0;
+ *clst_data = clen;
+
+ /*
+ * The frame is compressed if *clst_data + slen >= clst_frame
+ * Check next fragments
+ */
+ while ((vcn += clen) < alen) {
+ vcn_next = vcn;
+
+ if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+ vcn_next != vcn) {
+ err = attr_load_runs_vcn(ni, attr->type,
+ attr_name(attr),
+ attr->name_len, run, vcn_next);
+ if (err)
+ return err;
+ vcn = vcn_next;
+
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
+ return -EINVAL;
+ }
+
+ if (lcn == SPARSE_LCN) {
+ slen += clen;
+ } else {
+ if (slen) {
+ /*
+ * data_clusters + sparse_clusters =
+ * not enough for frame
+ */
+ return -EINVAL;
+ }
+ *clst_data += clen;
+ }
+
+ if (*clst_data + slen >= clst_frame) {
+ if (!slen) {
+ /*
+ * There is no sparsed clusters in this frame
+ * So it is not compressed
+ */
+ *clst_data = clst_frame;
+ } else {
+ /*frame is compressed*/
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * attr_allocate_frame
+ *
+ * allocate/free clusters for 'frame'
+ * assumed: down_write(&ni->file.run_lock);
+ */
+int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size,
+ u64 new_valid)
+{
+ int err = 0;
+ struct runs_tree *run = &ni->file.run;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTR_LIST_ENTRY *le, *le_b;
+ struct mft_inode *mi, *mi_b;
+ CLST svcn, evcn1, next_svcn, lcn, len;
+ CLST vcn, end, clst_data;
+ u64 total_size, valid_size, data_size;
+
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+ if (!attr_b)
+ return -ENOENT;
+
+ if (!is_attr_ext(attr_b))
+ return -EINVAL;
+
+ vcn = frame << NTFS_LZNT_CUNIT;
+ total_size = le64_to_cpu(attr_b->nres.total_size);
+
+ svcn = le64_to_cpu(attr_b->nres.svcn);
+ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+ data_size = le64_to_cpu(attr_b->nres.data_size);
+
+ if (svcn <= vcn && vcn < evcn1) {
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ } else if (!le_b) {
+ err = -EINVAL;
+ goto out;
+ } else {
+ le = le_b;
+ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+ &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ err = attr_load_runs(attr, ni, run, NULL);
+ if (err)
+ goto out;
+
+ err = attr_is_frame_compressed(ni, attr_b, frame, &clst_data);
+ if (err)
+ goto out;
+
+ total_size -= (u64)clst_data << sbi->cluster_bits;
+
+ len = bytes_to_cluster(sbi, compr_size);
+
+ if (len == clst_data)
+ goto out;
+
+ if (len < clst_data) {
+ err = run_deallocate_ex(sbi, run, vcn + len, clst_data - len,
+ NULL, true);
+ if (err)
+ goto out;
+
+ if (!run_add_entry(run, vcn + len, SPARSE_LCN, clst_data - len,
+ false)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ end = vcn + clst_data;
+ /* run contains updated range [vcn + len : end) */
+ } else {
+ CLST alen, hint = 0;
+ /* Get the last lcn to allocate from */
+ if (vcn + clst_data &&
+ !run_lookup_entry(run, vcn + clst_data - 1, &hint, NULL,
+ NULL)) {
+ hint = -1;
+ }
+
+ err = attr_allocate_clusters(sbi, run, vcn + clst_data,
+ hint + 1, len - clst_data, NULL, 0,
+ &alen, 0, &lcn);
+ if (err)
+ goto out;
+
+ end = vcn + len;
+ /* run contains updated range [vcn + clst_data : end) */
+ }
+
+ total_size += (u64)len << sbi->cluster_bits;
+
+repack:
+ err = mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn);
+ if (err)
+ goto out;
+
+ attr_b->nres.total_size = cpu_to_le64(total_size);
+ inode_set_bytes(&ni->vfs_inode, total_size);
+
+ mi_b->dirty = true;
+ mark_inode_dirty(&ni->vfs_inode);
+
+ /* stored [vcn : next_svcn) from [vcn : end) */
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+
+ if (end <= evcn1) {
+ if (next_svcn == evcn1) {
+ /* Normal way. update attribute and exit */
+ goto ok;
+ }
+ /* add new segment [next_svcn : evcn1 - next_svcn )*/
+ if (!ni->attr_list.size) {
+ err = ni_create_attr_list(ni);
+ if (err)
+ goto out;
+ /* layout of records is changed */
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
+ 0, NULL, &mi_b);
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ goto repack;
+ }
+ }
+
+ svcn = evcn1;
+
+ /* Estimate next attribute */
+ attr = ni_find_attr(ni, attr, &le, ATTR_DATA, NULL, 0, &svcn, &mi);
+
+ if (attr) {
+ CLST alloc = bytes_to_cluster(
+ sbi, le64_to_cpu(attr_b->nres.alloc_size));
+ CLST evcn = le64_to_cpu(attr->nres.evcn);
+
+ if (end < next_svcn)
+ end = next_svcn;
+ while (end > evcn) {
+ /* remove segment [svcn : evcn)*/
+ mi_remove_attr(mi, attr);
+
+ if (!al_remove_le(ni, le)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (evcn + 1 >= alloc) {
+ /* last attribute segment */
+ evcn1 = evcn + 1;
+ goto ins_ext;
+ }
+
+ if (ni_load_mi(ni, le, &mi)) {
+ attr = NULL;
+ goto out;
+ }
+
+ attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0,
+ &le->id);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn = le64_to_cpu(attr->nres.evcn);
+ }
+
+ if (end < svcn)
+ end = svcn;
+
+ err = attr_load_runs(attr, ni, run, &end);
+ if (err)
+ goto out;
+
+ evcn1 = evcn + 1;
+ attr->nres.svcn = cpu_to_le64(next_svcn);
+ err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn);
+ if (err)
+ goto out;
+
+ le->vcn = cpu_to_le64(next_svcn);
+ ni->attr_list.dirty = true;
+ mi->dirty = true;
+
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+ins_ext:
+ if (evcn1 > next_svcn) {
+ err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
+ next_svcn, evcn1 - next_svcn,
+ attr_b->flags, &attr, &mi);
+ if (err)
+ goto out;
+ }
+ok:
+ run_truncate_around(run, vcn);
+out:
+ if (new_valid > data_size)
+ new_valid = data_size;
+
+ valid_size = le64_to_cpu(attr_b->nres.valid_size);
+ if (new_valid != valid_size) {
+ attr_b->nres.valid_size = cpu_to_le64(valid_size);
+ mi_b->dirty = true;
+ }
+
+ return err;
+}
+
+/* Collapse range in file */
+int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
+{
+ int err = 0;
+ struct runs_tree *run = &ni->file.run;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTR_LIST_ENTRY *le, *le_b;
+ struct mft_inode *mi, *mi_b;
+ CLST svcn, evcn1, len, dealloc, alen;
+ CLST vcn, end;
+ u64 valid_size, data_size, alloc_size, total_size;
+ u32 mask;
+ __le16 a_flags;
+
+ if (!bytes)
+ return 0;
+
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+ if (!attr_b)
+ return -ENOENT;
+
+ if (!attr_b->non_res) {
+ /* Attribute is resident. Nothing to do? */
+ return 0;
+ }
+
+ data_size = le64_to_cpu(attr_b->nres.data_size);
+ alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
+ a_flags = attr_b->flags;
+
+ if (is_attr_ext(attr_b)) {
+ total_size = le64_to_cpu(attr_b->nres.total_size);
+ mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
+ } else {
+ total_size = alloc_size;
+ mask = sbi->cluster_mask;
+ }
+
+ if ((vbo & mask) || (bytes & mask)) {
+ /* allow to collapse only cluster aligned ranges */
+ return -EINVAL;
+ }
+
+ if (vbo > data_size)
+ return -EINVAL;
+
+ down_write(&ni->file.run_lock);
+
+ if (vbo + bytes >= data_size) {
+ u64 new_valid = min(ni->i_valid, vbo);
+
+ /* Simple truncate file at 'vbo' */
+ truncate_setsize(&ni->vfs_inode, vbo);
+ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo,
+ &new_valid, true, NULL);
+
+ if (!err && new_valid < ni->i_valid)
+ ni->i_valid = new_valid;
+
+ goto out;
+ }
+
+ /*
+ * Enumerate all attribute segments and collapse
+ */
+ alen = alloc_size >> sbi->cluster_bits;
+ vcn = vbo >> sbi->cluster_bits;
+ len = bytes >> sbi->cluster_bits;
+ end = vcn + len;
+ dealloc = 0;
+
+ svcn = le64_to_cpu(attr_b->nres.svcn);
+ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+ if (svcn <= vcn && vcn < evcn1) {
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ } else if (!le_b) {
+ err = -EINVAL;
+ goto out;
+ } else {
+ le = le_b;
+ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+ &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ for (;;) {
+ if (svcn >= end) {
+ /* shift vcn */
+ attr->nres.svcn = cpu_to_le64(svcn - len);
+ attr->nres.evcn = cpu_to_le64(evcn1 - 1 - len);
+ if (le) {
+ le->vcn = attr->nres.svcn;
+ ni->attr_list.dirty = true;
+ }
+ mi->dirty = true;
+ } else if (svcn < vcn || end < evcn1) {
+ CLST vcn1, eat, next_svcn;
+
+ /* collapse a part of this attribute segment */
+ err = attr_load_runs(attr, ni, run, &svcn);
+ if (err)
+ goto out;
+ vcn1 = max(vcn, svcn);
+ eat = min(end, evcn1) - vcn1;
+
+ err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc,
+ true);
+ if (err)
+ goto out;
+
+ if (!run_collapse_range(run, vcn1, eat)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (svcn >= vcn) {
+ /* shift vcn */
+ attr->nres.svcn = cpu_to_le64(vcn);
+ if (le) {
+ le->vcn = attr->nres.svcn;
+ ni->attr_list.dirty = true;
+ }
+ }
+
+ err = mi_pack_runs(mi, attr, run, evcn1 - svcn - eat);
+ if (err)
+ goto out;
+
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+ if (next_svcn + eat < evcn1) {
+ err = ni_insert_nonresident(
+ ni, ATTR_DATA, NULL, 0, run, next_svcn,
+ evcn1 - eat - next_svcn, a_flags, &attr,
+ &mi);
+ if (err)
+ goto out;
+
+ /* layout of records maybe changed */
+ attr_b = NULL;
+ le = al_find_ex(ni, NULL, ATTR_DATA, NULL, 0,
+ &next_svcn);
+ if (!le) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ /* free all allocated memory */
+ run_truncate(run, 0);
+ } else {
+ u16 le_sz;
+ u16 roff = le16_to_cpu(attr->nres.run_off);
+
+ /*run==1 means unpack and deallocate*/
+ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn,
+ evcn1 - 1, svcn, Add2Ptr(attr, roff),
+ le32_to_cpu(attr->size) - roff);
+
+ /* delete this attribute segment */
+ mi_remove_attr(mi, attr);
+ if (!le)
+ break;
+
+ le_sz = le16_to_cpu(le->size);
+ if (!al_remove_le(ni, le)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (evcn1 >= alen)
+ break;
+
+ if (!svcn) {
+ /* Load next record that contains this attribute */
+ if (ni_load_mi(ni, le, &mi)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Look for required attribute */
+ attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL,
+ 0, &le->id);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+ goto next_attr;
+ }
+ le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz);
+ }
+
+ if (evcn1 >= alen)
+ break;
+
+ attr = ni_enum_attr_ex(ni, attr, &le, &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+next_attr:
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ if (!attr_b) {
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL,
+ &mi_b);
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
+ }
+
+ data_size -= bytes;
+ valid_size = ni->i_valid;
+ if (vbo + bytes <= valid_size)
+ valid_size -= bytes;
+ else if (vbo < valid_size)
+ valid_size = vbo;
+
+ attr_b->nres.alloc_size = cpu_to_le64(alloc_size - bytes);
+ attr_b->nres.data_size = cpu_to_le64(data_size);
+ attr_b->nres.valid_size = cpu_to_le64(min(valid_size, data_size));
+ total_size -= (u64)dealloc << sbi->cluster_bits;
+ if (is_attr_ext(attr_b))
+ attr_b->nres.total_size = cpu_to_le64(total_size);
+ mi_b->dirty = true;
+
+ /*update inode size*/
+ ni->i_valid = valid_size;
+ ni->vfs_inode.i_size = data_size;
+ inode_set_bytes(&ni->vfs_inode, total_size);
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+ mark_inode_dirty(&ni->vfs_inode);
+
+out:
+ up_write(&ni->file.run_lock);
+ if (err)
+ make_bad_inode(&ni->vfs_inode);
+
+ return err;
+}
+
+/* not for normal files */
+int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
+{
+ int err = 0;
+ struct runs_tree *run = &ni->file.run;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTR_LIST_ENTRY *le, *le_b;
+ struct mft_inode *mi, *mi_b;
+ CLST svcn, evcn1, vcn, len, end, alen, dealloc;
+ u64 total_size, alloc_size;
+ u32 mask;
+
+ if (!bytes)
+ return 0;
+
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+ if (!attr_b)
+ return -ENOENT;
+
+ if (!attr_b->non_res) {
+ u32 data_size = le32_to_cpu(attr->res.data_size);
+ u32 from, to;
+
+ if (vbo > data_size)
+ return 0;
+
+ from = vbo;
+ to = (vbo + bytes) < data_size ? (vbo + bytes) : data_size;
+ memset(Add2Ptr(resident_data(attr_b), from), 0, to - from);
+ return 0;
+ }
+
+ if (!is_attr_ext(attr_b))
+ return -EOPNOTSUPP;
+
+ alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
+ total_size = le64_to_cpu(attr_b->nres.total_size);
+
+ if (vbo >= alloc_size) {
+ // NOTE: it is allowed
+ return 0;
+ }
+
+ mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
+
+ bytes += vbo;
+ if (bytes > alloc_size)
+ bytes = alloc_size;
+ bytes -= vbo;
+
+ if ((vbo & mask) || (bytes & mask)) {
+ /* We have to zero a range(s)*/
+ if (frame_size == NULL) {
+ /* Caller insists range is aligned */
+ return -EINVAL;
+ }
+ *frame_size = mask + 1;
+ return E_NTFS_NOTALIGNED;
+ }
+
+ down_write(&ni->file.run_lock);
+ /*
+ * Enumerate all attribute segments and punch hole where necessary
+ */
+ alen = alloc_size >> sbi->cluster_bits;
+ vcn = vbo >> sbi->cluster_bits;
+ len = bytes >> sbi->cluster_bits;
+ end = vcn + len;
+ dealloc = 0;
+
+ svcn = le64_to_cpu(attr_b->nres.svcn);
+ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+ if (svcn <= vcn && vcn < evcn1) {
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ } else if (!le_b) {
+ err = -EINVAL;
+ goto out;
+ } else {
+ le = le_b;
+ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+ &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ while (svcn < end) {
+ CLST vcn1, zero, dealloc2;
+
+ err = attr_load_runs(attr, ni, run, &svcn);
+ if (err)
+ goto out;
+ vcn1 = max(vcn, svcn);
+ zero = min(end, evcn1) - vcn1;
+
+ dealloc2 = dealloc;
+ err = run_deallocate_ex(sbi, run, vcn1, zero, &dealloc, true);
+ if (err)
+ goto out;
+
+ if (dealloc2 == dealloc) {
+ /* looks like the required range is already sparsed */
+ } else {
+ if (!run_add_entry(run, vcn1, SPARSE_LCN, zero,
+ false)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mi_pack_runs(mi, attr, run, evcn1 - svcn);
+ if (err)
+ goto out;
+ }
+ /* free all allocated memory */
+ run_truncate(run, 0);
+
+ if (evcn1 >= alen)
+ break;
+
+ attr = ni_enum_attr_ex(ni, attr, &le, &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ total_size -= (u64)dealloc << sbi->cluster_bits;
+ attr_b->nres.total_size = cpu_to_le64(total_size);
+ mi_b->dirty = true;
+
+ /*update inode size*/
+ inode_set_bytes(&ni->vfs_inode, total_size);
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+ mark_inode_dirty(&ni->vfs_inode);
+
+out:
+ up_write(&ni->file.run_lock);
+ if (err)
+ make_bad_inode(&ni->vfs_inode);
+
+ return err;
+}
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
new file mode 100644
index 000000000000..ea561361b576
--- /dev/null
+++ b/fs/ntfs3/attrlist.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/* Returns true if le is valid */
+static inline bool al_is_valid_le(const struct ntfs_inode *ni,
+ struct ATTR_LIST_ENTRY *le)
+{
+ if (!le || !ni->attr_list.le || !ni->attr_list.size)
+ return false;
+
+ return PtrOffset(ni->attr_list.le, le) + le16_to_cpu(le->size) <=
+ ni->attr_list.size;
+}
+
+void al_destroy(struct ntfs_inode *ni)
+{
+ run_close(&ni->attr_list.run);
+ ntfs_free(ni->attr_list.le);
+ ni->attr_list.le = NULL;
+ ni->attr_list.size = 0;
+ ni->attr_list.dirty = false;
+}
+
+/*
+ * ntfs_load_attr_list
+ *
+ * This method makes sure that the ATTRIB list, if present,
+ * has been properly set up.
+ */
+int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
+{
+ int err;
+ size_t lsize;
+ void *le = NULL;
+
+ if (ni->attr_list.size)
+ return 0;
+
+ if (!attr->non_res) {
+ lsize = le32_to_cpu(attr->res.data_size);
+ le = ntfs_malloc(al_aligned(lsize));
+ if (!le) {
+ err = -ENOMEM;
+ goto out;
+ }
+ memcpy(le, resident_data(attr), lsize);
+ } else if (attr->nres.svcn) {
+ err = -EINVAL;
+ goto out;
+ } else {
+ u16 run_off = le16_to_cpu(attr->nres.run_off);
+
+ lsize = le64_to_cpu(attr->nres.data_size);
+
+ run_init(&ni->attr_list.run);
+
+ err = run_unpack_ex(&ni->attr_list.run, ni->mi.sbi, ni->mi.rno,
+ 0, le64_to_cpu(attr->nres.evcn), 0,
+ Add2Ptr(attr, run_off),
+ le32_to_cpu(attr->size) - run_off);
+ if (err < 0)
+ goto out;
+
+ le = ntfs_malloc(al_aligned(lsize));
+ if (!le) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = ntfs_read_run_nb(ni->mi.sbi, &ni->attr_list.run, 0, le,
+ lsize, NULL);
+ if (err)
+ goto out;
+ }
+
+ ni->attr_list.size = lsize;
+ ni->attr_list.le = le;
+
+ return 0;
+
+out:
+ ni->attr_list.le = le;
+ al_destroy(ni);
+
+ return err;
+}
+
+/*
+ * al_enumerate
+ *
+ * Returns the next list 'le'
+ * if 'le' is NULL then returns the first 'le'
+ */
+struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
+ struct ATTR_LIST_ENTRY *le)
+{
+ size_t off;
+ u16 sz;
+
+ if (!le) {
+ le = ni->attr_list.le;
+ } else {
+ sz = le16_to_cpu(le->size);
+ if (sz < sizeof(struct ATTR_LIST_ENTRY)) {
+ /* Impossible 'cause we should not return such 'le' */
+ return NULL;
+ }
+ le = Add2Ptr(le, sz);
+ }
+
+ /* Check boundary */
+ off = PtrOffset(ni->attr_list.le, le);
+ if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) {
+ // The regular end of list
+ return NULL;
+ }
+
+ sz = le16_to_cpu(le->size);
+
+ /* Check 'le' for errors */
+ if (sz < sizeof(struct ATTR_LIST_ENTRY) ||
+ off + sz > ni->attr_list.size ||
+ sz < le->name_off + le->name_len * sizeof(short)) {
+ return NULL;
+ }
+
+ return le;
+}
+
+/*
+ * al_find_le
+ *
+ * finds the first 'le' in the list which matches type, name and vcn
+ * Returns NULL if not found
+ */
+struct ATTR_LIST_ENTRY *al_find_le(struct ntfs_inode *ni,
+ struct ATTR_LIST_ENTRY *le,
+ const struct ATTRIB *attr)
+{
+ CLST svcn = attr_svcn(attr);
+
+ return al_find_ex(ni, le, attr->type, attr_name(attr), attr->name_len,
+ &svcn);
+}
+
+/*
+ * al_find_ex
+ *
+ * finds the first 'le' in the list which matches type, name and vcn
+ * Returns NULL if not found
+ */
+struct ATTR_LIST_ENTRY *al_find_ex(struct ntfs_inode *ni,
+ struct ATTR_LIST_ENTRY *le,
+ enum ATTR_TYPE type, const __le16 *name,
+ u8 name_len, const CLST *vcn)
+{
+ struct ATTR_LIST_ENTRY *ret = NULL;
+ u32 type_in = le32_to_cpu(type);
+
+ while ((le = al_enumerate(ni, le))) {
+ u64 le_vcn;
+ int diff = le32_to_cpu(le->type) - type_in;
+
+ /* List entries are sorted by type, name and vcn */
+ if (diff < 0)
+ continue;
+
+ if (diff > 0)
+ return ret;
+
+ if (le->name_len != name_len)
+ continue;
+
+ le_vcn = le64_to_cpu(le->vcn);
+ if (!le_vcn) {
+ /*
+ * compare entry names only for entry with vcn == 0
+ */
+ diff = ntfs_cmp_names(le_name(le), name_len, name,
+ name_len, ni->mi.sbi->upcase,
+ true);
+ if (diff < 0)
+ continue;
+
+ if (diff > 0)
+ return ret;
+ }
+
+ if (!vcn)
+ return le;
+
+ if (*vcn == le_vcn)
+ return le;
+
+ if (*vcn < le_vcn)
+ return ret;
+
+ ret = le;
+ }
+
+ return ret;
+}
+
+/*
+ * al_find_le_to_insert
+ *
+ * finds the first list entry which matches type, name and vcn
+ */
+static struct ATTR_LIST_ENTRY *al_find_le_to_insert(struct ntfs_inode *ni,
+ enum ATTR_TYPE type,
+ const __le16 *name,
+ u8 name_len, CLST vcn)
+{
+ struct ATTR_LIST_ENTRY *le = NULL, *prev;
+ u32 type_in = le32_to_cpu(type);
+
+ /* List entries are sorted by type, name, vcn */
+ while ((le = al_enumerate(ni, prev = le))) {
+ int diff = le32_to_cpu(le->type) - type_in;
+
+ if (diff < 0)
+ continue;
+
+ if (diff > 0)
+ return le;
+
+ if (!le->vcn) {
+ /*
+ * compare entry names only for entry with vcn == 0
+ */
+ diff = ntfs_cmp_names(le_name(le), le->name_len, name,
+ name_len, ni->mi.sbi->upcase,
+ true);
+ if (diff < 0)
+ continue;
+
+ if (diff > 0)
+ return le;
+ }
+
+ if (le64_to_cpu(le->vcn) >= vcn)
+ return le;
+ }
+
+ return prev ? Add2Ptr(prev, le16_to_cpu(prev->size)) : ni->attr_list.le;
+}
+
+/*
+ * al_add_le
+ *
+ * adds an "attribute list entry" to the list.
+ */
+int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
+ u8 name_len, CLST svcn, __le16 id, const struct MFT_REF *ref,
+ struct ATTR_LIST_ENTRY **new_le)
+{
+ int err;
+ struct ATTRIB *attr;
+ struct ATTR_LIST_ENTRY *le;
+ size_t off;
+ u16 sz;
+ size_t asize, new_asize;
+ u64 new_size;
+ typeof(ni->attr_list) *al = &ni->attr_list;
+
+ /*
+ * Compute the size of the new 'le'
+ */
+ sz = le_size(name_len);
+ new_size = al->size + sz;
+ asize = al_aligned(al->size);
+ new_asize = al_aligned(new_size);
+
+ /* Scan forward to the point at which the new 'le' should be inserted. */
+ le = al_find_le_to_insert(ni, type, name, name_len, svcn);
+ off = PtrOffset(al->le, le);
+
+ if (new_size > asize) {
+ void *ptr = ntfs_malloc(new_asize);
+
+ if (!ptr)
+ return -ENOMEM;
+
+ memcpy(ptr, al->le, off);
+ memcpy(Add2Ptr(ptr, off + sz), le, al->size - off);
+ le = Add2Ptr(ptr, off);
+ ntfs_free(al->le);
+ al->le = ptr;
+ } else {
+ memmove(Add2Ptr(le, sz), le, al->size - off);
+ }
+
+ al->size = new_size;
+
+ le->type = type;
+ le->size = cpu_to_le16(sz);
+ le->name_len = name_len;
+ le->name_off = offsetof(struct ATTR_LIST_ENTRY, name);
+ le->vcn = cpu_to_le64(svcn);
+ le->ref = *ref;
+ le->id = id;
+ memcpy(le->name, name, sizeof(short) * name_len);
+
+ al->dirty = true;
+
+ err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, new_size,
+ &new_size, true, &attr);
+ if (err)
+ return err;
+
+ if (attr && attr->non_res) {
+ err = ntfs_sb_write_run(ni->mi.sbi, &al->run, 0, al->le,
+ al->size);
+ if (err)
+ return err;
+ }
+
+ al->dirty = false;
+ *new_le = le;
+
+ return 0;
+}
+
+/*
+ * al_remove_le
+ *
+ * removes 'le' from attribute list
+ */
+bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le)
+{
+ u16 size;
+ size_t off;
+ typeof(ni->attr_list) *al = &ni->attr_list;
+
+ if (!al_is_valid_le(ni, le))
+ return false;
+
+ /* Save on stack the size of 'le' */
+ size = le16_to_cpu(le->size);
+ off = PtrOffset(al->le, le);
+
+ memmove(le, Add2Ptr(le, size), al->size - (off + size));
+
+ al->size -= size;
+ al->dirty = true;
+
+ return true;
+}
+
+/*
+ * al_delete_le
+ *
+ * deletes from the list the first 'le' which matches its parameters.
+ */
+bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
+ const __le16 *name, size_t name_len,
+ const struct MFT_REF *ref)
+{
+ u16 size;
+ struct ATTR_LIST_ENTRY *le;
+ size_t off;
+ typeof(ni->attr_list) *al = &ni->attr_list;
+
+ /* Scan forward to the first 'le' that matches the input */
+ le = al_find_ex(ni, NULL, type, name, name_len, &vcn);
+ if (!le)
+ return false;
+
+ off = PtrOffset(al->le, le);
+
+next:
+ if (off >= al->size)
+ return false;
+ if (le->type != type)
+ return false;
+ if (le->name_len != name_len)
+ return false;
+ if (name_len && ntfs_cmp_names(le_name(le), name_len, name, name_len,
+ ni->mi.sbi->upcase, true))
+ return false;
+ if (le64_to_cpu(le->vcn) != vcn)
+ return false;
+
+ /*
+ * The caller specified a segment reference, so we have to
+ * scan through the matching entries until we find that segment
+ * reference or we run of matching entries.
+ */
+ if (ref && memcmp(ref, &le->ref, sizeof(*ref))) {
+ off += le16_to_cpu(le->size);
+ le = Add2Ptr(al->le, off);
+ goto next;
+ }
+
+ /* Save on stack the size of 'le' */
+ size = le16_to_cpu(le->size);
+ /* Delete 'le'. */
+ memmove(le, Add2Ptr(le, size), al->size - (off + size));
+
+ al->size -= size;
+ al->dirty = true;
+
+ return true;
+}
+
+/*
+ * al_update
+ */
+int al_update(struct ntfs_inode *ni)
+{
+ int err;
+ struct ATTRIB *attr;
+ typeof(ni->attr_list) *al = &ni->attr_list;
+
+ if (!al->dirty || !al->size)
+ return 0;
+
+ /*
+ * attribute list increased on demand in al_add_le
+ * attribute list decreased here
+ */
+ err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL,
+ false, &attr);
+ if (err)
+ goto out;
+
+ if (!attr->non_res) {
+ memcpy(resident_data(attr), al->le, al->size);
+ } else {
+ err = ntfs_sb_write_run(ni->mi.sbi, &al->run, 0, al->le,
+ al->size);
+ if (err)
+ goto out;
+
+ attr->nres.valid_size = attr->nres.data_size;
+ }
+
+ ni->mi.dirty = true;
+ al->dirty = false;
+
+out:
+ return err;
+}
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
new file mode 100644
index 000000000000..98871c895e77
--- /dev/null
+++ b/fs/ntfs3/xattr.c
@@ -0,0 +1,1128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+// clang-format off
+#define SYSTEM_DOS_ATTRIB "system.dos_attrib"
+#define SYSTEM_NTFS_ATTRIB "system.ntfs_attrib"
+#define SYSTEM_NTFS_SECURITY "system.ntfs_security"
+// clang-format on
+
+static inline size_t unpacked_ea_size(const struct EA_FULL *ea)
+{
+ return ea->size ? le32_to_cpu(ea->size)
+ : DwordAlign(struct_size(
+ ea, name,
+ 1 + ea->name_len + le16_to_cpu(ea->elength)));
+}
+
+static inline size_t packed_ea_size(const struct EA_FULL *ea)
+{
+ return struct_size(ea, name,
+ 1 + ea->name_len + le16_to_cpu(ea->elength)) -
+ offsetof(struct EA_FULL, flags);
+}
+
+/*
+ * find_ea
+ *
+ * assume there is at least one xattr in the list
+ */
+static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes,
+ const char *name, u8 name_len, u32 *off)
+{
+ *off = 0;
+
+ if (!ea_all || !bytes)
+ return false;
+
+ for (;;) {
+ const struct EA_FULL *ea = Add2Ptr(ea_all, *off);
+ u32 next_off = *off + unpacked_ea_size(ea);
+
+ if (next_off > bytes)
+ return false;
+
+ if (ea->name_len == name_len &&
+ !memcmp(ea->name, name, name_len))
+ return true;
+
+ *off = next_off;
+ if (next_off >= bytes)
+ return false;
+ }
+}
+
+/*
+ * ntfs_read_ea
+ *
+ * reads all extended attributes
+ * ea - new allocated memory
+ * info - pointer into resident data
+ */
+static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
+ size_t add_bytes, const struct EA_INFO **info)
+{
+ int err;
+ struct ATTR_LIST_ENTRY *le = NULL;
+ struct ATTRIB *attr_info, *attr_ea;
+ void *ea_p;
+ u32 size;
+
+ static_assert(le32_to_cpu(ATTR_EA_INFO) < le32_to_cpu(ATTR_EA));
+
+ *ea = NULL;
+ *info = NULL;
+
+ attr_info =
+ ni_find_attr(ni, NULL, &le, ATTR_EA_INFO, NULL, 0, NULL, NULL);
+ attr_ea =
+ ni_find_attr(ni, attr_info, &le, ATTR_EA, NULL, 0, NULL, NULL);
+
+ if (!attr_ea || !attr_info)
+ return 0;
+
+ *info = resident_data_ex(attr_info, sizeof(struct EA_INFO));
+ if (!*info)
+ return -EINVAL;
+
+ /* Check Ea limit */
+ size = le32_to_cpu((*info)->size);
+ if (size > ni->mi.sbi->ea_max_size)
+ return -EFBIG;
+
+ if (attr_size(attr_ea) > ni->mi.sbi->ea_max_size)
+ return -EFBIG;
+
+ /* Allocate memory for packed Ea */
+ ea_p = ntfs_malloc(size + add_bytes);
+ if (!ea_p)
+ return -ENOMEM;
+
+ if (attr_ea->non_res) {
+ struct runs_tree run;
+
+ run_init(&run);
+
+ err = attr_load_runs(attr_ea, ni, &run, NULL);
+ if (!err)
+ err = ntfs_read_run_nb(ni->mi.sbi, &run, 0, ea_p, size,
+ NULL);
+ run_close(&run);
+
+ if (err)
+ goto out;
+ } else {
+ void *p = resident_data_ex(attr_ea, size);
+
+ if (!p) {
+ err = -EINVAL;
+ goto out;
+ }
+ memcpy(ea_p, p, size);
+ }
+
+ memset(Add2Ptr(ea_p, size), 0, add_bytes);
+ *ea = ea_p;
+ return 0;
+
+out:
+ ntfs_free(ea_p);
+ *ea = NULL;
+ return err;
+}
+
+/*
+ * ntfs_list_ea
+ *
+ * copy a list of xattrs names into the buffer
+ * provided, or compute the buffer size required
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
+ size_t bytes_per_buffer)
+{
+ const struct EA_INFO *info;
+ struct EA_FULL *ea_all = NULL;
+ const struct EA_FULL *ea;
+ u32 off, size;
+ int err;
+ size_t ret;
+
+ err = ntfs_read_ea(ni, &ea_all, 0, &info);
+ if (err)
+ return err;
+
+ if (!info || !ea_all)
+ return 0;
+
+ size = le32_to_cpu(info->size);
+
+ /* Enumerate all xattrs */
+ for (ret = 0, off = 0; off < size; off += unpacked_ea_size(ea)) {
+ ea = Add2Ptr(ea_all, off);
+
+ if (buffer) {
+ if (ret + ea->name_len + 1 > bytes_per_buffer) {
+ err = -ERANGE;
+ goto out;
+ }
+
+ memcpy(buffer + ret, ea->name, ea->name_len);
+ buffer[ret + ea->name_len] = 0;
+ }
+
+ ret += ea->name_len + 1;
+ }
+
+out:
+ ntfs_free(ea_all);
+ return err ? err : ret;
+}
+
+static int ntfs_get_ea(struct inode *inode, const char *name, size_t name_len,
+ void *buffer, size_t size, size_t *required)
+{
+ struct ntfs_inode *ni = ntfs_i(inode);
+ const struct EA_INFO *info;
+ struct EA_FULL *ea_all = NULL;
+ const struct EA_FULL *ea;
+ u32 off, len;
+ int err;
+
+ if (!(ni->ni_flags & NI_FLAG_EA))
+ return -ENODATA;
+
+ if (!required)
+ ni_lock(ni);
+
+ len = 0;
+
+ if (name_len > 255) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+
+ err = ntfs_read_ea(ni, &ea_all, 0, &info);
+ if (err)
+ goto out;
+
+ if (!info)
+ goto out;
+
+ /* Enumerate all xattrs */
+ if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off)) {
+ err = -ENODATA;
+ goto out;
+ }
+ ea = Add2Ptr(ea_all, off);
+
+ len = le16_to_cpu(ea->elength);
+ if (!buffer) {
+ err = 0;
+ goto out;
+ }
+
+ if (len > size) {
+ err = -ERANGE;
+ if (required)
+ *required = len;
+ goto out;
+ }
+
+ memcpy(buffer, ea->name + ea->name_len + 1, len);
+ err = 0;
+
+out:
+ ntfs_free(ea_all);
+ if (!required)
+ ni_unlock(ni);
+
+ return err ? err : len;
+}
+
+static noinline int ntfs_set_ea(struct inode *inode, const char *name,
+ size_t name_len, const void *value,
+ size_t val_size, int flags, int locked)
+{
+ struct ntfs_inode *ni = ntfs_i(inode);
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ int err;
+ struct EA_INFO ea_info;
+ const struct EA_INFO *info;
+ struct EA_FULL *new_ea;
+ struct EA_FULL *ea_all = NULL;
+ size_t add, new_pack;
+ u32 off, size;
+ __le16 size_pack;
+ struct ATTRIB *attr;
+ struct ATTR_LIST_ENTRY *le;
+ struct mft_inode *mi;
+ struct runs_tree ea_run;
+ u64 new_sz;
+ void *p;
+
+ if (!locked)
+ ni_lock(ni);
+
+ run_init(&ea_run);
+
+ if (name_len > 255) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+
+ add = DwordAlign(struct_size(ea_all, name, 1 + name_len + val_size));
+
+ err = ntfs_read_ea(ni, &ea_all, add, &info);
+ if (err)
+ goto out;
+
+ if (!info) {
+ memset(&ea_info, 0, sizeof(ea_info));
+ size = 0;
+ size_pack = 0;
+ } else {
+ memcpy(&ea_info, info, sizeof(ea_info));
+ size = le32_to_cpu(ea_info.size);
+ size_pack = ea_info.size_pack;
+ }
+
+ if (info && find_ea(ea_all, size, name, name_len, &off)) {
+ struct EA_FULL *ea;
+ size_t ea_sz;
+
+ if (flags & XATTR_CREATE) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ ea = Add2Ptr(ea_all, off);
+
+ /*
+ * Check simple case when we try to insert xattr with the same value
+ * e.g. ntfs_save_wsl_perm
+ */
+ if (val_size && le16_to_cpu(ea->elength) == val_size &&
+ !memcmp(ea->name + ea->name_len + 1, value, val_size)) {
+ /* xattr already contains the required value */
+ goto out;
+ }
+
+ /* Remove current xattr */
+ if (ea->flags & FILE_NEED_EA)
+ le16_add_cpu(&ea_info.count, -1);
+
+ ea_sz = unpacked_ea_size(ea);
+
+ le16_add_cpu(&ea_info.size_pack, 0 - packed_ea_size(ea));
+
+ memmove(ea, Add2Ptr(ea, ea_sz), size - off - ea_sz);
+
+ size -= ea_sz;
+ memset(Add2Ptr(ea_all, size), 0, ea_sz);
+
+ ea_info.size = cpu_to_le32(size);
+
+ if ((flags & XATTR_REPLACE) && !val_size) {
+ /* remove xattr */
+ goto update_ea;
+ }
+ } else {
+ if (flags & XATTR_REPLACE) {
+ err = -ENODATA;
+ goto out;
+ }
+
+ if (!ea_all) {
+ ea_all = ntfs_zalloc(add);
+ if (!ea_all) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+
+ /* append new xattr */
+ new_ea = Add2Ptr(ea_all, size);
+ new_ea->size = cpu_to_le32(add);
+ new_ea->flags = 0;
+ new_ea->name_len = name_len;
+ new_ea->elength = cpu_to_le16(val_size);
+ memcpy(new_ea->name, name, name_len);
+ new_ea->name[name_len] = 0;
+ memcpy(new_ea->name + name_len + 1, value, val_size);
+ new_pack = le16_to_cpu(ea_info.size_pack) + packed_ea_size(new_ea);
+
+ /* should fit into 16 bits */
+ if (new_pack > 0xffff) {
+ err = -EFBIG; // -EINVAL?
+ goto out;
+ }
+ ea_info.size_pack = cpu_to_le16(new_pack);
+
+ /* new size of ATTR_EA */
+ size += add;
+ if (size > sbi->ea_max_size) {
+ err = -EFBIG; // -EINVAL?
+ goto out;
+ }
+ ea_info.size = cpu_to_le32(size);
+
+update_ea:
+
+ if (!info) {
+ /* Create xattr */
+ if (!size) {
+ err = 0;
+ goto out;
+ }
+
+ err = ni_insert_resident(ni, sizeof(struct EA_INFO),
+ ATTR_EA_INFO, NULL, 0, NULL, NULL);
+ if (err)
+ goto out;
+
+ err = ni_insert_resident(ni, 0, ATTR_EA, NULL, 0, NULL, NULL);
+ if (err)
+ goto out;
+ }
+
+ new_sz = size;
+ err = attr_set_size(ni, ATTR_EA, NULL, 0, &ea_run, new_sz, &new_sz,
+ false, NULL);
+ if (err)
+ goto out;
+
+ le = NULL;
+ attr = ni_find_attr(ni, NULL, &le, ATTR_EA_INFO, NULL, 0, NULL, &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!size) {
+ /* delete xattr, ATTR_EA_INFO */
+ err = ni_remove_attr_le(ni, attr, le);
+ if (err)
+ goto out;
+ } else {
+ p = resident_data_ex(attr, sizeof(struct EA_INFO));
+ if (!p) {
+ err = -EINVAL;
+ goto out;
+ }
+ memcpy(p, &ea_info, sizeof(struct EA_INFO));
+ mi->dirty = true;
+ }
+
+ le = NULL;
+ attr = ni_find_attr(ni, NULL, &le, ATTR_EA, NULL, 0, NULL, &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!size) {
+ /* delete xattr, ATTR_EA */
+ err = ni_remove_attr_le(ni, attr, le);
+ if (err)
+ goto out;
+ } else if (attr->non_res) {
+ err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size);
+ if (err)
+ goto out;
+ } else {
+ p = resident_data_ex(attr, size);
+ if (!p) {
+ err = -EINVAL;
+ goto out;
+ }
+ memcpy(p, ea_all, size);
+ mi->dirty = true;
+ }
+
+ /* Check if we delete the last xattr */
+ if (size)
+ ni->ni_flags |= NI_FLAG_EA;
+ else
+ ni->ni_flags &= ~NI_FLAG_EA;
+
+ if (ea_info.size_pack != size_pack)
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+ mark_inode_dirty(&ni->vfs_inode);
+
+out:
+ if (!locked)
+ ni_unlock(ni);
+
+ run_close(&ea_run);
+ ntfs_free(ea_all);
+
+ return err;
+}
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+static inline void ntfs_posix_acl_release(struct posix_acl *acl)
+{
+ if (acl && refcount_dec_and_test(&acl->a_refcount))
+ kfree(acl);
+}
+
+static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
+ struct inode *inode, int type,
+ int locked)
+{
+ struct ntfs_inode *ni = ntfs_i(inode);
+ const char *name;
+ size_t name_len;
+ struct posix_acl *acl;
+ size_t req;
+ int err;
+ void *buf;
+
+ /* allocate PATH_MAX bytes */
+ buf = __getname();
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ /* Possible values of 'type' was already checked above */
+ if (type == ACL_TYPE_ACCESS) {
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1;
+ } else {
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1;
+ }
+
+ if (!locked)
+ ni_lock(ni);
+
+ err = ntfs_get_ea(inode, name, name_len, buf, PATH_MAX, &req);
+
+ if (!locked)
+ ni_unlock(ni);
+
+ /* Translate extended attribute to acl */
+ if (err > 0) {
+ acl = posix_acl_from_xattr(mnt_userns, buf, err);
+ if (!IS_ERR(acl))
+ set_cached_acl(inode, type, acl);
+ } else {
+ acl = err == -ENODATA ? NULL : ERR_PTR(err);
+ }
+
+ __putname(buf);
+
+ return acl;
+}
+
+/*
+ * ntfs_get_acl
+ *
+ * inode_operations::get_acl
+ */
+struct posix_acl *ntfs_get_acl(struct inode *inode, int type)
+{
+ /* TODO: init_user_ns? */
+ return ntfs_get_acl_ex(&init_user_ns, inode, type, 0);
+}
+
+static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
+ struct inode *inode, struct posix_acl *acl,
+ int type, int locked)
+{
+ const char *name;
+ size_t size, name_len;
+ void *value = NULL;
+ int err = 0;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ if (acl) {
+ umode_t mode = inode->i_mode;
+
+ err = posix_acl_equiv_mode(acl, &mode);
+ if (err < 0)
+ return err;
+
+ if (inode->i_mode != mode) {
+ inode->i_mode = mode;
+ mark_inode_dirty(inode);
+ }
+
+ if (!err) {
+ /*
+ * acl can be exactly represented in the
+ * traditional file mode permission bits
+ */
+ acl = NULL;
+ }
+ }
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (!acl) {
+ size = 0;
+ value = NULL;
+ } else {
+ size = posix_acl_xattr_size(acl->a_count);
+ value = ntfs_malloc(size);
+ if (!value)
+ return -ENOMEM;
+
+ err = posix_acl_to_xattr(mnt_userns, acl, value, size);
+ if (err < 0)
+ goto out;
+ }
+
+ err = ntfs_set_ea(inode, name, name_len, value, size,
+ acl ? 0 : XATTR_REPLACE, locked);
+ if (!err)
+ set_cached_acl(inode, type, acl);
+
+out:
+ ntfs_free(value);
+
+ return err;
+}
+
+/*
+ * ntfs_set_acl
+ *
+ * inode_operations::set_acl
+ */
+int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
+{
+ return ntfs_set_acl_ex(mnt_userns, inode, acl, type, 0);
+}
+
+static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, int type, void *buffer,
+ size_t size)
+{
+ struct posix_acl *acl;
+ int err;
+
+ if (!(inode->i_sb->s_flags & SB_POSIXACL))
+ return -EOPNOTSUPP;
+
+ acl = ntfs_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+
+ if (!acl)
+ return -ENODATA;
+
+ err = posix_acl_to_xattr(mnt_userns, acl, buffer, size);
+ ntfs_posix_acl_release(acl);
+
+ return err;
+}
+
+static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, int type, const void *value,
+ size_t size)
+{
+ struct posix_acl *acl;
+ int err;
+
+ if (!(inode->i_sb->s_flags & SB_POSIXACL))
+ return -EOPNOTSUPP;
+
+ if (!inode_owner_or_capable(mnt_userns, inode))
+ return -EPERM;
+
+ if (!value) {
+ acl = NULL;
+ } else {
+ acl = posix_acl_from_xattr(mnt_userns, value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+
+ if (acl) {
+ err = posix_acl_valid(mnt_userns, acl);
+ if (err)
+ goto release_and_out;
+ }
+ }
+
+ err = ntfs_set_acl(mnt_userns, inode, acl, type);
+
+release_and_out:
+ ntfs_posix_acl_release(acl);
+ return err;
+}
+
+/*
+ * Initialize the ACLs of a new inode. Called from ntfs_create_inode.
+ */
+int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct inode *dir)
+{
+ struct posix_acl *default_acl, *acl;
+ int err;
+
+ /*
+ * TODO refactoring lock
+ * ni_lock(dir) ... -> posix_acl_create(dir,...) -> ntfs_get_acl -> ni_lock(dir)
+ */
+ inode->i_default_acl = NULL;
+
+ default_acl = ntfs_get_acl_ex(mnt_userns, dir, ACL_TYPE_DEFAULT, 1);
+
+ if (!default_acl || default_acl == ERR_PTR(-EOPNOTSUPP)) {
+ inode->i_mode &= ~current_umask();
+ err = 0;
+ goto out;
+ }
+
+ if (IS_ERR(default_acl)) {
+ err = PTR_ERR(default_acl);
+ goto out;
+ }
+
+ acl = default_acl;
+ err = __posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+ if (err < 0)
+ goto out1;
+ if (!err) {
+ posix_acl_release(acl);
+ acl = NULL;
+ }
+
+ if (!S_ISDIR(inode->i_mode)) {
+ posix_acl_release(default_acl);
+ default_acl = NULL;
+ }
+
+ if (default_acl)
+ err = ntfs_set_acl_ex(mnt_userns, inode, default_acl,
+ ACL_TYPE_DEFAULT, 1);
+
+ if (!acl)
+ inode->i_acl = NULL;
+ else if (!err)
+ err = ntfs_set_acl_ex(mnt_userns, inode, acl, ACL_TYPE_ACCESS,
+ 1);
+
+ posix_acl_release(acl);
+out1:
+ posix_acl_release(default_acl);
+
+out:
+ return err;
+}
+#endif
+
+/*
+ * ntfs_acl_chmod
+ *
+ * helper for 'ntfs3_setattr'
+ */
+int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (!(sb->s_flags & SB_POSIXACL))
+ return 0;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ return posix_acl_chmod(mnt_userns, inode, inode->i_mode);
+}
+
+/*
+ * ntfs_permission
+ *
+ * inode_operations::permission
+ */
+int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
+{
+ if (ntfs_sb(inode->i_sb)->options.no_acs_rules) {
+ /* "no access rules" mode - allow all changes */
+ return 0;
+ }
+
+ return generic_permission(mnt_userns, inode, mask);
+}
+
+/*
+ * ntfs_listxattr
+ *
+ * inode_operations::listxattr
+ */
+ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+ struct inode *inode = d_inode(dentry);
+ struct ntfs_inode *ni = ntfs_i(inode);
+ ssize_t ret;
+
+ if (!(ni->ni_flags & NI_FLAG_EA)) {
+ /* no xattr in file */
+ return 0;
+ }
+
+ ni_lock(ni);
+
+ ret = ntfs_list_ea(ni, buffer, size);
+
+ ni_unlock(ni);
+
+ return ret;
+}
+
+static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
+ struct inode *inode, const char *name, void *buffer,
+ size_t size)
+{
+ int err;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ size_t name_len = strlen(name);
+
+ /* Dispatch request */
+ if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 &&
+ !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) {
+ /* system.dos_attrib */
+ if (!buffer) {
+ err = sizeof(u8);
+ } else if (size < sizeof(u8)) {
+ err = -ENODATA;
+ } else {
+ err = sizeof(u8);
+ *(u8 *)buffer = le32_to_cpu(ni->std_fa);
+ }
+ goto out;
+ }
+
+ if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 &&
+ !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) {
+ /* system.ntfs_attrib */
+ if (!buffer) {
+ err = sizeof(u32);
+ } else if (size < sizeof(u32)) {
+ err = -ENODATA;
+ } else {
+ err = sizeof(u32);
+ *(u32 *)buffer = le32_to_cpu(ni->std_fa);
+ }
+ goto out;
+ }
+
+ if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 &&
+ !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) {
+ /* system.ntfs_security*/
+ struct SECURITY_DESCRIPTOR_RELATIVE *sd = NULL;
+ size_t sd_size = 0;
+
+ if (!is_ntfs3(ni->mi.sbi)) {
+ /* we should get nt4 security */
+ err = -EINVAL;
+ goto out;
+ } else if (le32_to_cpu(ni->std_security_id) <
+ SECURITY_ID_FIRST) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = ntfs_get_security_by_id(ni->mi.sbi, ni->std_security_id,
+ &sd, &sd_size);
+ if (err)
+ goto out;
+
+ if (!is_sd_valid(sd, sd_size)) {
+ ntfs_inode_warn(
+ inode,
+ "looks like you get incorrect security descriptor id=%u",
+ ni->std_security_id);
+ }
+
+ if (!buffer) {
+ err = sd_size;
+ } else if (size < sd_size) {
+ err = -ENODATA;
+ } else {
+ err = sd_size;
+ memcpy(buffer, sd, sd_size);
+ }
+ ntfs_free(sd);
+ goto out;
+ }
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
+ (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
+ /* TODO: init_user_ns? */
+ err = ntfs_xattr_get_acl(
+ &init_user_ns, inode,
+ name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
+ ? ACL_TYPE_ACCESS
+ : ACL_TYPE_DEFAULT,
+ buffer, size);
+ goto out;
+ }
+#endif
+ /* deal with ntfs extended attribute */
+ err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL);
+
+out:
+ return err;
+}
+
+/*
+ * ntfs_setxattr
+ *
+ * inode_operations::setxattr
+ */
+static noinline int ntfs_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
+ struct dentry *de, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ int err = -EINVAL;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ size_t name_len = strlen(name);
+ enum FILE_ATTRIBUTE new_fa;
+
+ /* Dispatch request */
+ if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 &&
+ !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) {
+ if (sizeof(u8) != size)
+ goto out;
+ new_fa = cpu_to_le32(*(u8 *)value);
+ goto set_new_fa;
+ }
+
+ if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 &&
+ !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) {
+ if (size != sizeof(u32))
+ goto out;
+ new_fa = cpu_to_le32(*(u32 *)value);
+
+ if (S_ISREG(inode->i_mode)) {
+ /* Process compressed/sparsed in special way*/
+ ni_lock(ni);
+ err = ni_new_attr_flags(ni, new_fa);
+ ni_unlock(ni);
+ if (err)
+ goto out;
+ }
+set_new_fa:
+ /*
+ * Thanks Mark Harmstone:
+ * keep directory bit consistency
+ */
+ if (S_ISDIR(inode->i_mode))
+ new_fa |= FILE_ATTRIBUTE_DIRECTORY;
+ else
+ new_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
+
+ if (ni->std_fa != new_fa) {
+ ni->std_fa = new_fa;
+ if (new_fa & FILE_ATTRIBUTE_READONLY)
+ inode->i_mode &= ~0222;
+ else
+ inode->i_mode |= 0222;
+ /* std attribute always in primary record */
+ ni->mi.dirty = true;
+ mark_inode_dirty(inode);
+ }
+ err = 0;
+
+ goto out;
+ }
+
+ if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 &&
+ !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) {
+ /* system.ntfs_security*/
+ __le32 security_id;
+ bool inserted;
+ struct ATTR_STD_INFO5 *std;
+
+ if (!is_ntfs3(ni->mi.sbi)) {
+ /*
+ * we should replace ATTR_SECURE
+ * Skip this way cause it is nt4 feature
+ */
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!is_sd_valid(value, size)) {
+ err = -EINVAL;
+ ntfs_inode_warn(
+ inode,
+ "you try to set invalid security descriptor");
+ goto out;
+ }
+
+ err = ntfs_insert_security(ni->mi.sbi, value, size,
+ &security_id, &inserted);
+ if (err)
+ goto out;
+
+ ni_lock(ni);
+ std = ni_std5(ni);
+ if (!std) {
+ err = -EINVAL;
+ } else if (std->security_id != security_id) {
+ std->security_id = ni->std_security_id = security_id;
+ /* std attribute always in primary record */
+ ni->mi.dirty = true;
+ mark_inode_dirty(&ni->vfs_inode);
+ }
+ ni_unlock(ni);
+ goto out;
+ }
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
+ (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
+ err = ntfs_xattr_set_acl(
+ mnt_userns, inode,
+ name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
+ ? ACL_TYPE_ACCESS
+ : ACL_TYPE_DEFAULT,
+ value, size);
+ goto out;
+ }
+#endif
+ /* deal with ntfs extended attribute */
+ err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0);
+
+out:
+ return err;
+}
+
+/*
+ * ntfs_save_wsl_perm
+ *
+ * save uid/gid/mode in xattr
+ */
+int ntfs_save_wsl_perm(struct inode *inode)
+{
+ int err;
+ __le32 value;
+
+ value = cpu_to_le32(i_uid_read(inode));
+ err = ntfs_set_ea(inode, "$LXUID", sizeof("$LXUID") - 1, &value,
+ sizeof(value), 0, 0);
+ if (err)
+ goto out;
+
+ value = cpu_to_le32(i_gid_read(inode));
+ err = ntfs_set_ea(inode, "$LXGID", sizeof("$LXGID") - 1, &value,
+ sizeof(value), 0, 0);
+ if (err)
+ goto out;
+
+ value = cpu_to_le32(inode->i_mode);
+ err = ntfs_set_ea(inode, "$LXMOD", sizeof("$LXMOD") - 1, &value,
+ sizeof(value), 0, 0);
+ if (err)
+ goto out;
+
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ value = cpu_to_le32(inode->i_rdev);
+ err = ntfs_set_ea(inode, "$LXDEV", sizeof("$LXDEV") - 1, &value,
+ sizeof(value), 0, 0);
+ if (err)
+ goto out;
+ }
+
+out:
+ /* In case of error should we delete all WSL xattr? */
+ return err;
+}
+
+/*
+ * ntfs_get_wsl_perm
+ *
+ * get uid/gid/mode from xattr
+ * it is called from ntfs_iget5->ntfs_read_mft
+ */
+void ntfs_get_wsl_perm(struct inode *inode)
+{
+ size_t sz;
+ __le32 value[3];
+
+ if (ntfs_get_ea(inode, "$LXUID", sizeof("$LXUID") - 1, &value[0],
+ sizeof(value[0]), &sz) == sizeof(value[0]) &&
+ ntfs_get_ea(inode, "$LXGID", sizeof("$LXGID") - 1, &value[1],
+ sizeof(value[1]), &sz) == sizeof(value[1]) &&
+ ntfs_get_ea(inode, "$LXMOD", sizeof("$LXMOD") - 1, &value[2],
+ sizeof(value[2]), &sz) == sizeof(value[2])) {
+ i_uid_write(inode, (uid_t)le32_to_cpu(value[0]));
+ i_gid_write(inode, (gid_t)le32_to_cpu(value[1]));
+ inode->i_mode = le32_to_cpu(value[2]);
+
+ if (ntfs_get_ea(inode, "$LXDEV", sizeof("$$LXDEV") - 1,
+ &value[0], sizeof(value),
+ &sz) == sizeof(value[0])) {
+ inode->i_rdev = le32_to_cpu(value[0]);
+ }
+ }
+}
+
+static bool ntfs_xattr_user_list(struct dentry *dentry)
+{
+ return true;
+}
+
+// clang-format off
+static const struct xattr_handler ntfs_xattr_handler = {
+ .prefix = "",
+ .get = ntfs_getxattr,
+ .set = ntfs_setxattr,
+ .list = ntfs_xattr_user_list,
+};
+
+const struct xattr_handler *ntfs_xattr_handlers[] = {
+ &ntfs_xattr_handler,
+ NULL,
+};
+// clang-format on
--
2.30.0
1
0
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15-rc1
commit 3f3b442b5ad2455507c9bfdacf39a3792eb3a6d0
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
This adds bitmap
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/bitfunc.c | 135 ++++
fs/ntfs3/bitmap.c | 1519 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1654 insertions(+)
create mode 100644 fs/ntfs3/bitfunc.c
create mode 100644 fs/ntfs3/bitmap.c
diff --git a/fs/ntfs3/bitfunc.c b/fs/ntfs3/bitfunc.c
new file mode 100644
index 000000000000..2de5faef2721
--- /dev/null
+++ b/fs/ntfs3/bitfunc.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+#define BITS_IN_SIZE_T (sizeof(size_t) * 8)
+
+/*
+ * fill_mask[i] - first i bits are '1' , i = 0,1,2,3,4,5,6,7,8
+ * fill_mask[i] = 0xFF >> (8-i)
+ */
+static const u8 fill_mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0F,
+ 0x1F, 0x3F, 0x7F, 0xFF };
+
+/*
+ * zero_mask[i] - first i bits are '0' , i = 0,1,2,3,4,5,6,7,8
+ * zero_mask[i] = 0xFF << i
+ */
+static const u8 zero_mask[] = { 0xFF, 0xFE, 0xFC, 0xF8, 0xF0,
+ 0xE0, 0xC0, 0x80, 0x00 };
+
+/*
+ * are_bits_clear
+ *
+ * Returns true if all bits [bit, bit+nbits) are zeros "0"
+ */
+bool are_bits_clear(const ulong *lmap, size_t bit, size_t nbits)
+{
+ size_t pos = bit & 7;
+ const u8 *map = (u8 *)lmap + (bit >> 3);
+
+ if (pos) {
+ if (8 - pos >= nbits)
+ return !nbits || !(*map & fill_mask[pos + nbits] &
+ zero_mask[pos]);
+
+ if (*map++ & zero_mask[pos])
+ return false;
+ nbits -= 8 - pos;
+ }
+
+ pos = ((size_t)map) & (sizeof(size_t) - 1);
+ if (pos) {
+ pos = sizeof(size_t) - pos;
+ if (nbits >= pos * 8) {
+ for (nbits -= pos * 8; pos; pos--, map++) {
+ if (*map)
+ return false;
+ }
+ }
+ }
+
+ for (pos = nbits / BITS_IN_SIZE_T; pos; pos--, map += sizeof(size_t)) {
+ if (*((size_t *)map))
+ return false;
+ }
+
+ for (pos = (nbits % BITS_IN_SIZE_T) >> 3; pos; pos--, map++) {
+ if (*map)
+ return false;
+ }
+
+ pos = nbits & 7;
+ if (pos && (*map & fill_mask[pos]))
+ return false;
+
+ // All bits are zero
+ return true;
+}
+
+/*
+ * are_bits_set
+ *
+ * Returns true if all bits [bit, bit+nbits) are ones "1"
+ */
+bool are_bits_set(const ulong *lmap, size_t bit, size_t nbits)
+{
+ u8 mask;
+ size_t pos = bit & 7;
+ const u8 *map = (u8 *)lmap + (bit >> 3);
+
+ if (pos) {
+ if (8 - pos >= nbits) {
+ mask = fill_mask[pos + nbits] & zero_mask[pos];
+ return !nbits || (*map & mask) == mask;
+ }
+
+ mask = zero_mask[pos];
+ if ((*map++ & mask) != mask)
+ return false;
+ nbits -= 8 - pos;
+ }
+
+ pos = ((size_t)map) & (sizeof(size_t) - 1);
+ if (pos) {
+ pos = sizeof(size_t) - pos;
+ if (nbits >= pos * 8) {
+ for (nbits -= pos * 8; pos; pos--, map++) {
+ if (*map != 0xFF)
+ return false;
+ }
+ }
+ }
+
+ for (pos = nbits / BITS_IN_SIZE_T; pos; pos--, map += sizeof(size_t)) {
+ if (*((size_t *)map) != MINUS_ONE_T)
+ return false;
+ }
+
+ for (pos = (nbits % BITS_IN_SIZE_T) >> 3; pos; pos--, map++) {
+ if (*map != 0xFF)
+ return false;
+ }
+
+ pos = nbits & 7;
+ if (pos) {
+ u8 mask = fill_mask[pos];
+
+ if ((*map & mask) != mask)
+ return false;
+ }
+
+ // All bits are ones
+ return true;
+}
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
new file mode 100644
index 000000000000..32aab0031221
--- /dev/null
+++ b/fs/ntfs3/bitmap.c
@@ -0,0 +1,1519 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ * This code builds two trees of free clusters extents.
+ * Trees are sorted by start of extent and by length of extent.
+ * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees.
+ * In extreme case code reads on-disk bitmap to find free clusters
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * Maximum number of extents in tree.
+ */
+#define NTFS_MAX_WND_EXTENTS (32u * 1024u)
+
+struct rb_node_key {
+ struct rb_node node;
+ size_t key;
+};
+
+/*
+ * Tree is sorted by start (key)
+ */
+struct e_node {
+ struct rb_node_key start; /* Tree sorted by start */
+ struct rb_node_key count; /* Tree sorted by len*/
+};
+
+static int wnd_rescan(struct wnd_bitmap *wnd);
+static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw);
+static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits);
+
+static struct kmem_cache *ntfs_enode_cachep;
+
+int __init ntfs3_init_bitmap(void)
+{
+ ntfs_enode_cachep =
+ kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+ return ntfs_enode_cachep ? 0 : -ENOMEM;
+}
+
+void ntfs3_exit_bitmap(void)
+{
+ kmem_cache_destroy(ntfs_enode_cachep);
+}
+
+static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i)
+{
+ return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8;
+}
+
+/*
+ * b_pos + b_len - biggest fragment
+ * Scan range [wpos wbits) window 'buf'
+ * Returns -1 if not found
+ */
+static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend,
+ size_t to_alloc, size_t *prev_tail, size_t *b_pos,
+ size_t *b_len)
+{
+ while (wpos < wend) {
+ size_t free_len;
+ u32 free_bits, end;
+ u32 used = find_next_zero_bit(buf, wend, wpos);
+
+ if (used >= wend) {
+ if (*b_len < *prev_tail) {
+ *b_pos = wbit - *prev_tail;
+ *b_len = *prev_tail;
+ }
+
+ *prev_tail = 0;
+ return -1;
+ }
+
+ if (used > wpos) {
+ wpos = used;
+ if (*b_len < *prev_tail) {
+ *b_pos = wbit - *prev_tail;
+ *b_len = *prev_tail;
+ }
+
+ *prev_tail = 0;
+ }
+
+ /*
+ * Now we have a fragment [wpos, wend) staring with 0
+ */
+ end = wpos + to_alloc - *prev_tail;
+ free_bits = find_next_bit(buf, min(end, wend), wpos);
+
+ free_len = *prev_tail + free_bits - wpos;
+
+ if (*b_len < free_len) {
+ *b_pos = wbit + wpos - *prev_tail;
+ *b_len = free_len;
+ }
+
+ if (free_len >= to_alloc)
+ return wbit + wpos - *prev_tail;
+
+ if (free_bits >= wend) {
+ *prev_tail += free_bits - wpos;
+ return -1;
+ }
+
+ wpos = free_bits + 1;
+
+ *prev_tail = 0;
+ }
+
+ return -1;
+}
+
+/*
+ * wnd_close
+ *
+ * Frees all resources
+ */
+void wnd_close(struct wnd_bitmap *wnd)
+{
+ struct rb_node *node, *next;
+
+ ntfs_free(wnd->free_bits);
+ run_close(&wnd->run);
+
+ node = rb_first(&wnd->start_tree);
+
+ while (node) {
+ next = rb_next(node);
+ rb_erase(node, &wnd->start_tree);
+ kmem_cache_free(ntfs_enode_cachep,
+ rb_entry(node, struct e_node, start.node));
+ node = next;
+ }
+}
+
+static struct rb_node *rb_lookup(struct rb_root *root, size_t v)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *r = NULL;
+
+ while (*p) {
+ struct rb_node_key *k;
+
+ k = rb_entry(*p, struct rb_node_key, node);
+ if (v < k->key) {
+ p = &(*p)->rb_left;
+ } else if (v > k->key) {
+ r = &k->node;
+ p = &(*p)->rb_right;
+ } else {
+ return &k->node;
+ }
+ }
+
+ return r;
+}
+
+/*
+ * rb_insert_count
+ *
+ * Helper function to insert special kind of 'count' tree
+ */
+static inline bool rb_insert_count(struct rb_root *root, struct e_node *e)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ size_t e_ckey = e->count.key;
+ size_t e_skey = e->start.key;
+
+ while (*p) {
+ struct e_node *k =
+ rb_entry(parent = *p, struct e_node, count.node);
+
+ if (e_ckey > k->count.key) {
+ p = &(*p)->rb_left;
+ } else if (e_ckey < k->count.key) {
+ p = &(*p)->rb_right;
+ } else if (e_skey < k->start.key) {
+ p = &(*p)->rb_left;
+ } else if (e_skey > k->start.key) {
+ p = &(*p)->rb_right;
+ } else {
+ WARN_ON(1);
+ return false;
+ }
+ }
+
+ rb_link_node(&e->count.node, parent, p);
+ rb_insert_color(&e->count.node, root);
+ return true;
+}
+
+/*
+ * inline bool rb_insert_start
+ *
+ * Helper function to insert special kind of 'start' tree
+ */
+static inline bool rb_insert_start(struct rb_root *root, struct e_node *e)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ size_t e_skey = e->start.key;
+
+ while (*p) {
+ struct e_node *k;
+
+ parent = *p;
+
+ k = rb_entry(parent, struct e_node, start.node);
+ if (e_skey < k->start.key) {
+ p = &(*p)->rb_left;
+ } else if (e_skey > k->start.key) {
+ p = &(*p)->rb_right;
+ } else {
+ WARN_ON(1);
+ return false;
+ }
+ }
+
+ rb_link_node(&e->start.node, parent, p);
+ rb_insert_color(&e->start.node, root);
+ return true;
+}
+
+/*
+ * wnd_add_free_ext
+ *
+ * adds a new extent of free space
+ * build = 1 when building tree
+ */
+static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
+ bool build)
+{
+ struct e_node *e, *e0 = NULL;
+ size_t ib, end_in = bit + len;
+ struct rb_node *n;
+
+ if (build) {
+ /* Use extent_min to filter too short extents */
+ if (wnd->count >= NTFS_MAX_WND_EXTENTS &&
+ len <= wnd->extent_min) {
+ wnd->uptodated = -1;
+ return;
+ }
+ } else {
+ /* Try to find extent before 'bit' */
+ n = rb_lookup(&wnd->start_tree, bit);
+
+ if (!n) {
+ n = rb_first(&wnd->start_tree);
+ } else {
+ e = rb_entry(n, struct e_node, start.node);
+ n = rb_next(n);
+ if (e->start.key + e->count.key == bit) {
+ /* Remove left */
+ bit = e->start.key;
+ len += e->count.key;
+ rb_erase(&e->start.node, &wnd->start_tree);
+ rb_erase(&e->count.node, &wnd->count_tree);
+ wnd->count -= 1;
+ e0 = e;
+ }
+ }
+
+ while (n) {
+ size_t next_end;
+
+ e = rb_entry(n, struct e_node, start.node);
+ next_end = e->start.key + e->count.key;
+ if (e->start.key > end_in)
+ break;
+
+ /* Remove right */
+ n = rb_next(n);
+ len += next_end - end_in;
+ end_in = next_end;
+ rb_erase(&e->start.node, &wnd->start_tree);
+ rb_erase(&e->count.node, &wnd->count_tree);
+ wnd->count -= 1;
+
+ if (!e0)
+ e0 = e;
+ else
+ kmem_cache_free(ntfs_enode_cachep, e);
+ }
+
+ if (wnd->uptodated != 1) {
+ /* Check bits before 'bit' */
+ ib = wnd->zone_bit == wnd->zone_end ||
+ bit < wnd->zone_end
+ ? 0
+ : wnd->zone_end;
+
+ while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
+ bit -= 1;
+ len += 1;
+ }
+
+ /* Check bits after 'end_in' */
+ ib = wnd->zone_bit == wnd->zone_end ||
+ end_in > wnd->zone_bit
+ ? wnd->nbits
+ : wnd->zone_bit;
+
+ while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
+ end_in += 1;
+ len += 1;
+ }
+ }
+ }
+ /* Insert new fragment */
+ if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
+ if (e0)
+ kmem_cache_free(ntfs_enode_cachep, e0);
+
+ wnd->uptodated = -1;
+
+ /* Compare with smallest fragment */
+ n = rb_last(&wnd->count_tree);
+ e = rb_entry(n, struct e_node, count.node);
+ if (len <= e->count.key)
+ goto out; /* Do not insert small fragments */
+
+ if (build) {
+ struct e_node *e2;
+
+ n = rb_prev(n);
+ e2 = rb_entry(n, struct e_node, count.node);
+ /* smallest fragment will be 'e2->count.key' */
+ wnd->extent_min = e2->count.key;
+ }
+
+ /* Replace smallest fragment by new one */
+ rb_erase(&e->start.node, &wnd->start_tree);
+ rb_erase(&e->count.node, &wnd->count_tree);
+ wnd->count -= 1;
+ } else {
+ e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
+ if (!e) {
+ wnd->uptodated = -1;
+ goto out;
+ }
+
+ if (build && len <= wnd->extent_min)
+ wnd->extent_min = len;
+ }
+ e->start.key = bit;
+ e->count.key = len;
+ if (len > wnd->extent_max)
+ wnd->extent_max = len;
+
+ rb_insert_start(&wnd->start_tree, e);
+ rb_insert_count(&wnd->count_tree, e);
+ wnd->count += 1;
+
+out:;
+}
+
+/*
+ * wnd_remove_free_ext
+ *
+ * removes a run from the cached free space
+ */
+static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
+{
+ struct rb_node *n, *n3;
+ struct e_node *e, *e3;
+ size_t end_in = bit + len;
+ size_t end3, end, new_key, new_len, max_new_len;
+
+ /* Try to find extent before 'bit' */
+ n = rb_lookup(&wnd->start_tree, bit);
+
+ if (!n)
+ return;
+
+ e = rb_entry(n, struct e_node, start.node);
+ end = e->start.key + e->count.key;
+
+ new_key = new_len = 0;
+ len = e->count.key;
+
+ /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n' */
+ if (e->start.key > bit)
+ ;
+ else if (end_in <= end) {
+ /* Range [bit,end_in) inside 'e' */
+ new_key = end_in;
+ new_len = end - end_in;
+ len = bit - e->start.key;
+ } else if (bit > end) {
+ bool bmax = false;
+
+ n3 = rb_next(n);
+
+ while (n3) {
+ e3 = rb_entry(n3, struct e_node, start.node);
+ if (e3->start.key >= end_in)
+ break;
+
+ if (e3->count.key == wnd->extent_max)
+ bmax = true;
+
+ end3 = e3->start.key + e3->count.key;
+ if (end3 > end_in) {
+ e3->start.key = end_in;
+ rb_erase(&e3->count.node, &wnd->count_tree);
+ e3->count.key = end3 - end_in;
+ rb_insert_count(&wnd->count_tree, e3);
+ break;
+ }
+
+ n3 = rb_next(n3);
+ rb_erase(&e3->start.node, &wnd->start_tree);
+ rb_erase(&e3->count.node, &wnd->count_tree);
+ wnd->count -= 1;
+ kmem_cache_free(ntfs_enode_cachep, e3);
+ }
+ if (!bmax)
+ return;
+ n3 = rb_first(&wnd->count_tree);
+ wnd->extent_max =
+ n3 ? rb_entry(n3, struct e_node, count.node)->count.key
+ : 0;
+ return;
+ }
+
+ if (e->count.key != wnd->extent_max) {
+ ;
+ } else if (rb_prev(&e->count.node)) {
+ ;
+ } else {
+ n3 = rb_next(&e->count.node);
+ max_new_len = len > new_len ? len : new_len;
+ if (!n3) {
+ wnd->extent_max = max_new_len;
+ } else {
+ e3 = rb_entry(n3, struct e_node, count.node);
+ wnd->extent_max = max(e3->count.key, max_new_len);
+ }
+ }
+
+ if (!len) {
+ if (new_len) {
+ e->start.key = new_key;
+ rb_erase(&e->count.node, &wnd->count_tree);
+ e->count.key = new_len;
+ rb_insert_count(&wnd->count_tree, e);
+ } else {
+ rb_erase(&e->start.node, &wnd->start_tree);
+ rb_erase(&e->count.node, &wnd->count_tree);
+ wnd->count -= 1;
+ kmem_cache_free(ntfs_enode_cachep, e);
+ }
+ goto out;
+ }
+ rb_erase(&e->count.node, &wnd->count_tree);
+ e->count.key = len;
+ rb_insert_count(&wnd->count_tree, e);
+
+ if (!new_len)
+ goto out;
+
+ if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
+ wnd->uptodated = -1;
+
+ /* Get minimal extent */
+ e = rb_entry(rb_last(&wnd->count_tree), struct e_node,
+ count.node);
+ if (e->count.key > new_len)
+ goto out;
+
+ /* Replace minimum */
+ rb_erase(&e->start.node, &wnd->start_tree);
+ rb_erase(&e->count.node, &wnd->count_tree);
+ wnd->count -= 1;
+ } else {
+ e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
+ if (!e)
+ wnd->uptodated = -1;
+ }
+
+ if (e) {
+ e->start.key = new_key;
+ e->count.key = new_len;
+ rb_insert_start(&wnd->start_tree, e);
+ rb_insert_count(&wnd->count_tree, e);
+ wnd->count += 1;
+ }
+
+out:
+ if (!wnd->count && 1 != wnd->uptodated)
+ wnd_rescan(wnd);
+}
+
+/*
+ * wnd_rescan
+ *
+ * Scan all bitmap. used while initialization.
+ */
+static int wnd_rescan(struct wnd_bitmap *wnd)
+{
+ int err = 0;
+ size_t prev_tail = 0;
+ struct super_block *sb = wnd->sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ u64 lbo, len = 0;
+ u32 blocksize = sb->s_blocksize;
+ u8 cluster_bits = sbi->cluster_bits;
+ u32 wbits = 8 * sb->s_blocksize;
+ u32 used, frb;
+ const ulong *buf;
+ size_t wpos, wbit, iw, vbo;
+ struct buffer_head *bh = NULL;
+ CLST lcn, clen;
+
+ wnd->uptodated = 0;
+ wnd->extent_max = 0;
+ wnd->extent_min = MINUS_ONE_T;
+ wnd->total_zeroes = 0;
+
+ vbo = 0;
+
+ for (iw = 0; iw < wnd->nwnd; iw++) {
+ if (iw + 1 == wnd->nwnd)
+ wbits = wnd->bits_last;
+
+ if (wnd->inited) {
+ if (!wnd->free_bits[iw]) {
+ /* all ones */
+ if (prev_tail) {
+ wnd_add_free_ext(wnd,
+ vbo * 8 - prev_tail,
+ prev_tail, true);
+ prev_tail = 0;
+ }
+ goto next_wnd;
+ }
+ if (wbits == wnd->free_bits[iw]) {
+ /* all zeroes */
+ prev_tail += wbits;
+ wnd->total_zeroes += wbits;
+ goto next_wnd;
+ }
+ }
+
+ if (!len) {
+ u32 off = vbo & sbi->cluster_mask;
+
+ if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits,
+ &lcn, &clen, NULL)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ lbo = ((u64)lcn << cluster_bits) + off;
+ len = ((u64)clen << cluster_bits) - off;
+ }
+
+ bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
+ if (!bh) {
+ err = -EIO;
+ goto out;
+ }
+
+ buf = (ulong *)bh->b_data;
+
+ used = __bitmap_weight(buf, wbits);
+ if (used < wbits) {
+ frb = wbits - used;
+ wnd->free_bits[iw] = frb;
+ wnd->total_zeroes += frb;
+ }
+
+ wpos = 0;
+ wbit = vbo * 8;
+
+ if (wbit + wbits > wnd->nbits)
+ wbits = wnd->nbits - wbit;
+
+ do {
+ used = find_next_zero_bit(buf, wbits, wpos);
+
+ if (used > wpos && prev_tail) {
+ wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
+ prev_tail, true);
+ prev_tail = 0;
+ }
+
+ wpos = used;
+
+ if (wpos >= wbits) {
+ /* No free blocks */
+ prev_tail = 0;
+ break;
+ }
+
+ frb = find_next_bit(buf, wbits, wpos);
+ if (frb >= wbits) {
+ /* keep last free block */
+ prev_tail += frb - wpos;
+ break;
+ }
+
+ wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
+ frb + prev_tail - wpos, true);
+
+ /* Skip free block and first '1' */
+ wpos = frb + 1;
+ /* Reset previous tail */
+ prev_tail = 0;
+ } while (wpos < wbits);
+
+next_wnd:
+
+ if (bh)
+ put_bh(bh);
+ bh = NULL;
+
+ vbo += blocksize;
+ if (len) {
+ len -= blocksize;
+ lbo += blocksize;
+ }
+ }
+
+ /* Add last block */
+ if (prev_tail)
+ wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true);
+
+ /*
+ * Before init cycle wnd->uptodated was 0
+ * If any errors or limits occurs while initialization then
+ * wnd->uptodated will be -1
+ * If 'uptodated' is still 0 then Tree is really updated
+ */
+ if (!wnd->uptodated)
+ wnd->uptodated = 1;
+
+ if (wnd->zone_bit != wnd->zone_end) {
+ size_t zlen = wnd->zone_end - wnd->zone_bit;
+
+ wnd->zone_end = wnd->zone_bit;
+ wnd_zone_set(wnd, wnd->zone_bit, zlen);
+ }
+
+out:
+ return err;
+}
+
+/*
+ * wnd_init
+ */
+int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
+{
+ int err;
+ u32 blocksize = sb->s_blocksize;
+ u32 wbits = blocksize * 8;
+
+ init_rwsem(&wnd->rw_lock);
+
+ wnd->sb = sb;
+ wnd->nbits = nbits;
+ wnd->total_zeroes = nbits;
+ wnd->extent_max = MINUS_ONE_T;
+ wnd->zone_bit = wnd->zone_end = 0;
+ wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits));
+ wnd->bits_last = nbits & (wbits - 1);
+ if (!wnd->bits_last)
+ wnd->bits_last = wbits;
+
+ wnd->free_bits = ntfs_zalloc(wnd->nwnd * sizeof(u16));
+ if (!wnd->free_bits)
+ return -ENOMEM;
+
+ err = wnd_rescan(wnd);
+ if (err)
+ return err;
+
+ wnd->inited = true;
+
+ return 0;
+}
+
+/*
+ * wnd_map
+ *
+ * call sb_bread for requested window
+ */
+static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw)
+{
+ size_t vbo;
+ CLST lcn, clen;
+ struct super_block *sb = wnd->sb;
+ struct ntfs_sb_info *sbi;
+ struct buffer_head *bh;
+ u64 lbo;
+
+ sbi = sb->s_fs_info;
+ vbo = (u64)iw << sb->s_blocksize_bits;
+
+ if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen,
+ NULL)) {
+ return ERR_PTR(-ENOENT);
+ }
+
+ lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask);
+
+ bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits);
+ if (!bh)
+ return ERR_PTR(-EIO);
+
+ return bh;
+}
+
+/*
+ * wnd_set_free
+ *
+ * Marks the bits range from bit to bit + bits as free
+ */
+int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+ int err = 0;
+ struct super_block *sb = wnd->sb;
+ size_t bits0 = bits;
+ u32 wbits = 8 * sb->s_blocksize;
+ size_t iw = bit >> (sb->s_blocksize_bits + 3);
+ u32 wbit = bit & (wbits - 1);
+ struct buffer_head *bh;
+
+ while (iw < wnd->nwnd && bits) {
+ u32 tail, op;
+ ulong *buf;
+
+ if (iw + 1 == wnd->nwnd)
+ wbits = wnd->bits_last;
+
+ tail = wbits - wbit;
+ op = tail < bits ? tail : bits;
+
+ bh = wnd_map(wnd, iw);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ break;
+ }
+
+ buf = (ulong *)bh->b_data;
+
+ lock_buffer(bh);
+
+ __bitmap_clear(buf, wbit, op);
+
+ wnd->free_bits[iw] += op;
+
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ put_bh(bh);
+
+ wnd->total_zeroes += op;
+ bits -= op;
+ wbit = 0;
+ iw += 1;
+ }
+
+ wnd_add_free_ext(wnd, bit, bits0, false);
+
+ return err;
+}
+
+/*
+ * wnd_set_used
+ *
+ * Marks the bits range from bit to bit + bits as used
+ */
+int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+ int err = 0;
+ struct super_block *sb = wnd->sb;
+ size_t bits0 = bits;
+ size_t iw = bit >> (sb->s_blocksize_bits + 3);
+ u32 wbits = 8 * sb->s_blocksize;
+ u32 wbit = bit & (wbits - 1);
+ struct buffer_head *bh;
+
+ while (iw < wnd->nwnd && bits) {
+ u32 tail, op;
+ ulong *buf;
+
+ if (unlikely(iw + 1 == wnd->nwnd))
+ wbits = wnd->bits_last;
+
+ tail = wbits - wbit;
+ op = tail < bits ? tail : bits;
+
+ bh = wnd_map(wnd, iw);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ break;
+ }
+ buf = (ulong *)bh->b_data;
+
+ lock_buffer(bh);
+
+ __bitmap_set(buf, wbit, op);
+ wnd->free_bits[iw] -= op;
+
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ put_bh(bh);
+
+ wnd->total_zeroes -= op;
+ bits -= op;
+ wbit = 0;
+ iw += 1;
+ }
+
+ if (!RB_EMPTY_ROOT(&wnd->start_tree))
+ wnd_remove_free_ext(wnd, bit, bits0);
+
+ return err;
+}
+
+/*
+ * wnd_is_free_hlp
+ *
+ * Returns true if all clusters [bit, bit+bits) are free (bitmap only)
+ */
+static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+ struct super_block *sb = wnd->sb;
+ size_t iw = bit >> (sb->s_blocksize_bits + 3);
+ u32 wbits = 8 * sb->s_blocksize;
+ u32 wbit = bit & (wbits - 1);
+
+ while (iw < wnd->nwnd && bits) {
+ u32 tail, op;
+
+ if (unlikely(iw + 1 == wnd->nwnd))
+ wbits = wnd->bits_last;
+
+ tail = wbits - wbit;
+ op = tail < bits ? tail : bits;
+
+ if (wbits != wnd->free_bits[iw]) {
+ bool ret;
+ struct buffer_head *bh = wnd_map(wnd, iw);
+
+ if (IS_ERR(bh))
+ return false;
+
+ ret = are_bits_clear((ulong *)bh->b_data, wbit, op);
+
+ put_bh(bh);
+ if (!ret)
+ return false;
+ }
+
+ bits -= op;
+ wbit = 0;
+ iw += 1;
+ }
+
+ return true;
+}
+
+/*
+ * wnd_is_free
+ *
+ * Returns true if all clusters [bit, bit+bits) are free
+ */
+bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+ bool ret;
+ struct rb_node *n;
+ size_t end;
+ struct e_node *e;
+
+ if (RB_EMPTY_ROOT(&wnd->start_tree))
+ goto use_wnd;
+
+ n = rb_lookup(&wnd->start_tree, bit);
+ if (!n)
+ goto use_wnd;
+
+ e = rb_entry(n, struct e_node, start.node);
+
+ end = e->start.key + e->count.key;
+
+ if (bit < end && bit + bits <= end)
+ return true;
+
+use_wnd:
+ ret = wnd_is_free_hlp(wnd, bit, bits);
+
+ return ret;
+}
+
+/*
+ * wnd_is_used
+ *
+ * Returns true if all clusters [bit, bit+bits) are used
+ */
+bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+ bool ret = false;
+ struct super_block *sb = wnd->sb;
+ size_t iw = bit >> (sb->s_blocksize_bits + 3);
+ u32 wbits = 8 * sb->s_blocksize;
+ u32 wbit = bit & (wbits - 1);
+ size_t end;
+ struct rb_node *n;
+ struct e_node *e;
+
+ if (RB_EMPTY_ROOT(&wnd->start_tree))
+ goto use_wnd;
+
+ end = bit + bits;
+ n = rb_lookup(&wnd->start_tree, end - 1);
+ if (!n)
+ goto use_wnd;
+
+ e = rb_entry(n, struct e_node, start.node);
+ if (e->start.key + e->count.key > bit)
+ return false;
+
+use_wnd:
+ while (iw < wnd->nwnd && bits) {
+ u32 tail, op;
+
+ if (unlikely(iw + 1 == wnd->nwnd))
+ wbits = wnd->bits_last;
+
+ tail = wbits - wbit;
+ op = tail < bits ? tail : bits;
+
+ if (wnd->free_bits[iw]) {
+ bool ret;
+ struct buffer_head *bh = wnd_map(wnd, iw);
+
+ if (IS_ERR(bh))
+ goto out;
+
+ ret = are_bits_set((ulong *)bh->b_data, wbit, op);
+ put_bh(bh);
+ if (!ret)
+ goto out;
+ }
+
+ bits -= op;
+ wbit = 0;
+ iw += 1;
+ }
+ ret = true;
+
+out:
+ return ret;
+}
+
+/*
+ * wnd_find
+ * - flags - BITMAP_FIND_XXX flags
+ *
+ * looks for free space
+ * Returns 0 if not found
+ */
+size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
+ size_t flags, size_t *allocated)
+{
+ struct super_block *sb;
+ u32 wbits, wpos, wzbit, wzend;
+ size_t fnd, max_alloc, b_len, b_pos;
+ size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend;
+ size_t to_alloc0 = to_alloc;
+ const ulong *buf;
+ const struct e_node *e;
+ const struct rb_node *pr, *cr;
+ u8 log2_bits;
+ bool fbits_valid;
+ struct buffer_head *bh;
+
+ /* fast checking for available free space */
+ if (flags & BITMAP_FIND_FULL) {
+ size_t zeroes = wnd_zeroes(wnd);
+
+ zeroes -= wnd->zone_end - wnd->zone_bit;
+ if (zeroes < to_alloc0)
+ goto no_space;
+
+ if (to_alloc0 > wnd->extent_max)
+ goto no_space;
+ } else {
+ if (to_alloc > wnd->extent_max)
+ to_alloc = wnd->extent_max;
+ }
+
+ if (wnd->zone_bit <= hint && hint < wnd->zone_end)
+ hint = wnd->zone_end;
+
+ max_alloc = wnd->nbits;
+ b_len = b_pos = 0;
+
+ if (hint >= max_alloc)
+ hint = 0;
+
+ if (RB_EMPTY_ROOT(&wnd->start_tree)) {
+ if (wnd->uptodated == 1) {
+ /* extents tree is updated -> no free space */
+ goto no_space;
+ }
+ goto scan_bitmap;
+ }
+
+ e = NULL;
+ if (!hint)
+ goto allocate_biggest;
+
+ /* Use hint: enumerate extents by start >= hint */
+ pr = NULL;
+ cr = wnd->start_tree.rb_node;
+
+ for (;;) {
+ e = rb_entry(cr, struct e_node, start.node);
+
+ if (e->start.key == hint)
+ break;
+
+ if (e->start.key < hint) {
+ pr = cr;
+ cr = cr->rb_right;
+ if (!cr)
+ break;
+ continue;
+ }
+
+ cr = cr->rb_left;
+ if (!cr) {
+ e = pr ? rb_entry(pr, struct e_node, start.node) : NULL;
+ break;
+ }
+ }
+
+ if (!e)
+ goto allocate_biggest;
+
+ if (e->start.key + e->count.key > hint) {
+ /* We have found extension with 'hint' inside */
+ size_t len = e->start.key + e->count.key - hint;
+
+ if (len >= to_alloc && hint + to_alloc <= max_alloc) {
+ fnd = hint;
+ goto found;
+ }
+
+ if (!(flags & BITMAP_FIND_FULL)) {
+ if (len > to_alloc)
+ len = to_alloc;
+
+ if (hint + len <= max_alloc) {
+ fnd = hint;
+ to_alloc = len;
+ goto found;
+ }
+ }
+ }
+
+allocate_biggest:
+ /* Allocate from biggest free extent */
+ e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node);
+ if (e->count.key != wnd->extent_max)
+ wnd->extent_max = e->count.key;
+
+ if (e->count.key < max_alloc) {
+ if (e->count.key >= to_alloc) {
+ ;
+ } else if (flags & BITMAP_FIND_FULL) {
+ if (e->count.key < to_alloc0) {
+ /* Biggest free block is less then requested */
+ goto no_space;
+ }
+ to_alloc = e->count.key;
+ } else if (-1 != wnd->uptodated) {
+ to_alloc = e->count.key;
+ } else {
+ /* Check if we can use more bits */
+ size_t op, max_check;
+ struct rb_root start_tree;
+
+ memcpy(&start_tree, &wnd->start_tree,
+ sizeof(struct rb_root));
+ memset(&wnd->start_tree, 0, sizeof(struct rb_root));
+
+ max_check = e->start.key + to_alloc;
+ if (max_check > max_alloc)
+ max_check = max_alloc;
+ for (op = e->start.key + e->count.key; op < max_check;
+ op++) {
+ if (!wnd_is_free(wnd, op, 1))
+ break;
+ }
+ memcpy(&wnd->start_tree, &start_tree,
+ sizeof(struct rb_root));
+ to_alloc = op - e->start.key;
+ }
+
+ /* Prepare to return */
+ fnd = e->start.key;
+ if (e->start.key + to_alloc > max_alloc)
+ to_alloc = max_alloc - e->start.key;
+ goto found;
+ }
+
+ if (wnd->uptodated == 1) {
+ /* extents tree is updated -> no free space */
+ goto no_space;
+ }
+
+ b_len = e->count.key;
+ b_pos = e->start.key;
+
+scan_bitmap:
+ sb = wnd->sb;
+ log2_bits = sb->s_blocksize_bits + 3;
+
+ /* At most two ranges [hint, max_alloc) + [0, hint) */
+Again:
+
+ /* TODO: optimize request for case nbits > wbits */
+ iw = hint >> log2_bits;
+ wbits = sb->s_blocksize * 8;
+ wpos = hint & (wbits - 1);
+ prev_tail = 0;
+ fbits_valid = true;
+
+ if (max_alloc == wnd->nbits) {
+ nwnd = wnd->nwnd;
+ } else {
+ size_t t = max_alloc + wbits - 1;
+
+ nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd;
+ }
+
+ /* Enumerate all windows */
+ for (; iw < nwnd; iw++) {
+ wbit = iw << log2_bits;
+
+ if (!wnd->free_bits[iw]) {
+ if (prev_tail > b_len) {
+ b_pos = wbit - prev_tail;
+ b_len = prev_tail;
+ }
+
+ /* Skip full used window */
+ prev_tail = 0;
+ wpos = 0;
+ continue;
+ }
+
+ if (unlikely(iw + 1 == nwnd)) {
+ if (max_alloc == wnd->nbits) {
+ wbits = wnd->bits_last;
+ } else {
+ size_t t = max_alloc & (wbits - 1);
+
+ if (t) {
+ wbits = t;
+ fbits_valid = false;
+ }
+ }
+ }
+
+ if (wnd->zone_end > wnd->zone_bit) {
+ ebit = wbit + wbits;
+ zbit = max(wnd->zone_bit, wbit);
+ zend = min(wnd->zone_end, ebit);
+
+ /* Here we have a window [wbit, ebit) and zone [zbit, zend) */
+ if (zend <= zbit) {
+ /* Zone does not overlap window */
+ } else {
+ wzbit = zbit - wbit;
+ wzend = zend - wbit;
+
+ /* Zone overlaps window */
+ if (wnd->free_bits[iw] == wzend - wzbit) {
+ prev_tail = 0;
+ wpos = 0;
+ continue;
+ }
+
+ /* Scan two ranges window: [wbit, zbit) and [zend, ebit) */
+ bh = wnd_map(wnd, iw);
+
+ if (IS_ERR(bh)) {
+ /* TODO: error */
+ prev_tail = 0;
+ wpos = 0;
+ continue;
+ }
+
+ buf = (ulong *)bh->b_data;
+
+ /* Scan range [wbit, zbit) */
+ if (wpos < wzbit) {
+ /* Scan range [wpos, zbit) */
+ fnd = wnd_scan(buf, wbit, wpos, wzbit,
+ to_alloc, &prev_tail,
+ &b_pos, &b_len);
+ if (fnd != MINUS_ONE_T) {
+ put_bh(bh);
+ goto found;
+ }
+ }
+
+ prev_tail = 0;
+
+ /* Scan range [zend, ebit) */
+ if (wzend < wbits) {
+ fnd = wnd_scan(buf, wbit,
+ max(wzend, wpos), wbits,
+ to_alloc, &prev_tail,
+ &b_pos, &b_len);
+ if (fnd != MINUS_ONE_T) {
+ put_bh(bh);
+ goto found;
+ }
+ }
+
+ wpos = 0;
+ put_bh(bh);
+ continue;
+ }
+ }
+
+ /* Current window does not overlap zone */
+ if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) {
+ /* window is empty */
+ if (prev_tail + wbits >= to_alloc) {
+ fnd = wbit + wpos - prev_tail;
+ goto found;
+ }
+
+ /* Increase 'prev_tail' and process next window */
+ prev_tail += wbits;
+ wpos = 0;
+ continue;
+ }
+
+ /* read window */
+ bh = wnd_map(wnd, iw);
+ if (IS_ERR(bh)) {
+ // TODO: error
+ prev_tail = 0;
+ wpos = 0;
+ continue;
+ }
+
+ buf = (ulong *)bh->b_data;
+
+ /* Scan range [wpos, eBits) */
+ fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail,
+ &b_pos, &b_len);
+ put_bh(bh);
+ if (fnd != MINUS_ONE_T)
+ goto found;
+ }
+
+ if (b_len < prev_tail) {
+ /* The last fragment */
+ b_len = prev_tail;
+ b_pos = max_alloc - prev_tail;
+ }
+
+ if (hint) {
+ /*
+ * We have scanned range [hint max_alloc)
+ * Prepare to scan range [0 hint + to_alloc)
+ */
+ size_t nextmax = hint + to_alloc;
+
+ if (likely(nextmax >= hint) && nextmax < max_alloc)
+ max_alloc = nextmax;
+ hint = 0;
+ goto Again;
+ }
+
+ if (!b_len)
+ goto no_space;
+
+ wnd->extent_max = b_len;
+
+ if (flags & BITMAP_FIND_FULL)
+ goto no_space;
+
+ fnd = b_pos;
+ to_alloc = b_len;
+
+found:
+ if (flags & BITMAP_FIND_MARK_AS_USED) {
+ /* TODO optimize remove extent (pass 'e'?) */
+ if (wnd_set_used(wnd, fnd, to_alloc))
+ goto no_space;
+ } else if (wnd->extent_max != MINUS_ONE_T &&
+ to_alloc > wnd->extent_max) {
+ wnd->extent_max = to_alloc;
+ }
+
+ *allocated = fnd;
+ return to_alloc;
+
+no_space:
+ return 0;
+}
+
+/*
+ * wnd_extend
+ *
+ * Extend bitmap ($MFT bitmap)
+ */
+int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
+{
+ int err;
+ struct super_block *sb = wnd->sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ u32 blocksize = sb->s_blocksize;
+ u32 wbits = blocksize * 8;
+ u32 b0, new_last;
+ size_t bits, iw, new_wnd;
+ size_t old_bits = wnd->nbits;
+ u16 *new_free;
+
+ if (new_bits <= old_bits)
+ return -EINVAL;
+
+ /* align to 8 byte boundary */
+ new_wnd = bytes_to_block(sb, bitmap_size(new_bits));
+ new_last = new_bits & (wbits - 1);
+ if (!new_last)
+ new_last = wbits;
+
+ if (new_wnd != wnd->nwnd) {
+ new_free = ntfs_malloc(new_wnd * sizeof(u16));
+ if (!new_free)
+ return -ENOMEM;
+
+ if (new_free != wnd->free_bits)
+ memcpy(new_free, wnd->free_bits,
+ wnd->nwnd * sizeof(short));
+ memset(new_free + wnd->nwnd, 0,
+ (new_wnd - wnd->nwnd) * sizeof(short));
+ ntfs_free(wnd->free_bits);
+ wnd->free_bits = new_free;
+ }
+
+ /* Zero bits [old_bits,new_bits) */
+ bits = new_bits - old_bits;
+ b0 = old_bits & (wbits - 1);
+
+ for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) {
+ u32 op;
+ size_t frb;
+ u64 vbo, lbo, bytes;
+ struct buffer_head *bh;
+ ulong *buf;
+
+ if (iw + 1 == new_wnd)
+ wbits = new_last;
+
+ op = b0 + bits > wbits ? wbits - b0 : bits;
+ vbo = (u64)iw * blocksize;
+
+ err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
+ if (err)
+ break;
+
+ bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
+ if (!bh)
+ return -EIO;
+
+ lock_buffer(bh);
+ buf = (ulong *)bh->b_data;
+
+ __bitmap_clear(buf, b0, blocksize * 8 - b0);
+ frb = wbits - __bitmap_weight(buf, wbits);
+ wnd->total_zeroes += frb - wnd->free_bits[iw];
+ wnd->free_bits[iw] = frb;
+
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ /*err = sync_dirty_buffer(bh);*/
+
+ b0 = 0;
+ bits -= op;
+ }
+
+ wnd->nbits = new_bits;
+ wnd->nwnd = new_wnd;
+ wnd->bits_last = new_last;
+
+ wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false);
+
+ return 0;
+}
+
+/*
+ * wnd_zone_set
+ */
+void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
+{
+ size_t zlen;
+
+ zlen = wnd->zone_end - wnd->zone_bit;
+ if (zlen)
+ wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
+
+ if (!RB_EMPTY_ROOT(&wnd->start_tree) && len)
+ wnd_remove_free_ext(wnd, lcn, len);
+
+ wnd->zone_bit = lcn;
+ wnd->zone_end = lcn + len;
+}
+
+int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
+{
+ int err = 0;
+ struct super_block *sb = sbi->sb;
+ struct wnd_bitmap *wnd = &sbi->used.bitmap;
+ u32 wbits = 8 * sb->s_blocksize;
+ CLST len = 0, lcn = 0, done = 0;
+ CLST minlen = bytes_to_cluster(sbi, range->minlen);
+ CLST lcn_from = bytes_to_cluster(sbi, range->start);
+ size_t iw = lcn_from >> (sb->s_blocksize_bits + 3);
+ u32 wbit = lcn_from & (wbits - 1);
+ const ulong *buf;
+ CLST lcn_to;
+
+ if (!minlen)
+ minlen = 1;
+
+ if (range->len == (u64)-1)
+ lcn_to = wnd->nbits;
+ else
+ lcn_to = bytes_to_cluster(sbi, range->start + range->len);
+
+ down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+
+ for (; iw < wnd->nbits; iw++, wbit = 0) {
+ CLST lcn_wnd = iw * wbits;
+ struct buffer_head *bh;
+
+ if (lcn_wnd > lcn_to)
+ break;
+
+ if (!wnd->free_bits[iw])
+ continue;
+
+ if (iw + 1 == wnd->nwnd)
+ wbits = wnd->bits_last;
+
+ if (lcn_wnd + wbits > lcn_to)
+ wbits = lcn_to - lcn_wnd;
+
+ bh = wnd_map(wnd, iw);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ break;
+ }
+
+ buf = (ulong *)bh->b_data;
+
+ for (; wbit < wbits; wbit++) {
+ if (!test_bit(wbit, buf)) {
+ if (!len)
+ lcn = lcn_wnd + wbit;
+ len += 1;
+ continue;
+ }
+ if (len >= minlen) {
+ err = ntfs_discard(sbi, lcn, len);
+ if (err)
+ goto out;
+ done += len;
+ }
+ len = 0;
+ }
+ put_bh(bh);
+ }
+
+ /* Process the last fragment */
+ if (len >= minlen) {
+ err = ntfs_discard(sbi, lcn, len);
+ if (err)
+ goto out;
+ done += len;
+ }
+
+out:
+ range->len = (u64)done << sbi->cluster_bits;
+
+ up_read(&wnd->rw_lock);
+
+ return err;
+}
--
2.30.0
1
0

08 Dec '21
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
mainline inclusion
from mainline-v5.15-rc1
commit 82cae269cfa953032fbb8980a7d554d60fb00b17
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4G67J?from=project-issue
CVE: NA
----------------------------------------------------------------------
This adds initialization of super block
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Yin Xiujiang <yinxiujiang(a)kylinos.cn>
---
fs/ntfs3/fsntfs.c | 2551 +++++++++++++++++++++++++++++++++++++++++++
fs/ntfs3/index.c | 2647 +++++++++++++++++++++++++++++++++++++++++++++
fs/ntfs3/inode.c | 2029 ++++++++++++++++++++++++++++++++++
fs/ntfs3/super.c | 1504 ++++++++++++++++++++++++++
4 files changed, 8731 insertions(+)
create mode 100644 fs/ntfs3/fsntfs.c
create mode 100644 fs/ntfs3/index.c
create mode 100644 fs/ntfs3/inode.c
create mode 100644 fs/ntfs3/super.c
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
new file mode 100644
index 000000000000..92140050fb6c
--- /dev/null
+++ b/fs/ntfs3/fsntfs.c
@@ -0,0 +1,2551 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+// clang-format off
+const struct cpu_str NAME_MFT = {
+ 4, 0, { '$', 'M', 'F', 'T' },
+};
+const struct cpu_str NAME_MIRROR = {
+ 8, 0, { '$', 'M', 'F', 'T', 'M', 'i', 'r', 'r' },
+};
+const struct cpu_str NAME_LOGFILE = {
+ 8, 0, { '$', 'L', 'o', 'g', 'F', 'i', 'l', 'e' },
+};
+const struct cpu_str NAME_VOLUME = {
+ 7, 0, { '$', 'V', 'o', 'l', 'u', 'm', 'e' },
+};
+const struct cpu_str NAME_ATTRDEF = {
+ 8, 0, { '$', 'A', 't', 't', 'r', 'D', 'e', 'f' },
+};
+const struct cpu_str NAME_ROOT = {
+ 1, 0, { '.' },
+};
+const struct cpu_str NAME_BITMAP = {
+ 7, 0, { '$', 'B', 'i', 't', 'm', 'a', 'p' },
+};
+const struct cpu_str NAME_BOOT = {
+ 5, 0, { '$', 'B', 'o', 'o', 't' },
+};
+const struct cpu_str NAME_BADCLUS = {
+ 8, 0, { '$', 'B', 'a', 'd', 'C', 'l', 'u', 's' },
+};
+const struct cpu_str NAME_QUOTA = {
+ 6, 0, { '$', 'Q', 'u', 'o', 't', 'a' },
+};
+const struct cpu_str NAME_SECURE = {
+ 7, 0, { '$', 'S', 'e', 'c', 'u', 'r', 'e' },
+};
+const struct cpu_str NAME_UPCASE = {
+ 7, 0, { '$', 'U', 'p', 'C', 'a', 's', 'e' },
+};
+const struct cpu_str NAME_EXTEND = {
+ 7, 0, { '$', 'E', 'x', 't', 'e', 'n', 'd' },
+};
+const struct cpu_str NAME_OBJID = {
+ 6, 0, { '$', 'O', 'b', 'j', 'I', 'd' },
+};
+const struct cpu_str NAME_REPARSE = {
+ 8, 0, { '$', 'R', 'e', 'p', 'a', 'r', 's', 'e' },
+};
+const struct cpu_str NAME_USNJRNL = {
+ 8, 0, { '$', 'U', 's', 'n', 'J', 'r', 'n', 'l' },
+};
+const __le16 BAD_NAME[4] = {
+ cpu_to_le16('$'), cpu_to_le16('B'), cpu_to_le16('a'), cpu_to_le16('d'),
+};
+const __le16 I30_NAME[4] = {
+ cpu_to_le16('$'), cpu_to_le16('I'), cpu_to_le16('3'), cpu_to_le16('0'),
+};
+const __le16 SII_NAME[4] = {
+ cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('I'), cpu_to_le16('I'),
+};
+const __le16 SDH_NAME[4] = {
+ cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('H'),
+};
+const __le16 SDS_NAME[4] = {
+ cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('S'),
+};
+const __le16 SO_NAME[2] = {
+ cpu_to_le16('$'), cpu_to_le16('O'),
+};
+const __le16 SQ_NAME[2] = {
+ cpu_to_le16('$'), cpu_to_le16('Q'),
+};
+const __le16 SR_NAME[2] = {
+ cpu_to_le16('$'), cpu_to_le16('R'),
+};
+
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+const __le16 WOF_NAME[17] = {
+ cpu_to_le16('W'), cpu_to_le16('o'), cpu_to_le16('f'), cpu_to_le16('C'),
+ cpu_to_le16('o'), cpu_to_le16('m'), cpu_to_le16('p'), cpu_to_le16('r'),
+ cpu_to_le16('e'), cpu_to_le16('s'), cpu_to_le16('s'), cpu_to_le16('e'),
+ cpu_to_le16('d'), cpu_to_le16('D'), cpu_to_le16('a'), cpu_to_le16('t'),
+ cpu_to_le16('a'),
+};
+#endif
+
+// clang-format on
+
+/*
+ * ntfs_fix_pre_write
+ *
+ * inserts fixups into 'rhdr' before writing to disk
+ */
+bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes)
+{
+ u16 *fixup, *ptr;
+ u16 sample;
+ u16 fo = le16_to_cpu(rhdr->fix_off);
+ u16 fn = le16_to_cpu(rhdr->fix_num);
+
+ if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
+ fn * SECTOR_SIZE > bytes) {
+ return false;
+ }
+
+ /* Get fixup pointer */
+ fixup = Add2Ptr(rhdr, fo);
+
+ if (*fixup >= 0x7FFF)
+ *fixup = 1;
+ else
+ *fixup += 1;
+
+ sample = *fixup;
+
+ ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
+
+ while (fn--) {
+ *++fixup = *ptr;
+ *ptr = sample;
+ ptr += SECTOR_SIZE / sizeof(short);
+ }
+ return true;
+}
+
+/*
+ * ntfs_fix_post_read
+ *
+ * remove fixups after reading from disk
+ * Returns < 0 if error, 0 if ok, 1 if need to update fixups
+ */
+int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
+ bool simple)
+{
+ int ret;
+ u16 *fixup, *ptr;
+ u16 sample, fo, fn;
+
+ fo = le16_to_cpu(rhdr->fix_off);
+ fn = simple ? ((bytes >> SECTOR_SHIFT) + 1)
+ : le16_to_cpu(rhdr->fix_num);
+
+ /* Check errors */
+ if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
+ fn * SECTOR_SIZE > bytes) {
+ return -EINVAL; /* native chkntfs returns ok! */
+ }
+
+ /* Get fixup pointer */
+ fixup = Add2Ptr(rhdr, fo);
+ sample = *fixup;
+ ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
+ ret = 0;
+
+ while (fn--) {
+ /* Test current word */
+ if (*ptr != sample) {
+ /* Fixup does not match! Is it serious error? */
+ ret = -E_NTFS_FIXUP;
+ }
+
+ /* Replace fixup */
+ *ptr = *++fixup;
+ ptr += SECTOR_SIZE / sizeof(short);
+ }
+
+ return ret;
+}
+
+/*
+ * ntfs_extend_init
+ *
+ * loads $Extend file
+ */
+int ntfs_extend_init(struct ntfs_sb_info *sbi)
+{
+ int err;
+ struct super_block *sb = sbi->sb;
+ struct inode *inode, *inode2;
+ struct MFT_REF ref;
+
+ if (sbi->volume.major_ver < 3) {
+ ntfs_notice(sb, "Skip $Extend 'cause NTFS version");
+ return 0;
+ }
+
+ ref.low = cpu_to_le32(MFT_REC_EXTEND);
+ ref.high = 0;
+ ref.seq = cpu_to_le16(MFT_REC_EXTEND);
+ inode = ntfs_iget5(sb, &ref, &NAME_EXTEND);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $Extend.");
+ inode = NULL;
+ goto out;
+ }
+
+ /* if ntfs_iget5 reads from disk it never returns bad inode */
+ if (!S_ISDIR(inode->i_mode)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Try to find $ObjId */
+ inode2 = dir_search_u(inode, &NAME_OBJID, NULL);
+ if (inode2 && !IS_ERR(inode2)) {
+ if (is_bad_inode(inode2)) {
+ iput(inode2);
+ } else {
+ sbi->objid.ni = ntfs_i(inode2);
+ sbi->objid_no = inode2->i_ino;
+ }
+ }
+
+ /* Try to find $Quota */
+ inode2 = dir_search_u(inode, &NAME_QUOTA, NULL);
+ if (inode2 && !IS_ERR(inode2)) {
+ sbi->quota_no = inode2->i_ino;
+ iput(inode2);
+ }
+
+ /* Try to find $Reparse */
+ inode2 = dir_search_u(inode, &NAME_REPARSE, NULL);
+ if (inode2 && !IS_ERR(inode2)) {
+ sbi->reparse.ni = ntfs_i(inode2);
+ sbi->reparse_no = inode2->i_ino;
+ }
+
+ /* Try to find $UsnJrnl */
+ inode2 = dir_search_u(inode, &NAME_USNJRNL, NULL);
+ if (inode2 && !IS_ERR(inode2)) {
+ sbi->usn_jrnl_no = inode2->i_ino;
+ iput(inode2);
+ }
+
+ err = 0;
+out:
+ iput(inode);
+ return err;
+}
+
+int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi)
+{
+ int err = 0;
+ struct super_block *sb = sbi->sb;
+ bool initialized = false;
+ struct MFT_REF ref;
+ struct inode *inode;
+
+ /* Check for 4GB */
+ if (ni->vfs_inode.i_size >= 0x100000000ull) {
+ ntfs_err(sb, "\x24LogFile is too big");
+ err = -EINVAL;
+ goto out;
+ }
+
+ sbi->flags |= NTFS_FLAGS_LOG_REPLAYING;
+
+ ref.low = cpu_to_le32(MFT_REC_MFT);
+ ref.high = 0;
+ ref.seq = cpu_to_le16(1);
+
+ inode = ntfs_iget5(sb, &ref, NULL);
+
+ if (IS_ERR(inode))
+ inode = NULL;
+
+ if (!inode) {
+ /* Try to use mft copy */
+ u64 t64 = sbi->mft.lbo;
+
+ sbi->mft.lbo = sbi->mft.lbo2;
+ inode = ntfs_iget5(sb, &ref, NULL);
+ sbi->mft.lbo = t64;
+ if (IS_ERR(inode))
+ inode = NULL;
+ }
+
+ if (!inode) {
+ err = -EINVAL;
+ ntfs_err(sb, "Failed to load $MFT.");
+ goto out;
+ }
+
+ sbi->mft.ni = ntfs_i(inode);
+
+ /* LogFile should not contains attribute list */
+ err = ni_load_all_mi(sbi->mft.ni);
+ if (!err)
+ err = log_replay(ni, &initialized);
+
+ iput(inode);
+ sbi->mft.ni = NULL;
+
+ sync_blockdev(sb->s_bdev);
+ invalidate_bdev(sb->s_bdev);
+
+ if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) {
+ err = 0;
+ goto out;
+ }
+
+ if (sb_rdonly(sb) || !initialized)
+ goto out;
+
+ /* fill LogFile by '-1' if it is initialized */
+ err = ntfs_bio_fill_1(sbi, &ni->file.run);
+
+out:
+ sbi->flags &= ~NTFS_FLAGS_LOG_REPLAYING;
+
+ return err;
+}
+
+/*
+ * ntfs_query_def
+ *
+ * returns current ATTR_DEF_ENTRY for given attribute type
+ */
+const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi,
+ enum ATTR_TYPE type)
+{
+ int type_in = le32_to_cpu(type);
+ size_t min_idx = 0;
+ size_t max_idx = sbi->def_entries - 1;
+
+ while (min_idx <= max_idx) {
+ size_t i = min_idx + ((max_idx - min_idx) >> 1);
+ const struct ATTR_DEF_ENTRY *entry = sbi->def_table + i;
+ int diff = le32_to_cpu(entry->type) - type_in;
+
+ if (!diff)
+ return entry;
+ if (diff < 0)
+ min_idx = i + 1;
+ else if (i)
+ max_idx = i - 1;
+ else
+ return NULL;
+ }
+ return NULL;
+}
+
+/*
+ * ntfs_look_for_free_space
+ *
+ * looks for a free space in bitmap
+ */
+int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len,
+ CLST *new_lcn, CLST *new_len,
+ enum ALLOCATE_OPT opt)
+{
+ int err;
+ struct super_block *sb = sbi->sb;
+ size_t a_lcn, zlen, zeroes, zlcn, zlen2, ztrim, new_zlen;
+ struct wnd_bitmap *wnd = &sbi->used.bitmap;
+
+ down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+ if (opt & ALLOCATE_MFT) {
+ CLST alen;
+
+ zlen = wnd_zone_len(wnd);
+
+ if (!zlen) {
+ err = ntfs_refresh_zone(sbi);
+ if (err)
+ goto out;
+
+ zlen = wnd_zone_len(wnd);
+
+ if (!zlen) {
+ ntfs_err(sbi->sb,
+ "no free space to extend mft");
+ err = -ENOSPC;
+ goto out;
+ }
+ }
+
+ lcn = wnd_zone_bit(wnd);
+ alen = zlen > len ? len : zlen;
+
+ wnd_zone_set(wnd, lcn + alen, zlen - alen);
+
+ err = wnd_set_used(wnd, lcn, alen);
+ if (err)
+ goto out;
+
+ *new_lcn = lcn;
+ *new_len = alen;
+ goto ok;
+ }
+
+ /*
+ * 'Cause cluster 0 is always used this value means that we should use
+ * cached value of 'next_free_lcn' to improve performance
+ */
+ if (!lcn)
+ lcn = sbi->used.next_free_lcn;
+
+ if (lcn >= wnd->nbits)
+ lcn = 0;
+
+ *new_len = wnd_find(wnd, len, lcn, BITMAP_FIND_MARK_AS_USED, &a_lcn);
+ if (*new_len) {
+ *new_lcn = a_lcn;
+ goto ok;
+ }
+
+ /* Try to use clusters from MftZone */
+ zlen = wnd_zone_len(wnd);
+ zeroes = wnd_zeroes(wnd);
+
+ /* Check too big request */
+ if (len > zeroes + zlen)
+ goto no_space;
+
+ if (zlen <= NTFS_MIN_MFT_ZONE)
+ goto no_space;
+
+ /* How many clusters to cat from zone */
+ zlcn = wnd_zone_bit(wnd);
+ zlen2 = zlen >> 1;
+ ztrim = len > zlen ? zlen : (len > zlen2 ? len : zlen2);
+ new_zlen = zlen - ztrim;
+
+ if (new_zlen < NTFS_MIN_MFT_ZONE) {
+ new_zlen = NTFS_MIN_MFT_ZONE;
+ if (new_zlen > zlen)
+ new_zlen = zlen;
+ }
+
+ wnd_zone_set(wnd, zlcn, new_zlen);
+
+ /* allocate continues clusters */
+ *new_len =
+ wnd_find(wnd, len, 0,
+ BITMAP_FIND_MARK_AS_USED | BITMAP_FIND_FULL, &a_lcn);
+ if (*new_len) {
+ *new_lcn = a_lcn;
+ goto ok;
+ }
+
+no_space:
+ up_write(&wnd->rw_lock);
+
+ return -ENOSPC;
+
+ok:
+ err = 0;
+
+ ntfs_unmap_meta(sb, *new_lcn, *new_len);
+
+ if (opt & ALLOCATE_MFT)
+ goto out;
+
+ /* Set hint for next requests */
+ sbi->used.next_free_lcn = *new_lcn + *new_len;
+
+out:
+ up_write(&wnd->rw_lock);
+ return err;
+}
+
+/*
+ * ntfs_extend_mft
+ *
+ * allocates additional MFT records
+ * sbi->mft.bitmap is locked for write
+ *
+ * NOTE: recursive:
+ * ntfs_look_free_mft ->
+ * ntfs_extend_mft ->
+ * attr_set_size ->
+ * ni_insert_nonresident ->
+ * ni_insert_attr ->
+ * ni_ins_attr_ext ->
+ * ntfs_look_free_mft ->
+ * ntfs_extend_mft
+ * To avoid recursive always allocate space for two new mft records
+ * see attrib.c: "at least two mft to avoid recursive loop"
+ */
+static int ntfs_extend_mft(struct ntfs_sb_info *sbi)
+{
+ int err;
+ struct ntfs_inode *ni = sbi->mft.ni;
+ size_t new_mft_total;
+ u64 new_mft_bytes, new_bitmap_bytes;
+ struct ATTRIB *attr;
+ struct wnd_bitmap *wnd = &sbi->mft.bitmap;
+
+ new_mft_total = (wnd->nbits + MFT_INCREASE_CHUNK + 127) & (CLST)~127;
+ new_mft_bytes = (u64)new_mft_total << sbi->record_bits;
+
+ /* Step 1: Resize $MFT::DATA */
+ down_write(&ni->file.run_lock);
+ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
+ new_mft_bytes, NULL, false, &attr);
+
+ if (err) {
+ up_write(&ni->file.run_lock);
+ goto out;
+ }
+
+ attr->nres.valid_size = attr->nres.data_size;
+ new_mft_total = le64_to_cpu(attr->nres.alloc_size) >> sbi->record_bits;
+ ni->mi.dirty = true;
+
+ /* Step 2: Resize $MFT::BITMAP */
+ new_bitmap_bytes = bitmap_size(new_mft_total);
+
+ err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run,
+ new_bitmap_bytes, &new_bitmap_bytes, true, NULL);
+
+ /* Refresh Mft Zone if necessary */
+ down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS);
+
+ ntfs_refresh_zone(sbi);
+
+ up_write(&sbi->used.bitmap.rw_lock);
+ up_write(&ni->file.run_lock);
+
+ if (err)
+ goto out;
+
+ err = wnd_extend(wnd, new_mft_total);
+
+ if (err)
+ goto out;
+
+ ntfs_clear_mft_tail(sbi, sbi->mft.used, new_mft_total);
+
+ err = _ni_write_inode(&ni->vfs_inode, 0);
+out:
+ return err;
+}
+
+/*
+ * ntfs_look_free_mft
+ *
+ * looks for a free MFT record
+ */
+int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft,
+ struct ntfs_inode *ni, struct mft_inode **mi)
+{
+ int err = 0;
+ size_t zbit, zlen, from, to, fr;
+ size_t mft_total;
+ struct MFT_REF ref;
+ struct super_block *sb = sbi->sb;
+ struct wnd_bitmap *wnd = &sbi->mft.bitmap;
+ u32 ir;
+
+ static_assert(sizeof(sbi->mft.reserved_bitmap) * 8 >=
+ MFT_REC_FREE - MFT_REC_RESERVED);
+
+ if (!mft)
+ down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
+
+ zlen = wnd_zone_len(wnd);
+
+ /* Always reserve space for MFT */
+ if (zlen) {
+ if (mft) {
+ zbit = wnd_zone_bit(wnd);
+ *rno = zbit;
+ wnd_zone_set(wnd, zbit + 1, zlen - 1);
+ }
+ goto found;
+ }
+
+ /* No MFT zone. find the nearest to '0' free MFT */
+ if (!wnd_find(wnd, 1, MFT_REC_FREE, 0, &zbit)) {
+ /* Resize MFT */
+ mft_total = wnd->nbits;
+
+ err = ntfs_extend_mft(sbi);
+ if (!err) {
+ zbit = mft_total;
+ goto reserve_mft;
+ }
+
+ if (!mft || MFT_REC_FREE == sbi->mft.next_reserved)
+ goto out;
+
+ err = 0;
+
+ /*
+ * Look for free record reserved area [11-16) ==
+ * [MFT_REC_RESERVED, MFT_REC_FREE ) MFT bitmap always
+ * marks it as used
+ */
+ if (!sbi->mft.reserved_bitmap) {
+ /* Once per session create internal bitmap for 5 bits */
+ sbi->mft.reserved_bitmap = 0xFF;
+
+ ref.high = 0;
+ for (ir = MFT_REC_RESERVED; ir < MFT_REC_FREE; ir++) {
+ struct inode *i;
+ struct ntfs_inode *ni;
+ struct MFT_REC *mrec;
+
+ ref.low = cpu_to_le32(ir);
+ ref.seq = cpu_to_le16(ir);
+
+ i = ntfs_iget5(sb, &ref, NULL);
+ if (IS_ERR(i)) {
+next:
+ ntfs_notice(
+ sb,
+ "Invalid reserved record %x",
+ ref.low);
+ continue;
+ }
+ if (is_bad_inode(i)) {
+ iput(i);
+ goto next;
+ }
+
+ ni = ntfs_i(i);
+
+ mrec = ni->mi.mrec;
+
+ if (!is_rec_base(mrec))
+ goto next;
+
+ if (mrec->hard_links)
+ goto next;
+
+ if (!ni_std(ni))
+ goto next;
+
+ if (ni_find_attr(ni, NULL, NULL, ATTR_NAME,
+ NULL, 0, NULL, NULL))
+ goto next;
+
+ __clear_bit(ir - MFT_REC_RESERVED,
+ &sbi->mft.reserved_bitmap);
+ }
+ }
+
+ /* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */
+ zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap,
+ MFT_REC_FREE, MFT_REC_RESERVED);
+ if (zbit >= MFT_REC_FREE) {
+ sbi->mft.next_reserved = MFT_REC_FREE;
+ goto out;
+ }
+
+ zlen = 1;
+ sbi->mft.next_reserved = zbit;
+ } else {
+reserve_mft:
+ zlen = zbit == MFT_REC_FREE ? (MFT_REC_USER - MFT_REC_FREE) : 4;
+ if (zbit + zlen > wnd->nbits)
+ zlen = wnd->nbits - zbit;
+
+ while (zlen > 1 && !wnd_is_free(wnd, zbit, zlen))
+ zlen -= 1;
+
+ /* [zbit, zbit + zlen) will be used for Mft itself */
+ from = sbi->mft.used;
+ if (from < zbit)
+ from = zbit;
+ to = zbit + zlen;
+ if (from < to) {
+ ntfs_clear_mft_tail(sbi, from, to);
+ sbi->mft.used = to;
+ }
+ }
+
+ if (mft) {
+ *rno = zbit;
+ zbit += 1;
+ zlen -= 1;
+ }
+
+ wnd_zone_set(wnd, zbit, zlen);
+
+found:
+ if (!mft) {
+ /* The request to get record for general purpose */
+ if (sbi->mft.next_free < MFT_REC_USER)
+ sbi->mft.next_free = MFT_REC_USER;
+
+ for (;;) {
+ if (sbi->mft.next_free >= sbi->mft.bitmap.nbits) {
+ } else if (!wnd_find(wnd, 1, MFT_REC_USER, 0, &fr)) {
+ sbi->mft.next_free = sbi->mft.bitmap.nbits;
+ } else {
+ *rno = fr;
+ sbi->mft.next_free = *rno + 1;
+ break;
+ }
+
+ err = ntfs_extend_mft(sbi);
+ if (err)
+ goto out;
+ }
+ }
+
+ if (ni && !ni_add_subrecord(ni, *rno, mi)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* We have found a record that are not reserved for next MFT */
+ if (*rno >= MFT_REC_FREE)
+ wnd_set_used(wnd, *rno, 1);
+ else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited)
+ __set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
+
+out:
+ if (!mft)
+ up_write(&wnd->rw_lock);
+
+ return err;
+}
+
+/*
+ * ntfs_mark_rec_free
+ *
+ * marks record as free
+ */
+void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno)
+{
+ struct wnd_bitmap *wnd = &sbi->mft.bitmap;
+
+ down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
+ if (rno >= wnd->nbits)
+ goto out;
+
+ if (rno >= MFT_REC_FREE) {
+ if (!wnd_is_used(wnd, rno, 1))
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ else
+ wnd_set_free(wnd, rno, 1);
+ } else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) {
+ __clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
+ }
+
+ if (rno < wnd_zone_bit(wnd))
+ wnd_zone_set(wnd, rno, 1);
+ else if (rno < sbi->mft.next_free && rno >= MFT_REC_USER)
+ sbi->mft.next_free = rno;
+
+out:
+ up_write(&wnd->rw_lock);
+}
+
+/*
+ * ntfs_clear_mft_tail
+ *
+ * formats empty records [from, to)
+ * sbi->mft.bitmap is locked for write
+ */
+int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to)
+{
+ int err;
+ u32 rs;
+ u64 vbo;
+ struct runs_tree *run;
+ struct ntfs_inode *ni;
+
+ if (from >= to)
+ return 0;
+
+ rs = sbi->record_size;
+ ni = sbi->mft.ni;
+ run = &ni->file.run;
+
+ down_read(&ni->file.run_lock);
+ vbo = (u64)from * rs;
+ for (; from < to; from++, vbo += rs) {
+ struct ntfs_buffers nb;
+
+ err = ntfs_get_bh(sbi, run, vbo, rs, &nb);
+ if (err)
+ goto out;
+
+ err = ntfs_write_bh(sbi, &sbi->new_rec->rhdr, &nb, 0);
+ nb_put(&nb);
+ if (err)
+ goto out;
+ }
+
+out:
+ sbi->mft.used = from;
+ up_read(&ni->file.run_lock);
+ return err;
+}
+
+/*
+ * ntfs_refresh_zone
+ *
+ * refreshes Mft zone
+ * sbi->used.bitmap is locked for rw
+ * sbi->mft.bitmap is locked for write
+ * sbi->mft.ni->file.run_lock for write
+ */
+int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
+{
+ CLST zone_limit, zone_max, lcn, vcn, len;
+ size_t lcn_s, zlen;
+ struct wnd_bitmap *wnd = &sbi->used.bitmap;
+ struct ntfs_inode *ni = sbi->mft.ni;
+
+ /* Do not change anything unless we have non empty Mft zone */
+ if (wnd_zone_len(wnd))
+ return 0;
+
+ /*
+ * Compute the mft zone at two steps
+ * It would be nice if we are able to allocate
+ * 1/8 of total clusters for MFT but not more then 512 MB
+ */
+ zone_limit = (512 * 1024 * 1024) >> sbi->cluster_bits;
+ zone_max = wnd->nbits >> 3;
+ if (zone_max > zone_limit)
+ zone_max = zone_limit;
+
+ vcn = bytes_to_cluster(sbi,
+ (u64)sbi->mft.bitmap.nbits << sbi->record_bits);
+
+ if (!run_lookup_entry(&ni->file.run, vcn - 1, &lcn, &len, NULL))
+ lcn = SPARSE_LCN;
+
+ /* We should always find Last Lcn for MFT */
+ if (lcn == SPARSE_LCN)
+ return -EINVAL;
+
+ lcn_s = lcn + 1;
+
+ /* Try to allocate clusters after last MFT run */
+ zlen = wnd_find(wnd, zone_max, lcn_s, 0, &lcn_s);
+ if (!zlen) {
+ ntfs_notice(sbi->sb, "MftZone: unavailable");
+ return 0;
+ }
+
+ /* Truncate too large zone */
+ wnd_zone_set(wnd, lcn_s, zlen);
+
+ return 0;
+}
+
+/*
+ * ntfs_update_mftmirr
+ *
+ * updates $MFTMirr data
+ */
+int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
+{
+ int err;
+ struct super_block *sb = sbi->sb;
+ u32 blocksize = sb->s_blocksize;
+ sector_t block1, block2;
+ u32 bytes;
+
+ if (!(sbi->flags & NTFS_FLAGS_MFTMIRR))
+ return 0;
+
+ err = 0;
+ bytes = sbi->mft.recs_mirr << sbi->record_bits;
+ block1 = sbi->mft.lbo >> sb->s_blocksize_bits;
+ block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits;
+
+ for (; bytes >= blocksize; bytes -= blocksize) {
+ struct buffer_head *bh1, *bh2;
+
+ bh1 = sb_bread(sb, block1++);
+ if (!bh1) {
+ err = -EIO;
+ goto out;
+ }
+
+ bh2 = sb_getblk(sb, block2++);
+ if (!bh2) {
+ put_bh(bh1);
+ err = -EIO;
+ goto out;
+ }
+
+ if (buffer_locked(bh2))
+ __wait_on_buffer(bh2);
+
+ lock_buffer(bh2);
+ memcpy(bh2->b_data, bh1->b_data, blocksize);
+ set_buffer_uptodate(bh2);
+ mark_buffer_dirty(bh2);
+ unlock_buffer(bh2);
+
+ put_bh(bh1);
+ bh1 = NULL;
+
+ if (wait)
+ err = sync_dirty_buffer(bh2);
+
+ put_bh(bh2);
+ if (err)
+ goto out;
+ }
+
+ sbi->flags &= ~NTFS_FLAGS_MFTMIRR;
+
+out:
+ return err;
+}
+
+/*
+ * ntfs_set_state
+ *
+ * mount: ntfs_set_state(NTFS_DIRTY_DIRTY)
+ * umount: ntfs_set_state(NTFS_DIRTY_CLEAR)
+ * ntfs error: ntfs_set_state(NTFS_DIRTY_ERROR)
+ */
+int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty)
+{
+ int err;
+ struct ATTRIB *attr;
+ struct VOLUME_INFO *info;
+ struct mft_inode *mi;
+ struct ntfs_inode *ni;
+
+ /*
+ * do not change state if fs was real_dirty
+ * do not change state if fs already dirty(clear)
+ * do not change any thing if mounted read only
+ */
+ if (sbi->volume.real_dirty || sb_rdonly(sbi->sb))
+ return 0;
+
+ /* Check cached value */
+ if ((dirty == NTFS_DIRTY_CLEAR ? 0 : VOLUME_FLAG_DIRTY) ==
+ (sbi->volume.flags & VOLUME_FLAG_DIRTY))
+ return 0;
+
+ ni = sbi->volume.ni;
+ if (!ni)
+ return -EINVAL;
+
+ mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_DIRTY);
+
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_VOL_INFO, NULL, 0, NULL, &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO);
+ if (!info) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ switch (dirty) {
+ case NTFS_DIRTY_ERROR:
+ ntfs_notice(sbi->sb, "Mark volume as dirty due to NTFS errors");
+ sbi->volume.real_dirty = true;
+ fallthrough;
+ case NTFS_DIRTY_DIRTY:
+ info->flags |= VOLUME_FLAG_DIRTY;
+ break;
+ case NTFS_DIRTY_CLEAR:
+ info->flags &= ~VOLUME_FLAG_DIRTY;
+ break;
+ }
+ /* cache current volume flags*/
+ sbi->volume.flags = info->flags;
+ mi->dirty = true;
+ err = 0;
+
+out:
+ ni_unlock(ni);
+ if (err)
+ return err;
+
+ mark_inode_dirty(&ni->vfs_inode);
+ /*verify(!ntfs_update_mftmirr()); */
+
+ /*
+ * if we used wait=1, sync_inode_metadata waits for the io for the
+ * inode to finish. It hangs when media is removed.
+ * So wait=0 is sent down to sync_inode_metadata
+ * and filemap_fdatawrite is used for the data blocks
+ */
+ err = sync_inode_metadata(&ni->vfs_inode, 0);
+ if (!err)
+ err = filemap_fdatawrite(ni->vfs_inode.i_mapping);
+
+ return err;
+}
+
+/*
+ * security_hash
+ *
+ * calculates a hash of security descriptor
+ */
+static inline __le32 security_hash(const void *sd, size_t bytes)
+{
+ u32 hash = 0;
+ const __le32 *ptr = sd;
+
+ bytes >>= 2;
+ while (bytes--)
+ hash = ((hash >> 0x1D) | (hash << 3)) + le32_to_cpu(*ptr++);
+ return cpu_to_le32(hash);
+}
+
+int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer)
+{
+ struct block_device *bdev = sb->s_bdev;
+ u32 blocksize = sb->s_blocksize;
+ u64 block = lbo >> sb->s_blocksize_bits;
+ u32 off = lbo & (blocksize - 1);
+ u32 op = blocksize - off;
+
+ for (; bytes; block += 1, off = 0, op = blocksize) {
+ struct buffer_head *bh = __bread(bdev, block, blocksize);
+
+ if (!bh)
+ return -EIO;
+
+ if (op > bytes)
+ op = bytes;
+
+ memcpy(buffer, bh->b_data + off, op);
+
+ put_bh(bh);
+
+ bytes -= op;
+ buffer = Add2Ptr(buffer, op);
+ }
+
+ return 0;
+}
+
+int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes,
+ const void *buf, int wait)
+{
+ u32 blocksize = sb->s_blocksize;
+ struct block_device *bdev = sb->s_bdev;
+ sector_t block = lbo >> sb->s_blocksize_bits;
+ u32 off = lbo & (blocksize - 1);
+ u32 op = blocksize - off;
+ struct buffer_head *bh;
+
+ if (!wait && (sb->s_flags & SB_SYNCHRONOUS))
+ wait = 1;
+
+ for (; bytes; block += 1, off = 0, op = blocksize) {
+ if (op > bytes)
+ op = bytes;
+
+ if (op < blocksize) {
+ bh = __bread(bdev, block, blocksize);
+ if (!bh) {
+ ntfs_err(sb, "failed to read block %llx",
+ (u64)block);
+ return -EIO;
+ }
+ } else {
+ bh = __getblk(bdev, block, blocksize);
+ if (!bh)
+ return -ENOMEM;
+ }
+
+ if (buffer_locked(bh))
+ __wait_on_buffer(bh);
+
+ lock_buffer(bh);
+ if (buf) {
+ memcpy(bh->b_data + off, buf, op);
+ buf = Add2Ptr(buf, op);
+ } else {
+ memset(bh->b_data + off, -1, op);
+ }
+
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+
+ if (wait) {
+ int err = sync_dirty_buffer(bh);
+
+ if (err) {
+ ntfs_err(
+ sb,
+ "failed to sync buffer at block %llx, error %d",
+ (u64)block, err);
+ put_bh(bh);
+ return err;
+ }
+ }
+
+ put_bh(bh);
+
+ bytes -= op;
+ }
+ return 0;
+}
+
+int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+ u64 vbo, const void *buf, size_t bytes)
+{
+ struct super_block *sb = sbi->sb;
+ u8 cluster_bits = sbi->cluster_bits;
+ u32 off = vbo & sbi->cluster_mask;
+ CLST lcn, clen, vcn = vbo >> cluster_bits, vcn_next;
+ u64 lbo, len;
+ size_t idx;
+
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
+ return -ENOENT;
+
+ if (lcn == SPARSE_LCN)
+ return -EINVAL;
+
+ lbo = ((u64)lcn << cluster_bits) + off;
+ len = ((u64)clen << cluster_bits) - off;
+
+ for (;;) {
+ u32 op = len < bytes ? len : bytes;
+ int err = ntfs_sb_write(sb, lbo, op, buf, 0);
+
+ if (err)
+ return err;
+
+ bytes -= op;
+ if (!bytes)
+ break;
+
+ vcn_next = vcn + clen;
+ if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+ vcn != vcn_next)
+ return -ENOENT;
+
+ if (lcn == SPARSE_LCN)
+ return -EINVAL;
+
+ if (buf)
+ buf = Add2Ptr(buf, op);
+
+ lbo = ((u64)lcn << cluster_bits);
+ len = ((u64)clen << cluster_bits);
+ }
+
+ return 0;
+}
+
+struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi,
+ const struct runs_tree *run, u64 vbo)
+{
+ struct super_block *sb = sbi->sb;
+ u8 cluster_bits = sbi->cluster_bits;
+ CLST lcn;
+ u64 lbo;
+
+ if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, NULL, NULL))
+ return ERR_PTR(-ENOENT);
+
+ lbo = ((u64)lcn << cluster_bits) + (vbo & sbi->cluster_mask);
+
+ return ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
+}
+
+int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+ u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb)
+{
+ int err;
+ struct super_block *sb = sbi->sb;
+ u32 blocksize = sb->s_blocksize;
+ u8 cluster_bits = sbi->cluster_bits;
+ u32 off = vbo & sbi->cluster_mask;
+ u32 nbh = 0;
+ CLST vcn_next, vcn = vbo >> cluster_bits;
+ CLST lcn, clen;
+ u64 lbo, len;
+ size_t idx;
+ struct buffer_head *bh;
+
+ if (!run) {
+ /* first reading of $Volume + $MFTMirr + LogFile goes here*/
+ if (vbo > MFT_REC_VOL * sbi->record_size) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* use absolute boot's 'MFTCluster' to read record */
+ lbo = vbo + sbi->mft.lbo;
+ len = sbi->record_size;
+ } else if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+ err = -ENOENT;
+ goto out;
+ } else {
+ if (lcn == SPARSE_LCN) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ lbo = ((u64)lcn << cluster_bits) + off;
+ len = ((u64)clen << cluster_bits) - off;
+ }
+
+ off = lbo & (blocksize - 1);
+ if (nb) {
+ nb->off = off;
+ nb->bytes = bytes;
+ }
+
+ for (;;) {
+ u32 len32 = len >= bytes ? bytes : len;
+ sector_t block = lbo >> sb->s_blocksize_bits;
+
+ do {
+ u32 op = blocksize - off;
+
+ if (op > len32)
+ op = len32;
+
+ bh = ntfs_bread(sb, block);
+ if (!bh) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (buf) {
+ memcpy(buf, bh->b_data + off, op);
+ buf = Add2Ptr(buf, op);
+ }
+
+ if (!nb) {
+ put_bh(bh);
+ } else if (nbh >= ARRAY_SIZE(nb->bh)) {
+ err = -EINVAL;
+ goto out;
+ } else {
+ nb->bh[nbh++] = bh;
+ nb->nbufs = nbh;
+ }
+
+ bytes -= op;
+ if (!bytes)
+ return 0;
+ len32 -= op;
+ block += 1;
+ off = 0;
+
+ } while (len32);
+
+ vcn_next = vcn + clen;
+ if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+ vcn != vcn_next) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (lcn == SPARSE_LCN) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ lbo = ((u64)lcn << cluster_bits);
+ len = ((u64)clen << cluster_bits);
+ }
+
+out:
+ if (!nbh)
+ return err;
+
+ while (nbh) {
+ put_bh(nb->bh[--nbh]);
+ nb->bh[nbh] = NULL;
+ }
+
+ nb->nbufs = 0;
+ return err;
+}
+
+/* Returns < 0 if error, 0 if ok, '-E_NTFS_FIXUP' if need to update fixups */
+int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
+ struct NTFS_RECORD_HEADER *rhdr, u32 bytes,
+ struct ntfs_buffers *nb)
+{
+ int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb);
+
+ if (err)
+ return err;
+ return ntfs_fix_post_read(rhdr, nb->bytes, true);
+}
+
+int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
+ u32 bytes, struct ntfs_buffers *nb)
+{
+ int err = 0;
+ struct super_block *sb = sbi->sb;
+ u32 blocksize = sb->s_blocksize;
+ u8 cluster_bits = sbi->cluster_bits;
+ CLST vcn_next, vcn = vbo >> cluster_bits;
+ u32 off;
+ u32 nbh = 0;
+ CLST lcn, clen;
+ u64 lbo, len;
+ size_t idx;
+
+ nb->bytes = bytes;
+
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ off = vbo & sbi->cluster_mask;
+ lbo = ((u64)lcn << cluster_bits) + off;
+ len = ((u64)clen << cluster_bits) - off;
+
+ nb->off = off = lbo & (blocksize - 1);
+
+ for (;;) {
+ u32 len32 = len < bytes ? len : bytes;
+ sector_t block = lbo >> sb->s_blocksize_bits;
+
+ do {
+ u32 op;
+ struct buffer_head *bh;
+
+ if (nbh >= ARRAY_SIZE(nb->bh)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ op = blocksize - off;
+ if (op > len32)
+ op = len32;
+
+ if (op == blocksize) {
+ bh = sb_getblk(sb, block);
+ if (!bh) {
+ err = -ENOMEM;
+ goto out;
+ }
+ if (buffer_locked(bh))
+ __wait_on_buffer(bh);
+ set_buffer_uptodate(bh);
+ } else {
+ bh = ntfs_bread(sb, block);
+ if (!bh) {
+ err = -EIO;
+ goto out;
+ }
+ }
+
+ nb->bh[nbh++] = bh;
+ bytes -= op;
+ if (!bytes) {
+ nb->nbufs = nbh;
+ return 0;
+ }
+
+ block += 1;
+ len32 -= op;
+ off = 0;
+ } while (len32);
+
+ vcn_next = vcn + clen;
+ if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+ vcn != vcn_next) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ lbo = ((u64)lcn << cluster_bits);
+ len = ((u64)clen << cluster_bits);
+ }
+
+out:
+ while (nbh) {
+ put_bh(nb->bh[--nbh]);
+ nb->bh[nbh] = NULL;
+ }
+
+ nb->nbufs = 0;
+
+ return err;
+}
+
+int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
+ struct ntfs_buffers *nb, int sync)
+{
+ int err = 0;
+ struct super_block *sb = sbi->sb;
+ u32 block_size = sb->s_blocksize;
+ u32 bytes = nb->bytes;
+ u32 off = nb->off;
+ u16 fo = le16_to_cpu(rhdr->fix_off);
+ u16 fn = le16_to_cpu(rhdr->fix_num);
+ u32 idx;
+ __le16 *fixup;
+ __le16 sample;
+
+ if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
+ fn * SECTOR_SIZE > bytes) {
+ return -EINVAL;
+ }
+
+ for (idx = 0; bytes && idx < nb->nbufs; idx += 1, off = 0) {
+ u32 op = block_size - off;
+ char *bh_data;
+ struct buffer_head *bh = nb->bh[idx];
+ __le16 *ptr, *end_data;
+
+ if (op > bytes)
+ op = bytes;
+
+ if (buffer_locked(bh))
+ __wait_on_buffer(bh);
+
+ lock_buffer(nb->bh[idx]);
+
+ bh_data = bh->b_data + off;
+ end_data = Add2Ptr(bh_data, op);
+ memcpy(bh_data, rhdr, op);
+
+ if (!idx) {
+ u16 t16;
+
+ fixup = Add2Ptr(bh_data, fo);
+ sample = *fixup;
+ t16 = le16_to_cpu(sample);
+ if (t16 >= 0x7FFF) {
+ sample = *fixup = cpu_to_le16(1);
+ } else {
+ sample = cpu_to_le16(t16 + 1);
+ *fixup = sample;
+ }
+
+ *(__le16 *)Add2Ptr(rhdr, fo) = sample;
+ }
+
+ ptr = Add2Ptr(bh_data, SECTOR_SIZE - sizeof(short));
+
+ do {
+ *++fixup = *ptr;
+ *ptr = sample;
+ ptr += SECTOR_SIZE / sizeof(short);
+ } while (ptr < end_data);
+
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+
+ if (sync) {
+ int err2 = sync_dirty_buffer(bh);
+
+ if (!err && err2)
+ err = err2;
+ }
+
+ bytes -= op;
+ rhdr = Add2Ptr(rhdr, op);
+ }
+
+ return err;
+}
+
+static inline struct bio *ntfs_alloc_bio(u32 nr_vecs)
+{
+ struct bio *bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
+
+ if (!bio && (current->flags & PF_MEMALLOC)) {
+ while (!bio && (nr_vecs /= 2))
+ bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
+ }
+ return bio;
+}
+
+/* read/write pages from/to disk*/
+int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+ struct page **pages, u32 nr_pages, u64 vbo, u32 bytes,
+ u32 op)
+{
+ int err = 0;
+ struct bio *new, *bio = NULL;
+ struct super_block *sb = sbi->sb;
+ struct block_device *bdev = sb->s_bdev;
+ struct page *page;
+ u8 cluster_bits = sbi->cluster_bits;
+ CLST lcn, clen, vcn, vcn_next;
+ u32 add, off, page_idx;
+ u64 lbo, len;
+ size_t run_idx;
+ struct blk_plug plug;
+
+ if (!bytes)
+ return 0;
+
+ blk_start_plug(&plug);
+
+ /* align vbo and bytes to be 512 bytes aligned */
+ lbo = (vbo + bytes + 511) & ~511ull;
+ vbo = vbo & ~511ull;
+ bytes = lbo - vbo;
+
+ vcn = vbo >> cluster_bits;
+ if (!run_lookup_entry(run, vcn, &lcn, &clen, &run_idx)) {
+ err = -ENOENT;
+ goto out;
+ }
+ off = vbo & sbi->cluster_mask;
+ page_idx = 0;
+ page = pages[0];
+
+ for (;;) {
+ lbo = ((u64)lcn << cluster_bits) + off;
+ len = ((u64)clen << cluster_bits) - off;
+new_bio:
+ new = ntfs_alloc_bio(nr_pages - page_idx);
+ if (!new) {
+ err = -ENOMEM;
+ goto out;
+ }
+ if (bio) {
+ bio_chain(bio, new);
+ submit_bio(bio);
+ }
+ bio = new;
+ bio_set_dev(bio, bdev);
+ bio->bi_iter.bi_sector = lbo >> 9;
+ bio->bi_opf = op;
+
+ while (len) {
+ off = vbo & (PAGE_SIZE - 1);
+ add = off + len > PAGE_SIZE ? (PAGE_SIZE - off) : len;
+
+ if (bio_add_page(bio, page, add, off) < add)
+ goto new_bio;
+
+ if (bytes <= add)
+ goto out;
+ bytes -= add;
+ vbo += add;
+
+ if (add + off == PAGE_SIZE) {
+ page_idx += 1;
+ if (WARN_ON(page_idx >= nr_pages)) {
+ err = -EINVAL;
+ goto out;
+ }
+ page = pages[page_idx];
+ }
+
+ if (len <= add)
+ break;
+ len -= add;
+ lbo += add;
+ }
+
+ vcn_next = vcn + clen;
+ if (!run_get_entry(run, ++run_idx, &vcn, &lcn, &clen) ||
+ vcn != vcn_next) {
+ err = -ENOENT;
+ goto out;
+ }
+ off = 0;
+ }
+out:
+ if (bio) {
+ if (!err)
+ err = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return err;
+}
+
+/*
+ * Helper for ntfs_loadlog_and_replay
+ * fill on-disk logfile range by (-1)
+ * this means empty logfile
+ */
+int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run)
+{
+ int err = 0;
+ struct super_block *sb = sbi->sb;
+ struct block_device *bdev = sb->s_bdev;
+ u8 cluster_bits = sbi->cluster_bits;
+ struct bio *new, *bio = NULL;
+ CLST lcn, clen;
+ u64 lbo, len;
+ size_t run_idx;
+ struct page *fill;
+ void *kaddr;
+ struct blk_plug plug;
+
+ fill = alloc_page(GFP_KERNEL);
+ if (!fill)
+ return -ENOMEM;
+
+ kaddr = kmap_atomic(fill);
+ memset(kaddr, -1, PAGE_SIZE);
+ kunmap_atomic(kaddr);
+ flush_dcache_page(fill);
+ lock_page(fill);
+
+ if (!run_lookup_entry(run, 0, &lcn, &clen, &run_idx)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /*
+ * TODO: try blkdev_issue_write_same
+ */
+ blk_start_plug(&plug);
+ do {
+ lbo = (u64)lcn << cluster_bits;
+ len = (u64)clen << cluster_bits;
+new_bio:
+ new = ntfs_alloc_bio(BIO_MAX_VECS);
+ if (!new) {
+ err = -ENOMEM;
+ break;
+ }
+ if (bio) {
+ bio_chain(bio, new);
+ submit_bio(bio);
+ }
+ bio = new;
+ bio_set_dev(bio, bdev);
+ bio->bi_opf = REQ_OP_WRITE;
+ bio->bi_iter.bi_sector = lbo >> 9;
+
+ for (;;) {
+ u32 add = len > PAGE_SIZE ? PAGE_SIZE : len;
+
+ if (bio_add_page(bio, fill, add, 0) < add)
+ goto new_bio;
+
+ lbo += add;
+ if (len <= add)
+ break;
+ len -= add;
+ }
+ } while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen));
+
+ if (bio) {
+ if (!err)
+ err = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+out:
+ unlock_page(fill);
+ put_page(fill);
+
+ return err;
+}
+
+int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+ u64 vbo, u64 *lbo, u64 *bytes)
+{
+ u32 off;
+ CLST lcn, len;
+ u8 cluster_bits = sbi->cluster_bits;
+
+ if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, &len, NULL))
+ return -ENOENT;
+
+ off = vbo & sbi->cluster_mask;
+ *lbo = lcn == SPARSE_LCN ? -1 : (((u64)lcn << cluster_bits) + off);
+ *bytes = ((u64)len << cluster_bits) - off;
+
+ return 0;
+}
+
+struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
+{
+ int err = 0;
+ struct super_block *sb = sbi->sb;
+ struct inode *inode = new_inode(sb);
+ struct ntfs_inode *ni;
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ ni = ntfs_i(inode);
+
+ err = mi_format_new(&ni->mi, sbi, rno, dir ? RECORD_FLAG_DIR : 0,
+ false);
+ if (err)
+ goto out;
+
+ inode->i_ino = rno;
+ if (insert_inode_locked(inode) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+out:
+ if (err) {
+ iput(inode);
+ ni = ERR_PTR(err);
+ }
+ return ni;
+}
+
+/*
+ * O:BAG:BAD:(A;OICI;FA;;;WD)
+ * owner S-1-5-32-544 (Administrators)
+ * group S-1-5-32-544 (Administrators)
+ * ACE: allow S-1-1-0 (Everyone) with FILE_ALL_ACCESS
+ */
+const u8 s_default_security[] __aligned(8) = {
+ 0x01, 0x00, 0x04, 0x80, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1C, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0xFF, 0x01, 0x1F, 0x00,
+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00,
+ 0x20, 0x02, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
+ 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00,
+};
+
+static_assert(sizeof(s_default_security) == 0x50);
+
+static inline u32 sid_length(const struct SID *sid)
+{
+ return struct_size(sid, SubAuthority, sid->SubAuthorityCount);
+}
+
+/*
+ * Thanks Mark Harmstone for idea
+ */
+static bool is_acl_valid(const struct ACL *acl, u32 len)
+{
+ const struct ACE_HEADER *ace;
+ u32 i;
+ u16 ace_count, ace_size;
+
+ if (acl->AclRevision != ACL_REVISION &&
+ acl->AclRevision != ACL_REVISION_DS) {
+ /*
+ * This value should be ACL_REVISION, unless the ACL contains an
+ * object-specific ACE, in which case this value must be ACL_REVISION_DS.
+ * All ACEs in an ACL must be at the same revision level.
+ */
+ return false;
+ }
+
+ if (acl->Sbz1)
+ return false;
+
+ if (le16_to_cpu(acl->AclSize) > len)
+ return false;
+
+ if (acl->Sbz2)
+ return false;
+
+ len -= sizeof(struct ACL);
+ ace = (struct ACE_HEADER *)&acl[1];
+ ace_count = le16_to_cpu(acl->AceCount);
+
+ for (i = 0; i < ace_count; i++) {
+ if (len < sizeof(struct ACE_HEADER))
+ return false;
+
+ ace_size = le16_to_cpu(ace->AceSize);
+ if (len < ace_size)
+ return false;
+
+ len -= ace_size;
+ ace = Add2Ptr(ace, ace_size);
+ }
+
+ return true;
+}
+
+bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len)
+{
+ u32 sd_owner, sd_group, sd_sacl, sd_dacl;
+
+ if (len < sizeof(struct SECURITY_DESCRIPTOR_RELATIVE))
+ return false;
+
+ if (sd->Revision != 1)
+ return false;
+
+ if (sd->Sbz1)
+ return false;
+
+ if (!(sd->Control & SE_SELF_RELATIVE))
+ return false;
+
+ sd_owner = le32_to_cpu(sd->Owner);
+ if (sd_owner) {
+ const struct SID *owner = Add2Ptr(sd, sd_owner);
+
+ if (sd_owner + offsetof(struct SID, SubAuthority) > len)
+ return false;
+
+ if (owner->Revision != 1)
+ return false;
+
+ if (sd_owner + sid_length(owner) > len)
+ return false;
+ }
+
+ sd_group = le32_to_cpu(sd->Group);
+ if (sd_group) {
+ const struct SID *group = Add2Ptr(sd, sd_group);
+
+ if (sd_group + offsetof(struct SID, SubAuthority) > len)
+ return false;
+
+ if (group->Revision != 1)
+ return false;
+
+ if (sd_group + sid_length(group) > len)
+ return false;
+ }
+
+ sd_sacl = le32_to_cpu(sd->Sacl);
+ if (sd_sacl) {
+ const struct ACL *sacl = Add2Ptr(sd, sd_sacl);
+
+ if (sd_sacl + sizeof(struct ACL) > len)
+ return false;
+
+ if (!is_acl_valid(sacl, len - sd_sacl))
+ return false;
+ }
+
+ sd_dacl = le32_to_cpu(sd->Dacl);
+ if (sd_dacl) {
+ const struct ACL *dacl = Add2Ptr(sd, sd_dacl);
+
+ if (sd_dacl + sizeof(struct ACL) > len)
+ return false;
+
+ if (!is_acl_valid(dacl, len - sd_dacl))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * ntfs_security_init
+ *
+ * loads and parse $Secure
+ */
+int ntfs_security_init(struct ntfs_sb_info *sbi)
+{
+ int err;
+ struct super_block *sb = sbi->sb;
+ struct inode *inode;
+ struct ntfs_inode *ni;
+ struct MFT_REF ref;
+ struct ATTRIB *attr;
+ struct ATTR_LIST_ENTRY *le;
+ u64 sds_size;
+ size_t cnt, off;
+ struct NTFS_DE *ne;
+ struct NTFS_DE_SII *sii_e;
+ struct ntfs_fnd *fnd_sii = NULL;
+ const struct INDEX_ROOT *root_sii;
+ const struct INDEX_ROOT *root_sdh;
+ struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
+ struct ntfs_index *indx_sii = &sbi->security.index_sii;
+
+ ref.low = cpu_to_le32(MFT_REC_SECURE);
+ ref.high = 0;
+ ref.seq = cpu_to_le16(MFT_REC_SECURE);
+
+ inode = ntfs_iget5(sb, &ref, &NAME_SECURE);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $Secure.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+ le = NULL;
+
+ attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SDH_NAME,
+ ARRAY_SIZE(SDH_NAME), NULL, NULL);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ root_sdh = resident_data(attr);
+ if (root_sdh->type != ATTR_ZERO ||
+ root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = indx_init(indx_sdh, sbi, attr, INDEX_MUTEX_SDH);
+ if (err)
+ goto out;
+
+ attr = ni_find_attr(ni, attr, &le, ATTR_ROOT, SII_NAME,
+ ARRAY_SIZE(SII_NAME), NULL, NULL);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ root_sii = resident_data(attr);
+ if (root_sii->type != ATTR_ZERO ||
+ root_sii->rule != NTFS_COLLATION_TYPE_UINT) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = indx_init(indx_sii, sbi, attr, INDEX_MUTEX_SII);
+ if (err)
+ goto out;
+
+ fnd_sii = fnd_get();
+ if (!fnd_sii) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sds_size = inode->i_size;
+
+ /* Find the last valid Id */
+ sbi->security.next_id = SECURITY_ID_FIRST;
+ /* Always write new security at the end of bucket */
+ sbi->security.next_off =
+ Quad2Align(sds_size - SecurityDescriptorsBlockSize);
+
+ cnt = 0;
+ off = 0;
+ ne = NULL;
+
+ for (;;) {
+ u32 next_id;
+
+ err = indx_find_raw(indx_sii, ni, root_sii, &ne, &off, fnd_sii);
+ if (err || !ne)
+ break;
+
+ sii_e = (struct NTFS_DE_SII *)ne;
+ if (le16_to_cpu(ne->view.data_size) < SIZEOF_SECURITY_HDR)
+ continue;
+
+ next_id = le32_to_cpu(sii_e->sec_id) + 1;
+ if (next_id >= sbi->security.next_id)
+ sbi->security.next_id = next_id;
+
+ cnt += 1;
+ }
+
+ sbi->security.ni = ni;
+ inode = NULL;
+out:
+ iput(inode);
+ fnd_put(fnd_sii);
+
+ return err;
+}
+
+/*
+ * ntfs_get_security_by_id
+ *
+ * reads security descriptor by id
+ */
+int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id,
+ struct SECURITY_DESCRIPTOR_RELATIVE **sd,
+ size_t *size)
+{
+ int err;
+ int diff;
+ struct ntfs_inode *ni = sbi->security.ni;
+ struct ntfs_index *indx = &sbi->security.index_sii;
+ void *p = NULL;
+ struct NTFS_DE_SII *sii_e;
+ struct ntfs_fnd *fnd_sii;
+ struct SECURITY_HDR d_security;
+ const struct INDEX_ROOT *root_sii;
+ u32 t32;
+
+ *sd = NULL;
+
+ mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
+
+ fnd_sii = fnd_get();
+ if (!fnd_sii) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ root_sii = indx_get_root(indx, ni, NULL, NULL);
+ if (!root_sii) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Try to find this SECURITY descriptor in SII indexes */
+ err = indx_find(indx, ni, root_sii, &security_id, sizeof(security_id),
+ NULL, &diff, (struct NTFS_DE **)&sii_e, fnd_sii);
+ if (err)
+ goto out;
+
+ if (diff)
+ goto out;
+
+ t32 = le32_to_cpu(sii_e->sec_hdr.size);
+ if (t32 < SIZEOF_SECURITY_HDR) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (t32 > SIZEOF_SECURITY_HDR + 0x10000) {
+ /*
+ * looks like too big security. 0x10000 - is arbitrary big number
+ */
+ err = -EFBIG;
+ goto out;
+ }
+
+ *size = t32 - SIZEOF_SECURITY_HDR;
+
+ p = ntfs_malloc(*size);
+ if (!p) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = ntfs_read_run_nb(sbi, &ni->file.run,
+ le64_to_cpu(sii_e->sec_hdr.off), &d_security,
+ sizeof(d_security), NULL);
+ if (err)
+ goto out;
+
+ if (memcmp(&d_security, &sii_e->sec_hdr, SIZEOF_SECURITY_HDR)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = ntfs_read_run_nb(sbi, &ni->file.run,
+ le64_to_cpu(sii_e->sec_hdr.off) +
+ SIZEOF_SECURITY_HDR,
+ p, *size, NULL);
+ if (err)
+ goto out;
+
+ *sd = p;
+ p = NULL;
+
+out:
+ ntfs_free(p);
+ fnd_put(fnd_sii);
+ ni_unlock(ni);
+
+ return err;
+}
+
+/*
+ * ntfs_insert_security
+ *
+ * inserts security descriptor into $Secure::SDS
+ *
+ * SECURITY Descriptor Stream data is organized into chunks of 256K bytes
+ * and it contains a mirror copy of each security descriptor. When writing
+ * to a security descriptor at location X, another copy will be written at
+ * location (X+256K).
+ * When writing a security descriptor that will cross the 256K boundary,
+ * the pointer will be advanced by 256K to skip
+ * over the mirror portion.
+ */
+int ntfs_insert_security(struct ntfs_sb_info *sbi,
+ const struct SECURITY_DESCRIPTOR_RELATIVE *sd,
+ u32 size_sd, __le32 *security_id, bool *inserted)
+{
+ int err, diff;
+ struct ntfs_inode *ni = sbi->security.ni;
+ struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
+ struct ntfs_index *indx_sii = &sbi->security.index_sii;
+ struct NTFS_DE_SDH *e;
+ struct NTFS_DE_SDH sdh_e;
+ struct NTFS_DE_SII sii_e;
+ struct SECURITY_HDR *d_security;
+ u32 new_sec_size = size_sd + SIZEOF_SECURITY_HDR;
+ u32 aligned_sec_size = Quad2Align(new_sec_size);
+ struct SECURITY_KEY hash_key;
+ struct ntfs_fnd *fnd_sdh = NULL;
+ const struct INDEX_ROOT *root_sdh;
+ const struct INDEX_ROOT *root_sii;
+ u64 mirr_off, new_sds_size;
+ u32 next, left;
+
+ static_assert((1 << Log2OfSecurityDescriptorsBlockSize) ==
+ SecurityDescriptorsBlockSize);
+
+ hash_key.hash = security_hash(sd, size_sd);
+ hash_key.sec_id = SECURITY_ID_INVALID;
+
+ if (inserted)
+ *inserted = false;
+ *security_id = SECURITY_ID_INVALID;
+
+ /* Allocate a temporal buffer*/
+ d_security = ntfs_zalloc(aligned_sec_size);
+ if (!d_security)
+ return -ENOMEM;
+
+ mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
+
+ fnd_sdh = fnd_get();
+ if (!fnd_sdh) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ root_sdh = indx_get_root(indx_sdh, ni, NULL, NULL);
+ if (!root_sdh) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ root_sii = indx_get_root(indx_sii, ni, NULL, NULL);
+ if (!root_sii) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Check if such security already exists
+ * use "SDH" and hash -> to get the offset in "SDS"
+ */
+ err = indx_find(indx_sdh, ni, root_sdh, &hash_key, sizeof(hash_key),
+ &d_security->key.sec_id, &diff, (struct NTFS_DE **)&e,
+ fnd_sdh);
+ if (err)
+ goto out;
+
+ while (e) {
+ if (le32_to_cpu(e->sec_hdr.size) == new_sec_size) {
+ err = ntfs_read_run_nb(sbi, &ni->file.run,
+ le64_to_cpu(e->sec_hdr.off),
+ d_security, new_sec_size, NULL);
+ if (err)
+ goto out;
+
+ if (le32_to_cpu(d_security->size) == new_sec_size &&
+ d_security->key.hash == hash_key.hash &&
+ !memcmp(d_security + 1, sd, size_sd)) {
+ *security_id = d_security->key.sec_id;
+ /*such security already exists*/
+ err = 0;
+ goto out;
+ }
+ }
+
+ err = indx_find_sort(indx_sdh, ni, root_sdh,
+ (struct NTFS_DE **)&e, fnd_sdh);
+ if (err)
+ goto out;
+
+ if (!e || e->key.hash != hash_key.hash)
+ break;
+ }
+
+ /* Zero unused space */
+ next = sbi->security.next_off & (SecurityDescriptorsBlockSize - 1);
+ left = SecurityDescriptorsBlockSize - next;
+
+ /* Zero gap until SecurityDescriptorsBlockSize */
+ if (left < new_sec_size) {
+ /* zero "left" bytes from sbi->security.next_off */
+ sbi->security.next_off += SecurityDescriptorsBlockSize + left;
+ }
+
+ /* Zero tail of previous security */
+ //used = ni->vfs_inode.i_size & (SecurityDescriptorsBlockSize - 1);
+
+ /*
+ * Example:
+ * 0x40438 == ni->vfs_inode.i_size
+ * 0x00440 == sbi->security.next_off
+ * need to zero [0x438-0x440)
+ * if (next > used) {
+ * u32 tozero = next - used;
+ * zero "tozero" bytes from sbi->security.next_off - tozero
+ */
+
+ /* format new security descriptor */
+ d_security->key.hash = hash_key.hash;
+ d_security->key.sec_id = cpu_to_le32(sbi->security.next_id);
+ d_security->off = cpu_to_le64(sbi->security.next_off);
+ d_security->size = cpu_to_le32(new_sec_size);
+ memcpy(d_security + 1, sd, size_sd);
+
+ /* Write main SDS bucket */
+ err = ntfs_sb_write_run(sbi, &ni->file.run, sbi->security.next_off,
+ d_security, aligned_sec_size);
+
+ if (err)
+ goto out;
+
+ mirr_off = sbi->security.next_off + SecurityDescriptorsBlockSize;
+ new_sds_size = mirr_off + aligned_sec_size;
+
+ if (new_sds_size > ni->vfs_inode.i_size) {
+ err = attr_set_size(ni, ATTR_DATA, SDS_NAME,
+ ARRAY_SIZE(SDS_NAME), &ni->file.run,
+ new_sds_size, &new_sds_size, false, NULL);
+ if (err)
+ goto out;
+ }
+
+ /* Write copy SDS bucket */
+ err = ntfs_sb_write_run(sbi, &ni->file.run, mirr_off, d_security,
+ aligned_sec_size);
+ if (err)
+ goto out;
+
+ /* Fill SII entry */
+ sii_e.de.view.data_off =
+ cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr));
+ sii_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
+ sii_e.de.view.res = 0;
+ sii_e.de.size = cpu_to_le16(SIZEOF_SII_DIRENTRY);
+ sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id));
+ sii_e.de.flags = 0;
+ sii_e.de.res = 0;
+ sii_e.sec_id = d_security->key.sec_id;
+ memcpy(&sii_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
+
+ err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL);
+ if (err)
+ goto out;
+
+ /* Fill SDH entry */
+ sdh_e.de.view.data_off =
+ cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr));
+ sdh_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
+ sdh_e.de.view.res = 0;
+ sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY);
+ sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key));
+ sdh_e.de.flags = 0;
+ sdh_e.de.res = 0;
+ sdh_e.key.hash = d_security->key.hash;
+ sdh_e.key.sec_id = d_security->key.sec_id;
+ memcpy(&sdh_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
+ sdh_e.magic[0] = cpu_to_le16('I');
+ sdh_e.magic[1] = cpu_to_le16('I');
+
+ fnd_clear(fnd_sdh);
+ err = indx_insert_entry(indx_sdh, ni, &sdh_e.de, (void *)(size_t)1,
+ fnd_sdh);
+ if (err)
+ goto out;
+
+ *security_id = d_security->key.sec_id;
+ if (inserted)
+ *inserted = true;
+
+ /* Update Id and offset for next descriptor */
+ sbi->security.next_id += 1;
+ sbi->security.next_off += aligned_sec_size;
+
+out:
+ fnd_put(fnd_sdh);
+ mark_inode_dirty(&ni->vfs_inode);
+ ni_unlock(ni);
+ ntfs_free(d_security);
+
+ return err;
+}
+
+/*
+ * ntfs_reparse_init
+ *
+ * loads and parse $Extend/$Reparse
+ */
+int ntfs_reparse_init(struct ntfs_sb_info *sbi)
+{
+ int err;
+ struct ntfs_inode *ni = sbi->reparse.ni;
+ struct ntfs_index *indx = &sbi->reparse.index_r;
+ struct ATTRIB *attr;
+ struct ATTR_LIST_ENTRY *le;
+ const struct INDEX_ROOT *root_r;
+
+ if (!ni)
+ return 0;
+
+ le = NULL;
+ attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SR_NAME,
+ ARRAY_SIZE(SR_NAME), NULL, NULL);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ root_r = resident_data(attr);
+ if (root_r->type != ATTR_ZERO ||
+ root_r->rule != NTFS_COLLATION_TYPE_UINTS) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = indx_init(indx, sbi, attr, INDEX_MUTEX_SR);
+ if (err)
+ goto out;
+
+out:
+ return err;
+}
+
+/*
+ * ntfs_objid_init
+ *
+ * loads and parse $Extend/$ObjId
+ */
+int ntfs_objid_init(struct ntfs_sb_info *sbi)
+{
+ int err;
+ struct ntfs_inode *ni = sbi->objid.ni;
+ struct ntfs_index *indx = &sbi->objid.index_o;
+ struct ATTRIB *attr;
+ struct ATTR_LIST_ENTRY *le;
+ const struct INDEX_ROOT *root;
+
+ if (!ni)
+ return 0;
+
+ le = NULL;
+ attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SO_NAME,
+ ARRAY_SIZE(SO_NAME), NULL, NULL);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ root = resident_data(attr);
+ if (root->type != ATTR_ZERO ||
+ root->rule != NTFS_COLLATION_TYPE_UINTS) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = indx_init(indx, sbi, attr, INDEX_MUTEX_SO);
+ if (err)
+ goto out;
+
+out:
+ return err;
+}
+
+int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid)
+{
+ int err;
+ struct ntfs_inode *ni = sbi->objid.ni;
+ struct ntfs_index *indx = &sbi->objid.index_o;
+
+ if (!ni)
+ return -EINVAL;
+
+ mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_OBJID);
+
+ err = indx_delete_entry(indx, ni, guid, sizeof(*guid), NULL);
+
+ mark_inode_dirty(&ni->vfs_inode);
+ ni_unlock(ni);
+
+ return err;
+}
+
+int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
+ const struct MFT_REF *ref)
+{
+ int err;
+ struct ntfs_inode *ni = sbi->reparse.ni;
+ struct ntfs_index *indx = &sbi->reparse.index_r;
+ struct NTFS_DE_R re;
+
+ if (!ni)
+ return -EINVAL;
+
+ memset(&re, 0, sizeof(re));
+
+ re.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_R, zero));
+ re.de.size = cpu_to_le16(sizeof(struct NTFS_DE_R));
+ re.de.key_size = cpu_to_le16(sizeof(re.key));
+
+ re.key.ReparseTag = rtag;
+ memcpy(&re.key.ref, ref, sizeof(*ref));
+
+ mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
+
+ err = indx_insert_entry(indx, ni, &re.de, NULL, NULL);
+
+ mark_inode_dirty(&ni->vfs_inode);
+ ni_unlock(ni);
+
+ return err;
+}
+
+int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
+ const struct MFT_REF *ref)
+{
+ int err, diff;
+ struct ntfs_inode *ni = sbi->reparse.ni;
+ struct ntfs_index *indx = &sbi->reparse.index_r;
+ struct ntfs_fnd *fnd = NULL;
+ struct REPARSE_KEY rkey;
+ struct NTFS_DE_R *re;
+ struct INDEX_ROOT *root_r;
+
+ if (!ni)
+ return -EINVAL;
+
+ rkey.ReparseTag = rtag;
+ rkey.ref = *ref;
+
+ mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
+
+ if (rtag) {
+ err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
+ goto out1;
+ }
+
+ fnd = fnd_get();
+ if (!fnd) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ root_r = indx_get_root(indx, ni, NULL, NULL);
+ if (!root_r) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* 1 - forces to ignore rkey.ReparseTag when comparing keys */
+ err = indx_find(indx, ni, root_r, &rkey, sizeof(rkey), (void *)1, &diff,
+ (struct NTFS_DE **)&re, fnd);
+ if (err)
+ goto out;
+
+ if (memcmp(&re->key.ref, ref, sizeof(*ref))) {
+ /* Impossible. Looks like volume corrupt?*/
+ goto out;
+ }
+
+ memcpy(&rkey, &re->key, sizeof(rkey));
+
+ fnd_put(fnd);
+ fnd = NULL;
+
+ err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
+ if (err)
+ goto out;
+
+out:
+ fnd_put(fnd);
+
+out1:
+ mark_inode_dirty(&ni->vfs_inode);
+ ni_unlock(ni);
+
+ return err;
+}
+
+static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn,
+ CLST len)
+{
+ ntfs_unmap_meta(sbi->sb, lcn, len);
+ ntfs_discard(sbi, lcn, len);
+}
+
+void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
+{
+ CLST end, i;
+ struct wnd_bitmap *wnd = &sbi->used.bitmap;
+
+ down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+ if (!wnd_is_used(wnd, lcn, len)) {
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+
+ end = lcn + len;
+ len = 0;
+ for (i = lcn; i < end; i++) {
+ if (wnd_is_used(wnd, i, 1)) {
+ if (!len)
+ lcn = i;
+ len += 1;
+ continue;
+ }
+
+ if (!len)
+ continue;
+
+ if (trim)
+ ntfs_unmap_and_discard(sbi, lcn, len);
+
+ wnd_set_free(wnd, lcn, len);
+ len = 0;
+ }
+
+ if (!len)
+ goto out;
+ }
+
+ if (trim)
+ ntfs_unmap_and_discard(sbi, lcn, len);
+ wnd_set_free(wnd, lcn, len);
+
+out:
+ up_write(&wnd->rw_lock);
+}
+
+/*
+ * run_deallocate
+ *
+ * deallocate clusters
+ */
+int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim)
+{
+ CLST lcn, len;
+ size_t idx = 0;
+
+ while (run_get_entry(run, idx++, NULL, &lcn, &len)) {
+ if (lcn == SPARSE_LCN)
+ continue;
+
+ mark_as_free_ex(sbi, lcn, len, trim);
+ }
+
+ return 0;
+}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
new file mode 100644
index 000000000000..6aa9540ece47
--- /dev/null
+++ b/fs/ntfs3/index.c
@@ -0,0 +1,2647 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+static const struct INDEX_NAMES {
+ const __le16 *name;
+ u8 name_len;
+} s_index_names[INDEX_MUTEX_TOTAL] = {
+ { I30_NAME, ARRAY_SIZE(I30_NAME) }, { SII_NAME, ARRAY_SIZE(SII_NAME) },
+ { SDH_NAME, ARRAY_SIZE(SDH_NAME) }, { SO_NAME, ARRAY_SIZE(SO_NAME) },
+ { SQ_NAME, ARRAY_SIZE(SQ_NAME) }, { SR_NAME, ARRAY_SIZE(SR_NAME) },
+};
+
+/*
+ * compare two names in index
+ * if l1 != 0
+ * both names are little endian on-disk ATTR_FILE_NAME structs
+ * else
+ * key1 - cpu_str, key2 - ATTR_FILE_NAME
+ */
+static int cmp_fnames(const void *key1, size_t l1, const void *key2, size_t l2,
+ const void *data)
+{
+ const struct ATTR_FILE_NAME *f2 = key2;
+ const struct ntfs_sb_info *sbi = data;
+ const struct ATTR_FILE_NAME *f1;
+ u16 fsize2;
+ bool both_case;
+
+ if (l2 <= offsetof(struct ATTR_FILE_NAME, name))
+ return -1;
+
+ fsize2 = fname_full_size(f2);
+ if (l2 < fsize2)
+ return -1;
+
+ both_case = f2->type != FILE_NAME_DOS /*&& !sbi->options.nocase*/;
+ if (!l1) {
+ const struct le_str *s2 = (struct le_str *)&f2->name_len;
+
+ /*
+ * If names are equal (case insensitive)
+ * try to compare it case sensitive
+ */
+ return ntfs_cmp_names_cpu(key1, s2, sbi->upcase, both_case);
+ }
+
+ f1 = key1;
+ return ntfs_cmp_names(f1->name, f1->name_len, f2->name, f2->name_len,
+ sbi->upcase, both_case);
+}
+
+/* $SII of $Secure and $Q of Quota */
+static int cmp_uint(const void *key1, size_t l1, const void *key2, size_t l2,
+ const void *data)
+{
+ const u32 *k1 = key1;
+ const u32 *k2 = key2;
+
+ if (l2 < sizeof(u32))
+ return -1;
+
+ if (*k1 < *k2)
+ return -1;
+ if (*k1 > *k2)
+ return 1;
+ return 0;
+}
+
+/* $SDH of $Secure */
+static int cmp_sdh(const void *key1, size_t l1, const void *key2, size_t l2,
+ const void *data)
+{
+ const struct SECURITY_KEY *k1 = key1;
+ const struct SECURITY_KEY *k2 = key2;
+ u32 t1, t2;
+
+ if (l2 < sizeof(struct SECURITY_KEY))
+ return -1;
+
+ t1 = le32_to_cpu(k1->hash);
+ t2 = le32_to_cpu(k2->hash);
+
+ /* First value is a hash value itself */
+ if (t1 < t2)
+ return -1;
+ if (t1 > t2)
+ return 1;
+
+ /* Second value is security Id */
+ if (data) {
+ t1 = le32_to_cpu(k1->sec_id);
+ t2 = le32_to_cpu(k2->sec_id);
+ if (t1 < t2)
+ return -1;
+ if (t1 > t2)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* $O of ObjId and "$R" for Reparse */
+static int cmp_uints(const void *key1, size_t l1, const void *key2, size_t l2,
+ const void *data)
+{
+ const __le32 *k1 = key1;
+ const __le32 *k2 = key2;
+ size_t count;
+
+ if ((size_t)data == 1) {
+ /*
+ * ni_delete_all -> ntfs_remove_reparse -> delete all with this reference
+ * k1, k2 - pointers to REPARSE_KEY
+ */
+
+ k1 += 1; // skip REPARSE_KEY.ReparseTag
+ k2 += 1; // skip REPARSE_KEY.ReparseTag
+ if (l2 <= sizeof(int))
+ return -1;
+ l2 -= sizeof(int);
+ if (l1 <= sizeof(int))
+ return 1;
+ l1 -= sizeof(int);
+ }
+
+ if (l2 < sizeof(int))
+ return -1;
+
+ for (count = min(l1, l2) >> 2; count > 0; --count, ++k1, ++k2) {
+ u32 t1 = le32_to_cpu(*k1);
+ u32 t2 = le32_to_cpu(*k2);
+
+ if (t1 > t2)
+ return 1;
+ if (t1 < t2)
+ return -1;
+ }
+
+ if (l1 > l2)
+ return 1;
+ if (l1 < l2)
+ return -1;
+
+ return 0;
+}
+
+static inline NTFS_CMP_FUNC get_cmp_func(const struct INDEX_ROOT *root)
+{
+ switch (root->type) {
+ case ATTR_NAME:
+ if (root->rule == NTFS_COLLATION_TYPE_FILENAME)
+ return &cmp_fnames;
+ break;
+ case ATTR_ZERO:
+ switch (root->rule) {
+ case NTFS_COLLATION_TYPE_UINT:
+ return &cmp_uint;
+ case NTFS_COLLATION_TYPE_SECURITY_HASH:
+ return &cmp_sdh;
+ case NTFS_COLLATION_TYPE_UINTS:
+ return &cmp_uints;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+struct bmp_buf {
+ struct ATTRIB *b;
+ struct mft_inode *mi;
+ struct buffer_head *bh;
+ ulong *buf;
+ size_t bit;
+ u32 nbits;
+ u64 new_valid;
+};
+
+static int bmp_buf_get(struct ntfs_index *indx, struct ntfs_inode *ni,
+ size_t bit, struct bmp_buf *bbuf)
+{
+ struct ATTRIB *b;
+ size_t data_size, valid_size, vbo, off = bit >> 3;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ CLST vcn = off >> sbi->cluster_bits;
+ struct ATTR_LIST_ENTRY *le = NULL;
+ struct buffer_head *bh;
+ struct super_block *sb;
+ u32 blocksize;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+ bbuf->bh = NULL;
+
+ b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+ &vcn, &bbuf->mi);
+ bbuf->b = b;
+ if (!b)
+ return -EINVAL;
+
+ if (!b->non_res) {
+ data_size = le32_to_cpu(b->res.data_size);
+
+ if (off >= data_size)
+ return -EINVAL;
+
+ bbuf->buf = (ulong *)resident_data(b);
+ bbuf->bit = 0;
+ bbuf->nbits = data_size * 8;
+
+ return 0;
+ }
+
+ data_size = le64_to_cpu(b->nres.data_size);
+ if (WARN_ON(off >= data_size)) {
+ /* looks like filesystem error */
+ return -EINVAL;
+ }
+
+ valid_size = le64_to_cpu(b->nres.valid_size);
+
+ bh = ntfs_bread_run(sbi, &indx->bitmap_run, off);
+ if (!bh)
+ return -EIO;
+
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+
+ bbuf->bh = bh;
+
+ if (buffer_locked(bh))
+ __wait_on_buffer(bh);
+
+ lock_buffer(bh);
+
+ sb = sbi->sb;
+ blocksize = sb->s_blocksize;
+
+ vbo = off & ~(size_t)sbi->block_mask;
+
+ bbuf->new_valid = vbo + blocksize;
+ if (bbuf->new_valid <= valid_size)
+ bbuf->new_valid = 0;
+ else if (bbuf->new_valid > data_size)
+ bbuf->new_valid = data_size;
+
+ if (vbo >= valid_size) {
+ memset(bh->b_data, 0, blocksize);
+ } else if (vbo + blocksize > valid_size) {
+ u32 voff = valid_size & sbi->block_mask;
+
+ memset(bh->b_data + voff, 0, blocksize - voff);
+ }
+
+ bbuf->buf = (ulong *)bh->b_data;
+ bbuf->bit = 8 * (off & ~(size_t)sbi->block_mask);
+ bbuf->nbits = 8 * blocksize;
+
+ return 0;
+}
+
+static void bmp_buf_put(struct bmp_buf *bbuf, bool dirty)
+{
+ struct buffer_head *bh = bbuf->bh;
+ struct ATTRIB *b = bbuf->b;
+
+ if (!bh) {
+ if (b && !b->non_res && dirty)
+ bbuf->mi->dirty = true;
+ return;
+ }
+
+ if (!dirty)
+ goto out;
+
+ if (bbuf->new_valid) {
+ b->nres.valid_size = cpu_to_le64(bbuf->new_valid);
+ bbuf->mi->dirty = true;
+ }
+
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+
+out:
+ unlock_buffer(bh);
+ put_bh(bh);
+}
+
+/*
+ * indx_mark_used
+ *
+ * marks the bit 'bit' as used
+ */
+static int indx_mark_used(struct ntfs_index *indx, struct ntfs_inode *ni,
+ size_t bit)
+{
+ int err;
+ struct bmp_buf bbuf;
+
+ err = bmp_buf_get(indx, ni, bit, &bbuf);
+ if (err)
+ return err;
+
+ __set_bit(bit - bbuf.bit, bbuf.buf);
+
+ bmp_buf_put(&bbuf, true);
+
+ return 0;
+}
+
+/*
+ * indx_mark_free
+ *
+ * the bit 'bit' as free
+ */
+static int indx_mark_free(struct ntfs_index *indx, struct ntfs_inode *ni,
+ size_t bit)
+{
+ int err;
+ struct bmp_buf bbuf;
+
+ err = bmp_buf_get(indx, ni, bit, &bbuf);
+ if (err)
+ return err;
+
+ __clear_bit(bit - bbuf.bit, bbuf.buf);
+
+ bmp_buf_put(&bbuf, true);
+
+ return 0;
+}
+
+/*
+ * if ntfs_readdir calls this function (indx_used_bit -> scan_nres_bitmap),
+ * inode is shared locked and no ni_lock
+ * use rw_semaphore for read/write access to bitmap_run
+ */
+static int scan_nres_bitmap(struct ntfs_inode *ni, struct ATTRIB *bitmap,
+ struct ntfs_index *indx, size_t from,
+ bool (*fn)(const ulong *buf, u32 bit, u32 bits,
+ size_t *ret),
+ size_t *ret)
+{
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct super_block *sb = sbi->sb;
+ struct runs_tree *run = &indx->bitmap_run;
+ struct rw_semaphore *lock = &indx->run_lock;
+ u32 nbits = sb->s_blocksize * 8;
+ u32 blocksize = sb->s_blocksize;
+ u64 valid_size = le64_to_cpu(bitmap->nres.valid_size);
+ u64 data_size = le64_to_cpu(bitmap->nres.data_size);
+ sector_t eblock = bytes_to_block(sb, data_size);
+ size_t vbo = from >> 3;
+ sector_t blk = (vbo & sbi->cluster_mask) >> sb->s_blocksize_bits;
+ sector_t vblock = vbo >> sb->s_blocksize_bits;
+ sector_t blen, block;
+ CLST lcn, clen, vcn, vcn_next;
+ size_t idx;
+ struct buffer_head *bh;
+ bool ok;
+
+ *ret = MINUS_ONE_T;
+
+ if (vblock >= eblock)
+ return 0;
+
+ from &= nbits - 1;
+ vcn = vbo >> sbi->cluster_bits;
+
+ down_read(lock);
+ ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx);
+ up_read(lock);
+
+next_run:
+ if (!ok) {
+ int err;
+ const struct INDEX_NAMES *name = &s_index_names[indx->type];
+
+ down_write(lock);
+ err = attr_load_runs_vcn(ni, ATTR_BITMAP, name->name,
+ name->name_len, run, vcn);
+ up_write(lock);
+ if (err)
+ return err;
+ down_read(lock);
+ ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx);
+ up_read(lock);
+ if (!ok)
+ return -EINVAL;
+ }
+
+ blen = (sector_t)clen * sbi->blocks_per_cluster;
+ block = (sector_t)lcn * sbi->blocks_per_cluster;
+
+ for (; blk < blen; blk++, from = 0) {
+ bh = ntfs_bread(sb, block + blk);
+ if (!bh)
+ return -EIO;
+
+ vbo = (u64)vblock << sb->s_blocksize_bits;
+ if (vbo >= valid_size) {
+ memset(bh->b_data, 0, blocksize);
+ } else if (vbo + blocksize > valid_size) {
+ u32 voff = valid_size & sbi->block_mask;
+
+ memset(bh->b_data + voff, 0, blocksize - voff);
+ }
+
+ if (vbo + blocksize > data_size)
+ nbits = 8 * (data_size - vbo);
+
+ ok = nbits > from ? (*fn)((ulong *)bh->b_data, from, nbits, ret)
+ : false;
+ put_bh(bh);
+
+ if (ok) {
+ *ret += 8 * vbo;
+ return 0;
+ }
+
+ if (++vblock >= eblock) {
+ *ret = MINUS_ONE_T;
+ return 0;
+ }
+ }
+ blk = 0;
+ vcn_next = vcn + clen;
+ down_read(lock);
+ ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && vcn == vcn_next;
+ if (!ok)
+ vcn = vcn_next;
+ up_read(lock);
+ goto next_run;
+}
+
+static bool scan_for_free(const ulong *buf, u32 bit, u32 bits, size_t *ret)
+{
+ size_t pos = find_next_zero_bit(buf, bits, bit);
+
+ if (pos >= bits)
+ return false;
+ *ret = pos;
+ return true;
+}
+
+/*
+ * indx_find_free
+ *
+ * looks for free bit
+ * returns -1 if no free bits
+ */
+static int indx_find_free(struct ntfs_index *indx, struct ntfs_inode *ni,
+ size_t *bit, struct ATTRIB **bitmap)
+{
+ struct ATTRIB *b;
+ struct ATTR_LIST_ENTRY *le = NULL;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
+ int err;
+
+ b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+ NULL, NULL);
+
+ if (!b)
+ return -ENOENT;
+
+ *bitmap = b;
+ *bit = MINUS_ONE_T;
+
+ if (!b->non_res) {
+ u32 nbits = 8 * le32_to_cpu(b->res.data_size);
+ size_t pos = find_next_zero_bit(resident_data(b), nbits, 0);
+
+ if (pos < nbits)
+ *bit = pos;
+ } else {
+ err = scan_nres_bitmap(ni, b, indx, 0, &scan_for_free, bit);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool scan_for_used(const ulong *buf, u32 bit, u32 bits, size_t *ret)
+{
+ size_t pos = find_next_bit(buf, bits, bit);
+
+ if (pos >= bits)
+ return false;
+ *ret = pos;
+ return true;
+}
+
+/*
+ * indx_used_bit
+ *
+ * looks for used bit
+ * returns MINUS_ONE_T if no used bits
+ */
+int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit)
+{
+ struct ATTRIB *b;
+ struct ATTR_LIST_ENTRY *le = NULL;
+ size_t from = *bit;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
+ int err;
+
+ b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+ NULL, NULL);
+
+ if (!b)
+ return -ENOENT;
+
+ *bit = MINUS_ONE_T;
+
+ if (!b->non_res) {
+ u32 nbits = le32_to_cpu(b->res.data_size) * 8;
+ size_t pos = find_next_bit(resident_data(b), nbits, from);
+
+ if (pos < nbits)
+ *bit = pos;
+ } else {
+ err = scan_nres_bitmap(ni, b, indx, from, &scan_for_used, bit);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * hdr_find_split
+ *
+ * finds a point at which the index allocation buffer would like to
+ * be split.
+ * NOTE: This function should never return 'END' entry NULL returns on error
+ */
+static const struct NTFS_DE *hdr_find_split(const struct INDEX_HDR *hdr)
+{
+ size_t o;
+ const struct NTFS_DE *e = hdr_first_de(hdr);
+ u32 used_2 = le32_to_cpu(hdr->used) >> 1;
+ u16 esize = le16_to_cpu(e->size);
+
+ if (!e || de_is_last(e))
+ return NULL;
+
+ for (o = le32_to_cpu(hdr->de_off) + esize; o < used_2; o += esize) {
+ const struct NTFS_DE *p = e;
+
+ e = Add2Ptr(hdr, o);
+
+ /* We must not return END entry */
+ if (de_is_last(e))
+ return p;
+
+ esize = le16_to_cpu(e->size);
+ }
+
+ return e;
+}
+
+/*
+ * hdr_insert_head
+ *
+ * inserts some entries at the beginning of the buffer.
+ * It is used to insert entries into a newly-created buffer.
+ */
+static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr,
+ const void *ins, u32 ins_bytes)
+{
+ u32 to_move;
+ struct NTFS_DE *e = hdr_first_de(hdr);
+ u32 used = le32_to_cpu(hdr->used);
+
+ if (!e)
+ return NULL;
+
+ /* Now we just make room for the inserted entries and jam it in. */
+ to_move = used - le32_to_cpu(hdr->de_off);
+ memmove(Add2Ptr(e, ins_bytes), e, to_move);
+ memcpy(e, ins, ins_bytes);
+ hdr->used = cpu_to_le32(used + ins_bytes);
+
+ return e;
+}
+
+void fnd_clear(struct ntfs_fnd *fnd)
+{
+ int i;
+
+ for (i = 0; i < fnd->level; i++) {
+ struct indx_node *n = fnd->nodes[i];
+
+ if (!n)
+ continue;
+
+ put_indx_node(n);
+ fnd->nodes[i] = NULL;
+ }
+ fnd->level = 0;
+ fnd->root_de = NULL;
+}
+
+static int fnd_push(struct ntfs_fnd *fnd, struct indx_node *n,
+ struct NTFS_DE *e)
+{
+ int i;
+
+ i = fnd->level;
+ if (i < 0 || i >= ARRAY_SIZE(fnd->nodes))
+ return -EINVAL;
+ fnd->nodes[i] = n;
+ fnd->de[i] = e;
+ fnd->level += 1;
+ return 0;
+}
+
+static struct indx_node *fnd_pop(struct ntfs_fnd *fnd)
+{
+ struct indx_node *n;
+ int i = fnd->level;
+
+ i -= 1;
+ n = fnd->nodes[i];
+ fnd->nodes[i] = NULL;
+ fnd->level = i;
+
+ return n;
+}
+
+static bool fnd_is_empty(struct ntfs_fnd *fnd)
+{
+ if (!fnd->level)
+ return !fnd->root_de;
+
+ return !fnd->de[fnd->level - 1];
+}
+
+/*
+ * hdr_find_e
+ *
+ * locates an entry the index buffer.
+ * If no matching entry is found, it returns the first entry which is greater
+ * than the desired entry If the search key is greater than all the entries the
+ * buffer, it returns the 'end' entry. This function does a binary search of the
+ * current index buffer, for the first entry that is <= to the search value
+ * Returns NULL if error
+ */
+static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx,
+ const struct INDEX_HDR *hdr, const void *key,
+ size_t key_len, const void *ctx, int *diff)
+{
+ struct NTFS_DE *e;
+ NTFS_CMP_FUNC cmp = indx->cmp;
+ u32 e_size, e_key_len;
+ u32 end = le32_to_cpu(hdr->used);
+ u32 off = le32_to_cpu(hdr->de_off);
+
+#ifdef NTFS3_INDEX_BINARY_SEARCH
+ int max_idx = 0, fnd, min_idx;
+ int nslots = 64;
+ u16 *offs;
+
+ if (end > 0x10000)
+ goto next;
+
+ offs = ntfs_malloc(sizeof(u16) * nslots);
+ if (!offs)
+ goto next;
+
+ /* use binary search algorithm */
+next1:
+ if (off + sizeof(struct NTFS_DE) > end) {
+ e = NULL;
+ goto out1;
+ }
+ e = Add2Ptr(hdr, off);
+ e_size = le16_to_cpu(e->size);
+
+ if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) {
+ e = NULL;
+ goto out1;
+ }
+
+ if (max_idx >= nslots) {
+ u16 *ptr;
+ int new_slots = QuadAlign(2 * nslots);
+
+ ptr = ntfs_malloc(sizeof(u16) * new_slots);
+ if (ptr)
+ memcpy(ptr, offs, sizeof(u16) * max_idx);
+ ntfs_free(offs);
+ offs = ptr;
+ nslots = new_slots;
+ if (!ptr)
+ goto next;
+ }
+
+ /* Store entry table */
+ offs[max_idx] = off;
+
+ if (!de_is_last(e)) {
+ off += e_size;
+ max_idx += 1;
+ goto next1;
+ }
+
+ /*
+ * Table of pointers is created
+ * Use binary search to find entry that is <= to the search value
+ */
+ fnd = -1;
+ min_idx = 0;
+
+ while (min_idx <= max_idx) {
+ int mid_idx = min_idx + ((max_idx - min_idx) >> 1);
+ int diff2;
+
+ e = Add2Ptr(hdr, offs[mid_idx]);
+
+ e_key_len = le16_to_cpu(e->key_size);
+
+ diff2 = (*cmp)(key, key_len, e + 1, e_key_len, ctx);
+
+ if (!diff2) {
+ *diff = 0;
+ goto out1;
+ }
+
+ if (diff2 < 0) {
+ max_idx = mid_idx - 1;
+ fnd = mid_idx;
+ if (!fnd)
+ break;
+ } else {
+ min_idx = mid_idx + 1;
+ }
+ }
+
+ if (fnd == -1) {
+ e = NULL;
+ goto out1;
+ }
+
+ *diff = -1;
+ e = Add2Ptr(hdr, offs[fnd]);
+
+out1:
+ ntfs_free(offs);
+
+ return e;
+#endif
+
+next:
+ /*
+ * Entries index are sorted
+ * Enumerate all entries until we find entry that is <= to the search value
+ */
+ if (off + sizeof(struct NTFS_DE) > end)
+ return NULL;
+
+ e = Add2Ptr(hdr, off);
+ e_size = le16_to_cpu(e->size);
+
+ if (e_size < sizeof(struct NTFS_DE) || off + e_size > end)
+ return NULL;
+
+ off += e_size;
+
+ e_key_len = le16_to_cpu(e->key_size);
+
+ *diff = (*cmp)(key, key_len, e + 1, e_key_len, ctx);
+ if (!*diff)
+ return e;
+
+ if (*diff <= 0)
+ return e;
+
+ if (de_is_last(e)) {
+ *diff = 1;
+ return e;
+ }
+ goto next;
+}
+
+/*
+ * hdr_insert_de
+ *
+ * inserts an index entry into the buffer.
+ * 'before' should be a pointer previously returned from hdr_find_e
+ */
+static struct NTFS_DE *hdr_insert_de(const struct ntfs_index *indx,
+ struct INDEX_HDR *hdr,
+ const struct NTFS_DE *de,
+ struct NTFS_DE *before, const void *ctx)
+{
+ int diff;
+ size_t off = PtrOffset(hdr, before);
+ u32 used = le32_to_cpu(hdr->used);
+ u32 total = le32_to_cpu(hdr->total);
+ u16 de_size = le16_to_cpu(de->size);
+
+ /* First, check to see if there's enough room */
+ if (used + de_size > total)
+ return NULL;
+
+ /* We know there's enough space, so we know we'll succeed. */
+ if (before) {
+ /* Check that before is inside Index */
+ if (off >= used || off < le32_to_cpu(hdr->de_off) ||
+ off + le16_to_cpu(before->size) > total) {
+ return NULL;
+ }
+ goto ok;
+ }
+ /* No insert point is applied. Get it manually */
+ before = hdr_find_e(indx, hdr, de + 1, le16_to_cpu(de->key_size), ctx,
+ &diff);
+ if (!before)
+ return NULL;
+ off = PtrOffset(hdr, before);
+
+ok:
+ /* Now we just make room for the entry and jam it in. */
+ memmove(Add2Ptr(before, de_size), before, used - off);
+
+ hdr->used = cpu_to_le32(used + de_size);
+ memcpy(before, de, de_size);
+
+ return before;
+}
+
+/*
+ * hdr_delete_de
+ *
+ * removes an entry from the index buffer
+ */
+static inline struct NTFS_DE *hdr_delete_de(struct INDEX_HDR *hdr,
+ struct NTFS_DE *re)
+{
+ u32 used = le32_to_cpu(hdr->used);
+ u16 esize = le16_to_cpu(re->size);
+ u32 off = PtrOffset(hdr, re);
+ int bytes = used - (off + esize);
+
+ if (off >= used || esize < sizeof(struct NTFS_DE) ||
+ bytes < sizeof(struct NTFS_DE))
+ return NULL;
+
+ hdr->used = cpu_to_le32(used - esize);
+ memmove(re, Add2Ptr(re, esize), bytes);
+
+ return re;
+}
+
+void indx_clear(struct ntfs_index *indx)
+{
+ run_close(&indx->alloc_run);
+ run_close(&indx->bitmap_run);
+}
+
+int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi,
+ const struct ATTRIB *attr, enum index_mutex_classed type)
+{
+ u32 t32;
+ const struct INDEX_ROOT *root = resident_data(attr);
+
+ /* Check root fields */
+ if (!root->index_block_clst)
+ return -EINVAL;
+
+ indx->type = type;
+ indx->idx2vbn_bits = __ffs(root->index_block_clst);
+
+ t32 = le32_to_cpu(root->index_block_size);
+ indx->index_bits = blksize_bits(t32);
+
+ /* Check index record size */
+ if (t32 < sbi->cluster_size) {
+ /* index record is smaller than a cluster, use 512 blocks */
+ if (t32 != root->index_block_clst * SECTOR_SIZE)
+ return -EINVAL;
+
+ /* Check alignment to a cluster */
+ if ((sbi->cluster_size >> SECTOR_SHIFT) &
+ (root->index_block_clst - 1)) {
+ return -EINVAL;
+ }
+
+ indx->vbn2vbo_bits = SECTOR_SHIFT;
+ } else {
+ /* index record must be a multiple of cluster size */
+ if (t32 != root->index_block_clst << sbi->cluster_bits)
+ return -EINVAL;
+
+ indx->vbn2vbo_bits = sbi->cluster_bits;
+ }
+
+ init_rwsem(&indx->run_lock);
+
+ indx->cmp = get_cmp_func(root);
+ return indx->cmp ? 0 : -EINVAL;
+}
+
+static struct indx_node *indx_new(struct ntfs_index *indx,
+ struct ntfs_inode *ni, CLST vbn,
+ const __le64 *sub_vbn)
+{
+ int err;
+ struct NTFS_DE *e;
+ struct indx_node *r;
+ struct INDEX_HDR *hdr;
+ struct INDEX_BUFFER *index;
+ u64 vbo = (u64)vbn << indx->vbn2vbo_bits;
+ u32 bytes = 1u << indx->index_bits;
+ u16 fn;
+ u32 eo;
+
+ r = ntfs_zalloc(sizeof(struct indx_node));
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ index = ntfs_zalloc(bytes);
+ if (!index) {
+ ntfs_free(r);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = ntfs_get_bh(ni->mi.sbi, &indx->alloc_run, vbo, bytes, &r->nb);
+
+ if (err) {
+ ntfs_free(index);
+ ntfs_free(r);
+ return ERR_PTR(err);
+ }
+
+ /* Create header */
+ index->rhdr.sign = NTFS_INDX_SIGNATURE;
+ index->rhdr.fix_off = cpu_to_le16(sizeof(struct INDEX_BUFFER)); // 0x28
+ fn = (bytes >> SECTOR_SHIFT) + 1; // 9
+ index->rhdr.fix_num = cpu_to_le16(fn);
+ index->vbn = cpu_to_le64(vbn);
+ hdr = &index->ihdr;
+ eo = QuadAlign(sizeof(struct INDEX_BUFFER) + fn * sizeof(short));
+ hdr->de_off = cpu_to_le32(eo);
+
+ e = Add2Ptr(hdr, eo);
+
+ if (sub_vbn) {
+ e->flags = NTFS_IE_LAST | NTFS_IE_HAS_SUBNODES;
+ e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64));
+ hdr->used =
+ cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64));
+ de_set_vbn_le(e, *sub_vbn);
+ hdr->flags = 1;
+ } else {
+ e->size = cpu_to_le16(sizeof(struct NTFS_DE));
+ hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE));
+ e->flags = NTFS_IE_LAST;
+ }
+
+ hdr->total = cpu_to_le32(bytes - offsetof(struct INDEX_BUFFER, ihdr));
+
+ r->index = index;
+ return r;
+}
+
+struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+ struct ATTRIB **attr, struct mft_inode **mi)
+{
+ struct ATTR_LIST_ENTRY *le = NULL;
+ struct ATTRIB *a;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+ a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL,
+ mi);
+ if (!a)
+ return NULL;
+
+ if (attr)
+ *attr = a;
+
+ return resident_data_ex(a, sizeof(struct INDEX_ROOT));
+}
+
+static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni,
+ struct indx_node *node, int sync)
+{
+ struct INDEX_BUFFER *ib = node->index;
+
+ return ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &node->nb, sync);
+}
+
+/*
+ * if ntfs_readdir calls this function
+ * inode is shared locked and no ni_lock
+ * use rw_semaphore for read/write access to alloc_run
+ */
+int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn,
+ struct indx_node **node)
+{
+ int err;
+ struct INDEX_BUFFER *ib;
+ struct runs_tree *run = &indx->alloc_run;
+ struct rw_semaphore *lock = &indx->run_lock;
+ u64 vbo = (u64)vbn << indx->vbn2vbo_bits;
+ u32 bytes = 1u << indx->index_bits;
+ struct indx_node *in = *node;
+ const struct INDEX_NAMES *name;
+
+ if (!in) {
+ in = ntfs_zalloc(sizeof(struct indx_node));
+ if (!in)
+ return -ENOMEM;
+ } else {
+ nb_put(&in->nb);
+ }
+
+ ib = in->index;
+ if (!ib) {
+ ib = ntfs_malloc(bytes);
+ if (!ib) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ down_read(lock);
+ err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb);
+ up_read(lock);
+ if (!err)
+ goto ok;
+
+ if (err == -E_NTFS_FIXUP)
+ goto ok;
+
+ if (err != -ENOENT)
+ goto out;
+
+ name = &s_index_names[indx->type];
+ down_write(lock);
+ err = attr_load_runs_range(ni, ATTR_ALLOC, name->name, name->name_len,
+ run, vbo, vbo + bytes);
+ up_write(lock);
+ if (err)
+ goto out;
+
+ down_read(lock);
+ err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb);
+ up_read(lock);
+ if (err == -E_NTFS_FIXUP)
+ goto ok;
+
+ if (err)
+ goto out;
+
+ok:
+ if (err == -E_NTFS_FIXUP) {
+ ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0);
+ err = 0;
+ }
+
+ in->index = ib;
+ *node = in;
+
+out:
+ if (ib != in->index)
+ ntfs_free(ib);
+
+ if (*node != in) {
+ nb_put(&in->nb);
+ ntfs_free(in);
+ }
+
+ return err;
+}
+
+/*
+ * indx_find
+ *
+ * scans NTFS directory for given entry
+ */
+int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const struct INDEX_ROOT *root, const void *key, size_t key_len,
+ const void *ctx, int *diff, struct NTFS_DE **entry,
+ struct ntfs_fnd *fnd)
+{
+ int err;
+ struct NTFS_DE *e;
+ const struct INDEX_HDR *hdr;
+ struct indx_node *node;
+
+ if (!root)
+ root = indx_get_root(&ni->dir, ni, NULL, NULL);
+
+ if (!root) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ hdr = &root->ihdr;
+
+ /* Check cache */
+ e = fnd->level ? fnd->de[fnd->level - 1] : fnd->root_de;
+ if (e && !de_is_last(e) &&
+ !(*indx->cmp)(key, key_len, e + 1, le16_to_cpu(e->key_size), ctx)) {
+ *entry = e;
+ *diff = 0;
+ return 0;
+ }
+
+ /* Soft finder reset */
+ fnd_clear(fnd);
+
+ /* Lookup entry that is <= to the search value */
+ e = hdr_find_e(indx, hdr, key, key_len, ctx, diff);
+ if (!e)
+ return -EINVAL;
+
+ if (fnd)
+ fnd->root_de = e;
+
+ err = 0;
+
+ for (;;) {
+ node = NULL;
+ if (*diff >= 0 || !de_has_vcn_ex(e)) {
+ *entry = e;
+ goto out;
+ }
+
+ /* Read next level. */
+ err = indx_read(indx, ni, de_get_vbn(e), &node);
+ if (err)
+ goto out;
+
+ /* Lookup entry that is <= to the search value */
+ e = hdr_find_e(indx, &node->index->ihdr, key, key_len, ctx,
+ diff);
+ if (!e) {
+ err = -EINVAL;
+ put_indx_node(node);
+ goto out;
+ }
+
+ fnd_push(fnd, node, e);
+ }
+
+out:
+ return err;
+}
+
+int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const struct INDEX_ROOT *root, struct NTFS_DE **entry,
+ struct ntfs_fnd *fnd)
+{
+ int err;
+ struct indx_node *n = NULL;
+ struct NTFS_DE *e;
+ size_t iter = 0;
+ int level = fnd->level;
+
+ if (!*entry) {
+ /* Start find */
+ e = hdr_first_de(&root->ihdr);
+ if (!e)
+ return 0;
+ fnd_clear(fnd);
+ fnd->root_de = e;
+ } else if (!level) {
+ if (de_is_last(fnd->root_de)) {
+ *entry = NULL;
+ return 0;
+ }
+
+ e = hdr_next_de(&root->ihdr, fnd->root_de);
+ if (!e)
+ return -EINVAL;
+ fnd->root_de = e;
+ } else {
+ n = fnd->nodes[level - 1];
+ e = fnd->de[level - 1];
+
+ if (de_is_last(e))
+ goto pop_level;
+
+ e = hdr_next_de(&n->index->ihdr, e);
+ if (!e)
+ return -EINVAL;
+
+ fnd->de[level - 1] = e;
+ }
+
+ /* Just to avoid tree cycle */
+next_iter:
+ if (iter++ >= 1000)
+ return -EINVAL;
+
+ while (de_has_vcn_ex(e)) {
+ if (le16_to_cpu(e->size) <
+ sizeof(struct NTFS_DE) + sizeof(u64)) {
+ if (n) {
+ fnd_pop(fnd);
+ ntfs_free(n);
+ }
+ return -EINVAL;
+ }
+
+ /* Read next level */
+ err = indx_read(indx, ni, de_get_vbn(e), &n);
+ if (err)
+ return err;
+
+ /* Try next level */
+ e = hdr_first_de(&n->index->ihdr);
+ if (!e) {
+ ntfs_free(n);
+ return -EINVAL;
+ }
+
+ fnd_push(fnd, n, e);
+ }
+
+ if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) {
+ *entry = e;
+ return 0;
+ }
+
+pop_level:
+ for (;;) {
+ if (!de_is_last(e))
+ goto next_iter;
+
+ /* Pop one level */
+ if (n) {
+ fnd_pop(fnd);
+ ntfs_free(n);
+ }
+
+ level = fnd->level;
+
+ if (level) {
+ n = fnd->nodes[level - 1];
+ e = fnd->de[level - 1];
+ } else if (fnd->root_de) {
+ n = NULL;
+ e = fnd->root_de;
+ fnd->root_de = NULL;
+ } else {
+ *entry = NULL;
+ return 0;
+ }
+
+ if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) {
+ *entry = e;
+ if (!fnd->root_de)
+ fnd->root_de = e;
+ return 0;
+ }
+ }
+}
+
+int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const struct INDEX_ROOT *root, struct NTFS_DE **entry,
+ size_t *off, struct ntfs_fnd *fnd)
+{
+ int err;
+ struct indx_node *n = NULL;
+ struct NTFS_DE *e = NULL;
+ struct NTFS_DE *e2;
+ size_t bit;
+ CLST next_used_vbn;
+ CLST next_vbn;
+ u32 record_size = ni->mi.sbi->record_size;
+
+ /* Use non sorted algorithm */
+ if (!*entry) {
+ /* This is the first call */
+ e = hdr_first_de(&root->ihdr);
+ if (!e)
+ return 0;
+ fnd_clear(fnd);
+ fnd->root_de = e;
+
+ /* The first call with setup of initial element */
+ if (*off >= record_size) {
+ next_vbn = (((*off - record_size) >> indx->index_bits))
+ << indx->idx2vbn_bits;
+ /* jump inside cycle 'for'*/
+ goto next;
+ }
+
+ /* Start enumeration from root */
+ *off = 0;
+ } else if (!fnd->root_de)
+ return -EINVAL;
+
+ for (;;) {
+ /* Check if current entry can be used */
+ if (e && le16_to_cpu(e->size) > sizeof(struct NTFS_DE))
+ goto ok;
+
+ if (!fnd->level) {
+ /* Continue to enumerate root */
+ if (!de_is_last(fnd->root_de)) {
+ e = hdr_next_de(&root->ihdr, fnd->root_de);
+ if (!e)
+ return -EINVAL;
+ fnd->root_de = e;
+ continue;
+ }
+
+ /* Start to enumerate indexes from 0 */
+ next_vbn = 0;
+ } else {
+ /* Continue to enumerate indexes */
+ e2 = fnd->de[fnd->level - 1];
+
+ n = fnd->nodes[fnd->level - 1];
+
+ if (!de_is_last(e2)) {
+ e = hdr_next_de(&n->index->ihdr, e2);
+ if (!e)
+ return -EINVAL;
+ fnd->de[fnd->level - 1] = e;
+ continue;
+ }
+
+ /* Continue with next index */
+ next_vbn = le64_to_cpu(n->index->vbn) +
+ root->index_block_clst;
+ }
+
+next:
+ /* Release current index */
+ if (n) {
+ fnd_pop(fnd);
+ put_indx_node(n);
+ n = NULL;
+ }
+
+ /* Skip all free indexes */
+ bit = next_vbn >> indx->idx2vbn_bits;
+ err = indx_used_bit(indx, ni, &bit);
+ if (err == -ENOENT || bit == MINUS_ONE_T) {
+ /* No used indexes */
+ *entry = NULL;
+ return 0;
+ }
+
+ next_used_vbn = bit << indx->idx2vbn_bits;
+
+ /* Read buffer into memory */
+ err = indx_read(indx, ni, next_used_vbn, &n);
+ if (err)
+ return err;
+
+ e = hdr_first_de(&n->index->ihdr);
+ fnd_push(fnd, n, e);
+ if (!e)
+ return -EINVAL;
+ }
+
+ok:
+ /* return offset to restore enumerator if necessary */
+ if (!n) {
+ /* 'e' points in root */
+ *off = PtrOffset(&root->ihdr, e);
+ } else {
+ /* 'e' points in index */
+ *off = (le64_to_cpu(n->index->vbn) << indx->vbn2vbo_bits) +
+ record_size + PtrOffset(&n->index->ihdr, e);
+ }
+
+ *entry = e;
+ return 0;
+}
+
+/*
+ * indx_create_allocate
+ *
+ * create "Allocation + Bitmap" attributes
+ */
+static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
+ CLST *vbn)
+{
+ int err = -ENOMEM;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct ATTRIB *bitmap;
+ struct ATTRIB *alloc;
+ u32 data_size = 1u << indx->index_bits;
+ u32 alloc_size = ntfs_up_cluster(sbi, data_size);
+ CLST len = alloc_size >> sbi->cluster_bits;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
+ CLST alen;
+ struct runs_tree run;
+
+ run_init(&run);
+
+ err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, 0, &alen, 0,
+ NULL);
+ if (err)
+ goto out;
+
+ err = ni_insert_nonresident(ni, ATTR_ALLOC, in->name, in->name_len,
+ &run, 0, len, 0, &alloc, NULL);
+ if (err)
+ goto out1;
+
+ alloc->nres.valid_size = alloc->nres.data_size = cpu_to_le64(data_size);
+
+ err = ni_insert_resident(ni, bitmap_size(1), ATTR_BITMAP, in->name,
+ in->name_len, &bitmap, NULL);
+ if (err)
+ goto out2;
+
+ if (in->name == I30_NAME) {
+ ni->vfs_inode.i_size = data_size;
+ inode_set_bytes(&ni->vfs_inode, alloc_size);
+ }
+
+ memcpy(&indx->alloc_run, &run, sizeof(run));
+
+ *vbn = 0;
+
+ return 0;
+
+out2:
+ mi_remove_attr(&ni->mi, alloc);
+
+out1:
+ run_deallocate(sbi, &run, false);
+
+out:
+ return err;
+}
+
+/*
+ * indx_add_allocate
+ *
+ * add clusters to index
+ */
+static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
+ CLST *vbn)
+{
+ int err;
+ size_t bit;
+ u64 data_size;
+ u64 bmp_size, bmp_size_v;
+ struct ATTRIB *bmp, *alloc;
+ struct mft_inode *mi;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+ err = indx_find_free(indx, ni, &bit, &bmp);
+ if (err)
+ goto out1;
+
+ if (bit != MINUS_ONE_T) {
+ bmp = NULL;
+ } else {
+ if (bmp->non_res) {
+ bmp_size = le64_to_cpu(bmp->nres.data_size);
+ bmp_size_v = le64_to_cpu(bmp->nres.valid_size);
+ } else {
+ bmp_size = bmp_size_v = le32_to_cpu(bmp->res.data_size);
+ }
+
+ bit = bmp_size << 3;
+ }
+
+ data_size = (u64)(bit + 1) << indx->index_bits;
+
+ if (bmp) {
+ /* Increase bitmap */
+ err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+ &indx->bitmap_run, bitmap_size(bit + 1),
+ NULL, true, NULL);
+ if (err)
+ goto out1;
+ }
+
+ alloc = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, in->name, in->name_len,
+ NULL, &mi);
+ if (!alloc) {
+ if (bmp)
+ goto out2;
+ goto out1;
+ }
+
+ /* Increase allocation */
+ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
+ &indx->alloc_run, data_size, &data_size, true,
+ NULL);
+ if (err) {
+ if (bmp)
+ goto out2;
+ goto out1;
+ }
+
+ *vbn = bit << indx->idx2vbn_bits;
+
+ return 0;
+
+out2:
+ /* Ops (no space?) */
+ attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+ &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL);
+
+out1:
+ return err;
+}
+
+/*
+ * indx_insert_into_root
+ *
+ * attempts to insert an entry into the index root
+ * If necessary, it will twiddle the index b-tree.
+ */
+static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const struct NTFS_DE *new_de,
+ struct NTFS_DE *root_de, const void *ctx,
+ struct ntfs_fnd *fnd)
+{
+ int err = 0;
+ struct NTFS_DE *e, *e0, *re;
+ struct mft_inode *mi;
+ struct ATTRIB *attr;
+ struct MFT_REC *rec;
+ struct INDEX_HDR *hdr;
+ struct indx_node *n;
+ CLST new_vbn;
+ __le64 *sub_vbn, t_vbn;
+ u16 new_de_size;
+ u32 hdr_used, hdr_total, asize, used, to_move;
+ u32 root_size, new_root_size;
+ struct ntfs_sb_info *sbi;
+ int ds_root;
+ struct INDEX_ROOT *root, *a_root = NULL;
+
+ /* Get the record this root placed in */
+ root = indx_get_root(indx, ni, &attr, &mi);
+ if (!root)
+ goto out;
+
+ /*
+ * Try easy case:
+ * hdr_insert_de will succeed if there's room the root for the new entry.
+ */
+ hdr = &root->ihdr;
+ sbi = ni->mi.sbi;
+ rec = mi->mrec;
+ used = le32_to_cpu(rec->used);
+ new_de_size = le16_to_cpu(new_de->size);
+ hdr_used = le32_to_cpu(hdr->used);
+ hdr_total = le32_to_cpu(hdr->total);
+ asize = le32_to_cpu(attr->size);
+ root_size = le32_to_cpu(attr->res.data_size);
+
+ ds_root = new_de_size + hdr_used - hdr_total;
+
+ if (used + ds_root < sbi->max_bytes_per_attr) {
+ /* make a room for new elements */
+ mi_resize_attr(mi, attr, ds_root);
+ hdr->total = cpu_to_le32(hdr_total + ds_root);
+ e = hdr_insert_de(indx, hdr, new_de, root_de, ctx);
+ WARN_ON(!e);
+ fnd_clear(fnd);
+ fnd->root_de = e;
+
+ return 0;
+ }
+
+ /* Make a copy of root attribute to restore if error */
+ a_root = ntfs_memdup(attr, asize);
+ if (!a_root) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* copy all the non-end entries from the index root to the new buffer.*/
+ to_move = 0;
+ e0 = hdr_first_de(hdr);
+
+ /* Calculate the size to copy */
+ for (e = e0;; e = hdr_next_de(hdr, e)) {
+ if (!e) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (de_is_last(e))
+ break;
+ to_move += le16_to_cpu(e->size);
+ }
+
+ n = NULL;
+ if (!to_move) {
+ re = NULL;
+ } else {
+ re = ntfs_memdup(e0, to_move);
+ if (!re) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ sub_vbn = NULL;
+ if (de_has_vcn(e)) {
+ t_vbn = de_get_vbn_le(e);
+ sub_vbn = &t_vbn;
+ }
+
+ new_root_size = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE) +
+ sizeof(u64);
+ ds_root = new_root_size - root_size;
+
+ if (ds_root > 0 && used + ds_root > sbi->max_bytes_per_attr) {
+ /* make root external */
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (ds_root)
+ mi_resize_attr(mi, attr, ds_root);
+
+ /* Fill first entry (vcn will be set later) */
+ e = (struct NTFS_DE *)(root + 1);
+ memset(e, 0, sizeof(struct NTFS_DE));
+ e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64));
+ e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST;
+
+ hdr->flags = 1;
+ hdr->used = hdr->total =
+ cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr));
+
+ fnd->root_de = hdr_first_de(hdr);
+ mi->dirty = true;
+
+ /* Create alloc and bitmap attributes (if not) */
+ err = run_is_empty(&indx->alloc_run)
+ ? indx_create_allocate(indx, ni, &new_vbn)
+ : indx_add_allocate(indx, ni, &new_vbn);
+
+ /* layout of record may be changed, so rescan root */
+ root = indx_get_root(indx, ni, &attr, &mi);
+ if (!root) {
+ /* bug? */
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ err = -EINVAL;
+ goto out1;
+ }
+
+ if (err) {
+ /* restore root */
+ if (mi_resize_attr(mi, attr, -ds_root))
+ memcpy(attr, a_root, asize);
+ else {
+ /* bug? */
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ }
+ goto out1;
+ }
+
+ e = (struct NTFS_DE *)(root + 1);
+ *(__le64 *)(e + 1) = cpu_to_le64(new_vbn);
+ mi->dirty = true;
+
+ /* now we can create/format the new buffer and copy the entries into */
+ n = indx_new(indx, ni, new_vbn, sub_vbn);
+ if (IS_ERR(n)) {
+ err = PTR_ERR(n);
+ goto out1;
+ }
+
+ hdr = &n->index->ihdr;
+ hdr_used = le32_to_cpu(hdr->used);
+ hdr_total = le32_to_cpu(hdr->total);
+
+ /* Copy root entries into new buffer */
+ hdr_insert_head(hdr, re, to_move);
+
+ /* Update bitmap attribute */
+ indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits);
+
+ /* Check if we can insert new entry new index buffer */
+ if (hdr_used + new_de_size > hdr_total) {
+ /*
+ * This occurs if mft record is the same or bigger than index
+ * buffer. Move all root new index and have no space to add
+ * new entry classic case when mft record is 1K and index
+ * buffer 4K the problem should not occurs
+ */
+ ntfs_free(re);
+ indx_write(indx, ni, n, 0);
+
+ put_indx_node(n);
+ fnd_clear(fnd);
+ err = indx_insert_entry(indx, ni, new_de, ctx, fnd);
+ goto out;
+ }
+
+ /*
+ * Now root is a parent for new index buffer
+ * Insert NewEntry a new buffer
+ */
+ e = hdr_insert_de(indx, hdr, new_de, NULL, ctx);
+ if (!e) {
+ err = -EINVAL;
+ goto out1;
+ }
+ fnd_push(fnd, n, e);
+
+ /* Just write updates index into disk */
+ indx_write(indx, ni, n, 0);
+
+ n = NULL;
+
+out1:
+ ntfs_free(re);
+ if (n)
+ put_indx_node(n);
+
+out:
+ ntfs_free(a_root);
+ return err;
+}
+
+/*
+ * indx_insert_into_buffer
+ *
+ * attempts to insert an entry into an Index Allocation Buffer.
+ * If necessary, it will split the buffer.
+ */
+static int
+indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni,
+ struct INDEX_ROOT *root, const struct NTFS_DE *new_de,
+ const void *ctx, int level, struct ntfs_fnd *fnd)
+{
+ int err;
+ const struct NTFS_DE *sp;
+ struct NTFS_DE *e, *de_t, *up_e = NULL;
+ struct indx_node *n2 = NULL;
+ struct indx_node *n1 = fnd->nodes[level];
+ struct INDEX_HDR *hdr1 = &n1->index->ihdr;
+ struct INDEX_HDR *hdr2;
+ u32 to_copy, used;
+ CLST new_vbn;
+ __le64 t_vbn, *sub_vbn;
+ u16 sp_size;
+
+ /* Try the most easy case */
+ e = fnd->level - 1 == level ? fnd->de[level] : NULL;
+ e = hdr_insert_de(indx, hdr1, new_de, e, ctx);
+ fnd->de[level] = e;
+ if (e) {
+ /* Just write updated index into disk */
+ indx_write(indx, ni, n1, 0);
+ return 0;
+ }
+
+ /*
+ * No space to insert into buffer. Split it.
+ * To split we:
+ * - Save split point ('cause index buffers will be changed)
+ * - Allocate NewBuffer and copy all entries <= sp into new buffer
+ * - Remove all entries (sp including) from TargetBuffer
+ * - Insert NewEntry into left or right buffer (depending on sp <=>
+ * NewEntry)
+ * - Insert sp into parent buffer (or root)
+ * - Make sp a parent for new buffer
+ */
+ sp = hdr_find_split(hdr1);
+ if (!sp)
+ return -EINVAL;
+
+ sp_size = le16_to_cpu(sp->size);
+ up_e = ntfs_malloc(sp_size + sizeof(u64));
+ if (!up_e)
+ return -ENOMEM;
+ memcpy(up_e, sp, sp_size);
+
+ if (!hdr1->flags) {
+ up_e->flags |= NTFS_IE_HAS_SUBNODES;
+ up_e->size = cpu_to_le16(sp_size + sizeof(u64));
+ sub_vbn = NULL;
+ } else {
+ t_vbn = de_get_vbn_le(up_e);
+ sub_vbn = &t_vbn;
+ }
+
+ /* Allocate on disk a new index allocation buffer. */
+ err = indx_add_allocate(indx, ni, &new_vbn);
+ if (err)
+ goto out;
+
+ /* Allocate and format memory a new index buffer */
+ n2 = indx_new(indx, ni, new_vbn, sub_vbn);
+ if (IS_ERR(n2)) {
+ err = PTR_ERR(n2);
+ goto out;
+ }
+
+ hdr2 = &n2->index->ihdr;
+
+ /* Make sp a parent for new buffer */
+ de_set_vbn(up_e, new_vbn);
+
+ /* copy all the entries <= sp into the new buffer. */
+ de_t = hdr_first_de(hdr1);
+ to_copy = PtrOffset(de_t, sp);
+ hdr_insert_head(hdr2, de_t, to_copy);
+
+ /* remove all entries (sp including) from hdr1 */
+ used = le32_to_cpu(hdr1->used) - to_copy - sp_size;
+ memmove(de_t, Add2Ptr(sp, sp_size), used - le32_to_cpu(hdr1->de_off));
+ hdr1->used = cpu_to_le32(used);
+
+ /* Insert new entry into left or right buffer (depending on sp <=> new_de) */
+ hdr_insert_de(indx,
+ (*indx->cmp)(new_de + 1, le16_to_cpu(new_de->key_size),
+ up_e + 1, le16_to_cpu(up_e->key_size),
+ ctx) < 0
+ ? hdr2
+ : hdr1,
+ new_de, NULL, ctx);
+
+ indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits);
+
+ indx_write(indx, ni, n1, 0);
+ indx_write(indx, ni, n2, 0);
+
+ put_indx_node(n2);
+
+ /*
+ * we've finished splitting everybody, so we are ready to
+ * insert the promoted entry into the parent.
+ */
+ if (!level) {
+ /* Insert in root */
+ err = indx_insert_into_root(indx, ni, up_e, NULL, ctx, fnd);
+ if (err)
+ goto out;
+ } else {
+ /*
+ * The target buffer's parent is another index buffer
+ * TODO: Remove recursion
+ */
+ err = indx_insert_into_buffer(indx, ni, root, up_e, ctx,
+ level - 1, fnd);
+ if (err)
+ goto out;
+ }
+
+out:
+ ntfs_free(up_e);
+
+ return err;
+}
+
+/*
+ * indx_insert_entry
+ *
+ * inserts new entry into index
+ */
+int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const struct NTFS_DE *new_de, const void *ctx,
+ struct ntfs_fnd *fnd)
+{
+ int err;
+ int diff;
+ struct NTFS_DE *e;
+ struct ntfs_fnd *fnd_a = NULL;
+ struct INDEX_ROOT *root;
+
+ if (!fnd) {
+ fnd_a = fnd_get();
+ if (!fnd_a) {
+ err = -ENOMEM;
+ goto out1;
+ }
+ fnd = fnd_a;
+ }
+
+ root = indx_get_root(indx, ni, NULL, NULL);
+ if (!root) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (fnd_is_empty(fnd)) {
+ /* Find the spot the tree where we want to insert the new entry. */
+ err = indx_find(indx, ni, root, new_de + 1,
+ le16_to_cpu(new_de->key_size), ctx, &diff, &e,
+ fnd);
+ if (err)
+ goto out;
+
+ if (!diff) {
+ err = -EEXIST;
+ goto out;
+ }
+ }
+
+ if (!fnd->level) {
+ /* The root is also a leaf, so we'll insert the new entry into it. */
+ err = indx_insert_into_root(indx, ni, new_de, fnd->root_de, ctx,
+ fnd);
+ if (err)
+ goto out;
+ } else {
+ /* found a leaf buffer, so we'll insert the new entry into it.*/
+ err = indx_insert_into_buffer(indx, ni, root, new_de, ctx,
+ fnd->level - 1, fnd);
+ if (err)
+ goto out;
+ }
+
+out:
+ fnd_put(fnd_a);
+out1:
+ return err;
+}
+
+/*
+ * indx_find_buffer
+ *
+ * locates a buffer the tree.
+ */
+static struct indx_node *indx_find_buffer(struct ntfs_index *indx,
+ struct ntfs_inode *ni,
+ const struct INDEX_ROOT *root,
+ __le64 vbn, struct indx_node *n)
+{
+ int err;
+ const struct NTFS_DE *e;
+ struct indx_node *r;
+ const struct INDEX_HDR *hdr = n ? &n->index->ihdr : &root->ihdr;
+
+ /* Step 1: Scan one level */
+ for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) {
+ if (!e)
+ return ERR_PTR(-EINVAL);
+
+ if (de_has_vcn(e) && vbn == de_get_vbn_le(e))
+ return n;
+
+ if (de_is_last(e))
+ break;
+ }
+
+ /* Step2: Do recursion */
+ e = Add2Ptr(hdr, le32_to_cpu(hdr->de_off));
+ for (;;) {
+ if (de_has_vcn_ex(e)) {
+ err = indx_read(indx, ni, de_get_vbn(e), &n);
+ if (err)
+ return ERR_PTR(err);
+
+ r = indx_find_buffer(indx, ni, root, vbn, n);
+ if (r)
+ return r;
+ }
+
+ if (de_is_last(e))
+ break;
+
+ e = Add2Ptr(e, le16_to_cpu(e->size));
+ }
+
+ return NULL;
+}
+
+/*
+ * indx_shrink
+ *
+ * deallocates unused tail indexes
+ */
+static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
+ size_t bit)
+{
+ int err = 0;
+ u64 bpb, new_data;
+ size_t nbits;
+ struct ATTRIB *b;
+ struct ATTR_LIST_ENTRY *le = NULL;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+ b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+ NULL, NULL);
+
+ if (!b)
+ return -ENOENT;
+
+ if (!b->non_res) {
+ unsigned long pos;
+ const unsigned long *bm = resident_data(b);
+
+ nbits = le32_to_cpu(b->res.data_size) * 8;
+
+ if (bit >= nbits)
+ return 0;
+
+ pos = find_next_bit(bm, nbits, bit);
+ if (pos < nbits)
+ return 0;
+ } else {
+ size_t used = MINUS_ONE_T;
+
+ nbits = le64_to_cpu(b->nres.data_size) * 8;
+
+ if (bit >= nbits)
+ return 0;
+
+ err = scan_nres_bitmap(ni, b, indx, bit, &scan_for_used, &used);
+ if (err)
+ return err;
+
+ if (used != MINUS_ONE_T)
+ return 0;
+ }
+
+ new_data = (u64)bit << indx->index_bits;
+
+ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
+ &indx->alloc_run, new_data, &new_data, false, NULL);
+ if (err)
+ return err;
+
+ bpb = bitmap_size(bit);
+ if (bpb * 8 == nbits)
+ return 0;
+
+ err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+ &indx->bitmap_run, bpb, &bpb, false, NULL);
+
+ return err;
+}
+
+static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const struct NTFS_DE *e, bool trim)
+{
+ int err;
+ struct indx_node *n;
+ struct INDEX_HDR *hdr;
+ CLST vbn = de_get_vbn(e);
+ size_t i;
+
+ err = indx_read(indx, ni, vbn, &n);
+ if (err)
+ return err;
+
+ hdr = &n->index->ihdr;
+ /* First, recurse into the children, if any.*/
+ if (hdr_has_subnode(hdr)) {
+ for (e = hdr_first_de(hdr); e; e = hdr_next_de(hdr, e)) {
+ indx_free_children(indx, ni, e, false);
+ if (de_is_last(e))
+ break;
+ }
+ }
+
+ put_indx_node(n);
+
+ i = vbn >> indx->idx2vbn_bits;
+ /* We've gotten rid of the children; add this buffer to the free list. */
+ indx_mark_free(indx, ni, i);
+
+ if (!trim)
+ return 0;
+
+ /*
+ * If there are no used indexes after current free index
+ * then we can truncate allocation and bitmap
+ * Use bitmap to estimate the case
+ */
+ indx_shrink(indx, ni, i + 1);
+ return 0;
+}
+
+/*
+ * indx_get_entry_to_replace
+ *
+ * finds a replacement entry for a deleted entry
+ * always returns a node entry:
+ * NTFS_IE_HAS_SUBNODES is set the flags and the size includes the sub_vcn
+ */
+static int indx_get_entry_to_replace(struct ntfs_index *indx,
+ struct ntfs_inode *ni,
+ const struct NTFS_DE *de_next,
+ struct NTFS_DE **de_to_replace,
+ struct ntfs_fnd *fnd)
+{
+ int err;
+ int level = -1;
+ CLST vbn;
+ struct NTFS_DE *e, *te, *re;
+ struct indx_node *n;
+ struct INDEX_BUFFER *ib;
+
+ *de_to_replace = NULL;
+
+ /* Find first leaf entry down from de_next */
+ vbn = de_get_vbn(de_next);
+ for (;;) {
+ n = NULL;
+ err = indx_read(indx, ni, vbn, &n);
+ if (err)
+ goto out;
+
+ e = hdr_first_de(&n->index->ihdr);
+ fnd_push(fnd, n, e);
+
+ if (!de_is_last(e)) {
+ /*
+ * This buffer is non-empty, so its first entry could be used as the
+ * replacement entry.
+ */
+ level = fnd->level - 1;
+ }
+
+ if (!de_has_vcn(e))
+ break;
+
+ /* This buffer is a node. Continue to go down */
+ vbn = de_get_vbn(e);
+ }
+
+ if (level == -1)
+ goto out;
+
+ n = fnd->nodes[level];
+ te = hdr_first_de(&n->index->ihdr);
+ /* Copy the candidate entry into the replacement entry buffer. */
+ re = ntfs_malloc(le16_to_cpu(te->size) + sizeof(u64));
+ if (!re) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ *de_to_replace = re;
+ memcpy(re, te, le16_to_cpu(te->size));
+
+ if (!de_has_vcn(re)) {
+ /*
+ * The replacement entry we found doesn't have a sub_vcn. increase its size
+ * to hold one.
+ */
+ le16_add_cpu(&re->size, sizeof(u64));
+ re->flags |= NTFS_IE_HAS_SUBNODES;
+ } else {
+ /*
+ * The replacement entry we found was a node entry, which means that all
+ * its child buffers are empty. Return them to the free pool.
+ */
+ indx_free_children(indx, ni, te, true);
+ }
+
+ /*
+ * Expunge the replacement entry from its former location,
+ * and then write that buffer.
+ */
+ ib = n->index;
+ e = hdr_delete_de(&ib->ihdr, te);
+
+ fnd->de[level] = e;
+ indx_write(indx, ni, n, 0);
+
+ /* Check to see if this action created an empty leaf. */
+ if (ib_is_leaf(ib) && ib_is_empty(ib))
+ return 0;
+
+out:
+ fnd_clear(fnd);
+ return err;
+}
+
+/*
+ * indx_delete_entry
+ *
+ * deletes an entry from the index.
+ */
+int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const void *key, u32 key_len, const void *ctx)
+{
+ int err, diff;
+ struct INDEX_ROOT *root;
+ struct INDEX_HDR *hdr;
+ struct ntfs_fnd *fnd, *fnd2;
+ struct INDEX_BUFFER *ib;
+ struct NTFS_DE *e, *re, *next, *prev, *me;
+ struct indx_node *n, *n2d = NULL;
+ __le64 sub_vbn;
+ int level, level2;
+ struct ATTRIB *attr;
+ struct mft_inode *mi;
+ u32 e_size, root_size, new_root_size;
+ size_t trim_bit;
+ const struct INDEX_NAMES *in;
+
+ fnd = fnd_get();
+ if (!fnd) {
+ err = -ENOMEM;
+ goto out2;
+ }
+
+ fnd2 = fnd_get();
+ if (!fnd2) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ root = indx_get_root(indx, ni, &attr, &mi);
+ if (!root) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Locate the entry to remove. */
+ err = indx_find(indx, ni, root, key, key_len, ctx, &diff, &e, fnd);
+ if (err)
+ goto out;
+
+ if (!e || diff) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ level = fnd->level;
+
+ if (level) {
+ n = fnd->nodes[level - 1];
+ e = fnd->de[level - 1];
+ ib = n->index;
+ hdr = &ib->ihdr;
+ } else {
+ hdr = &root->ihdr;
+ e = fnd->root_de;
+ n = NULL;
+ }
+
+ e_size = le16_to_cpu(e->size);
+
+ if (!de_has_vcn_ex(e)) {
+ /* The entry to delete is a leaf, so we can just rip it out */
+ hdr_delete_de(hdr, e);
+
+ if (!level) {
+ hdr->total = hdr->used;
+
+ /* Shrink resident root attribute */
+ mi_resize_attr(mi, attr, 0 - e_size);
+ goto out;
+ }
+
+ indx_write(indx, ni, n, 0);
+
+ /*
+ * Check to see if removing that entry made
+ * the leaf empty.
+ */
+ if (ib_is_leaf(ib) && ib_is_empty(ib)) {
+ fnd_pop(fnd);
+ fnd_push(fnd2, n, e);
+ }
+ } else {
+ /*
+ * The entry we wish to delete is a node buffer, so we
+ * have to find a replacement for it.
+ */
+ next = de_get_next(e);
+
+ err = indx_get_entry_to_replace(indx, ni, next, &re, fnd2);
+ if (err)
+ goto out;
+
+ if (re) {
+ de_set_vbn_le(re, de_get_vbn_le(e));
+ hdr_delete_de(hdr, e);
+
+ err = level ? indx_insert_into_buffer(indx, ni, root,
+ re, ctx,
+ fnd->level - 1,
+ fnd)
+ : indx_insert_into_root(indx, ni, re, e,
+ ctx, fnd);
+ ntfs_free(re);
+
+ if (err)
+ goto out;
+ } else {
+ /*
+ * There is no replacement for the current entry.
+ * This means that the subtree rooted at its node is empty,
+ * and can be deleted, which turn means that the node can
+ * just inherit the deleted entry sub_vcn
+ */
+ indx_free_children(indx, ni, next, true);
+
+ de_set_vbn_le(next, de_get_vbn_le(e));
+ hdr_delete_de(hdr, e);
+ if (level) {
+ indx_write(indx, ni, n, 0);
+ } else {
+ hdr->total = hdr->used;
+
+ /* Shrink resident root attribute */
+ mi_resize_attr(mi, attr, 0 - e_size);
+ }
+ }
+ }
+
+ /* Delete a branch of tree */
+ if (!fnd2 || !fnd2->level)
+ goto out;
+
+ /* Reinit root 'cause it can be changed */
+ root = indx_get_root(indx, ni, &attr, &mi);
+ if (!root) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ n2d = NULL;
+ sub_vbn = fnd2->nodes[0]->index->vbn;
+ level2 = 0;
+ level = fnd->level;
+
+ hdr = level ? &fnd->nodes[level - 1]->index->ihdr : &root->ihdr;
+
+ /* Scan current level */
+ for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) {
+ if (!e) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e))
+ break;
+
+ if (de_is_last(e)) {
+ e = NULL;
+ break;
+ }
+ }
+
+ if (!e) {
+ /* Do slow search from root */
+ struct indx_node *in;
+
+ fnd_clear(fnd);
+
+ in = indx_find_buffer(indx, ni, root, sub_vbn, NULL);
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
+ goto out;
+ }
+
+ if (in)
+ fnd_push(fnd, in, NULL);
+ }
+
+ /* Merge fnd2 -> fnd */
+ for (level = 0; level < fnd2->level; level++) {
+ fnd_push(fnd, fnd2->nodes[level], fnd2->de[level]);
+ fnd2->nodes[level] = NULL;
+ }
+ fnd2->level = 0;
+
+ hdr = NULL;
+ for (level = fnd->level; level; level--) {
+ struct indx_node *in = fnd->nodes[level - 1];
+
+ ib = in->index;
+ if (ib_is_empty(ib)) {
+ sub_vbn = ib->vbn;
+ } else {
+ hdr = &ib->ihdr;
+ n2d = in;
+ level2 = level;
+ break;
+ }
+ }
+
+ if (!hdr)
+ hdr = &root->ihdr;
+
+ e = hdr_first_de(hdr);
+ if (!e) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (hdr != &root->ihdr || !de_is_last(e)) {
+ prev = NULL;
+ while (!de_is_last(e)) {
+ if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e))
+ break;
+ prev = e;
+ e = hdr_next_de(hdr, e);
+ if (!e) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (sub_vbn != de_get_vbn_le(e)) {
+ /*
+ * Didn't find the parent entry, although this buffer is the parent trail.
+ * Something is corrupt.
+ */
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (de_is_last(e)) {
+ /*
+ * Since we can't remove the end entry, we'll remove its
+ * predecessor instead. This means we have to transfer the
+ * predecessor's sub_vcn to the end entry.
+ * Note: that this index block is not empty, so the
+ * predecessor must exist
+ */
+ if (!prev) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (de_has_vcn(prev)) {
+ de_set_vbn_le(e, de_get_vbn_le(prev));
+ } else if (de_has_vcn(e)) {
+ le16_sub_cpu(&e->size, sizeof(u64));
+ e->flags &= ~NTFS_IE_HAS_SUBNODES;
+ le32_sub_cpu(&hdr->used, sizeof(u64));
+ }
+ e = prev;
+ }
+
+ /*
+ * Copy the current entry into a temporary buffer (stripping off its
+ * down-pointer, if any) and delete it from the current buffer or root,
+ * as appropriate.
+ */
+ e_size = le16_to_cpu(e->size);
+ me = ntfs_memdup(e, e_size);
+ if (!me) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (de_has_vcn(me)) {
+ me->flags &= ~NTFS_IE_HAS_SUBNODES;
+ le16_sub_cpu(&me->size, sizeof(u64));
+ }
+
+ hdr_delete_de(hdr, e);
+
+ if (hdr == &root->ihdr) {
+ level = 0;
+ hdr->total = hdr->used;
+
+ /* Shrink resident root attribute */
+ mi_resize_attr(mi, attr, 0 - e_size);
+ } else {
+ indx_write(indx, ni, n2d, 0);
+ level = level2;
+ }
+
+ /* Mark unused buffers as free */
+ trim_bit = -1;
+ for (; level < fnd->level; level++) {
+ ib = fnd->nodes[level]->index;
+ if (ib_is_empty(ib)) {
+ size_t k = le64_to_cpu(ib->vbn) >>
+ indx->idx2vbn_bits;
+
+ indx_mark_free(indx, ni, k);
+ if (k < trim_bit)
+ trim_bit = k;
+ }
+ }
+
+ fnd_clear(fnd);
+ /*fnd->root_de = NULL;*/
+
+ /*
+ * Re-insert the entry into the tree.
+ * Find the spot the tree where we want to insert the new entry.
+ */
+ err = indx_insert_entry(indx, ni, me, ctx, fnd);
+ ntfs_free(me);
+ if (err)
+ goto out;
+
+ if (trim_bit != -1)
+ indx_shrink(indx, ni, trim_bit);
+ } else {
+ /*
+ * This tree needs to be collapsed down to an empty root.
+ * Recreate the index root as an empty leaf and free all the bits the
+ * index allocation bitmap.
+ */
+ fnd_clear(fnd);
+ fnd_clear(fnd2);
+
+ in = &s_index_names[indx->type];
+
+ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
+ &indx->alloc_run, 0, NULL, false, NULL);
+ err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len,
+ false, NULL);
+ run_close(&indx->alloc_run);
+
+ err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+ &indx->bitmap_run, 0, NULL, false, NULL);
+ err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len,
+ false, NULL);
+ run_close(&indx->bitmap_run);
+
+ root = indx_get_root(indx, ni, &attr, &mi);
+ if (!root) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ root_size = le32_to_cpu(attr->res.data_size);
+ new_root_size =
+ sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE);
+
+ if (new_root_size != root_size &&
+ !mi_resize_attr(mi, attr, new_root_size - root_size)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Fill first entry */
+ e = (struct NTFS_DE *)(root + 1);
+ e->ref.low = 0;
+ e->ref.high = 0;
+ e->ref.seq = 0;
+ e->size = cpu_to_le16(sizeof(struct NTFS_DE));
+ e->flags = NTFS_IE_LAST; // 0x02
+ e->key_size = 0;
+ e->res = 0;
+
+ hdr = &root->ihdr;
+ hdr->flags = 0;
+ hdr->used = hdr->total = cpu_to_le32(
+ new_root_size - offsetof(struct INDEX_ROOT, ihdr));
+ mi->dirty = true;
+ }
+
+out:
+ fnd_put(fnd2);
+out1:
+ fnd_put(fnd);
+out2:
+ return err;
+}
+
+/*
+ * Update duplicated information in directory entry
+ * 'dup' - info from MFT record
+ */
+int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi,
+ const struct ATTR_FILE_NAME *fname,
+ const struct NTFS_DUP_INFO *dup, int sync)
+{
+ int err, diff;
+ struct NTFS_DE *e = NULL;
+ struct ATTR_FILE_NAME *e_fname;
+ struct ntfs_fnd *fnd;
+ struct INDEX_ROOT *root;
+ struct mft_inode *mi;
+ struct ntfs_index *indx = &ni->dir;
+
+ fnd = fnd_get();
+ if (!fnd) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ root = indx_get_root(indx, ni, NULL, &mi);
+ if (!root) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Find entry in directory */
+ err = indx_find(indx, ni, root, fname, fname_full_size(fname), sbi,
+ &diff, &e, fnd);
+ if (err)
+ goto out;
+
+ if (!e) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (diff) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ e_fname = (struct ATTR_FILE_NAME *)(e + 1);
+
+ if (!memcmp(&e_fname->dup, dup, sizeof(*dup))) {
+ /* nothing to update in index! Try to avoid this call */
+ goto out;
+ }
+
+ memcpy(&e_fname->dup, dup, sizeof(*dup));
+
+ if (fnd->level) {
+ /* directory entry in index */
+ err = indx_write(indx, ni, fnd->nodes[fnd->level - 1], sync);
+ } else {
+ /* directory entry in directory MFT record */
+ mi->dirty = true;
+ if (sync)
+ err = mi_write(mi, 1);
+ else
+ mark_inode_dirty(&ni->vfs_inode);
+ }
+
+out:
+ fnd_put(fnd);
+
+out1:
+ return err;
+}
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
new file mode 100644
index 000000000000..bf51e294432e
--- /dev/null
+++ b/fs/ntfs3/inode.c
@@ -0,0 +1,2029 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/iversion.h>
+#include <linux/mpage.h>
+#include <linux/namei.h>
+#include <linux/nls.h>
+#include <linux/uio.h>
+#include <linux/version.h>
+#include <linux/writeback.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * ntfs_read_mft
+ *
+ * reads record and parses MFT
+ */
+static struct inode *ntfs_read_mft(struct inode *inode,
+ const struct cpu_str *name,
+ const struct MFT_REF *ref)
+{
+ int err = 0;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ struct super_block *sb = inode->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ mode_t mode = 0;
+ struct ATTR_STD_INFO5 *std5 = NULL;
+ struct ATTR_LIST_ENTRY *le;
+ struct ATTRIB *attr;
+ bool is_match = false;
+ bool is_root = false;
+ bool is_dir;
+ unsigned long ino = inode->i_ino;
+ u32 rp_fa = 0, asize, t32;
+ u16 roff, rsize, names = 0;
+ const struct ATTR_FILE_NAME *fname = NULL;
+ const struct INDEX_ROOT *root;
+ struct REPARSE_DATA_BUFFER rp; // 0x18 bytes
+ u64 t64;
+ struct MFT_REC *rec;
+ struct runs_tree *run;
+
+ inode->i_op = NULL;
+ /* Setup 'uid' and 'gid' */
+ inode->i_uid = sbi->options.fs_uid;
+ inode->i_gid = sbi->options.fs_gid;
+
+ err = mi_init(&ni->mi, sbi, ino);
+ if (err)
+ goto out;
+
+ if (!sbi->mft.ni && ino == MFT_REC_MFT && !sb->s_root) {
+ t64 = sbi->mft.lbo >> sbi->cluster_bits;
+ t32 = bytes_to_cluster(sbi, MFT_REC_VOL * sbi->record_size);
+ sbi->mft.ni = ni;
+ init_rwsem(&ni->file.run_lock);
+
+ if (!run_add_entry(&ni->file.run, 0, t64, t32, true)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ err = mi_read(&ni->mi, ino == MFT_REC_MFT);
+
+ if (err)
+ goto out;
+
+ rec = ni->mi.mrec;
+
+ if (sbi->flags & NTFS_FLAGS_LOG_REPLAYING) {
+ ;
+ } else if (ref->seq != rec->seq) {
+ err = -EINVAL;
+ ntfs_err(sb, "MFT: r=%lx, expect seq=%x instead of %x!", ino,
+ le16_to_cpu(ref->seq), le16_to_cpu(rec->seq));
+ goto out;
+ } else if (!is_rec_inuse(rec)) {
+ err = -EINVAL;
+ ntfs_err(sb, "Inode r=%x is not in use!", (u32)ino);
+ goto out;
+ }
+
+ if (le32_to_cpu(rec->total) != sbi->record_size) {
+ // bad inode?
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!is_rec_base(rec))
+ goto Ok;
+
+ /* record should contain $I30 root */
+ is_dir = rec->flags & RECORD_FLAG_DIR;
+
+ inode->i_generation = le16_to_cpu(rec->seq);
+
+ /* Enumerate all struct Attributes MFT */
+ le = NULL;
+ attr = NULL;
+
+ /*
+ * to reduce tab pressure use goto instead of
+ * while( (attr = ni_enum_attr_ex(ni, attr, &le, NULL) ))
+ */
+next_attr:
+ run = NULL;
+ err = -EINVAL;
+ attr = ni_enum_attr_ex(ni, attr, &le, NULL);
+ if (!attr)
+ goto end_enum;
+
+ if (le && le->vcn) {
+ /* This is non primary attribute segment. Ignore if not MFT */
+ if (ino != MFT_REC_MFT || attr->type != ATTR_DATA)
+ goto next_attr;
+
+ run = &ni->file.run;
+ asize = le32_to_cpu(attr->size);
+ goto attr_unpack_run;
+ }
+
+ roff = attr->non_res ? 0 : le16_to_cpu(attr->res.data_off);
+ rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size);
+ asize = le32_to_cpu(attr->size);
+
+ switch (attr->type) {
+ case ATTR_STD:
+ if (attr->non_res ||
+ asize < sizeof(struct ATTR_STD_INFO) + roff ||
+ rsize < sizeof(struct ATTR_STD_INFO))
+ goto out;
+
+ if (std5)
+ goto next_attr;
+
+ std5 = Add2Ptr(attr, roff);
+
+#ifdef STATX_BTIME
+ nt2kernel(std5->cr_time, &ni->i_crtime);
+#endif
+ nt2kernel(std5->a_time, &inode->i_atime);
+ nt2kernel(std5->c_time, &inode->i_ctime);
+ nt2kernel(std5->m_time, &inode->i_mtime);
+
+ ni->std_fa = std5->fa;
+
+ if (asize >= sizeof(struct ATTR_STD_INFO5) + roff &&
+ rsize >= sizeof(struct ATTR_STD_INFO5))
+ ni->std_security_id = std5->security_id;
+ goto next_attr;
+
+ case ATTR_LIST:
+ if (attr->name_len || le || ino == MFT_REC_LOG)
+ goto out;
+
+ err = ntfs_load_attr_list(ni, attr);
+ if (err)
+ goto out;
+
+ le = NULL;
+ attr = NULL;
+ goto next_attr;
+
+ case ATTR_NAME:
+ if (attr->non_res || asize < SIZEOF_ATTRIBUTE_FILENAME + roff ||
+ rsize < SIZEOF_ATTRIBUTE_FILENAME)
+ goto out;
+
+ fname = Add2Ptr(attr, roff);
+ if (fname->type == FILE_NAME_DOS)
+ goto next_attr;
+
+ names += 1;
+ if (name && name->len == fname->name_len &&
+ !ntfs_cmp_names_cpu(name, (struct le_str *)&fname->name_len,
+ NULL, false))
+ is_match = true;
+
+ goto next_attr;
+
+ case ATTR_DATA:
+ if (is_dir) {
+ /* ignore data attribute in dir record */
+ goto next_attr;
+ }
+
+ if (ino == MFT_REC_BADCLUST && !attr->non_res)
+ goto next_attr;
+
+ if (attr->name_len &&
+ ((ino != MFT_REC_BADCLUST || !attr->non_res ||
+ attr->name_len != ARRAY_SIZE(BAD_NAME) ||
+ memcmp(attr_name(attr), BAD_NAME, sizeof(BAD_NAME))) &&
+ (ino != MFT_REC_SECURE || !attr->non_res ||
+ attr->name_len != ARRAY_SIZE(SDS_NAME) ||
+ memcmp(attr_name(attr), SDS_NAME, sizeof(SDS_NAME))))) {
+ /* file contains stream attribute. ignore it */
+ goto next_attr;
+ }
+
+ if (is_attr_sparsed(attr))
+ ni->std_fa |= FILE_ATTRIBUTE_SPARSE_FILE;
+ else
+ ni->std_fa &= ~FILE_ATTRIBUTE_SPARSE_FILE;
+
+ if (is_attr_compressed(attr))
+ ni->std_fa |= FILE_ATTRIBUTE_COMPRESSED;
+ else
+ ni->std_fa &= ~FILE_ATTRIBUTE_COMPRESSED;
+
+ if (is_attr_encrypted(attr))
+ ni->std_fa |= FILE_ATTRIBUTE_ENCRYPTED;
+ else
+ ni->std_fa &= ~FILE_ATTRIBUTE_ENCRYPTED;
+
+ if (!attr->non_res) {
+ ni->i_valid = inode->i_size = rsize;
+ inode_set_bytes(inode, rsize);
+ t32 = asize;
+ } else {
+ t32 = le16_to_cpu(attr->nres.run_off);
+ }
+
+ mode = S_IFREG | (0777 & sbi->options.fs_fmask_inv);
+
+ if (!attr->non_res) {
+ ni->ni_flags |= NI_FLAG_RESIDENT;
+ goto next_attr;
+ }
+
+ inode_set_bytes(inode, attr_ondisk_size(attr));
+
+ ni->i_valid = le64_to_cpu(attr->nres.valid_size);
+ inode->i_size = le64_to_cpu(attr->nres.data_size);
+ if (!attr->nres.alloc_size)
+ goto next_attr;
+
+ run = ino == MFT_REC_BITMAP ? &sbi->used.bitmap.run
+ : &ni->file.run;
+ break;
+
+ case ATTR_ROOT:
+ if (attr->non_res)
+ goto out;
+
+ root = Add2Ptr(attr, roff);
+ is_root = true;
+
+ if (attr->name_len != ARRAY_SIZE(I30_NAME) ||
+ memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME)))
+ goto next_attr;
+
+ if (root->type != ATTR_NAME ||
+ root->rule != NTFS_COLLATION_TYPE_FILENAME)
+ goto out;
+
+ if (!is_dir)
+ goto next_attr;
+
+ ni->ni_flags |= NI_FLAG_DIR;
+
+ err = indx_init(&ni->dir, sbi, attr, INDEX_MUTEX_I30);
+ if (err)
+ goto out;
+
+ mode = sb->s_root
+ ? (S_IFDIR | (0777 & sbi->options.fs_dmask_inv))
+ : (S_IFDIR | 0777);
+ goto next_attr;
+
+ case ATTR_ALLOC:
+ if (!is_root || attr->name_len != ARRAY_SIZE(I30_NAME) ||
+ memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME)))
+ goto next_attr;
+
+ inode->i_size = le64_to_cpu(attr->nres.data_size);
+ ni->i_valid = le64_to_cpu(attr->nres.valid_size);
+ inode_set_bytes(inode, le64_to_cpu(attr->nres.alloc_size));
+
+ run = &ni->dir.alloc_run;
+ break;
+
+ case ATTR_BITMAP:
+ if (ino == MFT_REC_MFT) {
+ if (!attr->non_res)
+ goto out;
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+ /* 0x20000000 = 2^32 / 8 */
+ if (le64_to_cpu(attr->nres.alloc_size) >= 0x20000000)
+ goto out;
+#endif
+ run = &sbi->mft.bitmap.run;
+ break;
+ } else if (is_dir && attr->name_len == ARRAY_SIZE(I30_NAME) &&
+ !memcmp(attr_name(attr), I30_NAME,
+ sizeof(I30_NAME)) &&
+ attr->non_res) {
+ run = &ni->dir.bitmap_run;
+ break;
+ }
+ goto next_attr;
+
+ case ATTR_REPARSE:
+ if (attr->name_len)
+ goto next_attr;
+
+ rp_fa = ni_parse_reparse(ni, attr, &rp);
+ switch (rp_fa) {
+ case REPARSE_LINK:
+ if (!attr->non_res) {
+ inode->i_size = rsize;
+ inode_set_bytes(inode, rsize);
+ t32 = asize;
+ } else {
+ inode->i_size =
+ le64_to_cpu(attr->nres.data_size);
+ t32 = le16_to_cpu(attr->nres.run_off);
+ }
+
+ /* Looks like normal symlink */
+ ni->i_valid = inode->i_size;
+
+ /* Clear directory bit */
+ if (ni->ni_flags & NI_FLAG_DIR) {
+ indx_clear(&ni->dir);
+ memset(&ni->dir, 0, sizeof(ni->dir));
+ ni->ni_flags &= ~NI_FLAG_DIR;
+ } else {
+ run_close(&ni->file.run);
+ }
+ mode = S_IFLNK | 0777;
+ is_dir = false;
+ if (attr->non_res) {
+ run = &ni->file.run;
+ goto attr_unpack_run; // double break
+ }
+ break;
+
+ case REPARSE_COMPRESSED:
+ break;
+
+ case REPARSE_DEDUPLICATED:
+ break;
+ }
+ goto next_attr;
+
+ case ATTR_EA_INFO:
+ if (!attr->name_len &&
+ resident_data_ex(attr, sizeof(struct EA_INFO))) {
+ ni->ni_flags |= NI_FLAG_EA;
+ /*
+ * ntfs_get_wsl_perm updates inode->i_uid, inode->i_gid, inode->i_mode
+ */
+ inode->i_mode = mode;
+ ntfs_get_wsl_perm(inode);
+ mode = inode->i_mode;
+ }
+ goto next_attr;
+
+ default:
+ goto next_attr;
+ }
+
+attr_unpack_run:
+ roff = le16_to_cpu(attr->nres.run_off);
+
+ t64 = le64_to_cpu(attr->nres.svcn);
+ err = run_unpack_ex(run, sbi, ino, t64, le64_to_cpu(attr->nres.evcn),
+ t64, Add2Ptr(attr, roff), asize - roff);
+ if (err < 0)
+ goto out;
+ err = 0;
+ goto next_attr;
+
+end_enum:
+
+ if (!std5)
+ goto out;
+
+ if (!is_match && name) {
+ /* reuse rec as buffer for ascii name */
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (std5->fa & FILE_ATTRIBUTE_READONLY)
+ mode &= ~0222;
+
+ if (!names) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ set_nlink(inode, names);
+
+ if (S_ISDIR(mode)) {
+ ni->std_fa |= FILE_ATTRIBUTE_DIRECTORY;
+
+ /*
+ * dot and dot-dot should be included in count but was not
+ * included in enumeration.
+ * Usually a hard links to directories are disabled
+ */
+ inode->i_op = &ntfs_dir_inode_operations;
+ inode->i_fop = &ntfs_dir_operations;
+ ni->i_valid = 0;
+ } else if (S_ISLNK(mode)) {
+ ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
+ inode->i_op = &ntfs_link_inode_operations;
+ inode->i_fop = NULL;
+ inode_nohighmem(inode); // ??
+ } else if (S_ISREG(mode)) {
+ ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
+ inode->i_op = &ntfs_file_inode_operations;
+ inode->i_fop = &ntfs_file_operations;
+ inode->i_mapping->a_ops =
+ is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops;
+ if (ino != MFT_REC_MFT)
+ init_rwsem(&ni->file.run_lock);
+ } else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
+ S_ISSOCK(mode)) {
+ inode->i_op = &ntfs_special_inode_operations;
+ init_special_inode(inode, mode, inode->i_rdev);
+ } else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) &&
+ fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) {
+ /* Records in $Extend are not a files or general directories */
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if ((sbi->options.sys_immutable &&
+ (std5->fa & FILE_ATTRIBUTE_SYSTEM)) &&
+ !S_ISFIFO(mode) && !S_ISSOCK(mode) && !S_ISLNK(mode)) {
+ inode->i_flags |= S_IMMUTABLE;
+ } else {
+ inode->i_flags &= ~S_IMMUTABLE;
+ }
+
+ inode->i_mode = mode;
+ if (!(ni->ni_flags & NI_FLAG_EA)) {
+ /* if no xattr then no security (stored in xattr) */
+ inode->i_flags |= S_NOSEC;
+ }
+
+Ok:
+ if (ino == MFT_REC_MFT && !sb->s_root)
+ sbi->mft.ni = NULL;
+
+ unlock_new_inode(inode);
+
+ return inode;
+
+out:
+ if (ino == MFT_REC_MFT && !sb->s_root)
+ sbi->mft.ni = NULL;
+
+ iget_failed(inode);
+ return ERR_PTR(err);
+}
+
+/* returns 1 if match */
+static int ntfs_test_inode(struct inode *inode, void *data)
+{
+ struct MFT_REF *ref = data;
+
+ return ino_get(ref) == inode->i_ino;
+}
+
+static int ntfs_set_inode(struct inode *inode, void *data)
+{
+ const struct MFT_REF *ref = data;
+
+ inode->i_ino = ino_get(ref);
+ return 0;
+}
+
+struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
+ const struct cpu_str *name)
+{
+ struct inode *inode;
+
+ inode = iget5_locked(sb, ino_get(ref), ntfs_test_inode, ntfs_set_inode,
+ (void *)ref);
+ if (unlikely(!inode))
+ return ERR_PTR(-ENOMEM);
+
+ /* If this is a freshly allocated inode, need to read it now. */
+ if (inode->i_state & I_NEW)
+ inode = ntfs_read_mft(inode, name, ref);
+ else if (ref->seq != ntfs_i(inode)->mi.mrec->seq) {
+ /* inode overlaps? */
+ make_bad_inode(inode);
+ }
+
+ return inode;
+}
+
+enum get_block_ctx {
+ GET_BLOCK_GENERAL = 0,
+ GET_BLOCK_WRITE_BEGIN = 1,
+ GET_BLOCK_DIRECT_IO_R = 2,
+ GET_BLOCK_DIRECT_IO_W = 3,
+ GET_BLOCK_BMAP = 4,
+};
+
+static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
+ struct buffer_head *bh, int create,
+ enum get_block_ctx ctx)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ struct page *page = bh->b_page;
+ u8 cluster_bits = sbi->cluster_bits;
+ u32 block_size = sb->s_blocksize;
+ u64 bytes, lbo, valid;
+ u32 off;
+ int err;
+ CLST vcn, lcn, len;
+ bool new;
+
+ /*clear previous state*/
+ clear_buffer_new(bh);
+ clear_buffer_uptodate(bh);
+
+ /* direct write uses 'create=0'*/
+ if (!create && vbo >= ni->i_valid) {
+ /* out of valid */
+ return 0;
+ }
+
+ if (vbo >= inode->i_size) {
+ /* out of size */
+ return 0;
+ }
+
+ if (is_resident(ni)) {
+ ni_lock(ni);
+ err = attr_data_read_resident(ni, page);
+ ni_unlock(ni);
+
+ if (!err)
+ set_buffer_uptodate(bh);
+ bh->b_size = block_size;
+ return err;
+ }
+
+ vcn = vbo >> cluster_bits;
+ off = vbo & sbi->cluster_mask;
+ new = false;
+
+ err = attr_data_get_block(ni, vcn, 1, &lcn, &len, create ? &new : NULL);
+ if (err)
+ goto out;
+
+ if (!len)
+ return 0;
+
+ bytes = ((u64)len << cluster_bits) - off;
+
+ if (lcn == SPARSE_LCN) {
+ if (!create) {
+ if (bh->b_size > bytes)
+ bh->b_size = bytes;
+ return 0;
+ }
+ WARN_ON(1);
+ }
+
+ if (new) {
+ set_buffer_new(bh);
+ if ((len << cluster_bits) > block_size)
+ ntfs_sparse_cluster(inode, page, vcn, len);
+ }
+
+ lbo = ((u64)lcn << cluster_bits) + off;
+
+ set_buffer_mapped(bh);
+ bh->b_bdev = sb->s_bdev;
+ bh->b_blocknr = lbo >> sb->s_blocksize_bits;
+
+ valid = ni->i_valid;
+
+ if (ctx == GET_BLOCK_DIRECT_IO_W) {
+ /*ntfs_direct_IO will update ni->i_valid */
+ if (vbo >= valid)
+ set_buffer_new(bh);
+ } else if (create) {
+ /*normal write*/
+ if (bytes > bh->b_size)
+ bytes = bh->b_size;
+
+ if (vbo >= valid)
+ set_buffer_new(bh);
+
+ if (vbo + bytes > valid) {
+ ni->i_valid = vbo + bytes;
+ mark_inode_dirty(inode);
+ }
+ } else if (vbo >= valid) {
+ /* read out of valid data*/
+ /* should never be here 'cause already checked */
+ clear_buffer_mapped(bh);
+ } else if (vbo + bytes <= valid) {
+ /* normal read */
+ } else if (vbo + block_size <= valid) {
+ /* normal short read */
+ bytes = block_size;
+ } else {
+ /*
+ * read across valid size: vbo < valid && valid < vbo + block_size
+ */
+ bytes = block_size;
+
+ if (page) {
+ u32 voff = valid - vbo;
+
+ bh->b_size = block_size;
+ off = vbo & (PAGE_SIZE - 1);
+ set_bh_page(bh, page, off);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ err = -EIO;
+ goto out;
+ }
+ zero_user_segment(page, off + voff, off + block_size);
+ }
+ }
+
+ if (bh->b_size > bytes)
+ bh->b_size = bytes;
+
+#ifndef __LP64__
+ if (ctx == GET_BLOCK_DIRECT_IO_W || ctx == GET_BLOCK_DIRECT_IO_R) {
+ static_assert(sizeof(size_t) < sizeof(loff_t));
+ if (bytes > 0x40000000u)
+ bh->b_size = 0x40000000u;
+ }
+#endif
+
+ return 0;
+
+out:
+ return err;
+}
+
+int ntfs_get_block(struct inode *inode, sector_t vbn,
+ struct buffer_head *bh_result, int create)
+{
+ return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits,
+ bh_result, create, GET_BLOCK_GENERAL);
+}
+
+static int ntfs_get_block_bmap(struct inode *inode, sector_t vsn,
+ struct buffer_head *bh_result, int create)
+{
+ return ntfs_get_block_vbo(inode,
+ (u64)vsn << inode->i_sb->s_blocksize_bits,
+ bh_result, create, GET_BLOCK_BMAP);
+}
+
+static sector_t ntfs_bmap(struct address_space *mapping, sector_t block)
+{
+ return generic_block_bmap(mapping, block, ntfs_get_block_bmap);
+}
+
+static int ntfs_readpage(struct file *file, struct page *page)
+{
+ int err;
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+
+ if (is_resident(ni)) {
+ ni_lock(ni);
+ err = attr_data_read_resident(ni, page);
+ ni_unlock(ni);
+ if (err != E_NTFS_NONRESIDENT) {
+ unlock_page(page);
+ return err;
+ }
+ }
+
+ if (is_compressed(ni)) {
+ ni_lock(ni);
+ err = ni_readpage_cmpr(ni, page);
+ ni_unlock(ni);
+ return err;
+ }
+
+ /* normal + sparse files */
+ return mpage_readpage(page, ntfs_get_block);
+}
+
+static void ntfs_readahead(struct readahead_control *rac)
+{
+ struct address_space *mapping = rac->mapping;
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ u64 valid;
+ loff_t pos;
+
+ if (is_resident(ni)) {
+ /* no readahead for resident */
+ return;
+ }
+
+ if (is_compressed(ni)) {
+ /* no readahead for compressed */
+ return;
+ }
+
+ valid = ni->i_valid;
+ pos = readahead_pos(rac);
+
+ if (valid < i_size_read(inode) && pos <= valid &&
+ valid < pos + readahead_length(rac)) {
+ /* range cross 'valid'. read it page by page */
+ return;
+ }
+
+ mpage_readahead(rac, ntfs_get_block);
+}
+
+static int ntfs_get_block_direct_IO_R(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits,
+ bh_result, create, GET_BLOCK_DIRECT_IO_R);
+}
+
+static int ntfs_get_block_direct_IO_W(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits,
+ bh_result, create, GET_BLOCK_DIRECT_IO_W);
+}
+
+static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ loff_t vbo = iocb->ki_pos;
+ loff_t end;
+ int wr = iov_iter_rw(iter) & WRITE;
+ loff_t valid;
+ ssize_t ret;
+
+ if (is_resident(ni)) {
+ /*switch to buffered write*/
+ ret = 0;
+ goto out;
+ }
+
+ ret = blockdev_direct_IO(iocb, inode, iter,
+ wr ? ntfs_get_block_direct_IO_W
+ : ntfs_get_block_direct_IO_R);
+
+ if (ret <= 0)
+ goto out;
+
+ end = vbo + ret;
+ valid = ni->i_valid;
+ if (wr) {
+ if (end > valid && !S_ISBLK(inode->i_mode)) {
+ ni->i_valid = end;
+ mark_inode_dirty(inode);
+ }
+ } else if (vbo < valid && valid < end) {
+ /* fix page */
+ iov_iter_revert(iter, end - valid);
+ iov_iter_zero(end - valid, iter);
+ }
+
+out:
+ return ret;
+}
+
+int ntfs_set_size(struct inode *inode, u64 new_size)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ int err;
+
+ /* Check for maximum file size */
+ if (is_sparsed(ni) || is_compressed(ni)) {
+ if (new_size > sbi->maxbytes_sparse) {
+ err = -EFBIG;
+ goto out;
+ }
+ } else if (new_size > sbi->maxbytes) {
+ err = -EFBIG;
+ goto out;
+ }
+
+ ni_lock(ni);
+ down_write(&ni->file.run_lock);
+
+ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
+ &ni->i_valid, true, NULL);
+
+ up_write(&ni->file.run_lock);
+ ni_unlock(ni);
+
+ mark_inode_dirty(inode);
+
+out:
+ return err;
+}
+
+static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ int err;
+
+ if (is_resident(ni)) {
+ ni_lock(ni);
+ err = attr_data_write_resident(ni, page);
+ ni_unlock(ni);
+ if (err != E_NTFS_NONRESIDENT) {
+ unlock_page(page);
+ return err;
+ }
+ }
+
+ return block_write_full_page(page, ntfs_get_block, wbc);
+}
+
+static int ntfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ /* redirect call to 'ntfs_writepage' for resident files*/
+ get_block_t *get_block = is_resident(ni) ? NULL : &ntfs_get_block;
+
+ return mpage_writepages(mapping, wbc, get_block);
+}
+
+static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn,
+ struct buffer_head *bh_result, int create)
+{
+ return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits,
+ bh_result, create, GET_BLOCK_WRITE_BEGIN);
+}
+
+static int ntfs_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, u32 len, u32 flags, struct page **pagep,
+ void **fsdata)
+{
+ int err;
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+
+ *pagep = NULL;
+ if (is_resident(ni)) {
+ struct page *page = grab_cache_page_write_begin(
+ mapping, pos >> PAGE_SHIFT, flags);
+
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ni_lock(ni);
+ err = attr_data_read_resident(ni, page);
+ ni_unlock(ni);
+
+ if (!err) {
+ *pagep = page;
+ goto out;
+ }
+ unlock_page(page);
+ put_page(page);
+
+ if (err != E_NTFS_NONRESIDENT)
+ goto out;
+ }
+
+ err = block_write_begin(mapping, pos, len, flags, pagep,
+ ntfs_get_block_write_begin);
+
+out:
+ return err;
+}
+
+/* address_space_operations::write_end */
+static int ntfs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, u32 len, u32 copied, struct page *page,
+ void *fsdata)
+
+{
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ u64 valid = ni->i_valid;
+ bool dirty = false;
+ int err;
+
+ if (is_resident(ni)) {
+ ni_lock(ni);
+ err = attr_data_write_resident(ni, page);
+ ni_unlock(ni);
+ if (!err) {
+ dirty = true;
+ /* clear any buffers in page*/
+ if (page_has_buffers(page)) {
+ struct buffer_head *head, *bh;
+
+ bh = head = page_buffers(page);
+ do {
+ clear_buffer_dirty(bh);
+ clear_buffer_mapped(bh);
+ set_buffer_uptodate(bh);
+ } while (head != (bh = bh->b_this_page));
+ }
+ SetPageUptodate(page);
+ err = copied;
+ }
+ unlock_page(page);
+ put_page(page);
+ } else {
+ err = generic_write_end(file, mapping, pos, len, copied, page,
+ fsdata);
+ }
+
+ if (err >= 0) {
+ if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) {
+ inode->i_ctime = inode->i_mtime = current_time(inode);
+ ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
+ dirty = true;
+ }
+
+ if (valid != ni->i_valid) {
+ /* ni->i_valid is changed in ntfs_get_block_vbo */
+ dirty = true;
+ }
+
+ if (dirty)
+ mark_inode_dirty(inode);
+ }
+
+ return err;
+}
+
+int reset_log_file(struct inode *inode)
+{
+ int err;
+ loff_t pos = 0;
+ u32 log_size = inode->i_size;
+ struct address_space *mapping = inode->i_mapping;
+
+ for (;;) {
+ u32 len;
+ void *kaddr;
+ struct page *page;
+
+ len = pos + PAGE_SIZE > log_size ? (log_size - pos) : PAGE_SIZE;
+
+ err = block_write_begin(mapping, pos, len, 0, &page,
+ ntfs_get_block_write_begin);
+ if (err)
+ goto out;
+
+ kaddr = kmap_atomic(page);
+ memset(kaddr, -1, len);
+ kunmap_atomic(kaddr);
+ flush_dcache_page(page);
+
+ err = block_write_end(NULL, mapping, pos, len, len, page, NULL);
+ if (err < 0)
+ goto out;
+ pos += len;
+
+ if (pos >= log_size)
+ break;
+ balance_dirty_pages_ratelimited(mapping);
+ }
+out:
+ mark_inode_dirty_sync(inode);
+
+ return err;
+}
+
+int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ return _ni_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
+int ntfs_sync_inode(struct inode *inode)
+{
+ return _ni_write_inode(inode, 1);
+}
+
+/*
+ * helper function for ntfs_flush_inodes. This writes both the inode
+ * and the file data blocks, waiting for in flight data blocks before
+ * the start of the call. It does not wait for any io started
+ * during the call
+ */
+static int writeback_inode(struct inode *inode)
+{
+ int ret = sync_inode_metadata(inode, 0);
+
+ if (!ret)
+ ret = filemap_fdatawrite(inode->i_mapping);
+ return ret;
+}
+
+/*
+ * write data and metadata corresponding to i1 and i2. The io is
+ * started but we do not wait for any of it to finish.
+ *
+ * filemap_flush is used for the block device, so if there is a dirty
+ * page for a block already in flight, we will not wait and start the
+ * io over again
+ */
+int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
+ struct inode *i2)
+{
+ int ret = 0;
+
+ if (i1)
+ ret = writeback_inode(i1);
+ if (!ret && i2)
+ ret = writeback_inode(i2);
+ if (!ret)
+ ret = filemap_flush(sb->s_bdev->bd_inode->i_mapping);
+ return ret;
+}
+
+int inode_write_data(struct inode *inode, const void *data, size_t bytes)
+{
+ pgoff_t idx;
+
+ /* Write non resident data */
+ for (idx = 0; bytes; idx++) {
+ size_t op = bytes > PAGE_SIZE ? PAGE_SIZE : bytes;
+ struct page *page = ntfs_map_page(inode->i_mapping, idx);
+
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ lock_page(page);
+ WARN_ON(!PageUptodate(page));
+ ClearPageUptodate(page);
+
+ memcpy(page_address(page), data, op);
+
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ unlock_page(page);
+
+ ntfs_unmap_page(page);
+
+ bytes -= op;
+ data = Add2Ptr(data, PAGE_SIZE);
+ }
+ return 0;
+}
+
+/*
+ * number of bytes to for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK)
+ * for unicode string of 'uni_len' length
+ */
+static inline u32 ntfs_reparse_bytes(u32 uni_len)
+{
+ /* header + unicode string + decorated unicode string */
+ return sizeof(short) * (2 * uni_len + 4) +
+ offsetof(struct REPARSE_DATA_BUFFER,
+ SymbolicLinkReparseBuffer.PathBuffer);
+}
+
+static struct REPARSE_DATA_BUFFER *
+ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
+ u32 size, u16 *nsize)
+{
+ int i, err;
+ struct REPARSE_DATA_BUFFER *rp;
+ __le16 *rp_name;
+ typeof(rp->SymbolicLinkReparseBuffer) *rs;
+
+ rp = ntfs_zalloc(ntfs_reparse_bytes(2 * size + 2));
+ if (!rp)
+ return ERR_PTR(-ENOMEM);
+
+ rs = &rp->SymbolicLinkReparseBuffer;
+ rp_name = rs->PathBuffer;
+
+ /* Convert link name to utf16 */
+ err = ntfs_nls_to_utf16(sbi, symname, size,
+ (struct cpu_str *)(rp_name - 1), 2 * size,
+ UTF16_LITTLE_ENDIAN);
+ if (err < 0)
+ goto out;
+
+ /* err = the length of unicode name of symlink */
+ *nsize = ntfs_reparse_bytes(err);
+
+ if (*nsize > sbi->reparse.max_size) {
+ err = -EFBIG;
+ goto out;
+ }
+
+ /* translate linux '/' into windows '\' */
+ for (i = 0; i < err; i++) {
+ if (rp_name[i] == cpu_to_le16('/'))
+ rp_name[i] = cpu_to_le16('\\');
+ }
+
+ rp->ReparseTag = IO_REPARSE_TAG_SYMLINK;
+ rp->ReparseDataLength =
+ cpu_to_le16(*nsize - offsetof(struct REPARSE_DATA_BUFFER,
+ SymbolicLinkReparseBuffer));
+
+ /* PrintName + SubstituteName */
+ rs->SubstituteNameOffset = cpu_to_le16(sizeof(short) * err);
+ rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + 8);
+ rs->PrintNameLength = rs->SubstituteNameOffset;
+
+ /*
+ * TODO: use relative path if possible to allow windows to parse this path
+ * 0-absolute path 1- relative path (SYMLINK_FLAG_RELATIVE)
+ */
+ rs->Flags = 0;
+
+ memmove(rp_name + err + 4, rp_name, sizeof(short) * err);
+
+ /* decorate SubstituteName */
+ rp_name += err;
+ rp_name[0] = cpu_to_le16('\\');
+ rp_name[1] = cpu_to_le16('?');
+ rp_name[2] = cpu_to_le16('?');
+ rp_name[3] = cpu_to_le16('\\');
+
+ return rp;
+out:
+ ntfs_free(rp);
+ return ERR_PTR(err);
+}
+
+struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ const struct cpu_str *uni, umode_t mode,
+ dev_t dev, const char *symname, u32 size,
+ struct ntfs_fnd *fnd)
+{
+ int err;
+ struct super_block *sb = dir->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ const struct qstr *name = &dentry->d_name;
+ CLST ino = 0;
+ struct ntfs_inode *dir_ni = ntfs_i(dir);
+ struct ntfs_inode *ni = NULL;
+ struct inode *inode = NULL;
+ struct ATTRIB *attr;
+ struct ATTR_STD_INFO5 *std5;
+ struct ATTR_FILE_NAME *fname;
+ struct MFT_REC *rec;
+ u32 asize, dsize, sd_size;
+ enum FILE_ATTRIBUTE fa;
+ __le32 security_id = SECURITY_ID_INVALID;
+ CLST vcn;
+ const void *sd;
+ u16 t16, nsize = 0, aid = 0;
+ struct INDEX_ROOT *root, *dir_root;
+ struct NTFS_DE *e, *new_de = NULL;
+ struct REPARSE_DATA_BUFFER *rp = NULL;
+ bool rp_inserted = false;
+
+ dir_root = indx_get_root(&dir_ni->dir, dir_ni, NULL, NULL);
+ if (!dir_root)
+ return ERR_PTR(-EINVAL);
+
+ if (S_ISDIR(mode)) {
+ /* use parent's directory attributes */
+ fa = dir_ni->std_fa | FILE_ATTRIBUTE_DIRECTORY |
+ FILE_ATTRIBUTE_ARCHIVE;
+ /*
+ * By default child directory inherits parent attributes
+ * root directory is hidden + system
+ * Make an exception for children in root
+ */
+ if (dir->i_ino == MFT_REC_ROOT)
+ fa &= ~(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM);
+ } else if (S_ISLNK(mode)) {
+ /* It is good idea that link should be the same type (file/dir) as target */
+ fa = FILE_ATTRIBUTE_REPARSE_POINT;
+
+ /*
+ * linux: there are dir/file/symlink and so on
+ * NTFS: symlinks are "dir + reparse" or "file + reparse"
+ * It is good idea to create:
+ * dir + reparse if 'symname' points to directory
+ * or
+ * file + reparse if 'symname' points to file
+ * Unfortunately kern_path hangs if symname contains 'dir'
+ */
+
+ /*
+ * struct path path;
+ *
+ * if (!kern_path(symname, LOOKUP_FOLLOW, &path)){
+ * struct inode *target = d_inode(path.dentry);
+ *
+ * if (S_ISDIR(target->i_mode))
+ * fa |= FILE_ATTRIBUTE_DIRECTORY;
+ * // if ( target->i_sb == sb ){
+ * // use relative path?
+ * // }
+ * path_put(&path);
+ * }
+ */
+ } else if (S_ISREG(mode)) {
+ if (sbi->options.sparse) {
+ /* sparsed regular file, cause option 'sparse' */
+ fa = FILE_ATTRIBUTE_SPARSE_FILE |
+ FILE_ATTRIBUTE_ARCHIVE;
+ } else if (dir_ni->std_fa & FILE_ATTRIBUTE_COMPRESSED) {
+ /* compressed regular file, if parent is compressed */
+ fa = FILE_ATTRIBUTE_COMPRESSED | FILE_ATTRIBUTE_ARCHIVE;
+ } else {
+ /* regular file, default attributes */
+ fa = FILE_ATTRIBUTE_ARCHIVE;
+ }
+ } else {
+ fa = FILE_ATTRIBUTE_ARCHIVE;
+ }
+
+ if (!(mode & 0222))
+ fa |= FILE_ATTRIBUTE_READONLY;
+
+ /* allocate PATH_MAX bytes */
+ new_de = __getname();
+ if (!new_de) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ /*mark rw ntfs as dirty. it will be cleared at umount*/
+ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
+
+ /* Step 1: allocate and fill new mft record */
+ err = ntfs_look_free_mft(sbi, &ino, false, NULL, NULL);
+ if (err)
+ goto out2;
+
+ ni = ntfs_new_inode(sbi, ino, fa & FILE_ATTRIBUTE_DIRECTORY);
+ if (IS_ERR(ni)) {
+ err = PTR_ERR(ni);
+ ni = NULL;
+ goto out3;
+ }
+ inode = &ni->vfs_inode;
+ inode_init_owner(mnt_userns, inode, dir, mode);
+
+ inode->i_atime = inode->i_mtime = inode->i_ctime = ni->i_crtime =
+ current_time(inode);
+
+ rec = ni->mi.mrec;
+ rec->hard_links = cpu_to_le16(1);
+ attr = Add2Ptr(rec, le16_to_cpu(rec->attr_off));
+
+ /* Get default security id */
+ sd = s_default_security;
+ sd_size = sizeof(s_default_security);
+
+ if (is_ntfs3(sbi)) {
+ security_id = dir_ni->std_security_id;
+ if (le32_to_cpu(security_id) < SECURITY_ID_FIRST) {
+ security_id = sbi->security.def_security_id;
+
+ if (security_id == SECURITY_ID_INVALID &&
+ !ntfs_insert_security(sbi, sd, sd_size,
+ &security_id, NULL))
+ sbi->security.def_security_id = security_id;
+ }
+ }
+
+ /* Insert standard info */
+ std5 = Add2Ptr(attr, SIZEOF_RESIDENT);
+
+ if (security_id == SECURITY_ID_INVALID) {
+ dsize = sizeof(struct ATTR_STD_INFO);
+ } else {
+ dsize = sizeof(struct ATTR_STD_INFO5);
+ std5->security_id = security_id;
+ ni->std_security_id = security_id;
+ }
+ asize = SIZEOF_RESIDENT + dsize;
+
+ attr->type = ATTR_STD;
+ attr->size = cpu_to_le32(asize);
+ attr->id = cpu_to_le16(aid++);
+ attr->res.data_off = SIZEOF_RESIDENT_LE;
+ attr->res.data_size = cpu_to_le32(dsize);
+
+ std5->cr_time = std5->m_time = std5->c_time = std5->a_time =
+ kernel2nt(&inode->i_atime);
+
+ ni->std_fa = fa;
+ std5->fa = fa;
+
+ attr = Add2Ptr(attr, asize);
+
+ /* Insert file name */
+ err = fill_name_de(sbi, new_de, name, uni);
+ if (err)
+ goto out4;
+
+ mi_get_ref(&ni->mi, &new_de->ref);
+
+ fname = (struct ATTR_FILE_NAME *)(new_de + 1);
+ mi_get_ref(&dir_ni->mi, &fname->home);
+ fname->dup.cr_time = fname->dup.m_time = fname->dup.c_time =
+ fname->dup.a_time = std5->cr_time;
+ fname->dup.alloc_size = fname->dup.data_size = 0;
+ fname->dup.fa = std5->fa;
+ fname->dup.ea_size = fname->dup.reparse = 0;
+
+ dsize = le16_to_cpu(new_de->key_size);
+ asize = QuadAlign(SIZEOF_RESIDENT + dsize);
+
+ attr->type = ATTR_NAME;
+ attr->size = cpu_to_le32(asize);
+ attr->res.data_off = SIZEOF_RESIDENT_LE;
+ attr->res.flags = RESIDENT_FLAG_INDEXED;
+ attr->id = cpu_to_le16(aid++);
+ attr->res.data_size = cpu_to_le32(dsize);
+ memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), fname, dsize);
+
+ attr = Add2Ptr(attr, asize);
+
+ if (security_id == SECURITY_ID_INVALID) {
+ /* Insert security attribute */
+ asize = SIZEOF_RESIDENT + QuadAlign(sd_size);
+
+ attr->type = ATTR_SECURE;
+ attr->size = cpu_to_le32(asize);
+ attr->id = cpu_to_le16(aid++);
+ attr->res.data_off = SIZEOF_RESIDENT_LE;
+ attr->res.data_size = cpu_to_le32(sd_size);
+ memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), sd, sd_size);
+
+ attr = Add2Ptr(attr, asize);
+ }
+
+ if (fa & FILE_ATTRIBUTE_DIRECTORY) {
+ /*
+ * regular directory or symlink to directory
+ * Create root attribute
+ */
+ dsize = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE);
+ asize = sizeof(I30_NAME) + SIZEOF_RESIDENT + dsize;
+
+ attr->type = ATTR_ROOT;
+ attr->size = cpu_to_le32(asize);
+ attr->id = cpu_to_le16(aid++);
+
+ attr->name_len = ARRAY_SIZE(I30_NAME);
+ attr->name_off = SIZEOF_RESIDENT_LE;
+ attr->res.data_off =
+ cpu_to_le16(sizeof(I30_NAME) + SIZEOF_RESIDENT);
+ attr->res.data_size = cpu_to_le32(dsize);
+ memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), I30_NAME,
+ sizeof(I30_NAME));
+
+ root = Add2Ptr(attr, sizeof(I30_NAME) + SIZEOF_RESIDENT);
+ memcpy(root, dir_root, offsetof(struct INDEX_ROOT, ihdr));
+ root->ihdr.de_off =
+ cpu_to_le32(sizeof(struct INDEX_HDR)); // 0x10
+ root->ihdr.used = cpu_to_le32(sizeof(struct INDEX_HDR) +
+ sizeof(struct NTFS_DE));
+ root->ihdr.total = root->ihdr.used;
+
+ e = Add2Ptr(root, sizeof(struct INDEX_ROOT));
+ e->size = cpu_to_le16(sizeof(struct NTFS_DE));
+ e->flags = NTFS_IE_LAST;
+ } else if (S_ISLNK(mode)) {
+ /*
+ * symlink to file
+ * Create empty resident data attribute
+ */
+ asize = SIZEOF_RESIDENT;
+
+ /* insert empty ATTR_DATA */
+ attr->type = ATTR_DATA;
+ attr->size = cpu_to_le32(SIZEOF_RESIDENT);
+ attr->id = cpu_to_le16(aid++);
+ attr->name_off = SIZEOF_RESIDENT_LE;
+ attr->res.data_off = SIZEOF_RESIDENT_LE;
+ } else {
+ /*
+ * regular file or node
+ */
+ attr->type = ATTR_DATA;
+ attr->id = cpu_to_le16(aid++);
+
+ if (S_ISREG(mode)) {
+ /* Create empty non resident data attribute */
+ attr->non_res = 1;
+ attr->nres.evcn = cpu_to_le64(-1ll);
+ if (fa & FILE_ATTRIBUTE_SPARSE_FILE) {
+ attr->size =
+ cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8);
+ attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+ attr->flags = ATTR_FLAG_SPARSED;
+ asize = SIZEOF_NONRESIDENT_EX + 8;
+ } else if (fa & FILE_ATTRIBUTE_COMPRESSED) {
+ attr->size =
+ cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8);
+ attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+ attr->flags = ATTR_FLAG_COMPRESSED;
+ attr->nres.c_unit = COMPRESSION_UNIT;
+ asize = SIZEOF_NONRESIDENT_EX + 8;
+ } else {
+ attr->size =
+ cpu_to_le32(SIZEOF_NONRESIDENT + 8);
+ attr->name_off = SIZEOF_NONRESIDENT_LE;
+ asize = SIZEOF_NONRESIDENT + 8;
+ }
+ attr->nres.run_off = attr->name_off;
+ } else {
+ /* Create empty resident data attribute */
+ attr->size = cpu_to_le32(SIZEOF_RESIDENT);
+ attr->name_off = SIZEOF_RESIDENT_LE;
+ if (fa & FILE_ATTRIBUTE_SPARSE_FILE)
+ attr->flags = ATTR_FLAG_SPARSED;
+ else if (fa & FILE_ATTRIBUTE_COMPRESSED)
+ attr->flags = ATTR_FLAG_COMPRESSED;
+ attr->res.data_off = SIZEOF_RESIDENT_LE;
+ asize = SIZEOF_RESIDENT;
+ ni->ni_flags |= NI_FLAG_RESIDENT;
+ }
+ }
+
+ if (S_ISDIR(mode)) {
+ ni->ni_flags |= NI_FLAG_DIR;
+ err = indx_init(&ni->dir, sbi, attr, INDEX_MUTEX_I30);
+ if (err)
+ goto out4;
+ } else if (S_ISLNK(mode)) {
+ rp = ntfs_create_reparse_buffer(sbi, symname, size, &nsize);
+
+ if (IS_ERR(rp)) {
+ err = PTR_ERR(rp);
+ rp = NULL;
+ goto out4;
+ }
+
+ /*
+ * Insert ATTR_REPARSE
+ */
+ attr = Add2Ptr(attr, asize);
+ attr->type = ATTR_REPARSE;
+ attr->id = cpu_to_le16(aid++);
+
+ /* resident or non resident? */
+ asize = QuadAlign(SIZEOF_RESIDENT + nsize);
+ t16 = PtrOffset(rec, attr);
+
+ if (asize + t16 + 8 > sbi->record_size) {
+ CLST alen;
+ CLST clst = bytes_to_cluster(sbi, nsize);
+
+ /* bytes per runs */
+ t16 = sbi->record_size - t16 - SIZEOF_NONRESIDENT;
+
+ attr->non_res = 1;
+ attr->nres.evcn = cpu_to_le64(clst - 1);
+ attr->name_off = SIZEOF_NONRESIDENT_LE;
+ attr->nres.run_off = attr->name_off;
+ attr->nres.data_size = cpu_to_le64(nsize);
+ attr->nres.valid_size = attr->nres.data_size;
+ attr->nres.alloc_size =
+ cpu_to_le64(ntfs_up_cluster(sbi, nsize));
+
+ err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0,
+ clst, NULL, 0, &alen, 0,
+ NULL);
+ if (err)
+ goto out5;
+
+ err = run_pack(&ni->file.run, 0, clst,
+ Add2Ptr(attr, SIZEOF_NONRESIDENT), t16,
+ &vcn);
+ if (err < 0)
+ goto out5;
+
+ if (vcn != clst) {
+ err = -EINVAL;
+ goto out5;
+ }
+
+ asize = SIZEOF_NONRESIDENT + QuadAlign(err);
+ inode->i_size = nsize;
+ } else {
+ attr->res.data_off = SIZEOF_RESIDENT_LE;
+ attr->res.data_size = cpu_to_le32(nsize);
+ memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), rp, nsize);
+ inode->i_size = nsize;
+ nsize = 0;
+ }
+
+ attr->size = cpu_to_le32(asize);
+
+ err = ntfs_insert_reparse(sbi, IO_REPARSE_TAG_SYMLINK,
+ &new_de->ref);
+ if (err)
+ goto out5;
+
+ rp_inserted = true;
+ }
+
+ attr = Add2Ptr(attr, asize);
+ attr->type = ATTR_END;
+
+ rec->used = cpu_to_le32(PtrOffset(rec, attr) + 8);
+ rec->next_attr_id = cpu_to_le16(aid);
+
+ /* Step 2: Add new name in index */
+ err = indx_insert_entry(&dir_ni->dir, dir_ni, new_de, sbi, fnd);
+ if (err)
+ goto out6;
+
+ /* Update current directory record */
+ mark_inode_dirty(dir);
+
+ inode->i_generation = le16_to_cpu(rec->seq);
+
+ dir->i_mtime = dir->i_ctime = inode->i_atime;
+
+ if (S_ISDIR(mode)) {
+ if (dir->i_mode & S_ISGID)
+ mode |= S_ISGID;
+ inode->i_op = &ntfs_dir_inode_operations;
+ inode->i_fop = &ntfs_dir_operations;
+ } else if (S_ISLNK(mode)) {
+ inode->i_op = &ntfs_link_inode_operations;
+ inode->i_fop = NULL;
+ inode->i_mapping->a_ops = &ntfs_aops;
+ } else if (S_ISREG(mode)) {
+ inode->i_op = &ntfs_file_inode_operations;
+ inode->i_fop = &ntfs_file_operations;
+ inode->i_mapping->a_ops =
+ is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops;
+ init_rwsem(&ni->file.run_lock);
+ } else {
+ inode->i_op = &ntfs_special_inode_operations;
+ init_special_inode(inode, mode, dev);
+ }
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if (!S_ISLNK(mode) && (sb->s_flags & SB_POSIXACL)) {
+ err = ntfs_init_acl(mnt_userns, inode, dir);
+ if (err)
+ goto out6;
+ } else
+#endif
+ {
+ inode->i_flags |= S_NOSEC;
+ }
+
+ /* Write non resident data */
+ if (nsize) {
+ err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rp, nsize);
+ if (err)
+ goto out7;
+ }
+
+ /* call 'd_instantiate' after inode->i_op is set but before finish_open */
+ d_instantiate(dentry, inode);
+
+ ntfs_save_wsl_perm(inode);
+ mark_inode_dirty(inode);
+ mark_inode_dirty(dir);
+
+ /* normal exit */
+ goto out2;
+
+out7:
+
+ /* undo 'indx_insert_entry' */
+ indx_delete_entry(&dir_ni->dir, dir_ni, new_de + 1,
+ le16_to_cpu(new_de->key_size), sbi);
+out6:
+ if (rp_inserted)
+ ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref);
+
+out5:
+ if (S_ISDIR(mode) || run_is_empty(&ni->file.run))
+ goto out4;
+
+ run_deallocate(sbi, &ni->file.run, false);
+
+out4:
+ clear_rec_inuse(rec);
+ clear_nlink(inode);
+ ni->mi.dirty = false;
+ discard_new_inode(inode);
+out3:
+ ntfs_mark_rec_free(sbi, ino);
+
+out2:
+ __putname(new_de);
+ ntfs_free(rp);
+
+out1:
+ if (err)
+ return ERR_PTR(err);
+
+ unlock_new_inode(inode);
+
+ return inode;
+}
+
+int ntfs_link_inode(struct inode *inode, struct dentry *dentry)
+{
+ int err;
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct ntfs_inode *dir_ni = ntfs_i(dir);
+ struct ntfs_inode *ni = ntfs_i(inode);
+ struct super_block *sb = inode->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ const struct qstr *name = &dentry->d_name;
+ struct NTFS_DE *new_de = NULL;
+ struct ATTR_FILE_NAME *fname;
+ struct ATTRIB *attr;
+ u16 key_size;
+ struct INDEX_ROOT *dir_root;
+
+ dir_root = indx_get_root(&dir_ni->dir, dir_ni, NULL, NULL);
+ if (!dir_root)
+ return -EINVAL;
+
+ /* allocate PATH_MAX bytes */
+ new_de = __getname();
+ if (!new_de)
+ return -ENOMEM;
+
+ /*mark rw ntfs as dirty. it will be cleared at umount*/
+ ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY);
+
+ // Insert file name
+ err = fill_name_de(sbi, new_de, name, NULL);
+ if (err)
+ goto out;
+
+ key_size = le16_to_cpu(new_de->key_size);
+ err = ni_insert_resident(ni, key_size, ATTR_NAME, NULL, 0, &attr, NULL);
+ if (err)
+ goto out;
+
+ mi_get_ref(&ni->mi, &new_de->ref);
+
+ fname = (struct ATTR_FILE_NAME *)(new_de + 1);
+ mi_get_ref(&dir_ni->mi, &fname->home);
+ fname->dup.cr_time = fname->dup.m_time = fname->dup.c_time =
+ fname->dup.a_time = kernel2nt(&inode->i_ctime);
+ fname->dup.alloc_size = fname->dup.data_size = 0;
+ fname->dup.fa = ni->std_fa;
+ fname->dup.ea_size = fname->dup.reparse = 0;
+
+ memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), fname, key_size);
+
+ err = indx_insert_entry(&dir_ni->dir, dir_ni, new_de, sbi, NULL);
+ if (err)
+ goto out;
+
+ le16_add_cpu(&ni->mi.mrec->hard_links, 1);
+ ni->mi.dirty = true;
+
+out:
+ __putname(new_de);
+ return err;
+}
+
+/*
+ * ntfs_unlink_inode
+ *
+ * inode_operations::unlink
+ * inode_operations::rmdir
+ */
+int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry)
+{
+ int err;
+ struct super_block *sb = dir->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct inode *inode = d_inode(dentry);
+ struct ntfs_inode *ni = ntfs_i(inode);
+ const struct qstr *name = &dentry->d_name;
+ struct ntfs_inode *dir_ni = ntfs_i(dir);
+ struct ntfs_index *indx = &dir_ni->dir;
+ struct cpu_str *uni = NULL;
+ struct ATTR_FILE_NAME *fname;
+ u8 name_type;
+ struct ATTR_LIST_ENTRY *le;
+ struct MFT_REF ref;
+ bool is_dir = S_ISDIR(inode->i_mode);
+ struct INDEX_ROOT *dir_root;
+
+ dir_root = indx_get_root(indx, dir_ni, NULL, NULL);
+ if (!dir_root)
+ return -EINVAL;
+
+ ni_lock(ni);
+
+ if (is_dir && !dir_is_empty(inode)) {
+ err = -ENOTEMPTY;
+ goto out1;
+ }
+
+ if (ntfs_is_meta_file(sbi, inode->i_ino)) {
+ err = -EINVAL;
+ goto out1;
+ }
+
+ /* allocate PATH_MAX bytes */
+ uni = __getname();
+ if (!uni) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ /* Convert input string to unicode */
+ err = ntfs_nls_to_utf16(sbi, name->name, name->len, uni, NTFS_NAME_LEN,
+ UTF16_HOST_ENDIAN);
+ if (err < 0)
+ goto out2;
+
+ /*mark rw ntfs as dirty. it will be cleared at umount*/
+ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
+
+ /* find name in record */
+ mi_get_ref(&dir_ni->mi, &ref);
+
+ le = NULL;
+ fname = ni_fname_name(ni, uni, &ref, &le);
+ if (!fname) {
+ err = -ENOENT;
+ goto out3;
+ }
+
+ name_type = paired_name(fname->type);
+
+ err = indx_delete_entry(indx, dir_ni, fname, fname_full_size(fname),
+ sbi);
+ if (err)
+ goto out3;
+
+ /* Then remove name from mft */
+ ni_remove_attr_le(ni, attr_from_name(fname), le);
+
+ le16_add_cpu(&ni->mi.mrec->hard_links, -1);
+ ni->mi.dirty = true;
+
+ if (name_type != FILE_NAME_POSIX) {
+ /* Now we should delete name by type */
+ fname = ni_fname_type(ni, name_type, &le);
+ if (fname) {
+ err = indx_delete_entry(indx, dir_ni, fname,
+ fname_full_size(fname), sbi);
+ if (err)
+ goto out3;
+
+ ni_remove_attr_le(ni, attr_from_name(fname), le);
+
+ le16_add_cpu(&ni->mi.mrec->hard_links, -1);
+ }
+ }
+out3:
+ switch (err) {
+ case 0:
+ drop_nlink(inode);
+ case -ENOTEMPTY:
+ case -ENOSPC:
+ case -EROFS:
+ break;
+ default:
+ make_bad_inode(inode);
+ }
+
+ dir->i_mtime = dir->i_ctime = current_time(dir);
+ mark_inode_dirty(dir);
+ inode->i_ctime = dir->i_ctime;
+ if (inode->i_nlink)
+ mark_inode_dirty(inode);
+
+out2:
+ __putname(uni);
+out1:
+ ni_unlock(ni);
+ return err;
+}
+
+void ntfs_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages_final(&inode->i_data);
+
+ if (inode->i_nlink)
+ _ni_write_inode(inode, inode_needs_sync(inode));
+
+ invalidate_inode_buffers(inode);
+ clear_inode(inode);
+
+ ni_clear(ntfs_i(inode));
+}
+
+static noinline int ntfs_readlink_hlp(struct inode *inode, char *buffer,
+ int buflen)
+{
+ int i, err = 0;
+ struct ntfs_inode *ni = ntfs_i(inode);
+ struct super_block *sb = inode->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ u64 i_size = inode->i_size;
+ u16 nlen = 0;
+ void *to_free = NULL;
+ struct REPARSE_DATA_BUFFER *rp;
+ struct le_str *uni;
+ struct ATTRIB *attr;
+
+ /* Reparse data present. Try to parse it */
+ static_assert(!offsetof(struct REPARSE_DATA_BUFFER, ReparseTag));
+ static_assert(sizeof(u32) == sizeof(rp->ReparseTag));
+
+ *buffer = 0;
+
+ /* Read into temporal buffer */
+ if (i_size > sbi->reparse.max_size || i_size <= sizeof(u32)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_REPARSE, NULL, 0, NULL, NULL);
+ if (!attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!attr->non_res) {
+ rp = resident_data_ex(attr, i_size);
+ if (!rp) {
+ err = -EINVAL;
+ goto out;
+ }
+ } else {
+ rp = ntfs_malloc(i_size);
+ if (!rp) {
+ err = -ENOMEM;
+ goto out;
+ }
+ to_free = rp;
+ err = ntfs_read_run_nb(sbi, &ni->file.run, 0, rp, i_size, NULL);
+ if (err)
+ goto out;
+ }
+
+ err = -EINVAL;
+
+ /* Microsoft Tag */
+ switch (rp->ReparseTag) {
+ case IO_REPARSE_TAG_MOUNT_POINT:
+ /* Mount points and junctions */
+ /* Can we use 'Rp->MountPointReparseBuffer.PrintNameLength'? */
+ if (i_size <= offsetof(struct REPARSE_DATA_BUFFER,
+ MountPointReparseBuffer.PathBuffer))
+ goto out;
+ uni = Add2Ptr(rp,
+ offsetof(struct REPARSE_DATA_BUFFER,
+ MountPointReparseBuffer.PathBuffer) +
+ le16_to_cpu(rp->MountPointReparseBuffer
+ .PrintNameOffset) -
+ 2);
+ nlen = le16_to_cpu(rp->MountPointReparseBuffer.PrintNameLength);
+ break;
+
+ case IO_REPARSE_TAG_SYMLINK:
+ /* FolderSymbolicLink */
+ /* Can we use 'Rp->SymbolicLinkReparseBuffer.PrintNameLength'? */
+ if (i_size <= offsetof(struct REPARSE_DATA_BUFFER,
+ SymbolicLinkReparseBuffer.PathBuffer))
+ goto out;
+ uni = Add2Ptr(rp,
+ offsetof(struct REPARSE_DATA_BUFFER,
+ SymbolicLinkReparseBuffer.PathBuffer) +
+ le16_to_cpu(rp->SymbolicLinkReparseBuffer
+ .PrintNameOffset) -
+ 2);
+ nlen = le16_to_cpu(
+ rp->SymbolicLinkReparseBuffer.PrintNameLength);
+ break;
+
+ case IO_REPARSE_TAG_CLOUD:
+ case IO_REPARSE_TAG_CLOUD_1:
+ case IO_REPARSE_TAG_CLOUD_2:
+ case IO_REPARSE_TAG_CLOUD_3:
+ case IO_REPARSE_TAG_CLOUD_4:
+ case IO_REPARSE_TAG_CLOUD_5:
+ case IO_REPARSE_TAG_CLOUD_6:
+ case IO_REPARSE_TAG_CLOUD_7:
+ case IO_REPARSE_TAG_CLOUD_8:
+ case IO_REPARSE_TAG_CLOUD_9:
+ case IO_REPARSE_TAG_CLOUD_A:
+ case IO_REPARSE_TAG_CLOUD_B:
+ case IO_REPARSE_TAG_CLOUD_C:
+ case IO_REPARSE_TAG_CLOUD_D:
+ case IO_REPARSE_TAG_CLOUD_E:
+ case IO_REPARSE_TAG_CLOUD_F:
+ err = sizeof("OneDrive") - 1;
+ if (err > buflen)
+ err = buflen;
+ memcpy(buffer, "OneDrive", err);
+ goto out;
+
+ default:
+ if (IsReparseTagMicrosoft(rp->ReparseTag)) {
+ /* unknown Microsoft Tag */
+ goto out;
+ }
+ if (!IsReparseTagNameSurrogate(rp->ReparseTag) ||
+ i_size <= sizeof(struct REPARSE_POINT)) {
+ goto out;
+ }
+
+ /* Users tag */
+ uni = Add2Ptr(rp, sizeof(struct REPARSE_POINT) - 2);
+ nlen = le16_to_cpu(rp->ReparseDataLength) -
+ sizeof(struct REPARSE_POINT);
+ }
+
+ /* Convert nlen from bytes to UNICODE chars */
+ nlen >>= 1;
+
+ /* Check that name is available */
+ if (!nlen || &uni->name[nlen] > (__le16 *)Add2Ptr(rp, i_size))
+ goto out;
+
+ /* If name is already zero terminated then truncate it now */
+ if (!uni->name[nlen - 1])
+ nlen -= 1;
+ uni->len = nlen;
+
+ err = ntfs_utf16_to_nls(sbi, uni, buffer, buflen);
+
+ if (err < 0)
+ goto out;
+
+ /* translate windows '\' into linux '/' */
+ for (i = 0; i < err; i++) {
+ if (buffer[i] == '\\')
+ buffer[i] = '/';
+ }
+
+ /* Always set last zero */
+ buffer[err] = 0;
+out:
+ ntfs_free(to_free);
+ return err;
+}
+
+static const char *ntfs_get_link(struct dentry *de, struct inode *inode,
+ struct delayed_call *done)
+{
+ int err;
+ char *ret;
+
+ if (!de)
+ return ERR_PTR(-ECHILD);
+
+ ret = kmalloc(PAGE_SIZE, GFP_NOFS);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ err = ntfs_readlink_hlp(inode, ret, PAGE_SIZE);
+ if (err < 0) {
+ kfree(ret);
+ return ERR_PTR(err);
+ }
+
+ set_delayed_call(done, kfree_link, ret);
+
+ return ret;
+}
+
+// clang-format off
+const struct inode_operations ntfs_link_inode_operations = {
+ .get_link = ntfs_get_link,
+ .setattr = ntfs3_setattr,
+ .listxattr = ntfs_listxattr,
+ .permission = ntfs_permission,
+ .get_acl = ntfs_get_acl,
+ .set_acl = ntfs_set_acl,
+};
+
+const struct address_space_operations ntfs_aops = {
+ .readpage = ntfs_readpage,
+ .readahead = ntfs_readahead,
+ .writepage = ntfs_writepage,
+ .writepages = ntfs_writepages,
+ .write_begin = ntfs_write_begin,
+ .write_end = ntfs_write_end,
+ .direct_IO = ntfs_direct_IO,
+ .bmap = ntfs_bmap,
+ .set_page_dirty = __set_page_dirty_buffers,
+};
+
+const struct address_space_operations ntfs_aops_cmpr = {
+ .readpage = ntfs_readpage,
+ .readahead = ntfs_readahead,
+};
+// clang-format on
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
new file mode 100644
index 000000000000..6be13e256c1a
--- /dev/null
+++ b/fs/ntfs3/super.c
@@ -0,0 +1,1504 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ *
+ * terminology
+ *
+ * cluster - allocation unit - 512,1K,2K,4K,...,2M
+ * vcn - virtual cluster number - offset inside the file in clusters
+ * vbo - virtual byte offset - offset inside the file in bytes
+ * lcn - logical cluster number - 0 based cluster in clusters heap
+ * lbo - logical byte offset - absolute position inside volume
+ * run - maps vcn to lcn - stored in attributes in packed form
+ * attr - attribute segment - std/name/data etc records inside MFT
+ * mi - mft inode - one MFT record(usually 1024 bytes or 4K), consists of attributes
+ * ni - ntfs inode - extends linux inode. consists of one or more mft inodes
+ * index - unit inside directory - 2K, 4K, <=page size, does not depend on cluster size
+ *
+ * WSL - Windows Subsystem for Linux
+ * https://docs.microsoft.com/en-us/windows/wsl/file-permissions
+ * It stores uid/gid/mode/dev in xattr
+ *
+ */
+
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/fs.h>
+#include <linux/iversion.h>
+#include <linux/module.h>
+#include <linux/nls.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+#include "lib/lib.h"
+#endif
+
+#ifdef CONFIG_PRINTK
+/*
+ * Trace warnings/notices/errors
+ * Thanks Joe Perches <joe(a)perches.com> for implementation
+ */
+void ntfs_printk(const struct super_block *sb, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ int level;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+ /*should we use different ratelimits for warnings/notices/errors? */
+ if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3"))
+ return;
+
+ va_start(args, fmt);
+
+ level = printk_get_level(fmt);
+ vaf.fmt = printk_skip_level(fmt);
+ vaf.va = &args;
+ printk("%c%cntfs3: %s: %pV\n", KERN_SOH_ASCII, level, sb->s_id, &vaf);
+
+ va_end(args);
+}
+
+static char s_name_buf[512];
+static atomic_t s_name_buf_cnt = ATOMIC_INIT(1); // 1 means 'free s_name_buf'
+
+/* print warnings/notices/errors about inode using name or inode number */
+void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ char *name;
+ va_list args;
+ struct va_format vaf;
+ int level;
+
+ if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3"))
+ return;
+
+ /* use static allocated buffer, if possible */
+ name = atomic_dec_and_test(&s_name_buf_cnt)
+ ? s_name_buf
+ : kmalloc(sizeof(s_name_buf), GFP_NOFS);
+
+ if (name) {
+ struct dentry *de = d_find_alias(inode);
+ const u32 name_len = ARRAY_SIZE(s_name_buf) - 1;
+
+ if (de) {
+ spin_lock(&de->d_lock);
+ snprintf(name, name_len, " \"%s\"", de->d_name.name);
+ spin_unlock(&de->d_lock);
+ name[name_len] = 0; /* to be sure*/
+ } else {
+ name[0] = 0;
+ }
+ dput(de); /* cocci warns if placed in branch "if (de)" */
+ }
+
+ va_start(args, fmt);
+
+ level = printk_get_level(fmt);
+ vaf.fmt = printk_skip_level(fmt);
+ vaf.va = &args;
+
+ printk("%c%cntfs3: %s: ino=%lx,%s %pV\n", KERN_SOH_ASCII, level,
+ sb->s_id, inode->i_ino, name ? name : "", &vaf);
+
+ va_end(args);
+
+ atomic_inc(&s_name_buf_cnt);
+ if (name != s_name_buf)
+ kfree(name);
+}
+#endif
+
+/*
+ * Shared memory struct.
+ *
+ * on-disk ntfs's upcase table is created by ntfs formater
+ * 'upcase' table is 128K bytes of memory
+ * we should read it into memory when mounting
+ * Several ntfs volumes likely use the same 'upcase' table
+ * It is good idea to share in-memory 'upcase' table between different volumes
+ * Unfortunately winxp/vista/win7 use different upcase tables
+ */
+static DEFINE_SPINLOCK(s_shared_lock);
+
+static struct {
+ void *ptr;
+ u32 len;
+ int cnt;
+} s_shared[8];
+
+/*
+ * ntfs_set_shared
+ *
+ * Returns 'ptr' if pointer was saved in shared memory
+ * Returns NULL if pointer was not shared
+ */
+void *ntfs_set_shared(void *ptr, u32 bytes)
+{
+ void *ret = NULL;
+ int i, j = -1;
+
+ spin_lock(&s_shared_lock);
+ for (i = 0; i < ARRAY_SIZE(s_shared); i++) {
+ if (!s_shared[i].cnt) {
+ j = i;
+ } else if (bytes == s_shared[i].len &&
+ !memcmp(s_shared[i].ptr, ptr, bytes)) {
+ s_shared[i].cnt += 1;
+ ret = s_shared[i].ptr;
+ break;
+ }
+ }
+
+ if (!ret && j != -1) {
+ s_shared[j].ptr = ptr;
+ s_shared[j].len = bytes;
+ s_shared[j].cnt = 1;
+ ret = ptr;
+ }
+ spin_unlock(&s_shared_lock);
+
+ return ret;
+}
+
+/*
+ * ntfs_put_shared
+ *
+ * Returns 'ptr' if pointer is not shared anymore
+ * Returns NULL if pointer is still shared
+ */
+void *ntfs_put_shared(void *ptr)
+{
+ void *ret = ptr;
+ int i;
+
+ spin_lock(&s_shared_lock);
+ for (i = 0; i < ARRAY_SIZE(s_shared); i++) {
+ if (s_shared[i].cnt && s_shared[i].ptr == ptr) {
+ if (--s_shared[i].cnt)
+ ret = NULL;
+ break;
+ }
+ }
+ spin_unlock(&s_shared_lock);
+
+ return ret;
+}
+
+static inline void clear_mount_options(struct ntfs_mount_options *options)
+{
+ unload_nls(options->nls);
+}
+
+enum Opt {
+ Opt_uid,
+ Opt_gid,
+ Opt_umask,
+ Opt_dmask,
+ Opt_fmask,
+ Opt_immutable,
+ Opt_discard,
+ Opt_force,
+ Opt_sparse,
+ Opt_nohidden,
+ Opt_showmeta,
+ Opt_acl,
+ Opt_noatime,
+ Opt_nls,
+ Opt_prealloc,
+ Opt_no_acs_rules,
+ Opt_err,
+};
+
+static const match_table_t ntfs_tokens = {
+ { Opt_uid, "uid=%u" },
+ { Opt_gid, "gid=%u" },
+ { Opt_umask, "umask=%o" },
+ { Opt_dmask, "dmask=%o" },
+ { Opt_fmask, "fmask=%o" },
+ { Opt_immutable, "sys_immutable" },
+ { Opt_discard, "discard" },
+ { Opt_force, "force" },
+ { Opt_sparse, "sparse" },
+ { Opt_nohidden, "nohidden" },
+ { Opt_acl, "acl" },
+ { Opt_noatime, "noatime" },
+ { Opt_showmeta, "showmeta" },
+ { Opt_nls, "nls=%s" },
+ { Opt_prealloc, "prealloc" },
+ { Opt_no_acs_rules, "no_acs_rules" },
+ { Opt_err, NULL },
+};
+
+static noinline int ntfs_parse_options(struct super_block *sb, char *options,
+ int silent,
+ struct ntfs_mount_options *opts)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ char nls_name[30];
+ struct nls_table *nls;
+
+ opts->fs_uid = current_uid();
+ opts->fs_gid = current_gid();
+ opts->fs_fmask_inv = opts->fs_dmask_inv = ~current_umask();
+ nls_name[0] = 0;
+
+ if (!options)
+ goto out;
+
+ while ((p = strsep(&options, ","))) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, ntfs_tokens, args);
+ switch (token) {
+ case Opt_immutable:
+ opts->sys_immutable = 1;
+ break;
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ opts->fs_uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(opts->fs_uid))
+ return -EINVAL;
+ opts->uid = 1;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ opts->fs_gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(opts->fs_gid))
+ return -EINVAL;
+ opts->gid = 1;
+ break;
+ case Opt_umask:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ opts->fs_fmask_inv = opts->fs_dmask_inv = ~option;
+ opts->fmask = opts->dmask = 1;
+ break;
+ case Opt_dmask:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ opts->fs_dmask_inv = ~option;
+ opts->dmask = 1;
+ break;
+ case Opt_fmask:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ opts->fs_fmask_inv = ~option;
+ opts->fmask = 1;
+ break;
+ case Opt_discard:
+ opts->discard = 1;
+ break;
+ case Opt_force:
+ opts->force = 1;
+ break;
+ case Opt_sparse:
+ opts->sparse = 1;
+ break;
+ case Opt_nohidden:
+ opts->nohidden = 1;
+ break;
+ case Opt_acl:
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ sb->s_flags |= SB_POSIXACL;
+ break;
+#else
+ ntfs_err(sb, "support for ACL not compiled in!");
+ return -EINVAL;
+#endif
+ case Opt_noatime:
+ sb->s_flags |= SB_NOATIME;
+ break;
+ case Opt_showmeta:
+ opts->showmeta = 1;
+ break;
+ case Opt_nls:
+ match_strlcpy(nls_name, &args[0], sizeof(nls_name));
+ break;
+ case Opt_prealloc:
+ opts->prealloc = 1;
+ break;
+ case Opt_no_acs_rules:
+ opts->no_acs_rules = 1;
+ break;
+ default:
+ if (!silent)
+ ntfs_err(
+ sb,
+ "Unrecognized mount option \"%s\" or missing value",
+ p);
+ //return -EINVAL;
+ }
+ }
+
+out:
+ if (!strcmp(nls_name[0] ? nls_name : CONFIG_NLS_DEFAULT, "utf8")) {
+ /* For UTF-8 use utf16s_to_utf8s/utf8s_to_utf16s instead of nls */
+ nls = NULL;
+ } else if (nls_name[0]) {
+ nls = load_nls(nls_name);
+ if (!nls) {
+ ntfs_err(sb, "failed to load \"%s\"", nls_name);
+ return -EINVAL;
+ }
+ } else {
+ nls = load_nls_default();
+ if (!nls) {
+ ntfs_err(sb, "failed to load default nls");
+ return -EINVAL;
+ }
+ }
+ opts->nls = nls;
+
+ return 0;
+}
+
+static int ntfs_remount(struct super_block *sb, int *flags, char *data)
+{
+ int err, ro_rw;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct ntfs_mount_options old_opts;
+ char *orig_data = kstrdup(data, GFP_KERNEL);
+
+ if (data && !orig_data)
+ return -ENOMEM;
+
+ /* Store original options */
+ memcpy(&old_opts, &sbi->options, sizeof(old_opts));
+ clear_mount_options(&sbi->options);
+ memset(&sbi->options, 0, sizeof(sbi->options));
+
+ err = ntfs_parse_options(sb, data, 0, &sbi->options);
+ if (err)
+ goto restore_opts;
+
+ ro_rw = sb_rdonly(sb) && !(*flags & SB_RDONLY);
+ if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) {
+ ntfs_warn(
+ sb,
+ "Couldn't remount rw because journal is not replayed. Please umount/remount instead\n");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
+ sync_filesystem(sb);
+
+ if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) &&
+ !sbi->options.force) {
+ ntfs_warn(sb, "volume is dirty and \"force\" flag is not set!");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
+ clear_mount_options(&old_opts);
+
+ *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME) |
+ SB_NODIRATIME | SB_NOATIME;
+ ntfs_info(sb, "re-mounted. Opts: %s", orig_data);
+ err = 0;
+ goto out;
+
+restore_opts:
+ clear_mount_options(&sbi->options);
+ memcpy(&sbi->options, &old_opts, sizeof(old_opts));
+
+out:
+ kfree(orig_data);
+ return err;
+}
+
+static struct kmem_cache *ntfs_inode_cachep;
+
+static struct inode *ntfs_alloc_inode(struct super_block *sb)
+{
+ struct ntfs_inode *ni = kmem_cache_alloc(ntfs_inode_cachep, GFP_NOFS);
+
+ if (!ni)
+ return NULL;
+
+ memset(ni, 0, offsetof(struct ntfs_inode, vfs_inode));
+
+ mutex_init(&ni->ni_lock);
+
+ return &ni->vfs_inode;
+}
+
+static void ntfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ntfs_inode *ni = ntfs_i(inode);
+
+ mutex_destroy(&ni->ni_lock);
+
+ kmem_cache_free(ntfs_inode_cachep, ni);
+}
+
+static void ntfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ntfs_i_callback);
+}
+
+static void init_once(void *foo)
+{
+ struct ntfs_inode *ni = foo;
+
+ inode_init_once(&ni->vfs_inode);
+}
+
+/* noinline to reduce binary size*/
+static noinline void put_ntfs(struct ntfs_sb_info *sbi)
+{
+ ntfs_free(sbi->new_rec);
+ ntfs_vfree(ntfs_put_shared(sbi->upcase));
+ ntfs_free(sbi->def_table);
+
+ wnd_close(&sbi->mft.bitmap);
+ wnd_close(&sbi->used.bitmap);
+
+ if (sbi->mft.ni)
+ iput(&sbi->mft.ni->vfs_inode);
+
+ if (sbi->security.ni)
+ iput(&sbi->security.ni->vfs_inode);
+
+ if (sbi->reparse.ni)
+ iput(&sbi->reparse.ni->vfs_inode);
+
+ if (sbi->objid.ni)
+ iput(&sbi->objid.ni->vfs_inode);
+
+ if (sbi->volume.ni)
+ iput(&sbi->volume.ni->vfs_inode);
+
+ ntfs_update_mftmirr(sbi, 0);
+
+ indx_clear(&sbi->security.index_sii);
+ indx_clear(&sbi->security.index_sdh);
+ indx_clear(&sbi->reparse.index_r);
+ indx_clear(&sbi->objid.index_o);
+ ntfs_free(sbi->compress.lznt);
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+ xpress_free_decompressor(sbi->compress.xpress);
+ lzx_free_decompressor(sbi->compress.lzx);
+#endif
+ clear_mount_options(&sbi->options);
+
+ ntfs_free(sbi);
+}
+
+static void ntfs_put_super(struct super_block *sb)
+{
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+ /*mark rw ntfs as clear, if possible*/
+ ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
+
+ put_ntfs(sbi);
+
+ sync_blockdev(sb->s_bdev);
+}
+
+static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct wnd_bitmap *wnd = &sbi->used.bitmap;
+
+ buf->f_type = sb->s_magic;
+ buf->f_bsize = sbi->cluster_size;
+ buf->f_blocks = wnd->nbits;
+
+ buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd);
+ buf->f_fsid.val[0] = sbi->volume.ser_num;
+ buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32);
+ buf->f_namelen = NTFS_NAME_LEN;
+
+ return 0;
+}
+
+static int ntfs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct super_block *sb = root->d_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct ntfs_mount_options *opts = &sbi->options;
+ struct user_namespace *user_ns = seq_user_ns(m);
+
+ if (opts->uid)
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(user_ns, opts->fs_uid));
+ if (opts->gid)
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(user_ns, opts->fs_gid));
+ if (opts->fmask)
+ seq_printf(m, ",fmask=%04o", ~opts->fs_fmask_inv);
+ if (opts->dmask)
+ seq_printf(m, ",dmask=%04o", ~opts->fs_dmask_inv);
+ if (opts->nls)
+ seq_printf(m, ",nls=%s", opts->nls->charset);
+ else
+ seq_puts(m, ",nls=utf8");
+ if (opts->sys_immutable)
+ seq_puts(m, ",sys_immutable");
+ if (opts->discard)
+ seq_puts(m, ",discard");
+ if (opts->sparse)
+ seq_puts(m, ",sparse");
+ if (opts->showmeta)
+ seq_puts(m, ",showmeta");
+ if (opts->nohidden)
+ seq_puts(m, ",nohidden");
+ if (opts->force)
+ seq_puts(m, ",force");
+ if (opts->no_acs_rules)
+ seq_puts(m, ",no_acs_rules");
+ if (opts->prealloc)
+ seq_puts(m, ",prealloc");
+ if (sb->s_flags & SB_POSIXACL)
+ seq_puts(m, ",acl");
+ if (sb->s_flags & SB_NOATIME)
+ seq_puts(m, ",noatime");
+
+ return 0;
+}
+
+/*super_operations::sync_fs*/
+static int ntfs_sync_fs(struct super_block *sb, int wait)
+{
+ int err = 0, err2;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct ntfs_inode *ni;
+ struct inode *inode;
+
+ ni = sbi->security.ni;
+ if (ni) {
+ inode = &ni->vfs_inode;
+ err2 = _ni_write_inode(inode, wait);
+ if (err2 && !err)
+ err = err2;
+ }
+
+ ni = sbi->objid.ni;
+ if (ni) {
+ inode = &ni->vfs_inode;
+ err2 = _ni_write_inode(inode, wait);
+ if (err2 && !err)
+ err = err2;
+ }
+
+ ni = sbi->reparse.ni;
+ if (ni) {
+ inode = &ni->vfs_inode;
+ err2 = _ni_write_inode(inode, wait);
+ if (err2 && !err)
+ err = err2;
+ }
+
+ if (!err)
+ ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
+
+ ntfs_update_mftmirr(sbi, wait);
+
+ return err;
+}
+
+static const struct super_operations ntfs_sops = {
+ .alloc_inode = ntfs_alloc_inode,
+ .destroy_inode = ntfs_destroy_inode,
+ .evict_inode = ntfs_evict_inode,
+ .put_super = ntfs_put_super,
+ .statfs = ntfs_statfs,
+ .show_options = ntfs_show_options,
+ .sync_fs = ntfs_sync_fs,
+ .remount_fs = ntfs_remount,
+ .write_inode = ntfs3_write_inode,
+};
+
+static struct inode *ntfs_export_get_inode(struct super_block *sb, u64 ino,
+ u32 generation)
+{
+ struct MFT_REF ref;
+ struct inode *inode;
+
+ ref.low = cpu_to_le32(ino);
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ ref.high = cpu_to_le16(ino >> 32);
+#else
+ ref.high = 0;
+#endif
+ ref.seq = cpu_to_le16(generation);
+
+ inode = ntfs_iget5(sb, &ref, NULL);
+ if (!IS_ERR(inode) && is_bad_inode(inode)) {
+ iput(inode);
+ inode = ERR_PTR(-ESTALE);
+ }
+
+ return inode;
+}
+
+static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ ntfs_export_get_inode);
+}
+
+static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ ntfs_export_get_inode);
+}
+
+/* TODO: == ntfs_sync_inode */
+static int ntfs_nfs_commit_metadata(struct inode *inode)
+{
+ return _ni_write_inode(inode, 1);
+}
+
+static const struct export_operations ntfs_export_ops = {
+ .fh_to_dentry = ntfs_fh_to_dentry,
+ .fh_to_parent = ntfs_fh_to_parent,
+ .get_parent = ntfs3_get_parent,
+ .commit_metadata = ntfs_nfs_commit_metadata,
+};
+
+/* Returns Gb,Mb to print with "%u.%02u Gb" */
+static u32 format_size_gb(const u64 bytes, u32 *mb)
+{
+ /* Do simple right 30 bit shift of 64 bit value */
+ u64 kbytes = bytes >> 10;
+ u32 kbytes32 = kbytes;
+
+ *mb = (100 * (kbytes32 & 0xfffff) + 0x7ffff) >> 20;
+ if (*mb >= 100)
+ *mb = 99;
+
+ return (kbytes32 >> 20) | (((u32)(kbytes >> 32)) << 12);
+}
+
+static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot)
+{
+ return boot->sectors_per_clusters <= 0x80
+ ? boot->sectors_per_clusters
+ : (1u << (0 - boot->sectors_per_clusters));
+}
+
+/* inits internal info from on-disk boot sector*/
+static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
+ u64 dev_size)
+{
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ int err;
+ u32 mb, gb, boot_sector_size, sct_per_clst, record_size;
+ u64 sectors, clusters, fs_size, mlcn, mlcn2;
+ struct NTFS_BOOT *boot;
+ struct buffer_head *bh;
+ struct MFT_REC *rec;
+ u16 fn, ao;
+
+ sbi->volume.blocks = dev_size >> PAGE_SHIFT;
+
+ bh = ntfs_bread(sb, 0);
+ if (!bh)
+ return -EIO;
+
+ err = -EINVAL;
+ boot = (struct NTFS_BOOT *)bh->b_data;
+
+ if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1))
+ goto out;
+
+ /* 0x55AA is not mandaroty. Thanks Maxim Suhanov*/
+ /*if (0x55 != boot->boot_magic[0] || 0xAA != boot->boot_magic[1])
+ * goto out;
+ */
+
+ boot_sector_size = (u32)boot->bytes_per_sector[1] << 8;
+ if (boot->bytes_per_sector[0] || boot_sector_size < SECTOR_SIZE ||
+ !is_power_of2(boot_sector_size)) {
+ goto out;
+ }
+
+ /* cluster size: 512, 1K, 2K, 4K, ... 2M */
+ sct_per_clst = true_sectors_per_clst(boot);
+ if (!is_power_of2(sct_per_clst))
+ goto out;
+
+ mlcn = le64_to_cpu(boot->mft_clst);
+ mlcn2 = le64_to_cpu(boot->mft2_clst);
+ sectors = le64_to_cpu(boot->sectors_per_volume);
+
+ if (mlcn * sct_per_clst >= sectors)
+ goto out;
+
+ if (mlcn2 * sct_per_clst >= sectors)
+ goto out;
+
+ /* Check MFT record size */
+ if ((boot->record_size < 0 &&
+ SECTOR_SIZE > (2U << (-boot->record_size))) ||
+ (boot->record_size >= 0 && !is_power_of2(boot->record_size))) {
+ goto out;
+ }
+
+ /* Check index record size */
+ if ((boot->index_size < 0 &&
+ SECTOR_SIZE > (2U << (-boot->index_size))) ||
+ (boot->index_size >= 0 && !is_power_of2(boot->index_size))) {
+ goto out;
+ }
+
+ sbi->sector_size = boot_sector_size;
+ sbi->sector_bits = blksize_bits(boot_sector_size);
+ fs_size = (sectors + 1) << sbi->sector_bits;
+
+ gb = format_size_gb(fs_size, &mb);
+
+ /*
+ * - Volume formatted and mounted with the same sector size
+ * - Volume formatted 4K and mounted as 512
+ * - Volume formatted 512 and mounted as 4K
+ */
+ if (sbi->sector_size != sector_size) {
+ ntfs_warn(sb,
+ "Different NTFS' sector size and media sector size");
+ dev_size += sector_size - 1;
+ }
+
+ sbi->cluster_size = boot_sector_size * sct_per_clst;
+ sbi->cluster_bits = blksize_bits(sbi->cluster_size);
+
+ sbi->mft.lbo = mlcn << sbi->cluster_bits;
+ sbi->mft.lbo2 = mlcn2 << sbi->cluster_bits;
+
+ if (sbi->cluster_size < sbi->sector_size)
+ goto out;
+
+ sbi->cluster_mask = sbi->cluster_size - 1;
+ sbi->cluster_mask_inv = ~(u64)sbi->cluster_mask;
+ sbi->record_size = record_size = boot->record_size < 0
+ ? 1 << (-boot->record_size)
+ : (u32)boot->record_size
+ << sbi->cluster_bits;
+
+ if (record_size > MAXIMUM_BYTES_PER_MFT)
+ goto out;
+
+ sbi->record_bits = blksize_bits(record_size);
+ sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes
+
+ sbi->max_bytes_per_attr =
+ record_size - QuadAlign(MFTRECORD_FIXUP_OFFSET_1) -
+ QuadAlign(((record_size >> SECTOR_SHIFT) * sizeof(short))) -
+ QuadAlign(sizeof(enum ATTR_TYPE));
+
+ sbi->index_size = boot->index_size < 0
+ ? 1u << (-boot->index_size)
+ : (u32)boot->index_size << sbi->cluster_bits;
+
+ sbi->volume.ser_num = le64_to_cpu(boot->serial_num);
+ sbi->volume.size = sectors << sbi->sector_bits;
+
+ /* warning if RAW volume */
+ if (dev_size < fs_size) {
+ u32 mb0, gb0;
+
+ gb0 = format_size_gb(dev_size, &mb0);
+ ntfs_warn(
+ sb,
+ "RAW NTFS volume: Filesystem size %u.%02u Gb > volume size %u.%02u Gb. Mount in read-only",
+ gb, mb, gb0, mb0);
+ sb->s_flags |= SB_RDONLY;
+ }
+
+ clusters = sbi->volume.size >> sbi->cluster_bits;
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+ /* 32 bits per cluster */
+ if (clusters >> 32) {
+ ntfs_notice(
+ sb,
+ "NTFS %u.%02u Gb is too big to use 32 bits per cluster",
+ gb, mb);
+ goto out;
+ }
+#elif BITS_PER_LONG < 64
+#error "CONFIG_NTFS3_64BIT_CLUSTER incompatible in 32 bit OS"
+#endif
+
+ sbi->used.bitmap.nbits = clusters;
+
+ rec = ntfs_zalloc(record_size);
+ if (!rec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sbi->new_rec = rec;
+ rec->rhdr.sign = NTFS_FILE_SIGNATURE;
+ rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET_1);
+ fn = (sbi->record_size >> SECTOR_SHIFT) + 1;
+ rec->rhdr.fix_num = cpu_to_le16(fn);
+ ao = QuadAlign(MFTRECORD_FIXUP_OFFSET_1 + sizeof(short) * fn);
+ rec->attr_off = cpu_to_le16(ao);
+ rec->used = cpu_to_le32(ao + QuadAlign(sizeof(enum ATTR_TYPE)));
+ rec->total = cpu_to_le32(sbi->record_size);
+ ((struct ATTRIB *)Add2Ptr(rec, ao))->type = ATTR_END;
+
+ if (sbi->cluster_size < PAGE_SIZE)
+ sb_set_blocksize(sb, sbi->cluster_size);
+
+ sbi->block_mask = sb->s_blocksize - 1;
+ sbi->blocks_per_cluster = sbi->cluster_size >> sb->s_blocksize_bits;
+ sbi->volume.blocks = sbi->volume.size >> sb->s_blocksize_bits;
+
+ /* Maximum size for normal files */
+ sbi->maxbytes = (clusters << sbi->cluster_bits) - 1;
+
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ if (clusters >= (1ull << (64 - sbi->cluster_bits)))
+ sbi->maxbytes = -1;
+ sbi->maxbytes_sparse = -1;
+#else
+ /* Maximum size for sparse file */
+ sbi->maxbytes_sparse = (1ull << (sbi->cluster_bits + 32)) - 1;
+#endif
+
+ err = 0;
+
+out:
+ brelse(bh);
+
+ return err;
+}
+
+/* try to mount*/
+static int ntfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ int err;
+ struct ntfs_sb_info *sbi;
+ struct block_device *bdev = sb->s_bdev;
+ struct inode *bd_inode = bdev->bd_inode;
+ struct request_queue *rq = bdev_get_queue(bdev);
+ struct inode *inode = NULL;
+ struct ntfs_inode *ni;
+ size_t i, tt;
+ CLST vcn, lcn, len;
+ struct ATTRIB *attr;
+ const struct VOLUME_INFO *info;
+ u32 idx, done, bytes;
+ struct ATTR_DEF_ENTRY *t;
+ u16 *upcase = NULL;
+ u16 *shared;
+ bool is_ro;
+ struct MFT_REF ref;
+
+ ref.high = 0;
+
+ sbi = ntfs_zalloc(sizeof(struct ntfs_sb_info));
+ if (!sbi)
+ return -ENOMEM;
+
+ sb->s_fs_info = sbi;
+ sbi->sb = sb;
+ sb->s_flags |= SB_NODIRATIME;
+ sb->s_magic = 0x7366746e; // "ntfs"
+ sb->s_op = &ntfs_sops;
+ sb->s_export_op = &ntfs_export_ops;
+ sb->s_time_gran = NTFS_TIME_GRAN; // 100 nsec
+ sb->s_xattr = ntfs_xattr_handlers;
+
+ ratelimit_state_init(&sbi->msg_ratelimit, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ err = ntfs_parse_options(sb, data, silent, &sbi->options);
+ if (err)
+ goto out;
+
+ if (!rq || !blk_queue_discard(rq) || !rq->limits.discard_granularity) {
+ ;
+ } else {
+ sbi->discard_granularity = rq->limits.discard_granularity;
+ sbi->discard_granularity_mask_inv =
+ ~(u64)(sbi->discard_granularity - 1);
+ }
+
+ sb_set_blocksize(sb, PAGE_SIZE);
+
+ /* parse boot */
+ err = ntfs_init_from_boot(sb, rq ? queue_logical_block_size(rq) : 512,
+ bd_inode->i_size);
+ if (err)
+ goto out;
+
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+#else
+ sb->s_maxbytes = 0xFFFFFFFFull << sbi->cluster_bits;
+#endif
+
+ mutex_init(&sbi->compress.mtx_lznt);
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+ mutex_init(&sbi->compress.mtx_xpress);
+ mutex_init(&sbi->compress.mtx_lzx);
+#endif
+
+ /*
+ * Load $Volume. This should be done before LogFile
+ * 'cause 'sbi->volume.ni' is used 'ntfs_set_state'
+ */
+ ref.low = cpu_to_le32(MFT_REC_VOL);
+ ref.seq = cpu_to_le16(MFT_REC_VOL);
+ inode = ntfs_iget5(sb, &ref, &NAME_VOLUME);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $Volume.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+ /* Load and save label (not necessary) */
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_LABEL, NULL, 0, NULL, NULL);
+
+ if (!attr) {
+ /* It is ok if no ATTR_LABEL */
+ } else if (!attr->non_res && !is_attr_ext(attr)) {
+ /* $AttrDef allows labels to be up to 128 symbols */
+ err = utf16s_to_utf8s(resident_data(attr),
+ le32_to_cpu(attr->res.data_size) >> 1,
+ UTF16_LITTLE_ENDIAN, sbi->volume.label,
+ sizeof(sbi->volume.label));
+ if (err < 0)
+ sbi->volume.label[0] = 0;
+ } else {
+ /* should we break mounting here? */
+ //err = -EINVAL;
+ //goto out;
+ }
+
+ attr = ni_find_attr(ni, attr, NULL, ATTR_VOL_INFO, NULL, 0, NULL, NULL);
+ if (!attr || is_attr_ext(attr)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO);
+ if (!info) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ sbi->volume.major_ver = info->major_ver;
+ sbi->volume.minor_ver = info->minor_ver;
+ sbi->volume.flags = info->flags;
+
+ sbi->volume.ni = ni;
+ inode = NULL;
+
+ /* Load $MFTMirr to estimate recs_mirr */
+ ref.low = cpu_to_le32(MFT_REC_MIRR);
+ ref.seq = cpu_to_le16(MFT_REC_MIRR);
+ inode = ntfs_iget5(sb, &ref, &NAME_MIRROR);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $MFTMirr.");
+ inode = NULL;
+ goto out;
+ }
+
+ sbi->mft.recs_mirr =
+ ntfs_up_cluster(sbi, inode->i_size) >> sbi->record_bits;
+
+ iput(inode);
+
+ /* Load LogFile to replay */
+ ref.low = cpu_to_le32(MFT_REC_LOG);
+ ref.seq = cpu_to_le16(MFT_REC_LOG);
+ inode = ntfs_iget5(sb, &ref, &NAME_LOGFILE);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load \x24LogFile.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+ err = ntfs_loadlog_and_replay(ni, sbi);
+ if (err)
+ goto out;
+
+ iput(inode);
+ inode = NULL;
+
+ is_ro = sb_rdonly(sbi->sb);
+
+ if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) {
+ if (!is_ro) {
+ ntfs_warn(sb,
+ "failed to replay log file. Can't mount rw!");
+ err = -EINVAL;
+ goto out;
+ }
+ } else if (sbi->volume.flags & VOLUME_FLAG_DIRTY) {
+ if (!is_ro && !sbi->options.force) {
+ ntfs_warn(
+ sb,
+ "volume is dirty and \"force\" flag is not set!");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ /* Load $MFT */
+ ref.low = cpu_to_le32(MFT_REC_MFT);
+ ref.seq = cpu_to_le16(1);
+
+ inode = ntfs_iget5(sb, &ref, &NAME_MFT);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $MFT.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+ sbi->mft.used = ni->i_valid >> sbi->record_bits;
+ tt = inode->i_size >> sbi->record_bits;
+ sbi->mft.next_free = MFT_REC_USER;
+
+ err = wnd_init(&sbi->mft.bitmap, sb, tt);
+ if (err)
+ goto out;
+
+ err = ni_load_all_mi(ni);
+ if (err)
+ goto out;
+
+ sbi->mft.ni = ni;
+
+ /* Load $BadClus */
+ ref.low = cpu_to_le32(MFT_REC_BADCLUST);
+ ref.seq = cpu_to_le16(MFT_REC_BADCLUST);
+ inode = ntfs_iget5(sb, &ref, &NAME_BADCLUS);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $BadClus.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+ for (i = 0; run_get_entry(&ni->file.run, i, &vcn, &lcn, &len); i++) {
+ if (lcn == SPARSE_LCN)
+ continue;
+
+ if (!sbi->bad_clusters)
+ ntfs_notice(sb, "Volume contains bad blocks");
+
+ sbi->bad_clusters += len;
+ }
+
+ iput(inode);
+
+ /* Load $Bitmap */
+ ref.low = cpu_to_le32(MFT_REC_BITMAP);
+ ref.seq = cpu_to_le16(MFT_REC_BITMAP);
+ inode = ntfs_iget5(sb, &ref, &NAME_BITMAP);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $Bitmap.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+ if (inode->i_size >> 32) {
+ err = -EINVAL;
+ goto out;
+ }
+#endif
+
+ /* Check bitmap boundary */
+ tt = sbi->used.bitmap.nbits;
+ if (inode->i_size < bitmap_size(tt)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Not necessary */
+ sbi->used.bitmap.set_tail = true;
+ err = wnd_init(&sbi->used.bitmap, sbi->sb, tt);
+ if (err)
+ goto out;
+
+ iput(inode);
+
+ /* Compute the mft zone */
+ err = ntfs_refresh_zone(sbi);
+ if (err)
+ goto out;
+
+ /* Load $AttrDef */
+ ref.low = cpu_to_le32(MFT_REC_ATTR);
+ ref.seq = cpu_to_le16(MFT_REC_ATTR);
+ inode = ntfs_iget5(sbi->sb, &ref, &NAME_ATTRDEF);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load $AttrDef -> %d", err);
+ inode = NULL;
+ goto out;
+ }
+
+ if (inode->i_size < sizeof(struct ATTR_DEF_ENTRY)) {
+ err = -EINVAL;
+ goto out;
+ }
+ bytes = inode->i_size;
+ sbi->def_table = t = ntfs_malloc(bytes);
+ if (!t) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (done = idx = 0; done < bytes; done += PAGE_SIZE, idx++) {
+ unsigned long tail = bytes - done;
+ struct page *page = ntfs_map_page(inode->i_mapping, idx);
+
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+ memcpy(Add2Ptr(t, done), page_address(page),
+ min(PAGE_SIZE, tail));
+ ntfs_unmap_page(page);
+
+ if (!idx && ATTR_STD != t->type) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ t += 1;
+ sbi->def_entries = 1;
+ done = sizeof(struct ATTR_DEF_ENTRY);
+ sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE;
+ sbi->ea_max_size = 0x10000; /* default formater value */
+
+ while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) {
+ u32 t32 = le32_to_cpu(t->type);
+ u64 sz = le64_to_cpu(t->max_sz);
+
+ if ((t32 & 0xF) || le32_to_cpu(t[-1].type) >= t32)
+ break;
+
+ if (t->type == ATTR_REPARSE)
+ sbi->reparse.max_size = sz;
+ else if (t->type == ATTR_EA)
+ sbi->ea_max_size = sz;
+
+ done += sizeof(struct ATTR_DEF_ENTRY);
+ t += 1;
+ sbi->def_entries += 1;
+ }
+ iput(inode);
+
+ /* Load $UpCase */
+ ref.low = cpu_to_le32(MFT_REC_UPCASE);
+ ref.seq = cpu_to_le16(MFT_REC_UPCASE);
+ inode = ntfs_iget5(sb, &ref, &NAME_UPCASE);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load \x24LogFile.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+ if (inode->i_size != 0x10000 * sizeof(short)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ sbi->upcase = upcase = ntfs_vmalloc(0x10000 * sizeof(short));
+ if (!upcase) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) {
+ const __le16 *src;
+ u16 *dst = Add2Ptr(upcase, idx << PAGE_SHIFT);
+ struct page *page = ntfs_map_page(inode->i_mapping, idx);
+
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+
+ src = page_address(page);
+
+#ifdef __BIG_ENDIAN
+ for (i = 0; i < PAGE_SIZE / sizeof(u16); i++)
+ *dst++ = le16_to_cpu(*src++);
+#else
+ memcpy(dst, src, PAGE_SIZE);
+#endif
+ ntfs_unmap_page(page);
+ }
+
+ shared = ntfs_set_shared(upcase, 0x10000 * sizeof(short));
+ if (shared && upcase != shared) {
+ sbi->upcase = shared;
+ ntfs_vfree(upcase);
+ }
+
+ iput(inode);
+ inode = NULL;
+
+ if (is_ntfs3(sbi)) {
+ /* Load $Secure */
+ err = ntfs_security_init(sbi);
+ if (err)
+ goto out;
+
+ /* Load $Extend */
+ err = ntfs_extend_init(sbi);
+ if (err)
+ goto load_root;
+
+ /* Load $Extend\$Reparse */
+ err = ntfs_reparse_init(sbi);
+ if (err)
+ goto load_root;
+
+ /* Load $Extend\$ObjId */
+ err = ntfs_objid_init(sbi);
+ if (err)
+ goto load_root;
+ }
+
+load_root:
+ /* Load root */
+ ref.low = cpu_to_le32(MFT_REC_ROOT);
+ ref.seq = cpu_to_le16(MFT_REC_ROOT);
+ inode = ntfs_iget5(sb, &ref, &NAME_ROOT);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ntfs_err(sb, "Failed to load root.");
+ inode = NULL;
+ goto out;
+ }
+
+ ni = ntfs_i(inode);
+
+ sb->s_root = d_make_root(inode);
+
+ if (!sb->s_root) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ iput(inode);
+
+ if (sb->s_root) {
+ d_drop(sb->s_root);
+ sb->s_root = NULL;
+ }
+
+ put_ntfs(sbi);
+
+ sb->s_fs_info = NULL;
+ return err;
+}
+
+void ntfs_unmap_meta(struct super_block *sb, CLST lcn, CLST len)
+{
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct block_device *bdev = sb->s_bdev;
+ sector_t devblock = (u64)lcn * sbi->blocks_per_cluster;
+ unsigned long blocks = (u64)len * sbi->blocks_per_cluster;
+ unsigned long cnt = 0;
+ unsigned long limit = global_zone_page_state(NR_FREE_PAGES)
+ << (PAGE_SHIFT - sb->s_blocksize_bits);
+
+ if (limit >= 0x2000)
+ limit -= 0x1000;
+ else if (limit < 32)
+ limit = 32;
+ else
+ limit >>= 1;
+
+ while (blocks--) {
+ clean_bdev_aliases(bdev, devblock++, 1);
+ if (cnt++ >= limit) {
+ sync_blockdev(bdev);
+ cnt = 0;
+ }
+ }
+}
+
+/*
+ * ntfs_discard
+ *
+ * issue a discard request (trim for SSD)
+ */
+int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len)
+{
+ int err;
+ u64 lbo, bytes, start, end;
+ struct super_block *sb;
+
+ if (sbi->used.next_free_lcn == lcn + len)
+ sbi->used.next_free_lcn = lcn;
+
+ if (sbi->flags & NTFS_FLAGS_NODISCARD)
+ return -EOPNOTSUPP;
+
+ if (!sbi->options.discard)
+ return -EOPNOTSUPP;
+
+ lbo = (u64)lcn << sbi->cluster_bits;
+ bytes = (u64)len << sbi->cluster_bits;
+
+ /* Align up 'start' on discard_granularity */
+ start = (lbo + sbi->discard_granularity - 1) &
+ sbi->discard_granularity_mask_inv;
+ /* Align down 'end' on discard_granularity */
+ end = (lbo + bytes) & sbi->discard_granularity_mask_inv;
+
+ sb = sbi->sb;
+ if (start >= end)
+ return 0;
+
+ err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9,
+ GFP_NOFS, 0);
+
+ if (err == -EOPNOTSUPP)
+ sbi->flags |= NTFS_FLAGS_NODISCARD;
+
+ return err;
+}
+
+static struct dentry *ntfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
+}
+
+// clang-format off
+static struct file_system_type ntfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ntfs3",
+ .mount = ntfs_mount,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+};
+// clang-format on
+
+static int __init init_ntfs_fs(void)
+{
+ int err;
+
+ pr_notice("ntfs3: Index binary search\n");
+ pr_notice("ntfs3: Hot fix free clusters\n");
+ pr_notice("ntfs3: Max link count %u\n", NTFS_LINK_MAX);
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ pr_notice("ntfs3: Enabled Linux POSIX ACLs support\n");
+#endif
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ pr_notice("ntfs3: Activated 64 bits per cluster\n");
+#else
+ pr_notice("ntfs3: Activated 32 bits per cluster\n");
+#endif
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+ pr_notice("ntfs3: Read-only lzx/xpress compression included\n");
+#endif
+
+ err = ntfs3_init_bitmap();
+ if (err)
+ return err;
+
+ ntfs_inode_cachep = kmem_cache_create(
+ "ntfs_inode_cache", sizeof(struct ntfs_inode), 0,
+ (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ init_once);
+ if (!ntfs_inode_cachep) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ err = register_filesystem(&ntfs_fs_type);
+ if (err)
+ goto out;
+
+ return 0;
+out:
+ kmem_cache_destroy(ntfs_inode_cachep);
+out1:
+ ntfs3_exit_bitmap();
+ return err;
+}
+
+static void __exit exit_ntfs_fs(void)
+{
+ if (ntfs_inode_cachep) {
+ rcu_barrier();
+ kmem_cache_destroy(ntfs_inode_cachep);
+ }
+
+ unregister_filesystem(&ntfs_fs_type);
+ ntfs3_exit_bitmap();
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ntfs3 read/write filesystem");
+MODULE_INFO(behaviour, "Index binary search");
+MODULE_INFO(behaviour, "Hot fix free clusters");
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+MODULE_INFO(behaviour, "Enabled Linux POSIX ACLs support");
+#endif
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+MODULE_INFO(cluster, "Activated 64 bits per cluster");
+#else
+MODULE_INFO(cluster, "Activated 32 bits per cluster");
+#endif
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+MODULE_INFO(compression, "Read-only lzx/xpress compression included");
+#endif
+
+MODULE_AUTHOR("Konstantin Komarov");
+MODULE_ALIAS_FS("ntfs3");
+
+module_init(init_ntfs_fs);
+module_exit(exit_ntfs_fs);
--
2.30.0
1
0

07 Dec '21
From: 沈子俊 <shenzijun(a)kylinos.cn>
mainline inclusion
from mainline-v5.16
commit 4a7e1e5fc294687a8941fa3eeb4a7e8539ca5e2f
category: bugfix
bugzilla: NA
CVE: NA
-----------------------------------------------------------------
When building with clang and GNU as, there is a warning about ignored
changed section attributes:
/tmp/sm4-c916c8.s: Assembler messages:
/tmp/sm4-c916c8.s:677: Warning: ignoring changed section attributes for
.data..cacheline_aligned
"static const" places the data in .rodata but __cacheline_aligned has
the section attribute to place it in .data..cacheline_aligned, in
addition to the aligned attribute.
To keep the alignment but avoid attempting to change sections, use the
____cacheline_aligned attribute, which is just the aligned attribute.
Fixes: 2b31277af577 ("crypto: sm4 - create SM4 library based on sm4 generic code")
Link: https://github.com/ClangBuiltLinux/linux/issues/1441
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Reviewed-by: Tianjia Zhang <tianjia.zhang(a)linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Signed-off-by: 沈子俊 <shenzijun(a)kylinos.cn>
---
lib/crypto/sm4.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c
index 633b59fed9db..284e62576d0c 100644
--- a/lib/crypto/sm4.c
+++ b/lib/crypto/sm4.c
@@ -15,7 +15,7 @@ static const u32 fk[4] = {
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
};
-static const u32 __cacheline_aligned ck[32] = {
+static const u32 ____cacheline_aligned ck[32] = {
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
@@ -26,7 +26,7 @@ static const u32 __cacheline_aligned ck[32] = {
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
};
-static const u8 __cacheline_aligned sbox[256] = {
+static const u8 ____cacheline_aligned sbox[256] = {
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
--
2.30.0
1
2

[PATCH openEuler-1.0-LTS 1/6] ext4: always panic when errors=panic is specified
by Yang Yingliang 07 Dec '21
by Yang Yingliang 07 Dec '21
07 Dec '21
From: Ye Bin <yebin10(a)huawei.com>
mainline inclusion
from mainline-v5.13-rc1
commit ac2f7ca51b0929461ea49918f27c11b680f28995
category: bugfix
bugzilla: 182973
CVE: NA
-------------------------------------------------
Before commit 014c9caa29d3 ("ext4: make ext4_abort() use
__ext4_error()"), the following series of commands would trigger a
panic:
1. mount /dev/sda -o ro,errors=panic test
2. mount /dev/sda -o remount,abort test
After commit 014c9caa29d3, remounting a file system using the test
mount option "abort" will no longer trigger a panic. This commit will
restore the behaviour immediately before commit 014c9caa29d3.
(However, note that the Linux kernel's behavior has not been
consistent; some previous kernel versions, including 5.4 and 4.19
similarly did not panic after using the mount option "abort".)
This also makes a change to long-standing behaviour; namely, the
following series commands will now cause a panic, when previously it
did not:
1. mount /dev/sda -o ro,errors=panic test
2. echo test > /sys/fs/ext4/sda/trigger_fs_error
However, this makes ext4's behaviour much more consistent, so this is
a good thing.
Cc: stable(a)kernel.org
Fixes: 014c9caa29d3 ("ext4: make ext4_abort() use __ext4_error()")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Link: https://lore.kernel.org/r/20210401081903.3421208-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Signed-off-by: Zheng Liang <zhengliang6(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
fs/ext4/super.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a051671f7cb89..5a58f72ac2090 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -652,12 +652,6 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
ext4_commit_super(sb);
}
- if (sb_rdonly(sb))
- return;
-
- if (continue_fs)
- goto out;
-
/*
* We force ERRORS_RO behavior when system is rebooting. Otherwise we
* could panic during 'reboot -f' as the underlying device got already
@@ -668,6 +662,12 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
sb->s_id);
}
+ if (sb_rdonly(sb))
+ return;
+
+ if (continue_fs)
+ goto out;
+
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible before
--
2.25.1
1
5
Backport 5.10.81 LTS patches from upstream.
Borislav Petkov (1):
selftests/x86/iopl: Adjust to the faked iopl CLI/STI usage
Gao Xiang (1):
erofs: fix unsafe pagevec reuse of hooked pclusters
Greg Thelen (1):
perf/core: Avoid put_page() when GUP fails
Joakim Zhang (2):
net: stmmac: add clocks management for gmac driver
net: stmmac: fix system hang if change mac address after interface
ifdown
Kees Cook (1):
fortify: Explicitly disable Clang support
Marc Zyngier (2):
PCI/MSI: Deal with devices lying about their MSI mask capability
PCI: Add MSI masking quirk for Nvidia ION AHCI
Masami Hiramatsu (1):
bootconfig: init: Fix memblock leak in xbc_make_cmdline()
Michael Riesch (1):
net: stmmac: dwmac-rk: fix unbalanced pm_runtime_enable warnings
Nathan Chancellor (1):
scripts/lld-version.sh: Rewrite based on upstream ld-version.sh
Peter Zijlstra (1):
x86/iopl: Fake iopl(3) CLI/STI usage
Subbaraman Narayanamurthy (1):
thermal: Fix NULL pointer dereferences in of_thermal_ functions
Sven Schnelle (1):
parisc/entry: fix trace test in syscall exit path
Thomas Gleixner (1):
PCI/MSI: Destroy sysfs before freeing entries
Wei Yongjun (1):
net: stmmac: platform: fix build error with !CONFIG_PM_SLEEP
Wong Vee Khee (1):
net: stmmac: fix issue where clk is being unprepared twice
Xie Yongji (2):
block: Add a helper to validate the block size
loop: Use blk_validate_block_size() to validate block size
Yang Yingliang (1):
net: stmmac: fix missing unlock on error in stmmac_suspend()
Yue Hu (1):
erofs: remove the occupied parameter from z_erofs_pagevec_enqueue()
arch/parisc/kernel/entry.S | 2 +-
arch/x86/include/asm/insn-eval.h | 1 +
arch/x86/include/asm/processor.h | 1 +
arch/x86/kernel/process.c | 1 +
arch/x86/kernel/traps.c | 34 ++++++
arch/x86/lib/insn-eval.c | 2 +-
drivers/block/loop.c | 19 +--
.../net/ethernet/stmicro/stmmac/dwmac-rk.c | 9 --
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 +
.../net/ethernet/stmicro/stmmac/stmmac_main.c | 87 ++++++++++++--
.../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 111 ++++++++++++++----
.../ethernet/stmicro/stmmac/stmmac_platform.c | 30 ++++-
drivers/pci/msi.c | 27 +++--
drivers/pci/quirks.c | 6 +
drivers/thermal/thermal_of.c | 9 +-
fs/erofs/zdata.c | 15 ++-
fs/erofs/zpvec.h | 14 ++-
include/linux/blkdev.h | 8 ++
include/linux/pci.h | 2 +
init/main.c | 1 +
kernel/events/core.c | 10 +-
scripts/lld-version.sh | 35 ++++--
security/Kconfig | 3 +
tools/testing/selftests/x86/iopl.c | 78 ++++++++----
24 files changed, 375 insertions(+), 131 deletions(-)
--
2.20.1
1
21
Backport 5.10.80 LTS patches from upstream.
Ahmad Fatoum (1):
watchdog: f71808e_wdt: fix inaccurate report in WDIOC_GETTIMEOUT
Ajay Singh (1):
wilc1000: fix possible memory leak in cfg_scan_result()
Alagu Sankar (1):
ath10k: high latency fixes for beacon buffer
Aleksander Jan Bajkowski (3):
MIPS: lantiq: dma: add small delay after reset
MIPS: lantiq: dma: reset correct number of channel
MIPS: lantiq: dma: fix burst length for DEU
Alex Bee (1):
arm64: dts: rockchip: Fix GPU register width for RK3328
Alex Deucher (1):
drm/amdgpu/gmc6: fix DMA mask from 44 to 40 bits
Alex Xu (Hello71) (1):
drm/plane-helper: fix uninitialized variable reference
Alexander Tsoy (1):
ALSA: usb-audio: Add registration quirk for JBL Quantum 400
Alexandru Ardelean (1):
iio: st_sensors: disable regulators after device unregistration
Alexei Starovoitov (2):
bpf: Fix propagation of bounds from 64-bit min/max into 32-bit and
var_off.
bpf: Fix propagation of signed bounds from 64-bit min/max into 32-bit.
Alexey Gladkov (1):
Fix user namespace leak
Alok Prasad (1):
RDMA/qedr: Fix NULL deref for query_qp on the GSI QP
Amelie Delaunay (3):
usb: dwc2: drd: fix dwc2_force_mode call in dwc2_ovr_init
usb: dwc2: drd: fix dwc2_drd_role_sw_set when clock could be disabled
usb: dwc2: drd: reset current session before setting the new one
Amit Engel (1):
nvmet-tcp: fix header digest verification
Anand Jain (1):
btrfs: call btrfs_check_rw_degradable only if there is a missing
device
Anand Moon (2):
arm64: dts: meson-g12a: Fix the pwm regulator supply properties
arm64: dts: meson-g12b: Fix the pwm regulator supply properties
Anant Thazhemadam (1):
media: usb: dvd-usb: fix uninit-value bug in dibusb_read_eeprom_byte()
Anders Roxell (1):
PM: hibernate: fix sparse warnings
Andrea Righi (1):
selftests/bpf: Fix fclose/pclose mismatch in test_progs
Andreas Gruenbacher (3):
iov_iter: Fix iov_iter_get_pages{,_alloc} page fault return value
gfs2: Cancel remote delete work asynchronously
gfs2: Fix glock_hash_walk bugs
Andreas Kemnade (1):
arm: dts: omap3-gta04a4: accelerometer irq fix
Andrej Shadura (2):
HID: u2fzero: clarify error check and length calculations
HID: u2fzero: properly handle timeouts in usb_submit_urb
Andrey Grodzovsky (1):
drm/amdgpu: Fix MMIO access page fault
Andrii Nakryiko (6):
selftests/bpf: Fix strobemeta selftest regression
libbpf: Fix BTF data layout checks and allow empty BTF
libbpf: Allow loading empty BTFs
libbpf: Fix overflow in BTF sanity checks
libbpf: Fix BTF header parsing checks
selftests/bpf: Fix also no-alu32 strobemeta selftest
André Almeida (1):
ACPI: battery: Accept charges over the design capacity as full
Andy Shevchenko (2):
iio: st_sensors: Call st_sensors_power_enable() from bus drivers
serial: 8250_dw: Drop wrong use of ACPI_PTR()
Anel Orazgaliyeva (1):
cpuidle: Fix kobject memory leaks in error paths
Anson Jacob (1):
drm/amd/display: dcn20_resource_construct reduce scope of FPU enabled
Anssi Hannula (1):
serial: xilinx_uartps: Fix race condition causing stuck TX
Antoine Tenart (1):
net-sysfs: try not to restart the syscall if it will fail eventually
Arnaud Pouliquen (1):
rpmsg: Fix rpmsg_create_ept return when RPMSG config is not defined
Arnd Bergmann (9):
hyperv/vmbus: include linux/bitops.h
ifb: fix building without CONFIG_NET_CLS_ACT
ARM: 9136/1: ARMv7-M uses BE-8, not BE-32
drm/amdgpu: fix warning for overflow check
crypto: ecc - fix CRYPTO_DEFAULT_RNG dependency
memstick: avoid out-of-range warning
arm64: pgtable: make __pte_to_phys/__phys_to_pte_val inline functions
ARM: 9156/1: drop cc-option fallbacks for architecture selection
ath10k: fix invalid dma_addr_t token assignment
Arun Easi (1):
scsi: qla2xxx: Fix kernel crash when accessing port_speed sysfs file
Asmaa Mnebhi (1):
gpio: mlxbf2.c: Add check for bgpio_init failure
Austin Kim (2):
ALSA: synth: missing check for possible NULL after the call to kstrdup
evm: mark evm_fixmode as __ro_after_init
Baochen Qiang (2):
ath11k: Change DMA_FROM_DEVICE to DMA_TO_DEVICE when map reinjected
packets
ath11k: Fix memory leak in ath11k_qmi_driver_event_work
Baptiste Lepers (1):
pnfs/flexfiles: Fix misplaced barrier in nfs4_ff_layout_prepare_ds
Barnabás Pőcze (1):
platform/x86: wmi: do not fail if disabling fails
Bastien Roucariès (1):
ARM: dts: sun7i: A20-olinuxino-lime2: Fix ethernet phy-mode
Benjamin Li (2):
wcn36xx: handle connection loss indication
wcn36xx: add proper DMA memory barriers in rx path
Bixuan Cui (1):
powerpc/44x/fsp2: add missing of_node_put
Bjorn Andersson (1):
soc: qcom: rpmhpd: Make power_on actually enable the domain
Borislav Petkov (1):
x86/sev: Make the #VC exception stacks part of the default stacks
storage
Brett Creeley (1):
ice: Fix not stopping Tx queues for VFs
Bryan O'Donoghue (1):
wcn36xx: Fix Antenna Diversity Switching
Bryant Mairs (1):
drm: panel-orientation-quirks: Add quirk for Aya Neo 2021
Can Guo (1):
scsi: ufs: Refactor ufshcd_setup_clocks() to remove skip_ref_clk
Charan Teja Reddy (1):
dma-buf: WARN on dmabuf release with pending attachments
Chen-Yu Tsai (2):
media: rkvdec: Do not override sizeimage for output format
media: rkvdec: Support dynamic resolution changes
Chengfeng Ye (1):
nfc: pn533: Fix double free when pn533_fill_fragment_skbs() fails
Chenyuan Mi (1):
drm/nouveau/svm: Fix refcount leak bug and missing check against null
bug
Christian Löhle (1):
mmc: dw_mmc: Dont wait for DRTO on Write RSP error
Christoph Hellwig (1):
rds: stop using dmapool
Christophe JAILLET (6):
media: mtk-vpu: Fix a resource leak in the error handling path of
'mtk_vpu_probe()'
mmc: mxs-mmc: disable regulator on error and in the remove function
clk: mvebu: ap-cpu-clk: Fix a memory leak in error handling paths
soc/tegra: Fix an error handling path in tegra_powergate_power_up()
remoteproc: Fix a memory leak in an error handling path in
'rproc_handle_vdev()'
i2c: xlr: Fix a resource leak in the error handling path of
'xlr_i2c_probe()'
Christophe Leroy (1):
video: fbdev: chipsfb: use memset_io() instead of memset()
Claudio Imbrenda (2):
KVM: s390: pv: avoid double free of sida page
KVM: s390: pv: avoid stalls for kvm_s390_pv_init_vm
Claudiu Beznea (2):
clk: at91: sam9x60-pll: use DIV_ROUND_CLOSEST_ULL
dmaengine: at_xdmac: fix AT_XDMAC_CC_PERID() macro
Clément Léger (1):
clk: at91: check pmc node status before registering syscore ops
Colin Ian King (4):
media: cxd2880-spi: Fix a null pointer dereference on error handling
path
media: cx23885: Fix snd_card_free call on null card pointer
media: em28xx: Don't use ops->suspend if it is NULL
mmc: moxart: Fix null pointer dereference on pointer host
Corey Minyard (1):
ipmi: Disable some operations during a panic
Cyril Strejc (1):
net: multicast: calculate csum of looped-back and forwarded packets
Damien Le Moal (1):
libata: fix read log timeout value
Dan Carpenter (13):
tpm: Check for integer overflow in tpm2_map_response_body()
ath11k: fix some sleeping in atomic bugs
b43legacy: fix a lower bounds test
b43: fix a lower bounds test
memstick: jmb38x_ms: use appropriate free function in
jmb38x_ms_alloc_host()
drm/msm: potential error pointer dereference in init()
drm/msm: uninitialized variable in msm_gem_import()
usb: gadget: hid: fix error code in do_config()
scsi: csiostor: Uninitialized data in csio_ln_vnp_read_cbfn()
phy: ti: gmii-sel: check of_get_address() for failure
rtc: rv3032: fix error handling in rv3032_clkout_set_rate()
zram: off by one in read_block_state()
gve: Fix off by one in gve_tx_timeout()
Dan Schatzberg (1):
cgroup: Fix rootcg cpu.stat guest double counting
Daniel Borkmann (2):
net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE
net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE
Daniel Jordan (1):
crypto: pcrypt - Delay write to padata->info
Dave Jones (1):
x86/mce: Add errata workaround for Skylake SKX37
David Hildenbrand (1):
s390/gmap: don't unconditionally call pte_unmap_unlock() in
__gmap_zap()
Davide Baldo (1):
ALSA: hda/realtek: Fixes HP Spectre x360 15-eb1xxx speakers
Derong Liu (1):
mmc: mtk-sd: Add wait dma stop done flow
Desmond Cheong Zhi Xi (1):
Bluetooth: fix init and cleanup of sco_conn.timeout_work
Dinghao Liu (1):
Bluetooth: btmtkuart: fix a memleak in mtk_hci_wmt_sync
Dirk Bender (1):
media: mt9p031: Fix corrupted frame after restarting stream
Dmitriy Ulitin (1):
media: stm32: Potential NULL pointer dereference in dcmi_irq_thread()
Dmitry Bogdanov (1):
scsi: qla2xxx: Fix unmap of already freed sgl
Dmitry Osipenko (1):
soc/tegra: pmc: Fix imbalanced clock disabling in error code path
Dominique Martinet (1):
9p/net: fix missing error check in p9_check_errors
Dongli Zhang (2):
xen/netfront: stop tx queues during live migration
vmxnet3: do not stop tx queues after netif_device_detach()
Dongliang Mu (2):
JFS: fix memleak in jfs_mount
memory: fsl_ifc: fix leak of irq and nand_irq in fsl_ifc_ctrl_probe
Dust Li (1):
net/smc: fix sk_refcnt underflow on linkdown and fallback
Eiichi Tsukata (1):
vsock: prevent unnecessary refcnt inc for nonblocking connect
Eric Badger (1):
EDAC/sb_edac: Fix top-of-high-memory value for Broadwell/Haswell
Eric Biggers (1):
fscrypt: allow 256-bit master keys with AES-256-XTS
Eric Dumazet (4):
net: annotate data-race in neigh_output()
tcp: switch orphan_count to bare per-cpu counters
llc: fix out-of-bound array index in llc_sk_dev_hash()
net/sched: sch_taprio: fix undefined behavior in ktime_mono_to_any
Eric W. Biederman (3):
signal: Remove the bogus sigkill_pending in ptrace_stop
signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT
signal/sh: Use force_sig(SIGKILL) instead of do_group_exit(SIGKILL)
Erik Ekman (2):
sfc: Export fibre-specific supported link modes
sfc: Don't use netif_info before net_device setup
Evgeny Novikov (3):
media: atomisp: Fix error handling in probe
media: dvb-frontends: mn88443x: Handle errors of clk_prepare_enable()
mtd: spi-nor: hisi-sfc: Remove excessive clk_disable_unprepare()
Fabio Estevam (1):
ath10k: sdio: Add missing BH locking around napi_schdule()
Filipe Manana (1):
btrfs: fix lost error handling when replaying directory deletes
Florian Westphal (3):
fcnal-test: kill hanging ping/nettest binaries on cleanup
vrf: run conntrack only in context of lower/physdev for locally
generated packets
netfilter: nfnetlink_queue: fix OOB when mac header was cleared
Frank Rowand (1):
of: unittest: fix EXPECT text for gpio hog errors
Gao Xiang (1):
erofs: don't trigger WARN() when decompression fails
Geert Uytterhoeven (6):
arm64: dts: renesas: beacon: Fix Ethernet PHY mode
pinctrl: renesas: checker: Fix off-by-one bug in drive register check
mips: cm: Convert to bitfield API to fix out-of-bounds access
auxdisplay: img-ascii-lcd: Fix lock-up when displaying empty string
auxdisplay: ht16k33: Connect backlight to fbdev
auxdisplay: ht16k33: Fix frame buffer device blanking
Giovanni Cabiddu (2):
crypto: qat - detect PFVF collision after ACK
crypto: qat - disregard spurious PFVF interrupts
Guo Ren (1):
irqchip/sifive-plic: Fixup EOI failed when masked
Guru Das Srinagesh (1):
firmware: qcom_scm: Fix error retval in __qcom_scm_is_call_available()
Halil Pasic (1):
s390/cio: make ccw_device_dma_* more robust
Hannes Reinecke (1):
nvme: drop scan_lock and always kick requeue list when removing
namespaces
Hans de Goede (6):
drm: panel-orientation-quirks: Update the Lenovo Ideapad D330 quirk
(v2)
drm: panel-orientation-quirks: Add quirk for KD Kurio Smart C15200
2-in-1
drm: panel-orientation-quirks: Add quirk for the Samsung Galaxy Book
10.6
brcmfmac: Add DMI nvram filename quirk for Cyberbook T116 tablet
power: supply: bq27xxx: Fix kernel crash on IRQ handler register error
ACPI: PMIC: Fix intel_pmic_regs_handler() read accesses
Hao Wu (1):
tpm: fix Atmel TPM crash caused by too frequent queries
Harald Freudenberger (1):
s390/ap: Fix hanging ioctl caused by orphaned replies
Helge Deller (4):
parisc: Fix set_fixmap() on PA1.x CPUs
parisc: Fix ptrace check on syscall return
task_stack: Fix end_of_stack() for architectures with upwards-growing
stack
parisc: Fix backtrace to always include init funtion names
Henrik Grimler (1):
power: supply: max17042_battery: use VFSOC for capacity when no rsns
Iago Toral Quiroga (1):
drm/v3d: fix wait for TMU write combiner flush
Ian Rogers (1):
perf bpf: Add missing free to bpf_event__print_bpf_prog_info()
Igor Pylypiv (1):
scsi: pm80xx: Fix misleading log statement in
pm8001_mpi_get_nvmd_resp()
Ilya Leoshkevich (1):
libbpf: Fix endianness detection in BPF_CORE_READ_BITFIELD_PROBED()
Imre Deak (2):
ALSA: hda: Release controller display power during shutdown/reboot
ALSA: hda: Fix hang during shutdown due to link reset
Ingmar Klein (1):
PCI: Mark Atheros QCA6174 to avoid bus reset
Israel Rukshin (3):
nvmet: fix use-after-free when a port is removed
nvmet-rdma: fix use-after-free when a port is removed
nvmet-tcp: fix use-after-free when a port is removed
J. Bruce Fields (1):
nfsd: don't alloc under spinlock in rpc_parse_scope_id
Jack Andersen (1):
mfd: dln2: Add cell for initializing DLN2 ADC
Jackie Liu (3):
ARM: s3c: irq-s3c24xx: Fix return value check for s3c24xx_init_intc()
MIPS: loongson64: make CPU_LOONGSON64 depends on MIPS_FP_SUPPORT
ar7: fix kernel builds for compiler test
Jaegeuk Kim (1):
f2fs: should use GFP_NOFS for directory inodes
Jakob Hauser (1):
power: supply: rt5033_battery: Change voltage values to µV
Jakub Kicinski (4):
net: sched: update default qdisc visibility after Tx queue cnt changes
net: stream: don't purge sk_error_queue in sk_stream_kill_queues()
udp6: allow SO_MARK ctrl msg to affect routing
ethtool: fix ethtool msg len calculation for pause stats
Jan Kara (1):
ocfs2: fix data corruption on truncate
Jane Malalane (1):
x86/cpu: Fix migration safety with X86_BUG_NULL_SEL
Janghyub Seo (1):
r8169: Add device 10ec:8162 to driver r8169
Janis Schoetterl-Glausch (1):
KVM: s390: Fix handle_sske page fault handling
Jaroslav Kysela (1):
ALSA: hda/realtek: Add a quirk for Acer Spin SP513-54N
Jason Ormes (1):
ALSA: usb-audio: Line6 HX-Stomp XL USB_ID for 48k-fixed quirk
Jens Axboe (2):
block: bump max plugged deferred size from 16 to 32
block: remove inaccurate requeue check
Jeremy Soller (1):
ALSA: hda/realtek: Headset fixup for Clevo NH77HJQ
Jernej Skrabec (1):
drm/sun4i: Fix macros in sun8i_csc.h
Jessica Zhang (1):
drm/msm: Fix potential NULL dereference in DPU SSPP
Jia-Ju Bai (1):
fs: orangefs: fix error return code of orangefs_revalidate_lookup()
Jiasheng Jiang (1):
rxrpc: Fix _usecs_to_jiffies() by using usecs_to_jiffies()
Jim Mattson (1):
KVM: selftests: Fix nested SVM tests when built with clang
Jiri Olsa (1):
selftests/bpf: Fix perf_buffer test on system with offline cpus
Joerg Roedel (1):
x86/sev: Fix stack type check in vc_switch_off_ist()
Johan Hovold (14):
Input: iforce - fix control-message timeout
ALSA: ua101: fix division by zero at probe
ALSA: 6fire: fix control and bulk message timeouts
ALSA: line6: fix control and interrupt message timeouts
mwifiex: fix division by zero in fw download path
ath6kl: fix division by zero in send path
ath6kl: fix control-message timeout
ath10k: fix control-message timeout
ath10k: fix division by zero in send path
rtl8187: fix control-message timeouts
serial: 8250: fix racy uartclk update
most: fix control-message timeouts
USB: iowarrior: fix control-message timeouts
USB: chipidea: fix interrupt deadlock
Johannes Berg (1):
iwlwifi: mvm: disable RX-diversity in powersave
John Fastabend (2):
bpf, sockmap: Remove unhash handler for BPF sockmap usage
bpf: sockmap, strparser, and tls are reusing qdisc_skb_cb and
colliding
John Fraker (1):
gve: Recover from queue stall due to missed IRQ
Johnathon Clark (1):
ALSA: hda/realtek: Fix mic mute LED for the HP Spectre x360 14
Jon Maxwell (1):
tcp: don't free a FIN sk_buff in tcp_remove_empty_skb()
Jonas Dreßler (5):
mwifiex: Read a PCI register after writing the TX ring write pointer
mwifiex: Try waking the firmware until we get an interrupt
mwifiex: Run SET_BSS_MODE when changing from P2P to STATION vif-type
mwifiex: Properly initialize private structure on interface type
changes
mwifiex: Send DELBA requests according to spec
Josef Bacik (1):
btrfs: do not take the uuid_mutex in btrfs_rm_device
Josh Don (1):
fs/proc/uptime.c: Fix idle time reporting in /proc/uptime
Josh Poimboeuf (1):
objtool: Add xen_start_kernel() to noreturn list
Juergen Gross (1):
xen/balloon: add late_initcall_sync() for initial ballooning done
Junji Wei (1):
RDMA/rxe: Fix wrong port_cap_flags
Kai-Heng Feng (1):
ALSA: hda/realtek: Add quirk for HP EliteBook 840 G7 mute LED
Kalesh Singh (1):
tracing/cfi: Fix cmp_entries_* functions signature mismatch
Kan Liang (2):
perf/x86/intel/uncore: Support extra IMC channel on Ice Lake server
perf/x86/intel/uncore: Fix Intel ICX IIO event constraints
Kees Cook (5):
leaking_addresses: Always print a trailing newline
media: radio-wl1273: Avoid card name truncation
media: si470x: Avoid card name truncation
media: tm6000: Avoid card name truncation
clocksource/drivers/timer-ti-dm: Select TIMER_OF
Kewei Xu (1):
i2c: mediatek: fixing the incorrect register offset
Kishon Vijay Abraham I (2):
arm64: dts: ti: k3-j721e-main: Fix "max-virtual-functions" in PCIe EP
nodes
arm64: dts: ti: k3-j721e-main: Fix "bus-range" upto 256 bus number for
PCIe
Krzysztof Kozlowski (3):
regulator: s5m8767: do not use reset value as DVS voltage if GPIO DVS
is disabled
regulator: dt-bindings: samsung,s5m8767: correct
s5m8767,pmic-buck-default-dvs-idx property
mfd: core: Add missing of_node_put for loop iteration
Kumar Kartikeya Dwivedi (1):
selftests/bpf: Fix fd cleanup in sk_lookup test
Kunihiko Hayashi (1):
PCI: uniphier: Serialize INTx masking/unmasking and fix the bit
operation
Lad Prabhakar (1):
spi: spi-rpc-if: Check return value of rpcif_sw_init()
Lars-Peter Clausen (1):
dmaengine: dmaengine_desc_callback_valid(): Check for
`callback_result`
Lasse Collin (2):
lib/xz: Avoid overlapping memcpy() with invalid input with in-place
decompression
lib/xz: Validate the value before assigning it to an enum variable
Lee Jones (1):
soc: qcom: rpmhpd: Provide some missing struct member descriptions
Leon Romanovsky (1):
RDMA/mlx4: Return missed an error if device doesn't support steering
Li Chen (1):
PCI: cadence: Add cdns_plat_pcie_probe() missing return
Li Zhang (1):
btrfs: clear MISSING device status bit in btrfs_close_one_device
Linus Lüssing (1):
ath9k: Fix potential interrupt storm on queue reset
Linus Walleij (1):
net: dsa: rtl8366rb: Fix off-by-one bug
Loic Poulain (6):
wcn36xx: Fix HT40 capability for 2Ghz band
wcn36xx: Fix tx_status mechanism
wcn36xx: Fix (QoS) null data frame bitrate/modulation
wcn36xx: Correct band/freq reporting on RX
ath10k: Fix missing frame timestamp for beacon/probe-resp
wcn36xx: Fix discarded frames due to wrong sequence number
Lorenz Bauer (3):
bpf: Define bpf_jit_alloc_exec_limit for arm64 JIT
bpf: Prevent increasing bpf_jit_limit above max
selftests: bpf: Convert sk_lookup ctx access tests to PROG_TEST_RUN
Lorenzo Bianconi (3):
mt76: mt7615: fix endianness warning in mt7615_mac_write_txwi
mt76: mt76x02: fix endianness warnings in mt76x02_mac.c
mt76: mt7915: fix possible infinite loop release semaphore
Lucas Tanure (1):
ASoC: cs42l42: Disable regulators if probe fails
Lukas Wunner (1):
ifb: Depend on netfilter alternatively to tc
Maciej W. Rozycki (1):
MIPS: Fix assembly error from MIPSr2 code used within
MIPS_ISA_ARCH_LEVEL
Marc Kleine-Budde (1):
can: mcp251xfd: mcp251xfd_chip_start(): fix error handling for
mcp251xfd_chip_rx_int_enable()
Marek Behún (4):
PCI: pci-bridge-emul: Fix emulation of W1C bits
PCI: aardvark: Fix return value of MSI domain .alloc() method
PCI: aardvark: Read all 16-bits from PCIE_MSI_PAYLOAD_REG
PCI: aardvark: Don't spam about PIO Response Status
Marek Vasut (3):
rsi: Fix module dev_oper_mode parameter description
ARM: dts: stm32: Reduce DHCOR SPI NOR frequency to 50 MHz
video: backlight: Drop maximum brightness override for brightness zero
Marijn Suijten (1):
ARM: dts: qcom: msm8974: Add xo_board reference clock to DSI0 PHY
Mario (1):
drm: panel-orientation-quirks: Add quirk for GPD Win3
Mark Brown (1):
tpm_tis_spi: Add missing SPI ID
Mark Rutland (2):
KVM: arm64: Extract ESR_ELx.EC only
irq: mips: avoid nested irq_enter()
Markus Schneider-Pargmann (1):
hwrng: mtk - Force runtime pm ops for sleep ops
Martin Fuzzey (3):
rsi: fix occasional initialisation failure with BT coex
rsi: fix key enabled check causing unwanted encryption for vap_id > 0
rsi: fix rate mask set leading to P2P failure
Martin Kepplinger (1):
media: imx: set a media_device bus_info string
Masami Hiramatsu (2):
ia64: kprobes: Fix to pass correct trampoline address to the handler
ARM: clang: Do not rely on lr register for stacktrace
Mathias Nyman (1):
xhci: Fix USB 3.1 enumeration issues by increasing roothub
power-on-good delay
Matthew Auld (1):
drm/ttm: stop calling tt_swapin in vm_access
Matthias Schiffer (1):
net: phy: micrel: make *-skew-ps check more lenient
Maurizio Lombardi (1):
nvmet-tcp: fix a memory leak when releasing a queue
Max Gurtovoy (1):
nvme-rdma: fix error code in nvme_rdma_setup_ctrl
Maxim Kiselev (1):
net: davinci_emac: Fix interrupt pacing disable
Meng Li (2):
soc: fsl: dpio: replace smp_processor_id with raw_smp_processor_id
soc: fsl: dpio: use the combined functions to protect critical zone
Menglong Dong (1):
workqueue: make sysfs of unbound kworker cpumask more clever
Miaohe Lin (1):
mm/zsmalloc.c: close race window between zs_pool_dec_isolated() and
zs_unregister_migration()
Michael Ellerman (1):
powerpc: Fix is_kvm_guest() / kvm_para_available()
Michael Pratt (1):
posix-cpu-timers: Clear task::posix_cputimers_work in copy_process()
Michael Tretter (1):
media: allegro: ignore interrupt if mailbox is not initialized
Michael Walle (1):
crypto: caam - disable pkc for non-E SoCs
Michal Hocko (1):
mm, oom: do not trigger out_of_memory from the #PF
Michał Mirosław (1):
ARM: 9155/1: fix early early_iounmap()
Miklos Szeredi (1):
fuse: fix page stealing
Miquel Raynal (9):
mtd: rawnand: socrates: Keep the driver compatible with on-die ECC
engines
mtd: rawnand: ams-delta: Keep the driver compatible with on-die ECC
engines
mtd: rawnand: xway: Keep the driver compatible with on-die ECC engines
mtd: rawnand: mpc5121: Keep the driver compatible with on-die ECC
engines
mtd: rawnand: gpio: Keep the driver compatible with on-die ECC engines
mtd: rawnand: pasemi: Keep the driver compatible with on-die ECC
engines
mtd: rawnand: orion: Keep the driver compatible with on-die ECC
engines
mtd: rawnand: plat_nand: Keep the driver compatible with on-die ECC
engines
mtd: rawnand: au1550nd: Keep the driver compatible with on-die ECC
engines
Muchun Song (1):
seq_file: fix passing wrong private data
Nadezda Lutovinova (2):
media: s5p-mfc: Add checking to s5p_mfc_probe().
media: rcar-csi2: Add checking to rcsi2_start_receiver()
Naohiro Aota (1):
block: schedule queue restart after BLK_STS_ZONE_RESOURCE
Nathan Chancellor (1):
platform/x86: thinkpad_acpi: Fix bitwise vs. logical warning
Nathan Lynch (1):
powerpc: fix unbalanced node refcount in check_kvm_guest()
Naveen N. Rao (4):
powerpc/lib: Add helper to check if offset is within conditional
branch range
powerpc/bpf: Validate branch ranges
powerpc/security: Add a helper to query stf_barrier type
powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC
Neeraj Upadhyay (1):
rcu: Fix existing exp request check in sync_sched_exp_online_cleanup()
Nehal Bakulchandra Shah (1):
usb: xhci: Enable runtime-pm by default on AMD Yellow Carp platform
Nikita Yushchenko (1):
staging: most: dim2: do not double-register the same device
Nuno Sá (2):
iio: ad5770r: make devicetree property reading consistent
iio: adis: do not disabe IRQs in 'adis_init()'
Olivier Moysan (2):
ARM: dts: stm32: fix SAI sub nodes register range
ARM: dts: stm32: fix AV96 board SAI2 pin muxing on stm32mp15
Ondrej Mosnacek (1):
selinux: fix race condition when computing ocontext SIDs
Pablo Neira Ayuso (2):
netfilter: conntrack: set on IPS_ASSURED if flows enters internal
stream state
netfilter: nft_dynset: relax superfluous check on set updates
Pali Rohár (13):
serial: core: Fix initializing and restoring termios speed
PCI: aardvark: Do not clear status bits of masked interrupts
PCI: aardvark: Fix checking for link up via LTSSM state
PCI: aardvark: Do not unmask unused interrupts
PCI: aardvark: Fix reporting Data Link Layer Link Active
PCI: aardvark: Fix configuring Reference clock
PCI: aardvark: Fix support for bus mastering and PCI_COMMAND on
emulated bridge
PCI: aardvark: Fix support for PCI_BRIDGE_CTL_BUS_RESET on emulated
bridge
PCI: aardvark: Set PCI Bridge Class Code to PCI Bridge
PCI: aardvark: Fix support for PCI_ROM_ADDRESS1 on emulated bridge
PCI: aardvark: Fix preserving PCI_EXP_RTCTL_CRSSVE flag on emulated
bridge
PCI: Add PCI_EXP_DEVCTL_PAYLOAD_* macros
PCI: aardvark: Fix PCIe Max Payload Size setting
Paul E. McKenney (1):
rcu-tasks: Move RTGS_WAIT_CBS to beginning of rcu_tasks_kthread() loop
Pavel Skripkin (3):
ALSA: mixer: fix deadlock in snd_mixer_oss_set_volume
media: em28xx: add missing em28xx_close_extension
media: dvb-usb: fix ununit-value in az6027_rc_query
Pawan Gupta (1):
smackfs: Fix use-after-free in netlbl_catmap_walk()
Paweł Anikiel (1):
reset: socfpga: add empty driver allowing consumers to probe
Pekka Korpinen (1):
iio: dac: ad5446: Fix ad5622_write() return value
Peter Rosin (1):
ARM: dts: at91: tse850: the emac<->phy interface is rmii
Peter Zijlstra (5):
locking/lockdep: Avoid RCU-induced noinstr fail
x86: Increase exception stack sizes
x86/xen: Mark cpu_bringup_and_idle() as dead_end_function
objtool: Fix static_call list generation
rcu: Always inline rcu_dynticks_task*_{enter,exit}()
Phoenix Huang (1):
Input: elantench - fix misreporting trackpoint coordinates
Pradeep Kumar Chitrapu (1):
ath11k: fix packet drops due to incorrect 6 GHz freq value in rx
status
Punit Agrawal (1):
kprobes: Do not use local variable when creating debugfs file
Quentin Monnet (1):
bpftool: Avoid leaking the JSON writer prepared for program metadata
Quinn Tran (4):
scsi: qla2xxx: Fix use after free in eh_abort path
scsi: qla2xxx: Relogin during fabric disturbance
scsi: qla2xxx: Fix gnl list corruption
scsi: qla2xxx: Turn off target reset during issue_lip
Rafael J. Wysocki (2):
PM: sleep: Do not let "syscore" devices runtime-suspend during system
transitions
ACPICA: Avoid evaluating methods too early during system resume
Rafał Miłecki (1):
ARM: dts: BCM5301X: Fix memory nodes names
Rahul Lakkireddy (1):
cxgb4: fix eeprom len when diagnostics not implemented
Rahul Tanwar (1):
pinctrl: equilibrium: Fix function addition in multiple groups
Rajat Asthana (1):
media: mceusb: return without resubmitting URB in case of -EPROTO
error.
Randy Dunlap (5):
mmc: winbond: don't build on M68K
ia64: don't do IA64_CMPXCHG_DEBUG without CONFIG_PRINTK
usb: musb: select GENERIC_PHY instead of depending on it
usb: typec: STUSB160X should select REGMAP_I2C
m68k: set a default value for MEMORY_RESERVE
Ranjani Sridharan (1):
ASoC: SOF: topology: do not power down primary core during topology
removal
Reimar Döffinger (1):
libata: fix checking of DMA state
Ricardo Koller (1):
KVM: selftests: Add operand to vmsave/vmload/vmrun in svm.c
Ricardo Ribalda (7):
media: v4l2-ioctl: Fix check_ext_ctrls
media: uvcvideo: Set capability in s_param
media: uvcvideo: Return -EIO for control errors
media: uvcvideo: Set unique vdev name based in type
media: ipu3-imgu: imgu_fmt: Handle properly try
media: ipu3-imgu: VIDIOC_QUERYCAP: Fix bus_info
media: v4l2-ioctl: S_CTRL output the right value
Richard Fitzgerald (4):
ASoC: cs42l42: Correct some register default values
ASoC: cs42l42: Defer probe if request_threaded_irq() returns
EPROBE_DEFER
ASoC: cs42l42: Use device_property API instead of of_property
ASoC: cs42l42: Correct configuring of switch inversion from ts-inv
Robert-Ionut Alexa (1):
soc: fsl: dpaa2-console: free buffer before returning from
dpaa2_console_read
Russell King (Oracle) (1):
net: phylink: avoid mvneta warning when setting pause parameters
Ryder Lee (1):
mt76: mt7915: fix an off-by-one bound check
Sandeep Maheswaram (1):
phy: qcom-snps: Correct the FSEL_MASK
Saurav Kashyap (1):
scsi: qla2xxx: Changes to support FCP2 Target
Scott Wood (1):
rcutorture: Avoid problematic critical section nesting on PREEMPT_RT
Sean Christopherson (3):
x86/irq: Ensure PI wakeup handler is unregistered before module unload
KVM: VMX: Unregister posted interrupt wakeup handler on hardware
unsetup
KVM: nVMX: Query current VMCS when determining if MSR bitmaps are in
use
Sean Young (3):
media: ite-cir: IR receiver stop working after receive overflow
media: ir-kbd-i2c: improve responsiveness of hauppauge zilog receivers
media: ir_toy: assignment to be16 should be of correct type
Sebastian Andrzej Siewior (1):
lockdep: Let lock_is_held_type() detect recursive read as read
Sebastian Krzyszkowiak (2):
power: supply: max17042_battery: Prevent int underflow in
set_soc_threshold
power: supply: max17042_battery: Clear status bits in interrupt
handler
Seevalamuthu Mariappan (1):
ath11k: Align bss_chan_info structure with firmware
Selvin Xavier (1):
RDMA/bnxt_re: Fix query SRQ failure
Shaoying Xu (1):
ext4: fix lazy initialization next schedule time computation in more
granular unit
Shayne Chen (2):
mt76: mt7915: fix sta_rec_wtbl tag len
mt76: mt7915: fix muar_idx in mt7915_mcu_alloc_sta_req()
Shuah Khan (2):
selftests: kvm: fix mismatched fclose() after popen()
selftests/core: fix conflicting types compile error for close_range()
Shyam Sundar S K (1):
net: amd-xgbe: Toggle PLL settings during rate change
Sidong Yang (1):
btrfs: reflink: initialize return value to 0 in btrfs_extent_same()
Simon Ser (1):
drm/panel-orientation-quirks: add Valve Steam Deck
Srikar Dronamraju (3):
powerpc: Refactor is_kvm_guest() declaration to new header
powerpc: Rename is_kvm_guest() to check_kvm_guest()
powerpc: Reintroduce is_kvm_guest() as a fast-path check
Srinivas Kandagatla (2):
soundwire: debugfs: use controller id and link_id for debugfs
scsi: ufs: ufshcd-pltfrm: Fix memory leak due to probe defer
Sriram R (2):
ath11k: Avoid reg rules update during firmware recovery
ath11k: Avoid race during regd updates
Stafford Horne (1):
openrisc: fix SMP tlb flush NULL pointer dereference
Stefan Agner (2):
phy: micrel: ksz8041nl: do not use power down mode
serial: imx: fix detach/attach of serial console
Stefan Schaeckeler (1):
ACPI: AC: Quirk GK45 to skip reading _PSR
Stephan Gerhold (2):
arm64: dts: qcom: msm8916: Fix Secondary MI2S bit clock
arm64: dts: qcom: pm8916: Remove wrong reg-names for rtc@6000
Stephen Suryaputra (1):
gre/sit: Don't generate link-local addr if addr_gen_mode is
IN6_ADDR_GEN_MODE_NONE
Steven Rostedt (VMware) (2):
ring-buffer: Protect ring_buffer_reset() from reentrancy
tracefs: Have tracefs directories not set OTH permission bits by
default
Sudarshan Rajagopalan (1):
arm64: mm: update max_pfn after memory hotplug
Sukadev Bhattiprolu (2):
ibmvnic: don't stop queue in xmit
ibmvnic: Process crqs after enabling interrupts
Sungjong Seo (1):
exfat: fix incorrect loading of i_blocks for large files
Sven Eckelmann (1):
ath10k: fix max antenna gain unit
Sven Schnelle (4):
parisc: fix warning in flush_tlb_all
parisc/unwind: fix unwinder when CONFIG_64BIT is enabled
parisc/kgdb: add kgdb_roundup() to make kgdb work with idle polling
s390/tape: fix timer initialization in tape_std_assign()
Sylwester Dziedziuch (1):
ice: Fix replacing VF hardware MAC to existing MAC filter
Tadeusz Struk (1):
scsi: core: Remove command size deduction from scsi_setup_scsi_cmnd()
Takashi Iwai (8):
Input: i8042 - Add quirk for Fujitsu Lifebook T725
ALSA: hda/realtek: Add a quirk for HP OMEN 15 mute LED
ALSA: hda/realtek: Add quirk for ASUS UX550VE
ALSA: hda: Free card instance properly at probe errors
ALSA: timer: Unconditionally unlink slave instances, too
ALSA: mixer: oss: Fix racy access to slots
ALSA: hda: Reduce udelay() at SKL+ position reporting
ALSA: hda: Use position buffer for SKL+ again
Tang Bin (1):
crypto: s5p-sss - Add error handling in s5p_aes_probe()
Tao Zhang (1):
coresight: cti: Correct the parameter for pm_runtime_put
Tetsuo Handa (2):
smackfs: use __GFP_NOFAIL for smk_cipso_doi()
smackfs: use netlbl_cfg_cipsov4_del() for deleting cipso_v4_doi
Thomas Perrot (1):
spi: spl022: fix Microwire full duplex mode
Tiezhu Yang (1):
samples/kretprobes: Fix return value if register_kretprobe() failed
Tim Crawford (1):
ALSA: hda/realtek: Add quirk for Clevo PC70HS
Tim Gardner (2):
drm/msm: prevent NULL dereference in msm_gpu_crashstate_capture()
net: enetc: unmap DMA in enetc_send_cmd()
Todd Kjos (3):
binder: use euid from cred instead of using task
binder: use cred instead of task for selinux checks
binder: use cred instead of task for getsecid
Tom Lendacky (3):
x86/sme: Use #define USE_EARLY_PGTABLE_L5 in mem_encrypt_identity.c
arch/cc: Introduce a function to check for confidential computing
features
x86/sev: Add an x86 version of cc_platform_has()
Tom Rix (2):
media: TDA1997x: handle short reads of hdmi info frame.
apparmor: fix error check
Tong Zhang (1):
scsi: dc395: Fix error case unwinding
Tony Lindgren (3):
mmc: sdhci-omap: Fix NULL pointer exception if regulator is not
configured
mmc: sdhci-omap: Fix context restore
bus: ti-sysc: Fix timekeeping_suspended warning on resume
Tony Lu (1):
net/smc: Fix smc_link->llc_testlink_time overflow
Trond Myklebust (6):
NFS: Fix dentry verifier races
NFS: Fix deadlocks in nfs_scan_commit_list()
NFS: Fix up commit deadlocks
NFS: Fix an Oops in pnfs_mark_request_commit()
NFSv4: Fix a regression in nfs_set_open_stateid_locked()
SUNRPC: Partial revert of commit 6f9f17287e78
Tuo Li (2):
media: s5p-mfc: fix possible null-pointer dereference in
s5p_mfc_probe()
ath: dfs_pattern_detector: Fix possible null-pointer dereference in
channel_detector_create()
Vasant Hegde (1):
powerpc/powernv/prd: Unregister OPAL_MSG_PRD2 notifier during module
unload
Vasily Averin (2):
memcg: prohibit unconditional exceeding the limit of dying tasks
mm, oom: pagefault_out_of_memory: don't force global OOM for dying
tasks
Vegard Nossum (1):
staging: ks7010: select CRYPTO_HASH/CRYPTO_MICHAEL_MIC
Vincent Donnefort (1):
PM: EM: Fix inefficient states detection
Vineeth Vijayan (1):
s390/cio: check the subchannel validity for dev_busid
Vitaly Kuznetsov (1):
x86/hyperv: Protect set_hv_tscchange_cb() against getting preempted
Vladimir Oltean (1):
net: stmmac: allow a tc-taprio base-time of zero
Vladimir Zapolskiy (1):
phy: qcom-qusb2: Fix a memory leak on probe
Waiman Long (1):
cgroup: Make rebind_subsystems() disable v2 controllers all at once
Walter Stoll (1):
watchdog: Fix OMAP watchdog early handling
Wan Jiabing (1):
soc: qcom: apr: Add of_node_put() before return
Wang Hai (3):
USB: serial: keyspan: fix memleak on probe errors
libertas_tf: Fix possible memory leak in probe and disconnect
libertas: Fix possible memory leak in probe and disconnect
Wen Gong (1):
ath11k: add handler for scan event WMI_SCAN_EVENT_DEQUEUED
Wen Gu (1):
net/smc: Correct spelling mistake to TCPF_SYN_RECV
Willem de Bruijn (1):
selftests/net: udpgso_bench_rx: fix port argument
Wolfram Sang (1):
memory: renesas-rpc-if: Correct QSPI data transfer in Manual mode
Xiao Ni (1):
md: update superblock after changing rdev flags in state_store
Xiaoming Ni (2):
powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found
powerpc/85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
Xin Xiong (1):
mmc: moxart: Fix reference count leaks in moxart_probe
Xuan Zhuo (1):
virtio_ring: check desc == NULL when using indirect with packed
Yajun Deng (1):
net: net_namespace: Fix undefined member in key_remove_domain()
Yang Yingliang (6):
ASoC: soc-core: fix null-ptr-deref in snd_soc_del_component_unlocked()
pinctrl: core: fix possible memory leak in pinctrl_enable()
spi: bcm-qspi: Fix missing clk_disable_unprepare() on error in
bcm_qspi_probe()
hwmon: Fix possible memleak in __hwmon_device_register()
driver core: Fix possible memory leak in device_link_add()
power: supply: max17040: fix null-ptr-deref in max17040_probe()
Yazen Ghannam (1):
EDAC/amd64: Handle three rank interleaving mode
Yifan Zhang (1):
drm/amdkfd: fix resume error when iommu disabled in Picasso
Yu Xiao (1):
nfp: bpf: relax prog rejection for mtu check through max_pkt_offset
YueHaibing (2):
opp: Fix return in _opp_add_static_v2()
xen-pciback: Fix return in pm_ctrl_init()
Zev Weiss (3):
hwmon: (pmbus/lm25066) Add offset coefficients
hwmon: (pmbus/lm25066) Let compiler determine outer dimension of
lm25066_coeff
mtd: core: don't remove debugfs directory if device is in use
Zhang Changzhong (2):
can: j1939: j1939_tp_cmd_recv(): ignore abort message in the BAM
transport
can: j1939: j1939_can_recv(): ignore messages with invalid source
address
Zhang Qiao (1):
kernel/sched: Fix sched_fork() access an invalid sched_task_group
Zheyu Ma (7):
cavium: Return negative value when pci_alloc_irq_vectors() fails
scsi: qla2xxx: Return -ENOMEM if kzalloc() fails
mISDN: Fix return values of the probe function
cavium: Fix return values of the probe function
media: netup_unidvb: handle interrupt properly according to the
firmware
memstick: r592: Fix a UAF bug when removing the driver
mwl8k: Fix use-after-free in mwl8k_fw_state_machine()
Ziyang Xuan (2):
rsi: stop thread firstly in rsi_91x_init() error handling
net: vlan: fix a UAF in vlan_dev_real_dev()
Zong-Zhe Yang (1):
rtw88: fix RX clock gate setting while fifo dump
jing yangyang (1):
firmware/psci: fix application of sizeof to pointer
liuyuntao (1):
virtio-gpu: fix possible memory allocation failure
.../admin-guide/kernel-parameters.txt | 7 +
.../bindings/regulator/samsung,s5m8767.txt | 23 +-
Documentation/filesystems/fscrypt.rst | 10 +-
arch/Kconfig | 3 +
arch/arm/Makefile | 22 +-
arch/arm/boot/dts/at91-tse850-3.dts | 2 +-
arch/arm/boot/dts/bcm4708-netgear-r6250.dts | 2 +-
arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts | 2 +-
.../boot/dts/bcm4709-buffalo-wxr-1900dhp.dts | 2 +-
arch/arm/boot/dts/bcm4709-linksys-ea9200.dts | 2 +-
arch/arm/boot/dts/bcm4709-netgear-r7000.dts | 2 +-
arch/arm/boot/dts/bcm4709-netgear-r8000.dts | 2 +-
.../boot/dts/bcm4709-tplink-archer-c9-v1.dts | 2 +-
arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts | 2 +-
arch/arm/boot/dts/bcm53016-meraki-mr32.dts | 2 +-
arch/arm/boot/dts/bcm94708.dts | 2 +-
arch/arm/boot/dts/bcm94709.dts | 2 +-
arch/arm/boot/dts/omap3-gta04.dtsi | 2 +-
arch/arm/boot/dts/qcom-msm8974.dtsi | 4 +-
arch/arm/boot/dts/stm32mp15-pinctrl.dtsi | 8 +-
arch/arm/boot/dts/stm32mp151.dtsi | 16 +-
arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi | 2 +-
.../boot/dts/sun7i-a20-olinuxino-lime2.dts | 2 +-
arch/arm/kernel/stacktrace.c | 3 +-
arch/arm/mach-s3c/irq-s3c24xx.c | 22 +-
arch/arm/mm/Kconfig | 2 +-
arch/arm/mm/mmu.c | 4 +-
.../boot/dts/amlogic/meson-g12a-sei510.dts | 2 +-
.../boot/dts/amlogic/meson-g12a-u200.dts | 2 +-
.../boot/dts/amlogic/meson-g12a-x96-max.dts | 2 +-
.../dts/amlogic/meson-g12b-khadas-vim3.dtsi | 4 +-
.../dts/amlogic/meson-g12b-odroid-n2.dtsi | 4 +-
.../boot/dts/amlogic/meson-g12b-w400.dtsi | 4 +-
arch/arm64/boot/dts/qcom/msm8916.dtsi | 8 +-
arch/arm64/boot/dts/qcom/pm8916.dtsi | 1 -
.../boot/dts/renesas/beacon-renesom-som.dtsi | 1 +
arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +-
arch/arm64/boot/dts/ti/k3-j721e-main.dtsi | 16 +-
arch/arm64/include/asm/esr.h | 1 +
arch/arm64/include/asm/pgtable.h | 12 +-
arch/arm64/kvm/hyp/hyp-entry.S | 2 +-
arch/arm64/kvm/hyp/nvhe/host.S | 2 +-
arch/arm64/mm/mmu.c | 5 +
arch/arm64/net/bpf_jit_comp.c | 5 +
arch/ia64/Kconfig.debug | 2 +-
arch/ia64/kernel/kprobes.c | 9 +-
arch/m68k/Kconfig.machine | 1 +
arch/mips/Kconfig | 1 +
arch/mips/include/asm/cmpxchg.h | 5 +-
arch/mips/include/asm/mips-cm.h | 12 +-
arch/mips/kernel/mips-cm.c | 21 +-
arch/mips/kernel/r2300_fpu.S | 4 +-
arch/mips/kernel/syscall.c | 9 -
arch/mips/lantiq/xway/dma.c | 23 +-
arch/openrisc/kernel/dma.c | 4 +-
arch/openrisc/kernel/smp.c | 6 +-
arch/parisc/kernel/entry.S | 2 +-
arch/parisc/kernel/smp.c | 19 +-
arch/parisc/kernel/unwind.c | 21 +-
arch/parisc/kernel/vmlinux.lds.S | 3 +-
arch/parisc/mm/fixmap.c | 5 +-
arch/parisc/mm/init.c | 4 +-
arch/powerpc/include/asm/code-patching.h | 1 +
arch/powerpc/include/asm/firmware.h | 6 -
arch/powerpc/include/asm/kvm_guest.h | 25 ++
arch/powerpc/include/asm/kvm_para.h | 2 +-
arch/powerpc/include/asm/security_features.h | 5 +
arch/powerpc/kernel/firmware.c | 12 +-
arch/powerpc/kernel/security.c | 5 +
arch/powerpc/lib/code-patching.c | 7 +-
arch/powerpc/net/bpf_jit.h | 33 ++-
arch/powerpc/net/bpf_jit64.h | 8 +-
arch/powerpc/net/bpf_jit_comp64.c | 64 ++++-
arch/powerpc/platforms/44x/fsp2.c | 2 +
arch/powerpc/platforms/85xx/Makefile | 4 +-
arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 7 +-
arch/powerpc/platforms/85xx/smp.c | 12 +-
arch/powerpc/platforms/powernv/opal-prd.c | 12 +-
arch/powerpc/platforms/pseries/smp.c | 3 +
arch/s390/kvm/priv.c | 2 +
arch/s390/kvm/pv.c | 21 +-
arch/s390/mm/gmap.c | 5 +-
arch/sh/kernel/cpu/fpu.c | 10 +-
arch/x86/Kconfig | 1 +
arch/x86/events/intel/uncore_snbep.c | 6 +-
arch/x86/hyperv/hv_init.c | 5 +-
arch/x86/include/asm/cpu_entry_area.h | 8 +-
arch/x86/include/asm/mem_encrypt.h | 1 +
arch/x86/include/asm/page_64_types.h | 2 +-
arch/x86/kernel/Makefile | 6 +
arch/x86/kernel/cc_platform.c | 69 +++++
arch/x86/kernel/cpu/amd.c | 2 +
arch/x86/kernel/cpu/common.c | 44 ++-
arch/x86/kernel/cpu/cpu.h | 1 +
arch/x86/kernel/cpu/hygon.c | 2 +
arch/x86/kernel/cpu/mce/intel.c | 5 +-
arch/x86/kernel/irq.c | 4 +-
arch/x86/kernel/sev-es.c | 32 ---
arch/x86/kernel/traps.c | 2 +-
arch/x86/kvm/vmx/vmx.c | 15 +-
arch/x86/mm/cpu_entry_area.c | 7 +
arch/x86/mm/mem_encrypt.c | 1 +
arch/x86/mm/mem_encrypt_identity.c | 9 +
block/blk-mq.c | 18 +-
block/blk.h | 6 +
crypto/Kconfig | 2 +-
crypto/pcrypt.c | 12 +-
drivers/acpi/ac.c | 19 ++
drivers/acpi/acpica/acglobal.h | 2 +
drivers/acpi/acpica/hwesleep.c | 8 +-
drivers/acpi/acpica/hwsleep.c | 11 +-
drivers/acpi/acpica/hwxfsleep.c | 7 +
drivers/acpi/battery.c | 2 +-
drivers/acpi/pmic/intel_pmic.c | 51 ++--
drivers/android/binder.c | 22 +-
drivers/ata/libata-core.c | 2 +-
drivers/ata/libata-eh.c | 8 +
drivers/auxdisplay/ht16k33.c | 66 +++--
drivers/auxdisplay/img-ascii-lcd.c | 10 +
drivers/base/core.c | 4 +-
drivers/base/power/main.c | 9 +-
drivers/block/zram/zram_drv.c | 2 +-
drivers/bluetooth/btmtkuart.c | 13 +-
drivers/bus/ti-sysc.c | 65 ++++-
drivers/char/hw_random/mtk-rng.c | 9 +-
drivers/char/ipmi/ipmi_msghandler.c | 10 +-
drivers/char/ipmi/ipmi_watchdog.c | 17 +-
drivers/char/tpm/tpm2-space.c | 3 +
drivers/char/tpm/tpm_tis_core.c | 26 +-
drivers/char/tpm/tpm_tis_core.h | 4 +
drivers/char/tpm/tpm_tis_spi_main.c | 1 +
drivers/clk/at91/clk-sam9x60-pll.c | 4 +-
drivers/clk/at91/pmc.c | 5 +
drivers/clk/mvebu/ap-cpu-clk.c | 14 +-
drivers/clocksource/Kconfig | 1 +
drivers/cpuidle/sysfs.c | 5 +-
drivers/crypto/caam/caampkc.c | 19 +-
drivers/crypto/caam/regs.h | 3 +
drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 13 +
drivers/crypto/qat/qat_common/adf_vf_isr.c | 6 +
drivers/crypto/s5p-sss.c | 2 +
drivers/dma-buf/dma-buf.c | 1 +
drivers/dma/at_xdmac.c | 2 +-
drivers/dma/dmaengine.h | 2 +-
drivers/edac/amd64_edac.c | 22 +-
drivers/edac/sb_edac.c | 2 +-
drivers/firmware/psci/psci_checker.c | 2 +-
drivers/firmware/qcom_scm.c | 2 +-
drivers/gpio/gpio-mlxbf2.c | 5 +
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 4 +-
drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 8 +-
drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 17 +-
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 +
.../drm/amd/display/dc/dcn20/dcn20_resource.c | 16 +-
.../gpu/drm/drm_panel_orientation_quirks.c | 47 +++-
drivers/gpu/drm/drm_plane_helper.c | 1 -
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c | 8 +-
drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 4 +
drivers/gpu/drm/msm/msm_gem.c | 4 +-
drivers/gpu/drm/msm/msm_gpu.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_svm.c | 4 +
drivers/gpu/drm/sun4i/sun8i_csc.h | 4 +-
drivers/gpu/drm/ttm/ttm_bo_vm.c | 5 -
drivers/gpu/drm/v3d/v3d_gem.c | 4 +-
drivers/gpu/drm/virtio/virtgpu_vq.c | 8 +-
drivers/hid/hid-u2fzero.c | 10 +-
drivers/hv/hyperv_vmbus.h | 1 +
drivers/hwmon/hwmon.c | 6 +-
drivers/hwmon/pmbus/lm25066.c | 25 +-
.../hwtracing/coresight/coresight-cti-core.c | 2 +-
drivers/i2c/busses/i2c-mt65xx.c | 2 +-
drivers/i2c/busses/i2c-xlr.c | 6 +-
drivers/iio/accel/st_accel_core.c | 21 +-
drivers/iio/accel/st_accel_i2c.c | 17 +-
drivers/iio/accel/st_accel_spi.c | 17 +-
drivers/iio/dac/ad5446.c | 9 +-
drivers/iio/dac/ad5770r.c | 2 +-
drivers/iio/gyro/st_gyro_core.c | 15 +-
drivers/iio/gyro/st_gyro_i2c.c | 17 +-
drivers/iio/gyro/st_gyro_spi.c | 17 +-
drivers/iio/imu/adis.c | 4 +-
drivers/iio/magnetometer/st_magn_core.c | 15 +-
drivers/iio/magnetometer/st_magn_i2c.c | 14 +-
drivers/iio/magnetometer/st_magn_spi.c | 14 +-
drivers/iio/pressure/st_pressure_core.c | 15 +-
drivers/iio/pressure/st_pressure_i2c.c | 17 +-
drivers/iio/pressure/st_pressure_spi.c | 17 +-
drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 +-
drivers/infiniband/hw/mlx4/qp.c | 4 +-
drivers/infiniband/hw/qedr/verbs.c | 15 +-
drivers/infiniband/sw/rxe/rxe_param.h | 2 +-
drivers/input/joystick/iforce/iforce-usb.c | 2 +-
drivers/input/mouse/elantech.c | 13 +
drivers/input/serio/i8042-x86ia64io.h | 14 +
drivers/irqchip/irq-bcm6345-l1.c | 2 +-
drivers/irqchip/irq-sifive-plic.c | 8 +-
drivers/isdn/hardware/mISDN/hfcpci.c | 8 +-
drivers/md/md.c | 11 +-
drivers/media/dvb-frontends/mn88443x.c | 18 +-
drivers/media/i2c/ir-kbd-i2c.c | 1 +
drivers/media/i2c/mt9p031.c | 28 +-
drivers/media/i2c/tda1997x.c | 8 +-
drivers/media/pci/cx23885/cx23885-alsa.c | 3 +-
.../pci/netup_unidvb/netup_unidvb_core.c | 27 +-
drivers/media/platform/mtk-vpu/mtk_vpu.c | 5 +-
drivers/media/platform/rcar-vin/rcar-csi2.c | 2 +
drivers/media/platform/s5p-mfc/s5p_mfc.c | 6 +-
drivers/media/platform/stm32/stm32-dcmi.c | 19 +-
drivers/media/radio/radio-wl1273.c | 2 +-
drivers/media/radio/si470x/radio-si470x-i2c.c | 2 +-
drivers/media/radio/si470x/radio-si470x-usb.c | 2 +-
drivers/media/rc/ir_toy.c | 2 +-
drivers/media/rc/ite-cir.c | 2 +-
drivers/media/rc/mceusb.c | 1 +
drivers/media/spi/cxd2880-spi.c | 2 +-
drivers/media/usb/dvb-usb/az6027.c | 1 +
drivers/media/usb/dvb-usb/dibusb-common.c | 2 +-
drivers/media/usb/em28xx/em28xx-cards.c | 5 +-
drivers/media/usb/em28xx/em28xx-core.c | 5 +-
drivers/media/usb/tm6000/tm6000-video.c | 3 +-
drivers/media/usb/uvc/uvc_driver.c | 7 +-
drivers/media/usb/uvc/uvc_v4l2.c | 7 +-
drivers/media/usb/uvc/uvc_video.c | 5 +
drivers/media/v4l2-core/v4l2-ioctl.c | 67 +++--
drivers/memory/fsl_ifc.c | 13 +-
drivers/memory/renesas-rpc-if.c | 113 +++++---
drivers/memstick/core/ms_block.c | 2 +-
drivers/memstick/host/jmb38x_ms.c | 2 +-
drivers/memstick/host/r592.c | 8 +-
drivers/mfd/dln2.c | 18 ++
drivers/mfd/mfd-core.c | 2 +
drivers/mmc/host/Kconfig | 2 +-
drivers/mmc/host/dw_mmc.c | 3 +-
drivers/mmc/host/moxart-mmc.c | 29 +-
drivers/mmc/host/mtk-sd.c | 5 +
drivers/mmc/host/mxs-mmc.c | 10 +
drivers/mmc/host/sdhci-omap.c | 18 +-
drivers/most/most_usb.c | 5 +-
drivers/mtd/mtdcore.c | 4 +-
drivers/mtd/nand/raw/ams-delta.c | 12 +-
drivers/mtd/nand/raw/au1550nd.c | 12 +-
drivers/mtd/nand/raw/gpio.c | 12 +-
drivers/mtd/nand/raw/mpc5121_nfc.c | 12 +-
drivers/mtd/nand/raw/orion_nand.c | 12 +-
drivers/mtd/nand/raw/pasemi_nand.c | 12 +-
drivers/mtd/nand/raw/plat_nand.c | 12 +-
drivers/mtd/nand/raw/socrates_nand.c | 12 +-
drivers/mtd/nand/raw/xway_nand.c | 12 +-
drivers/mtd/spi-nor/controllers/hisi-sfc.c | 1 -
drivers/net/Kconfig | 2 +-
.../net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +-
drivers/net/dsa/rtl8366rb.c | 2 +-
drivers/net/ethernet/amd/xgbe/xgbe-common.h | 8 +
drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 20 +-
.../net/ethernet/cavium/thunder/nic_main.c | 2 +-
.../net/ethernet/cavium/thunder/nicvf_main.c | 4 +-
.../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 7 +-
drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 2 +
.../chelsio/inline_crypto/chtls/chtls_cm.c | 2 +-
.../chelsio/inline_crypto/chtls/chtls_cm.h | 2 +-
.../net/ethernet/freescale/enetc/enetc_qos.c | 18 +-
drivers/net/ethernet/google/gve/gve.h | 4 +-
drivers/net/ethernet/google/gve/gve_adminq.h | 1 +
drivers/net/ethernet/google/gve/gve_main.c | 48 +++-
drivers/net/ethernet/ibm/ibmvnic.c | 5 +-
drivers/net/ethernet/intel/ice/ice_base.c | 2 +-
.../net/ethernet/intel/ice/ice_virtchnl_pf.c | 20 +-
drivers/net/ethernet/netronome/nfp/bpf/main.c | 16 +-
drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 +
.../net/ethernet/netronome/nfp/bpf/offload.c | 17 +-
drivers/net/ethernet/realtek/r8169_main.c | 1 +
drivers/net/ethernet/sfc/mcdi_port_common.c | 37 ++-
drivers/net/ethernet/sfc/ptp.c | 4 +-
drivers/net/ethernet/sfc/siena_sriov.c | 2 +-
.../net/ethernet/stmicro/stmmac/stmmac_tc.c | 2 -
drivers/net/ethernet/ti/davinci_emac.c | 16 +-
drivers/net/ifb.c | 2 +
drivers/net/phy/micrel.c | 9 +-
drivers/net/phy/phylink.c | 2 +-
drivers/net/vmxnet3/vmxnet3_drv.c | 1 -
drivers/net/vrf.c | 28 +-
drivers/net/wireless/ath/ath10k/mac.c | 45 +++-
drivers/net/wireless/ath/ath10k/sdio.c | 5 +-
drivers/net/wireless/ath/ath10k/usb.c | 7 +-
drivers/net/wireless/ath/ath10k/wmi.c | 4 +
drivers/net/wireless/ath/ath10k/wmi.h | 3 +
drivers/net/wireless/ath/ath11k/dbring.c | 16 +-
drivers/net/wireless/ath/ath11k/dp_rx.c | 13 +-
drivers/net/wireless/ath/ath11k/mac.c | 2 +-
drivers/net/wireless/ath/ath11k/qmi.c | 4 +-
drivers/net/wireless/ath/ath11k/reg.c | 11 +-
drivers/net/wireless/ath/ath11k/reg.h | 2 +-
drivers/net/wireless/ath/ath11k/wmi.c | 40 ++-
drivers/net/wireless/ath/ath11k/wmi.h | 3 +-
drivers/net/wireless/ath/ath6kl/usb.c | 7 +-
drivers/net/wireless/ath/ath9k/main.c | 4 +-
.../net/wireless/ath/dfs_pattern_detector.c | 10 +-
drivers/net/wireless/ath/wcn36xx/dxe.c | 49 ++--
drivers/net/wireless/ath/wcn36xx/main.c | 8 +-
drivers/net/wireless/ath/wcn36xx/smd.c | 44 ++-
drivers/net/wireless/ath/wcn36xx/txrx.c | 64 +++--
drivers/net/wireless/ath/wcn36xx/txrx.h | 3 +-
drivers/net/wireless/broadcom/b43/phy_g.c | 2 +-
.../net/wireless/broadcom/b43legacy/radio.c | 2 +-
.../broadcom/brcm80211/brcmfmac/dmi.c | 10 +
.../net/wireless/intel/iwlwifi/mvm/utils.c | 3 +
.../net/wireless/marvell/libertas/if_usb.c | 2 +
.../net/wireless/marvell/libertas_tf/if_usb.c | 2 +
drivers/net/wireless/marvell/mwifiex/11n.c | 5 +-
.../net/wireless/marvell/mwifiex/cfg80211.c | 32 +--
drivers/net/wireless/marvell/mwifiex/pcie.c | 36 ++-
drivers/net/wireless/marvell/mwifiex/usb.c | 16 ++
drivers/net/wireless/marvell/mwl8k.c | 2 +-
.../net/wireless/mediatek/mt76/mt7615/mac.c | 15 +-
.../net/wireless/mediatek/mt76/mt76x02_mac.c | 13 +-
.../net/wireless/mediatek/mt76/mt7915/mcu.c | 8 +-
.../wireless/microchip/wilc1000/cfg80211.c | 3 +-
.../realtek/rtl818x/rtl8187/rtl8225.c | 14 +-
drivers/net/wireless/realtek/rtw88/fw.c | 7 +-
drivers/net/wireless/realtek/rtw88/reg.h | 1 +
drivers/net/wireless/rsi/rsi_91x_core.c | 2 +
drivers/net/wireless/rsi/rsi_91x_hal.c | 10 +-
drivers/net/wireless/rsi/rsi_91x_mac80211.c | 74 ++----
drivers/net/wireless/rsi/rsi_91x_main.c | 17 +-
drivers/net/wireless/rsi/rsi_91x_mgmt.c | 24 +-
drivers/net/wireless/rsi/rsi_91x_sdio.c | 5 +-
drivers/net/wireless/rsi/rsi_91x_usb.c | 5 +-
drivers/net/wireless/rsi/rsi_hal.h | 11 +
drivers/net/wireless/rsi/rsi_main.h | 15 +-
drivers/net/xen-netfront.c | 8 +
drivers/nfc/pn533/pn533.c | 6 +-
drivers/nvme/host/multipath.c | 9 +-
drivers/nvme/host/rdma.c | 2 +
drivers/nvme/target/configfs.c | 2 +
drivers/nvme/target/rdma.c | 24 ++
drivers/nvme/target/tcp.c | 21 +-
drivers/of/unittest.c | 16 +-
drivers/opp/of.c | 2 +-
.../controller/cadence/pcie-cadence-plat.c | 2 +
drivers/pci/controller/dwc/pcie-uniphier.c | 26 +-
drivers/pci/controller/pci-aardvark.c | 251 +++++++++++++++---
drivers/pci/pci-bridge-emul.c | 13 +
drivers/pci/quirks.c | 1 +
drivers/phy/qualcomm/phy-qcom-qusb2.c | 16 +-
drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c | 2 +-
drivers/phy/ti/phy-gmii-sel.c | 2 +
drivers/pinctrl/core.c | 2 +
drivers/pinctrl/pinctrl-equilibrium.c | 7 +-
drivers/pinctrl/renesas/core.c | 2 +-
drivers/platform/x86/thinkpad_acpi.c | 2 +-
drivers/platform/x86/wmi.c | 9 +-
drivers/power/supply/bq27xxx_battery_i2c.c | 3 +-
drivers/power/supply/max17040_battery.c | 2 +
drivers/power/supply/max17042_battery.c | 12 +-
drivers/power/supply/rt5033_battery.c | 2 +-
drivers/regulator/s5m8767.c | 21 +-
drivers/remoteproc/remoteproc_core.c | 8 +-
drivers/reset/reset-socfpga.c | 26 ++
drivers/rtc/rtc-rv3032.c | 4 +-
drivers/s390/char/tape_std.c | 3 +-
drivers/s390/cio/css.c | 4 +-
drivers/s390/cio/device_ops.c | 12 +-
drivers/s390/crypto/ap_queue.c | 2 +
drivers/scsi/csiostor/csio_lnode.c | 2 +-
drivers/scsi/dc395x.c | 1 +
drivers/scsi/pm8001/pm8001_hwi.c | 2 +-
drivers/scsi/qla2xxx/qla_attr.c | 24 +-
drivers/scsi/qla2xxx/qla_dbg.c | 3 +-
drivers/scsi/qla2xxx/qla_gbl.h | 2 -
drivers/scsi/qla2xxx/qla_init.c | 54 +++-
drivers/scsi/qla2xxx/qla_mr.c | 23 --
drivers/scsi/qla2xxx/qla_os.c | 47 ++--
drivers/scsi/qla2xxx/qla_target.c | 14 +-
drivers/scsi/scsi_lib.c | 2 -
drivers/scsi/ufs/ufshcd-pltfrm.c | 6 +-
drivers/scsi/ufs/ufshcd.c | 29 +-
drivers/scsi/ufs/ufshcd.h | 3 +
drivers/soc/fsl/dpaa2-console.c | 1 +
drivers/soc/fsl/dpio/dpio-service.c | 2 +-
drivers/soc/fsl/dpio/qbman-portal.c | 9 +-
drivers/soc/qcom/apr.c | 2 +
drivers/soc/qcom/rpmhpd.c | 21 +-
drivers/soc/tegra/pmc.c | 4 +-
drivers/soundwire/debugfs.c | 2 +-
drivers/spi/spi-bcm-qspi.c | 5 +-
drivers/spi/spi-pl022.c | 5 +-
drivers/spi/spi-rpc-if.c | 4 +-
drivers/staging/ks7010/Kconfig | 3 +
.../staging/media/allegro-dvt/allegro-core.c | 9 +
.../media/atomisp/i2c/atomisp-lm3554.c | 37 ++-
.../staging/media/imx/imx-media-dev-common.c | 2 +
drivers/staging/media/ipu3/ipu3-v4l2.c | 7 +-
drivers/staging/media/rkvdec/rkvdec-h264.c | 5 +-
drivers/staging/media/rkvdec/rkvdec.c | 40 +--
drivers/staging/most/dim2/Makefile | 2 +-
drivers/staging/most/dim2/dim2.c | 24 +-
drivers/staging/most/dim2/sysfs.c | 49 ----
drivers/staging/most/dim2/sysfs.h | 11 -
drivers/tty/serial/8250/8250_dw.c | 2 +-
drivers/tty/serial/8250/8250_port.c | 21 +-
drivers/tty/serial/imx.c | 4 +-
drivers/tty/serial/serial_core.c | 16 +-
drivers/tty/serial/xilinx_uartps.c | 3 +-
drivers/usb/chipidea/core.c | 23 +-
drivers/usb/dwc2/drd.c | 24 +-
drivers/usb/gadget/legacy/hid.c | 4 +-
drivers/usb/host/xhci-hub.c | 3 +-
drivers/usb/host/xhci-pci.c | 16 ++
drivers/usb/misc/iowarrior.c | 8 +-
drivers/usb/musb/Kconfig | 2 +-
drivers/usb/serial/keyspan.c | 15 +-
drivers/usb/typec/Kconfig | 4 +-
drivers/video/backlight/backlight.c | 6 -
drivers/video/fbdev/chipsfb.c | 2 +-
drivers/virtio/virtio_ring.c | 14 +-
drivers/watchdog/Kconfig | 2 +-
drivers/watchdog/f71808e_wdt.c | 4 +-
drivers/watchdog/omap_wdt.c | 6 +-
drivers/xen/balloon.c | 86 ++++--
.../xen/xen-pciback/conf_space_capability.c | 2 +-
fs/btrfs/disk-io.c | 3 +-
fs/btrfs/reflink.c | 2 +-
fs/btrfs/tree-log.c | 4 +-
fs/btrfs/volumes.c | 14 +-
fs/crypto/fscrypt_private.h | 5 +-
fs/crypto/hkdf.c | 11 +-
fs/crypto/keysetup.c | 57 +++-
fs/erofs/decompressor.c | 1 -
fs/exfat/inode.c | 2 +-
fs/ext4/super.c | 9 +-
fs/f2fs/inode.c | 2 +-
fs/f2fs/namei.c | 2 +-
fs/fuse/dev.c | 14 +-
fs/gfs2/glock.c | 24 +-
fs/jfs/jfs_mount.c | 51 ++--
fs/nfs/dir.c | 7 +-
fs/nfs/direct.c | 2 +-
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 +-
fs/nfs/nfs4idmap.c | 2 +-
fs/nfs/nfs4proc.c | 15 +-
fs/nfs/pnfs.h | 2 +-
fs/nfs/pnfs_nfs.c | 6 +-
fs/nfs/write.c | 26 +-
fs/ocfs2/file.c | 8 +-
fs/orangefs/dcache.c | 4 +-
fs/proc/stat.c | 4 +-
fs/proc/uptime.c | 14 +-
fs/tracefs/inode.c | 3 +-
include/linux/blkdev.h | 2 -
include/linux/cc_platform.h | 88 ++++++
include/linux/console.h | 2 +
include/linux/ethtool_netlink.h | 3 +
include/linux/filter.h | 1 +
include/linux/kernel_stat.h | 1 +
include/linux/libata.h | 2 +-
include/linux/lsm_hook_defs.h | 14 +-
include/linux/lsm_hooks.h | 14 +-
include/linux/nfs_fs.h | 1 +
include/linux/posix-timers.h | 2 +
include/linux/rpmsg.h | 2 +-
include/linux/sched/task.h | 3 +-
include/linux/sched/task_stack.h | 4 +
include/linux/security.h | 33 ++-
include/linux/seq_file.h | 2 +-
include/linux/tpm.h | 1 +
include/memory/renesas-rpc-if.h | 1 +
include/net/inet_connection_sock.h | 2 +-
include/net/llc.h | 4 +-
include/net/neighbour.h | 12 +-
include/net/sch_generic.h | 4 +
include/net/sock.h | 2 +-
include/net/strparser.h | 16 +-
include/net/tcp.h | 17 +-
include/net/udp.h | 5 +-
include/uapi/linux/ethtool_netlink.h | 4 +-
include/uapi/linux/pci_regs.h | 6 +
kernel/bpf/core.c | 4 +-
kernel/bpf/verifier.c | 4 +-
kernel/cgroup/cgroup.c | 31 ++-
kernel/cgroup/rstat.c | 2 -
kernel/fork.c | 3 +-
kernel/kprobes.c | 3 +-
kernel/locking/lockdep.c | 4 +-
kernel/power/energy_model.c | 23 +-
kernel/power/swap.c | 2 +-
kernel/rcu/rcutorture.c | 48 +++-
kernel/rcu/tasks.h | 3 +-
kernel/rcu/tree_exp.h | 2 +-
kernel/rcu/tree_plugin.h | 8 +-
kernel/sched/core.c | 43 +--
kernel/signal.c | 18 +-
kernel/time/posix-cpu-timers.c | 19 +-
kernel/trace/ring_buffer.c | 5 +
kernel/trace/tracing_map.c | 40 +--
kernel/workqueue.c | 15 +-
lib/decompress_unxz.c | 2 +-
lib/iov_iter.c | 5 +-
lib/xz/xz_dec_lzma2.c | 21 +-
lib/xz/xz_dec_stream.c | 6 +-
mm/memcontrol.c | 27 +-
mm/oom_kill.c | 23 +-
mm/zsmalloc.c | 7 +-
net/8021q/vlan.c | 3 -
net/8021q/vlan_dev.c | 3 +
net/9p/client.c | 2 +
net/bluetooth/sco.c | 9 +-
net/can/j1939/main.c | 7 +
net/can/j1939/transport.c | 6 +
net/core/dev.c | 5 +-
net/core/filter.c | 21 ++
net/core/neighbour.c | 48 ++--
net/core/net-sysfs.c | 55 ++++
net/core/net_namespace.c | 4 +
net/core/stream.c | 3 -
net/core/sysctl_net_core.c | 2 +-
net/dccp/dccp.h | 2 +-
net/dccp/proto.c | 14 +-
net/ethtool/pause.c | 3 +-
net/ipv4/inet_connection_sock.c | 4 +-
net/ipv4/inet_hashtables.c | 2 +-
net/ipv4/proc.c | 2 +-
net/ipv4/tcp.c | 40 ++-
net/ipv4/tcp_bpf.c | 1 -
net/ipv6/addrconf.c | 3 +
net/ipv6/udp.c | 2 +-
net/netfilter/nf_conntrack_proto_udp.c | 7 +-
net/netfilter/nfnetlink_queue.c | 2 +-
net/netfilter/nft_dynset.c | 11 +-
net/rds/ib.c | 10 -
net/rds/ib.h | 6 -
net/rds/ib_cm.c | 128 +++++----
net/rds/ib_recv.c | 18 +-
net/rds/ib_send.c | 8 +
net/rxrpc/rtt.c | 2 +-
net/sched/sch_generic.c | 9 +
net/sched/sch_mq.c | 24 ++
net/sched/sch_mqprio.c | 23 ++
net/sched/sch_taprio.c | 27 +-
net/smc/af_smc.c | 20 +-
net/smc/smc_llc.c | 2 +-
net/strparser/strparser.c | 10 +-
net/sunrpc/addr.c | 40 ++-
net/sunrpc/xprt.c | 28 +-
net/vmw_vsock/af_vsock.c | 2 +
samples/kprobes/kretprobe_example.c | 2 +-
scripts/leaking_addresses.pl | 3 +-
security/apparmor/label.c | 4 +-
security/integrity/evm/evm_main.c | 2 +-
security/security.c | 14 +-
security/selinux/hooks.c | 36 ++-
security/selinux/ss/services.c | 162 ++++++-----
security/smack/smackfs.c | 11 +-
sound/core/oss/mixer_oss.c | 43 ++-
sound/core/timer.c | 13 +-
sound/pci/hda/hda_intel.c | 74 +++---
sound/pci/hda/patch_realtek.c | 82 ++++++
sound/soc/codecs/cs42l42.c | 88 +++---
sound/soc/soc-core.c | 1 +
sound/soc/sof/topology.c | 9 +
sound/synth/emux/emux.c | 2 +-
sound/usb/6fire/comm.c | 2 +-
sound/usb/6fire/firmware.c | 6 +-
sound/usb/format.c | 1 +
sound/usb/line6/driver.c | 14 +-
sound/usb/line6/driver.h | 2 +-
sound/usb/line6/podhd.c | 6 +-
sound/usb/line6/toneport.c | 2 +-
sound/usb/misc/ua101.c | 4 +-
sound/usb/quirks.c | 1 +
tools/bpf/bpftool/prog.c | 16 +-
tools/lib/bpf/bpf_core_read.h | 2 +-
tools/lib/bpf/btf.c | 25 +-
tools/objtool/check.c | 19 +-
tools/perf/util/bpf-event.c | 4 +-
.../selftests/bpf/prog_tests/perf_buffer.c | 4 +-
.../selftests/bpf/prog_tests/sk_lookup.c | 85 ++++--
.../testing/selftests/bpf/progs/strobemeta.h | 4 +-
.../selftests/bpf/progs/test_sk_lookup.c | 62 +++--
tools/testing/selftests/bpf/test_progs.c | 4 +-
.../selftests/bpf/verifier/array_access.c | 2 +-
.../testing/selftests/core/close_range_test.c | 2 +-
tools/testing/selftests/kvm/lib/x86_64/svm.c | 22 +-
.../selftests/kvm/x86_64/mmio_warning_test.c | 2 +-
tools/testing/selftests/net/fcnal-test.sh | 3 +
tools/testing/selftests/net/udpgso_bench_rx.c | 11 +-
587 files changed, 4709 insertions(+), 2317 deletions(-)
create mode 100644 arch/powerpc/include/asm/kvm_guest.h
create mode 100644 arch/x86/kernel/cc_platform.c
delete mode 100644 drivers/staging/most/dim2/sysfs.c
create mode 100644 include/linux/cc_platform.h
--
2.20.1
1
557
From: 沈子俊 <shenzijun(a)kylinos.cn>
kylin inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4A842?from=project-issue
CVE: NA
-------------------------------------------------------------------------
Add the configuration in arch/x86/configs/openeuler_defconfig
Signed-off-by: 沈子俊 <shenzijun(a)kylinos.cn>
---
arch/x86/configs/openeuler_defconfig | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index b25d908dc7a1..9b23f113f669 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -7991,6 +7991,8 @@ CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64=m
+CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_TWOFISH_COMMON=m
--
2.30.0
1
0

[PATCH openEuler-5.10 01/54] hugetlb: before freeing hugetlb page set dtor to appropriate value
by Zheng Zengkai 03 Dec '21
by Zheng Zengkai 03 Dec '21
03 Dec '21
From: Mike Kravetz <mike.kravetz(a)oracle.com>
mainline inclusion
from mainline-v5.15-rc1
commit e32d20c0c88b1cd0a44f882c4f0eb2f536363d1b
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4IGRQ
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
----------------------------------------------------------------------
When removing a hugetlb page from the pool the ref count is set to one (as
the free page has no ref count) and compound page destructor is set to
NULL_COMPOUND_DTOR. Since a subsequent call to free the hugetlb page will
call __free_pages for non-gigantic pages and free_gigantic_page for
gigantic pages the destructor is not used.
However, consider the following race with code taking a speculative
reference on the page:
Thread 0 Thread 1
-------- --------
remove_hugetlb_page
set_page_refcounted(page);
set_compound_page_dtor(page,
NULL_COMPOUND_DTOR);
get_page_unless_zero(page)
__update_and_free_page
__free_pages(page,
huge_page_order(h));
/* Note that __free_pages() will simply drop
the reference to the page. */
put_page(page)
__put_compound_page()
destroy_compound_page
NULL_COMPOUND_DTOR
BUG: kernel NULL pointer
dereference, address:
0000000000000000
To address this race, set the dtor to the normal compound page dtor for
non-gigantic pages. The dtor for gigantic pages does not matter as
gigantic pages are changed from a compound page to 'just a group of pages'
before freeing. Hence, the destructor is not used.
Link: https://lkml.kernel.org/r/20210809184832.18342-4-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Mina Almasry <almasrymina(a)google.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Chen Wandun <chenwandun(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
mm/hugetlb.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 47dd6b5e0040..6ae2d2e90681 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1364,8 +1364,28 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page,
h->surplus_huge_pages_node[nid]--;
}
+ /*
+ * Very subtle
+ *
+ * For non-gigantic pages set the destructor to the normal compound
+ * page dtor. This is needed in case someone takes an additional
+ * temporary ref to the page, and freeing is delayed until they drop
+ * their reference.
+ *
+ * For gigantic pages set the destructor to the null dtor. This
+ * destructor will never be called. Before freeing the gigantic
+ * page destroy_compound_gigantic_page will turn the compound page
+ * into a simple group of pages. After this the destructor does not
+ * apply.
+ *
+ * This handles the case where more than one ref is held when and
+ * after update_and_free_page is called.
+ */
set_page_refcounted(page);
- set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+ if (hstate_is_gigantic(h))
+ set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+ else
+ set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
--
2.20.1
1
53
On 2021/12/3 17:46, 郑振鹏 wrote:
> 谢工,您好
>
> 在千兆驱动合入4.19内核完成后,我打算先推万兆驱动4.19内核功能升级及同步最新bug修复的补丁,再准备5.10内核千兆驱动补丁。
5.10 会在 3 月份发 22.03 LTS 版本,不打算这个版本带进来吗
>
> BR,
> zhenpeng
>
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> ****************************************************************
>
> 郑振鹏(zheng zhenpeng)
>
> 北京网迅科技有限公司杭州分公司 软件工程师
>
> 浙江省杭州市西湖区文三路478号华星时代广场A座507室 310013
>
> Beijing WangXun Technology Co., Ltd. Software Engineer.
>
> Room A507, HuaXing Times Square, No.478 West Wensan Road.
>
> West Lake District, Hangzhou City, 310013 ZHEJIANG, P.R.CHINA.
>
>
>
> Office: +86(0571)89807901-8014
>
> Mobile: +86-13656681762
>
> E-Mail: z <mailto:jianwang@trustnetic.com>henpengzheng(a)net-swift.com
>
> ****************************************************************
>
>
> *发件人:* Xie XiuQi <mailto:xiexiuqi@huawei.com>
> *发送时间:* 2021-12-03 16:15
> *收件人:* 郑振鹏 <mailto:zhenpengzheng@net-swift.com>; QiuLaibin <mailto:qiulaibin@huawei.com>
> *抄送:* yangyingliang(a)huawei.com <mailto:yangyingliang@huawei.com>; kernel(a)openeuler.org <mailto:kernel@openeuler.org>
> *主题:* Re: 【openEuler】Netswift Giga NIC驱动合入openEuler20.03
>
>
> On 2021/12/3 15:17, 郑振鹏 wrote:
> > 您好,
> >
> > 目前我只在x86平台上测试我此次提交的patch,如果完成arm平台测试我再提交新的patch。
>
> 感谢。
>
> Netswift Giga NIC 对 22.03 (5.10内核) 的支持,这个是不是也在做。
>
> >
> > BR,
> > zhenpeng
> >
> >
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> >
> > ****************************************************************
> >
> > 郑振鹏(zheng zhenpeng)
> >
> > 北京网迅科技有限公司杭州分公司 软件工程师
> >
> > 浙江省杭州市西湖区文三路478号华星时代广场A座507室 310013
> >
> > Beijing WangXun Technology Co., Ltd. Software Engineer.
> >
> > Room A507, HuaXing Times Square, No.478 West Wensan Road.
> >
> > West Lake District, Hangzhou City, 310013 ZHEJIANG, P.R.CHINA.
> >
> >
> >
> > Office: +86(0571)89807901-8014
> >
> > Mobile: +86-13656681762
> >
> > E-Mail: z <mailto:jianwang@trustnetic.com>henpengzheng(a)net-swift.com
> >
> > ****************************************************************
> >
> >
> > *发件人:* QiuLaibin <mailto:qiulaibin@huawei.com>
> > *发送时间:* 2021-12-03 15:01
> > *收件人:* zhenpengzheng(a)net-swift.com <mailto:zhenpengzheng@net-swift.com>
> > *抄送:* Xiexiuqi <mailto:xiexiuqi@huawei.com>; yangyingliang(a)huawei.com <mailto:yangyingliang@huawei.com>
> > *主题:* 【openEuler】Netswift Giga NIC驱动合入openEuler20.03
> > pengzheng您好:
> >
> > 非常感谢您的提交!
> >
> > 目前我们正在向版本合入先前您针对openEuler提交的Netswift Giga NIC驱补丁集:
> >
> > [openEuler-1.0-LTS,1/2] net: ngbe: Add Netswift Giga NIC driver
> > [openEuler-1.0-LTS,2/2] x86/config: Enable netswift Giga NIC driver for x86
> >
> > 但鉴于您在编译config只在X86的config中打开,因此我们需要确认是否该驱动只能支持X86平台;arm上是否可以打开支持,若arm相关的config误打开是否会引入问题。
> >
> > best regard
> > Laibin Qiu
> >
> >
> >
>
1
0
TC 议题申报:
议题:openEuler 22.03 LTS ARM64 版本(5.10 内核)内核页大小及支持CPU规格决策:
决策点1:openEuler 22.03 LTS 是否默认采用 4K 页表 (配合 48 BIT VA/PA)
决策点2:openEuler 22.03 LTS 如果默认采用 4K 页表,是否需要在单独出 64K 页表的 kernel 包。
决策点3:openEuler 22.03 LTS 支持最大 CPU 数:NR_CPUS=4096, NODES 支持 128.
(20.03 原版本配置:64K, 48 BIT VA/PA, NR_CPUS 1024, NODES 16)
之前的讨论链接:
https://gitee.com/openeuler/kernel/issues/I4HDHZ
理由1:从讨论看,64K 兼容性问题比较多,且业界OS多采用4K,方便驱动及上层软件在兼容性认证,
也利于支撑更多的使用场景。性能上,64K 优势场景,可以采用大页,或者单独 64K 版本支持。
理由2:52 BIT 大内存支持,需要 64K 页,4K 页最大支持 48 BIT VA/PA, 256T 内存空间。如果要支
持更大 PB 级内存,需要 64K。
理由3:从未来几年诉求,和业界OS支持规格来看,CPU 数目增加是趋势,从兼容性和规格考虑,
NR_CPUS 与业界OS最大的 4096 一致。当前4路飞腾S2500 最大 32 个 nodes,为满足未来几年
的演进诉求,NODES 数拟最大支持 128.
On 2021/12/3 14:35, yangcong wrote:
>
>
> 申报议题:
> 议题5:License风险识别系统引入openEuler社区 - 杨聪 <yangcong_hit(a)163.com>
> 议题6:针对openEuler社区repo不同的类型(内容),选择license的策略 - 杨聪 <yangcong_hit(a)163.com>
> 在2021年12月1日 09:34,Hufeng (Solar, Euler)<solar.hu(a)huawei.com> 写道:
> 当前已有议题如下
> 议题1:release management sig工作计划 - 胡峰(延期到本次例会)
> 议题2:openEuler 新技术领域规划讨论:Edge – 刘寿永,Embedded – 任慰 (延期到本次例会)
> 议题3:NestOS的社区资源支持与立项讨论:杜奕威 <duyiwei(a)kylinos.cn>(延期到本次例会)
> a. 如何从openEuler官网下载NestOS镜像
> b. 提供相关资源以部署nestos所需要的自动更新环境
> c. 如何立项使更多的人参与到NestOS中
> 议题4: Greenplum白皮书评审-270162781(a)qq.com - bo zhao
> 上会前请确认会议遗留问题闭环情况
>
>
> -----Original Message-----
> From: Hufeng (Solar, Euler)
> Sent: Tuesday, November 30, 2021 2:45 PM
> To: tc(a)openeuler.org; 'dev(a)openeuler.org' <dev(a)openeuler.org>
> Subject: [TC]议题收集,请给位议题申报人按时与会,谢谢。RE: [Dev] openEuler 技术委员会例会
>
> 当前已有议题如下
> 议题1:release management sig工作计划 - 胡峰(延期到本次例会)
> 议题2:openEuler 新技术领域规划讨论:Edge – 刘寿永,Embedded – 任慰 (延期到本次例会)
> 议题3:NestOS的社区资源支持与立项讨论:杜奕威 <duyiwei(a)kylinos.cn>(延期到本次例会)
> a. 如何从openEuler官网下载NestOS镜像
> b. 提供相关资源以部署nestos所需要的自动更新环境
> c. 如何立项使更多的人参与到NestOS中
>
> -----Original Message-----
> From: openEuler conference [mailto:public@openeuler.org]
> Sent: Tuesday, November 30, 2021 2:35 PM
> Subject: [Dev] openEuler 技术委员会例会
>
> 您好!
>
> TC SIG 邀请您参加 2021-12-01 10:00 召开的ZOOM会议(自动录制)
>
> 会议主题:openEuler 技术委员会例会
>
> 会议链接:https://us06web.zoom.us/j/82159612220?pwd=ZlNxWkEwY1MyUlQ3SmtFNmNEVGtwQT09
>
> 温馨提醒:建议接入会议后修改参会人的姓名,也可以使用您在gitee.com的ID
>
> 更多资讯尽在:https://openeuler.org/zh/
>
>
>
>
> Hello!
>
> openEuler TC SIG invites you to attend the ZOOM conference(auto recording) will be held at 2021-12-01 10:00,
>
> The subject of the conference is openEuler 技术委员会例会,
>
> You can join the meeting at https://us06web.zoom.us/j/82159612220?pwd=ZlNxWkEwY1MyUlQ3SmtFNmNEVGtwQT09.
>
> Note: You are advised to change the participant name after joining the conference or use your ID at gitee.com.
>
> More information: https://openeuler.org/en/
>
> _______________________________________________
> Dev mailing list -- dev(a)openeuler.org
> To unsubscribe send an email to dev-leave(a)openeuler.org
> _______________________________________________
> Dev mailing list -- dev(a)openeuler.org
> To unsubscribe send an email to dev-leave(a)openeuler.org
> _______________________________________________
> Tc mailing list -- tc(a)openeuler.org
> To unsubscribe send an email to tc-leave(a)openeuler.org
>
3
2
On 2021/12/3 15:17, 郑振鹏 wrote:
> 您好,
>
> 目前我只在x86平台上测试我此次提交的patch,如果完成arm平台测试我再提交新的patch。
感谢。
Netswift Giga NIC 对 22.03 (5.10内核) 的支持,这个是不是也在做。
>
> BR,
> zhenpeng
>
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> ****************************************************************
>
> 郑振鹏(zheng zhenpeng)
>
> 北京网迅科技有限公司杭州分公司 软件工程师
>
> 浙江省杭州市西湖区文三路478号华星时代广场A座507室 310013
>
> Beijing WangXun Technology Co., Ltd. Software Engineer.
>
> Room A507, HuaXing Times Square, No.478 West Wensan Road.
>
> West Lake District, Hangzhou City, 310013 ZHEJIANG, P.R.CHINA.
>
>
>
> Office: +86(0571)89807901-8014
>
> Mobile: +86-13656681762
>
> E-Mail: z <mailto:jianwang@trustnetic.com>henpengzheng(a)net-swift.com
>
> ****************************************************************
>
>
> *发件人:* QiuLaibin <mailto:qiulaibin@huawei.com>
> *发送时间:* 2021-12-03 15:01
> *收件人:* zhenpengzheng(a)net-swift.com <mailto:zhenpengzheng@net-swift.com>
> *抄送:* Xiexiuqi <mailto:xiexiuqi@huawei.com>; yangyingliang(a)huawei.com <mailto:yangyingliang@huawei.com>
> *主题:* 【openEuler】Netswift Giga NIC驱动合入openEuler20.03
> pengzheng您好:
>
> 非常感谢您的提交!
>
> 目前我们正在向版本合入先前您针对openEuler提交的Netswift Giga NIC驱补丁集:
>
> [openEuler-1.0-LTS,1/2] net: ngbe: Add Netswift Giga NIC driver
> [openEuler-1.0-LTS,2/2] x86/config: Enable netswift Giga NIC driver for x86
>
> 但鉴于您在编译config只在X86的config中打开,因此我们需要确认是否该驱动只能支持X86平台;arm上是否可以打开支持,若arm相关的config误打开是否会引入问题。
>
> best regard
> Laibin Qiu
>
>
>
1
0

[PATCH openEuler-1.0-LTS] config: disable CONFIG_NGBE by default in hulk_defconfig
by Yang Yingliang 03 Dec '21
by Yang Yingliang 03 Dec '21
03 Dec '21
driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4C4XW?from=project-issue
CVE: NA
---------------------------------------
Disable CONFIG_NGBE by default CONFIG_NGBE on ARM64.
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/configs/hulk_defconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig
index fdf628f1fa028..e80f4b7fde56d 100644
--- a/arch/arm64/configs/hulk_defconfig
+++ b/arch/arm64/configs/hulk_defconfig
@@ -2491,6 +2491,7 @@ CONFIG_ICE=m
CONFIG_FM10K=m
CONFIG_NET_VENDOR_NETSWIFT=y
CONFIG_TXGBE=m
+# CONFIG_NGBE is not set
# CONFIG_JME is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_NET_VENDOR_MELLANOX=y
--
2.25.1
1
0

03 Dec '21
From: zhenpengzheng <zhenpengzheng(a)net-swift.com>
driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4C4XW?from=project-issue
CVE: NA
------------------------------------------
This patch contains main code of Netswift Giga NIC Driver which supports devices as follows:
1) Netswift WX1860AL_W 8088:0100[VID:DID]
2) Netswift WX1860A2 8088:0101[VID:DID]
3) Netswift WX1860A2S 8088:0102[VID:DID]
4) Netswift WX1860A4 8088:0103[VID:DID]
5) Netswift WX1860A4S 8088:0104[VID:DID]
6) Netswift WX1860AL2 8088:0105[VID:DID]
7) Netswift WX1860AL2S 8088:0106[VID:DID]
8) Netswift WX1860AL4 8088:0107[VID:DID]
9) Netswift WX1860AL4S 8088:0108[VID:DID]
10)Netswift WX1860NCSI 8088:0109[VID:DID]
11)Netswift WX1860A1 8088:010a[VID:DID]
12)Netswift WX1860AL1 8088:010b[VID:DID]
Signed-off-by: zhenpengzheng <zhenpengzheng(a)net-swift.com> #openEuler_contributor
Signed-off-by: Zhen Lei <thunder.leizhen(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Acked-by: Xie XiuQi <xiexiuqi(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/net/ethernet/netswift/Kconfig | 66 +
drivers/net/ethernet/netswift/Makefile | 1 +
drivers/net/ethernet/netswift/ngbe/Makefile | 16 +
drivers/net/ethernet/netswift/ngbe/ngbe.h | 1109 +++
.../net/ethernet/netswift/ngbe/ngbe_debugfs.c | 764 ++
.../net/ethernet/netswift/ngbe/ngbe_ethtool.c | 2756 +++++++
drivers/net/ethernet/netswift/ngbe/ngbe_hw.c | 5047 ++++++++++++
drivers/net/ethernet/netswift/ngbe/ngbe_hw.h | 280 +
drivers/net/ethernet/netswift/ngbe/ngbe_lib.c | 701 ++
.../net/ethernet/netswift/ngbe/ngbe_main.c | 7119 +++++++++++++++++
drivers/net/ethernet/netswift/ngbe/ngbe_mbx.c | 687 ++
drivers/net/ethernet/netswift/ngbe/ngbe_mbx.h | 167 +
.../net/ethernet/netswift/ngbe/ngbe_param.c | 839 ++
.../net/ethernet/netswift/ngbe/ngbe_pcierr.c | 257 +
.../net/ethernet/netswift/ngbe/ngbe_pcierr.h | 23 +
drivers/net/ethernet/netswift/ngbe/ngbe_phy.c | 1243 +++
drivers/net/ethernet/netswift/ngbe/ngbe_phy.h | 201 +
.../net/ethernet/netswift/ngbe/ngbe_procfs.c | 908 +++
drivers/net/ethernet/netswift/ngbe/ngbe_ptp.c | 858 ++
.../net/ethernet/netswift/ngbe/ngbe_sriov.c | 1461 ++++
.../net/ethernet/netswift/ngbe/ngbe_sriov.h | 63 +
.../net/ethernet/netswift/ngbe/ngbe_sysfs.c | 222 +
.../net/ethernet/netswift/ngbe/ngbe_type.h | 2941 +++++++
23 files changed, 27729 insertions(+)
create mode 100644 drivers/net/ethernet/netswift/ngbe/Makefile
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe.h
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_debugfs.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_ethtool.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_hw.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_hw.h
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_lib.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_main.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_mbx.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_mbx.h
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_param.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.h
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_phy.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_phy.h
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_procfs.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_ptp.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_sriov.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_sriov.h
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_sysfs.c
create mode 100644 drivers/net/ethernet/netswift/ngbe/ngbe_type.h
diff --git a/drivers/net/ethernet/netswift/Kconfig b/drivers/net/ethernet/netswift/Kconfig
index c4b510b659ae9..58b0cfa917c63 100644
--- a/drivers/net/ethernet/netswift/Kconfig
+++ b/drivers/net/ethernet/netswift/Kconfig
@@ -17,4 +17,70 @@ if NET_VENDOR_NETSWIFT
source "drivers/net/ethernet/netswift/txgbe/Kconfig"
+config NGBE
+ tristate "Netswift PCI-Express Gigabit Ethernet support"
+ depends on PCI
+ imply PTP_1588_CLOCK
+ ---help---
+ This driver supports Netswift gigabit ethernet adapters.
+ For more information on how to identify your adapter, go
+ to <http://www.net-swift.com>
+
+ To compile this driver as a module, choose M here. The module
+ will be called ngbe.
+
+config NGBE_HWMON
+ bool "Netswift PCI-Express Gigabit adapters HWMON support"
+ default n
+ depends on NGBE && HWMON && !(NGBE=y && HWMON=m)
+ ---help---
+ Say Y if you want to expose thermal sensor data on these devices.
+
+ If unsure, say N.
+
+config NGBE_PROCFS
+ bool "Netswift PCI-Express Gigabit adapters procfs support"
+ default n
+ depends on NGBE && !NGBE_SYSFS
+ ---help---
+ Say Y if you want to setup procfs for these devices.
+
+ If unsure, say N.
+
+config NGBE_NO_LLI
+ bool "Netswift PCI-Express Gigabit adapters NO Low Latency Interrupt support"
+ default n
+ depends on NGBE
+ ---help---
+ Say N if you want to enable LLI for these devices.
+
+ If unsure, say Y.
+
+config NGBE_DEBUG_FS
+ bool "Netswift PCI-Express Gigabit adapters debugfs support"
+ default n
+ depends on NGBE
+ ---help---
+ Say Y if you want to setup debugfs for these devices.
+
+ If unsure, say N.
+
+config NGBE_POLL_LINK_STATUS
+ bool "Netswift PCI-Express Gigabit adapters poll mode support"
+ default n
+ depends on NGBE
+ ---help---
+ Say Y if you want to turn these devices to poll mode instead of interrupt-trigged TX/RX.
+
+ If unsure, say N.
+
+config NGBE_SYSFS
+ bool "Netswift PCI-Express Gigabit adapters sysfs support"
+ default n
+ depends on NGBE
+ ---help---
+ Say Y if you want to setup sysfs for these devices.
+
+ If unsure, say N.
+
endif # NET_VENDOR_NETSWIFT
diff --git a/drivers/net/ethernet/netswift/Makefile b/drivers/net/ethernet/netswift/Makefile
index 0845d08600bee..5690b6392ce2f 100644
--- a/drivers/net/ethernet/netswift/Makefile
+++ b/drivers/net/ethernet/netswift/Makefile
@@ -4,3 +4,4 @@
#
obj-$(CONFIG_TXGBE) += txgbe/
+obj-$(CONFIG_NGBE) += ngbe/
diff --git a/drivers/net/ethernet/netswift/ngbe/Makefile b/drivers/net/ethernet/netswift/ngbe/Makefile
new file mode 100644
index 0000000000000..dd6615eee4ede
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+#
+# Makefile for the Netswift Gigabit PCI Express ethernet driver
+#
+
+obj-$(CONFIG_NGBE) += ngbe.o
+
+ngbe-objs := ngbe_main.o ngbe_ethtool.o \
+ ngbe_hw.o ngbe_phy.o ngbe_sriov.o \
+ ngbe_mbx.o ngbe_pcierr.o ngbe_param.o ngbe_lib.o ngbe_ptp.o
+
+ngbe-$(CONFIG_NGBE_HWMON) += ngbe_sysfs.o
+ngbe-$(CONFIG_NGBE_DEBUG_FS) += ngbe_debugfs.o
+ngbe-$(CONFIG_NGBE_PROCFS) += ngbe_procfs.o
+ngbe-$(CONFIG_NGBE_SYSFS) += ngbe_sysfs.o
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe.h b/drivers/net/ethernet/netswift/ngbe/ngbe.h
new file mode 100644
index 0000000000000..4e77777143a04
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe.h
@@ -0,0 +1,1109 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+
+#ifndef _NGBE_H_
+#define _NGBE_H_
+
+#include <net/ip.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/sctp.h>
+#include <linux/timecounter.h>
+#include <linux/clocksource.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/aer.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/sched.h>
+
+#include "ngbe_type.h"
+
+/* Ether Types */
+#define NGBE_ETH_P_LLDP 0x88CC
+#define NGBE_ETH_P_CNM 0x22E7
+
+/* TX/RX descriptor defines */
+#define NGBE_DEFAULT_TXD 512 /* default ring size */
+#define NGBE_DEFAULT_TX_WORK 256
+#define NGBE_MAX_TXD 8192
+#define NGBE_MIN_TXD 128
+
+#define NGBE_DEFAULT_RXD 512 /* default ring size */
+#define NGBE_DEFAULT_RX_WORK 256
+#define NGBE_MAX_RXD 8192
+#define NGBE_MIN_RXD 128
+
+#define NGBE_ETH_P_LLDP 0x88CC
+
+/* flow control */
+#define NGBE_MIN_FCRTL 0x40
+#define NGBE_MAX_FCRTL 0x7FF80
+#define NGBE_MIN_FCRTH 0x600
+#define NGBE_MAX_FCRTH 0x7FFF0
+#define NGBE_DEFAULT_FCPAUSE 0xFFFF
+#define NGBE_MIN_FCPAUSE 0
+#define NGBE_MAX_FCPAUSE 0xFFFF
+
+/* Supported Rx Buffer Sizes */
+#define NGBE_RXBUFFER_256 256 /* Used for skb receive header */
+#define NGBE_RXBUFFER_2K 2048
+#define NGBE_RXBUFFER_3K 3072
+#define NGBE_RXBUFFER_4K 4096
+#define NGBE_MAX_RXBUFFER 16384 /* largest size for single descriptor */
+
+/*
+ * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
+ * reserve 64 more, and skb_shared_info adds an additional 320 bytes more,
+ * this adds up to 448 bytes of extra data.
+ *
+ * Since netdev_alloc_skb now allocates a page fragment we can use a value
+ * of 256 and the resultant skb will have a truesize of 960 or less.
+ */
+#define NGBE_RX_HDR_SIZE NGBE_RXBUFFER_256
+
+#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define NGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+
+#define NGBE_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+enum ngbe_tx_flags {
+ /* cmd_type flags */
+ NGBE_TX_FLAGS_HW_VLAN = 0x01,
+ NGBE_TX_FLAGS_TSO = 0x02,
+ NGBE_TX_FLAGS_TSTAMP = 0x04,
+
+ /* olinfo flags */
+ NGBE_TX_FLAGS_CC = 0x08,
+ NGBE_TX_FLAGS_IPV4 = 0x10,
+ NGBE_TX_FLAGS_CSUM = 0x20,
+ NGBE_TX_FLAGS_OUTER_IPV4 = 0x100,
+ NGBE_TX_FLAGS_LINKSEC = 0x200,
+ NGBE_TX_FLAGS_IPSEC = 0x400,
+
+ /* software defined flags */
+ NGBE_TX_FLAGS_SW_VLAN = 0x40,
+ NGBE_TX_FLAGS_FCOE = 0x80,
+};
+
+/* VLAN info */
+#define NGBE_TX_FLAGS_VLAN_MASK 0xffff0000
+#define NGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000
+#define NGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29
+#define NGBE_TX_FLAGS_VLAN_SHIFT 16
+
+#define NGBE_MAX_RX_DESC_POLL 10
+
+#define NGBE_MAX_VF_MC_ENTRIES 30
+#define NGBE_MAX_VF_FUNCTIONS 8
+#define MAX_EMULATION_MAC_ADDRS 16
+#define NGBE_MAX_PF_MACVLANS 15
+#define NGBE_VF_DEVICE_ID 0x1000
+
+/* must account for pools assigned to VFs. */
+#ifdef CONFIG_PCI_IOV
+#define VMDQ_P(p) ((p) + adapter->ring_feature[RING_F_VMDQ].offset)
+#else
+#define VMDQ_P(p) (p)
+#endif
+
+#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \
+ { \
+ u32 current_counter = rd32(hw, reg); \
+ if (current_counter < last_counter) \
+ counter += 0x100000000LL; \
+ last_counter = current_counter; \
+ counter &= 0xFFFFFFFF00000000LL; \
+ counter |= current_counter; \
+ }
+
+#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \
+ { \
+ u64 current_counter_lsb = rd32(hw, reg_lsb); \
+ u64 current_counter_msb = rd32(hw, reg_msb); \
+ u64 current_counter = (current_counter_msb << 32) | \
+ current_counter_lsb; \
+ if (current_counter < last_counter) \
+ counter += 0x1000000000LL; \
+ last_counter = current_counter; \
+ counter &= 0xFFFFFFF000000000LL; \
+ counter |= current_counter; \
+ }
+
+struct vf_stats {
+ u64 gprc;
+ u64 gorc;
+ u64 gptc;
+ u64 gotc;
+ u64 mprc;
+};
+
+struct vf_data_storage {
+ struct pci_dev *vfdev;
+ u8 __iomem *b4_addr;
+ u32 b4_buf[16];
+ unsigned char vf_mac_addresses[ETH_ALEN];
+ u16 vf_mc_hashes[NGBE_MAX_VF_MC_ENTRIES];
+ u16 num_vf_mc_hashes;
+ u16 default_vf_vlan_id;
+ u16 vlans_enabled;
+ bool clear_to_send;
+ struct vf_stats vfstats;
+ struct vf_stats last_vfstats;
+ struct vf_stats saved_rst_vfstats;
+ bool pf_set_mac;
+ u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+ u16 pf_qos;
+ u16 min_tx_rate;
+ u16 max_tx_rate;
+ u16 vlan_count;
+ u8 spoofchk_enabled;
+ u8 trusted;
+ int xcast_mode;
+ unsigned int vf_api;
+};
+
+struct vf_macvlans {
+ struct list_head l;
+ int vf;
+ bool free;
+ bool is_macvlan;
+ u8 vf_macvlan[ETH_ALEN];
+};
+
+#define NGBE_MAX_TXD_PWR 14
+#define NGBE_MAX_DATA_PER_TXD (1 << NGBE_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), NGBE_MAX_DATA_PER_TXD)
+#ifndef MAX_SKB_FRAGS
+#define DESC_NEEDED 4
+#elif (MAX_SKB_FRAGS < 16)
+#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#else
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+#endif
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct ngbe_tx_buffer {
+ union ngbe_tx_desc *next_to_watch;
+ unsigned long time_stamp;
+ struct sk_buff *skb;
+ unsigned int bytecount;
+ unsigned short gso_segs;
+ __be16 protocol;
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+ u32 tx_flags;
+};
+
+struct ngbe_rx_buffer {
+ struct sk_buff *skb;
+ dma_addr_t dma;
+ dma_addr_t page_dma;
+ struct page *page;
+ unsigned int page_offset;
+};
+
+struct ngbe_queue_stats {
+ u64 packets;
+ u64 bytes;
+};
+
+struct ngbe_tx_queue_stats {
+ u64 restart_queue;
+ u64 tx_busy;
+ u64 tx_done_old;
+};
+
+struct ngbe_rx_queue_stats {
+ u64 non_eop_descs;
+ u64 alloc_rx_page_failed;
+ u64 alloc_rx_buff_failed;
+ u64 csum_good_cnt;
+ u64 csum_err;
+};
+
+#define NGBE_TS_HDR_LEN 8
+enum ngbe_ring_state_t {
+ __NGBE_RX_3K_BUFFER,
+ __NGBE_RX_BUILD_SKB_ENABLED,
+ __NGBE_TX_XPS_INIT_DONE,
+ __NGBE_TX_DETECT_HANG,
+ __NGBE_HANG_CHECK_ARMED,
+ __NGBE_RX_HS_ENABLED,
+};
+
+struct ngbe_fwd_adapter {
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ struct net_device *vdev;
+ struct ngbe_adapter *adapter;
+ unsigned int tx_base_queue;
+ unsigned int rx_base_queue;
+ int index; /* pool index on PF */
+};
+
+#define ring_uses_build_skb(ring) \
+ test_bit(__NGBE_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+
+#define ring_is_hs_enabled(ring) \
+ test_bit(__NGBE_RX_HS_ENABLED, &(ring)->state)
+#define set_ring_hs_enabled(ring) \
+ set_bit(__NGBE_RX_HS_ENABLED, &(ring)->state)
+#define clear_ring_hs_enabled(ring) \
+ clear_bit(__NGBE_RX_HS_ENABLED, &(ring)->state)
+#define check_for_tx_hang(ring) \
+ test_bit(__NGBE_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+ set_bit(__NGBE_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+ clear_bit(__NGBE_TX_DETECT_HANG, &(ring)->state)
+
+struct ngbe_ring {
+ struct ngbe_ring *next; /* pointer to next ring in q_vector */
+ struct ngbe_q_vector *q_vector; /* backpointer to host q_vector */
+ struct net_device *netdev; /* netdev ring belongs to */
+ struct device *dev; /* device for DMA mapping */
+ struct ngbe_fwd_adapter *accel;
+ void *desc; /* descriptor ring memory */
+ union {
+ struct ngbe_tx_buffer *tx_buffer_info;
+ struct ngbe_rx_buffer *rx_buffer_info;
+ };
+ unsigned long state;
+ u8 __iomem *tail;
+ dma_addr_t dma; /* phys. address of descriptor ring */
+ unsigned int size; /* length in bytes */
+
+ u16 count; /* amount of descriptors */
+
+ u8 queue_index; /* needed for multiqueue queue management */
+ u8 reg_idx; /* holds the special value that gets
+ * the hardware register offset
+ * associated with this ring, which is
+ * different for DCB and RSS modes
+ */
+ u16 next_to_use;
+ u16 next_to_clean;
+
+ unsigned long last_rx_timestamp;
+
+ u16 rx_buf_len;
+ union {
+ u16 next_to_alloc;
+ struct {
+ u8 atr_sample_rate;
+ u8 atr_count;
+ };
+ };
+
+ u8 dcb_tc;
+ struct ngbe_queue_stats stats;
+ struct u64_stats_sync syncp;
+
+ union {
+ struct ngbe_tx_queue_stats tx_stats;
+ struct ngbe_rx_queue_stats rx_stats;
+ };
+} ____cacheline_internodealigned_in_smp;
+
+enum ngbe_ring_f_enum {
+ RING_F_NONE = 0,
+ RING_F_VMDQ, /* SR-IOV uses the same ring feature */
+ RING_F_RSS,
+ RING_F_ARRAY_SIZE /* must be last in enum set */
+};
+
+#define TGB_MAX_RX_QUEUES 16
+#define NGBE_MAX_TX_QUEUES 16
+
+#define NGBE_MAX_RSS_INDICES 8
+#define NGBE_MAX_VMDQ_INDICES 8
+#define NGBE_MAX_FDIR_INDICES 8
+#define MAX_RX_QUEUES 8
+#define MAX_TX_QUEUES 8
+#define NGBE_MAX_L2A_QUEUES 4
+#define NGBE_BAD_L2A_QUEUE 3
+
+#define NGBE_MAX_MACVLANS 8
+
+struct ngbe_ring_feature {
+ u16 limit; /* upper limit on feature indices */
+ u16 indices; /* current value of indices */
+ u16 mask; /* Mask used for feature to ring mapping */
+ u16 offset; /* offset to start of feature */
+};
+
+/*
+ * FCoE requires that all Rx buffers be over 2200 bytes in length. Since
+ * this is twice the size of a half page we need to double the page order
+ * for FCoE enabled Rx queues.
+ */
+static inline unsigned int ngbe_rx_bufsz(struct ngbe_ring __maybe_unused *ring)
+{
+#if MAX_SKB_FRAGS < 8
+ return ALIGN(NGBE_MAX_RXBUFFER / MAX_SKB_FRAGS, 1024);
+#else
+ return NGBE_RXBUFFER_2K;
+#endif
+}
+
+static inline unsigned int ngbe_rx_pg_order(struct ngbe_ring __maybe_unused *ring)
+{
+ return 0;
+}
+#define ngbe_rx_pg_size(_ring) (PAGE_SIZE << ngbe_rx_pg_order(_ring))
+
+struct ngbe_ring_container {
+ struct ngbe_ring *ring; /* pointer to linked list of rings */
+ unsigned int total_bytes; /* total bytes processed this int */
+ unsigned int total_packets; /* total packets processed this int */
+ u16 work_limit; /* total work allowed per interrupt */
+ u8 count; /* total number of rings in vector */
+ u8 itr; /* current ITR setting for ring */
+};
+
+/* iterator for handling rings in ring container */
+#define ngbe_for_each_ring(pos, head) \
+ for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+#define MAX_RX_PACKET_BUFFERS ((adapter->flags & NGBE_FLAG_DCB_ENABLED) \
+ ? 8 : 1)
+#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
+
+/* MAX_MSIX_Q_VECTORS of these are allocated,
+ * but we only use one per queue-specific vector.
+ */
+struct ngbe_q_vector {
+ struct ngbe_adapter *adapter;
+ int cpu; /* CPU for DCA */
+ u16 v_idx; /* index of q_vector within array, also used for
+ * finding the bit in EICR and friends that
+ * represents the vector for this ring */
+ u16 itr; /* Interrupt throttle rate written to EITR */
+ struct ngbe_ring_container rx, tx;
+
+ struct napi_struct napi;
+ cpumask_t affinity_mask;
+ int numa_node;
+ struct rcu_head rcu; /* to avoid race with update stats on free */
+ char name[IFNAMSIZ + 17];
+ bool netpoll_rx;
+
+ /* for dynamic allocation of rings associated with this q_vector */
+ struct ngbe_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+#ifdef CONFIG_NGBE_HWMON
+
+#define NGBE_HWMON_TYPE_TEMP 0
+#define NGBE_HWMON_TYPE_ALARMTHRESH 1
+#define NGBE_HWMON_TYPE_DALARMTHRESH 2
+
+struct hwmon_attr {
+ struct device_attribute dev_attr;
+ struct ngbe_hw *hw;
+ struct ngbe_thermal_diode_data *sensor;
+ char name[19];
+};
+
+struct hwmon_buff {
+ struct device *device;
+ struct hwmon_attr *hwmon_list;
+ unsigned int n_hwmon;
+};
+#endif /* CONFIG_NGBE_HWMON */
+
+/*
+ * microsecond values for various ITR rates shifted by 2 to fit itr register
+ * with the first 3 bits reserved 0
+ */
+#define NGBE_70K_ITR 57
+#define NGBE_20K_ITR 200
+#define NGBE_4K_ITR 1024
+#define NGBE_7K_ITR 595
+
+/* ngbe_test_staterr - tests bits in Rx descriptor status and error fields */
+static inline __le32 ngbe_test_staterr(union ngbe_rx_desc *rx_desc,
+ const u32 stat_err_bits)
+{
+ return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+/* ngbe_desc_unused - calculate if we have unused descriptors */
+static inline u16 ngbe_desc_unused(struct ngbe_ring *ring)
+{
+ u16 ntc = ring->next_to_clean;
+ u16 ntu = ring->next_to_use;
+
+ return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+#define NGBE_RX_DESC(R, i) \
+ (&(((union ngbe_rx_desc *)((R)->desc))[i]))
+#define NGBE_TX_DESC(R, i) \
+ (&(((union ngbe_tx_desc *)((R)->desc))[i]))
+#define NGBE_TX_CTXTDESC(R, i) \
+ (&(((struct ngbe_tx_context_desc *)((R)->desc))[i]))
+
+#define NGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
+#define TCP_TIMER_VECTOR 0
+#define OTHER_VECTOR 1
+#define NON_Q_VECTORS (OTHER_VECTOR + TCP_TIMER_VECTOR)
+
+#define NGBE_MAX_MSIX_Q_VECTORS_EMERALD 9
+
+struct ngbe_mac_addr {
+ u8 addr[ETH_ALEN];
+ u16 state; /* bitmask */
+ u64 pools;
+};
+
+#define NGBE_MAC_STATE_DEFAULT 0x1
+#define NGBE_MAC_STATE_MODIFIED 0x2
+#define NGBE_MAC_STATE_IN_USE 0x4
+
+#ifdef CONFIG_NGBE_PROCFS
+struct ngbe_therm_proc_data {
+ struct ngbe_hw *hw;
+ struct ngbe_thermal_diode_data *sensor_data;
+};
+#endif
+
+/*
+ * Only for array allocations in our adapter struct.
+ * we can actually assign 64 queue vectors based on our extended-extended
+ * interrupt registers.
+ */
+#define MAX_MSIX_Q_VECTORS NGBE_MAX_MSIX_Q_VECTORS_EMERALD
+#define MAX_MSIX_COUNT NGBE_MAX_MSIX_VECTORS_EMERALD
+
+#define MIN_MSIX_Q_VECTORS 1
+#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
+
+/* default to trying for four seconds */
+#define NGBE_TRY_LINK_TIMEOUT (4 * HZ)
+#define NGBE_SFP_POLL_JIFFIES (2 * HZ) /* SFP poll every 2 seconds */
+
+/**
+ * ngbe_adapter.flag
+ **/
+#define NGBE_FLAG_MSI_CAPABLE (u32)(1 << 0)
+#define NGBE_FLAG_MSI_ENABLED (u32)(1 << 1)
+#define NGBE_FLAG_MSIX_CAPABLE (u32)(1 << 2)
+#define NGBE_FLAG_MSIX_ENABLED (u32)(1 << 3)
+#ifndef CONFIG_NGBE_NO_LLI
+#define NGBE_FLAG_LLI_PUSH (u32)(1 << 4)
+#endif
+
+#define NGBE_FLAG_TPH_ENABLED (u32)(1 << 6)
+#define NGBE_FLAG_TPH_CAPABLE (u32)(1 << 7)
+#define NGBE_FLAG_TPH_ENABLED_DATA (u32)(1 << 8)
+
+#define NGBE_FLAG_MQ_CAPABLE (u32)(1 << 9)
+#define NGBE_FLAG_DCB_ENABLED (u32)(1 << 10)
+#define NGBE_FLAG_VMDQ_ENABLED (u32)(1 << 11)
+#define NGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 12)
+#define NGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 13)
+#define NGBE_FLAG_NEED_ANC_CHECK (u32)(1 << 14)
+#define NGBE_FLAG_FDIR_HASH_CAPABLE (u32)(1 << 15)
+#define NGBE_FLAG_FDIR_PERFECT_CAPABLE (u32)(1 << 16)
+#define NGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 19)
+#define NGBE_FLAG_SRIOV_ENABLED (u32)(1 << 20)
+#define NGBE_FLAG_SRIOV_REPLICATION_ENABLE (u32)(1 << 21)
+#define NGBE_FLAG_SRIOV_L2SWITCH_ENABLE (u32)(1 << 22)
+#define NGBE_FLAG_SRIOV_VEPA_BRIDGE_MODE (u32)(1 << 23)
+#define NGBE_FLAG_RX_HWTSTAMP_ENABLED (u32)(1 << 24)
+#define NGBE_FLAG_VXLAN_OFFLOAD_CAPABLE (u32)(1 << 25)
+#define NGBE_FLAG_VXLAN_OFFLOAD_ENABLE (u32)(1 << 26)
+#define NGBE_FLAG_RX_HWTSTAMP_IN_REGISTER (u32)(1 << 27)
+#define NGBE_FLAG_NEED_ETH_PHY_RESET (u32)(1 << 28)
+#define NGBE_FLAG_RX_HS_ENABLED (u32)(1 << 30)
+#define NGBE_FLAG_LINKSEC_ENABLED (u32)(1 << 31)
+#define NGBE_FLAG_IPSEC_ENABLED (u32)(1 << 5)
+
+/* preset defaults */
+#define NGBE_FLAGS_SP_INIT (NGBE_FLAG_MSI_CAPABLE \
+ | NGBE_FLAG_MSIX_CAPABLE \
+ | NGBE_FLAG_MQ_CAPABLE \
+ | NGBE_FLAG_SRIOV_CAPABLE)
+
+/**
+ * ngbe_adapter.flag2
+ **/
+#define NGBE_FLAG2_RSC_CAPABLE (1U << 0)
+#define NGBE_FLAG2_RSC_ENABLED (1U << 1)
+#define NGBE_FLAG2_TEMP_SENSOR_CAPABLE (1U << 3)
+#define NGBE_FLAG2_TEMP_SENSOR_EVENT (1U << 4)
+#define NGBE_FLAG2_SEARCH_FOR_SFP (1U << 5)
+#define NGBE_FLAG2_SFP_NEEDS_RESET (1U << 6)
+#define NGBE_FLAG2_PF_RESET_REQUESTED (1U << 7)
+#define NGBE_FLAG2_FDIR_REQUIRES_REINIT (1U << 8)
+#define NGBE_FLAG2_RSS_FIELD_IPV4_UDP (1U << 9)
+#define NGBE_FLAG2_RSS_FIELD_IPV6_UDP (1U << 10)
+#define NGBE_FLAG2_RSS_ENABLED (1U << 12)
+#define NGBE_FLAG2_PTP_PPS_ENABLED (1U << 11)
+#define NGBE_FLAG2_EEE_CAPABLE (1U << 14)
+#define NGBE_FLAG2_EEE_ENABLED (1U << 15)
+#define NGBE_FLAG2_VXLAN_REREG_NEEDED (1U << 16)
+#define NGBE_FLAG2_DEV_RESET_REQUESTED (1U << 18)
+#define NGBE_FLAG2_RESET_INTR_RECEIVED (1U << 19)
+#define NGBE_FLAG2_GLOBAL_RESET_REQUESTED (1U << 20)
+#define NGBE_FLAG2_MNG_REG_ACCESS_DISABLED (1U << 22)
+#define NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP (1U << 23)
+#define NGBE_FLAG2_PCIE_NEED_RECOVER (1U << 31)
+
+#define NGBE_SET_FLAG(_input, _flag, _result) \
+ ((_flag <= _result) ? \
+ ((u32)(_input & _flag) * (_result / _flag)) : \
+ ((u32)(_input & _flag) / (_flag / _result)))
+
+enum ngbe_isb_idx {
+ NGBE_ISB_HEADER,
+ NGBE_ISB_MISC,
+ NGBE_ISB_VEC0,
+ NGBE_ISB_VEC1,
+ NGBE_ISB_MAX
+};
+
+/* board specific private data structure */
+struct ngbe_adapter {
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ /* OS defined structs */
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+
+ unsigned long state;
+
+ /* Some features need tri-state capability,
+ * thus the additional *_CAPABLE flags.
+ */
+ u32 flags;
+ u32 flags2;
+
+ /* Tx fast path data */
+ int num_tx_queues;
+ u16 tx_itr_setting;
+ u16 tx_work_limit;
+
+ /* Rx fast path data */
+ int num_rx_queues;
+ u16 rx_itr_setting;
+ u16 rx_work_limit;
+
+ unsigned int num_vmdqs; /* does not include pools assigned to VFs */
+ unsigned int queues_per_pool;
+
+ /* TX */
+ struct ngbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
+
+ u64 restart_queue;
+ u64 lsc_int;
+ u32 tx_timeout_count;
+
+ /* RX */
+ struct ngbe_ring *rx_ring[MAX_RX_QUEUES];
+ u64 hw_csum_rx_error;
+ u64 hw_csum_rx_good;
+ u64 hw_rx_no_dma_resources;
+ u64 non_eop_descs;
+ u32 alloc_rx_page_failed;
+ u32 alloc_rx_buff_failed;
+
+ struct ngbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
+
+#ifdef HAVE_DCBNL_IEEE
+ struct ieee_pfc *ngbe_ieee_pfc;
+ struct ieee_ets *ngbe_ieee_ets;
+#endif
+ enum ngbe_fc_mode last_lfc_mode;
+ int num_q_vectors; /* current number of q_vectors for device */
+ int max_q_vectors; /* upper limit of q_vectors for device */
+ struct ngbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
+ struct msix_entry *msix_entries;
+
+ u64 test_icr;
+ struct ngbe_ring test_tx_ring;
+ struct ngbe_ring test_rx_ring;
+
+ /* structs defined in ngbe_hw.h */
+ struct ngbe_hw hw;
+ u16 msg_enable;
+ struct ngbe_hw_stats stats;
+#ifndef CONFIG_NGBE_NO_LLI
+ u32 lli_port;
+ u32 lli_size;
+ u32 lli_etype;
+ u32 lli_vlan_pri;
+#endif /* CONFIG_NGBE_NO_LLI */
+
+ u32 *config_space;
+ u64 tx_busy;
+ unsigned int tx_ring_count;
+ unsigned int rx_ring_count;
+
+ u32 link_speed;
+ bool link_up;
+ unsigned long sfp_poll_time;
+ unsigned long link_check_timeout;
+
+ struct timer_list service_timer;
+ struct work_struct service_task;
+#ifdef CONFIG_NGBE_POLL_LINK_STATUS
+ struct timer_list link_check_timer;
+#endif
+ u32 atr_sample_rate;
+ u8 __iomem *io_addr; /* Mainly for iounmap use */
+ u32 wol;
+
+ u16 bd_number;
+ u16 bridge_mode;
+
+ char eeprom_id[32];
+ u16 eeprom_cap;
+ bool netdev_registered;
+ u32 interrupt_event;
+ u32 led_reg;
+
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_caps;
+ struct work_struct ptp_tx_work;
+ struct sk_buff *ptp_tx_skb;
+ struct hwtstamp_config tstamp_config;
+ unsigned long ptp_tx_start;
+ unsigned long last_overflow_check;
+ unsigned long last_rx_ptp_check;
+ spinlock_t tmreg_lock;
+ struct cyclecounter hw_cc;
+ struct timecounter hw_tc;
+ u32 base_incval;
+ u32 tx_hwtstamp_timeouts;
+ u32 tx_hwtstamp_skipped;
+ u32 rx_hwtstamp_cleared;
+ void (*ptp_setup_sdp) (struct ngbe_adapter *);
+
+ DECLARE_BITMAP(active_vfs, NGBE_MAX_VF_FUNCTIONS);
+ unsigned int num_vfs;
+ struct vf_data_storage *vfinfo;
+ struct vf_macvlans vf_mvs;
+ struct vf_macvlans *mv_list;
+#ifdef CONFIG_PCI_IOV
+ u32 timer_event_accumulator;
+ u32 vferr_refcount;
+#endif
+ struct ngbe_mac_addr *mac_table;
+
+ __le16 vxlan_port;
+ __le16 geneve_port;
+
+#ifdef CONFIG_NGBE_SYSFS
+#ifdef CONFIG_NGBE_HWMON
+ struct hwmon_buff ngbe_hwmon_buff;
+#endif /* CONFIG_NGBE_HWMON */
+#else /* CONFIG_NGBE_SYSFS */
+#ifdef CONFIG_NGBE_PROCFS
+ struct proc_dir_entry *eth_dir;
+ struct proc_dir_entry *info_dir;
+ u64 old_lsc;
+ struct proc_dir_entry *therm_dir;
+ struct ngbe_therm_proc_data therm_data;
+#endif /* CONFIG_NGBE_PROCFS */
+#endif /* CONFIG_NGBE_SYSFS */
+
+#ifdef CONFIG_NGBE_DEBUG_FS
+ struct dentry *ngbe_dbg_adapter;
+#endif /* CONFIG_NGBE_DEBUG_FS */
+ u8 default_up;
+ unsigned long fwd_bitmask; /* bitmask indicating in use pools */
+ unsigned long tx_timeout_last_recovery;
+ u32 tx_timeout_recovery_level;
+
+#define NGBE_MAX_RETA_ENTRIES 128
+ u8 rss_indir_tbl[NGBE_MAX_RETA_ENTRIES];
+#define NGBE_RSS_KEY_SIZE 40
+ u32 rss_key[NGBE_RSS_KEY_SIZE / sizeof(u32)];
+
+ void *ipsec;
+
+ /* misc interrupt status block */
+ dma_addr_t isb_dma;
+ u32 *isb_mem;
+ u32 isb_tag[NGBE_ISB_MAX];
+
+ u32 hang_cnt;
+};
+
+static inline u32 ngbe_misc_isb(struct ngbe_adapter *adapter,
+ enum ngbe_isb_idx idx)
+{
+ u32 cur_tag = 0;
+ u32 cur_diff = 0;
+
+ cur_tag = adapter->isb_mem[NGBE_ISB_HEADER];
+ cur_diff = cur_tag - adapter->isb_tag[idx];
+
+ adapter->isb_tag[idx] = cur_tag;
+
+ return cpu_to_le32(adapter->isb_mem[idx]);
+}
+
+static inline u8 ngbe_max_rss_indices(struct ngbe_adapter *adapter)
+{
+ return NGBE_MAX_RSS_INDICES;
+}
+
+enum ngbe_state_t {
+ __NGBE_TESTING,
+ __NGBE_RESETTING,
+ __NGBE_DOWN,
+ __NGBE_HANGING,
+ __NGBE_DISABLED,
+ __NGBE_REMOVING,
+ __NGBE_SERVICE_SCHED,
+ __NGBE_SERVICE_INITED,
+ __NGBE_IN_SFP_INIT,
+ __NGBE_PTP_RUNNING,
+ __NGBE_PTP_TX_IN_PROGRESS,
+};
+
+struct ngbe_cb {
+ dma_addr_t dma;
+ u16 append_cnt; /* number of skb's appended */
+ bool page_released;
+ bool dma_released;
+};
+#define NGBE_CB(skb) ((struct ngbe_cb *)(skb)->cb)
+
+/* ESX ngbe CIM IOCTL definition */
+
+#ifdef CONFIG_NGBE_SYSFS
+void ngbe_sysfs_exit(struct ngbe_adapter *adapter);
+int ngbe_sysfs_init(struct ngbe_adapter *adapter);
+#endif /* CONFIG_NGBE_SYSFS */
+#ifdef CONFIG_NGBE_PROCFS
+void ngbe_procfs_exit(struct ngbe_adapter *adapter);
+int ngbe_procfs_init(struct ngbe_adapter *adapter);
+int ngbe_procfs_topdir_init(void);
+void ngbe_procfs_topdir_exit(void);
+#endif /* CONFIG_NGBE_PROCFS */
+
+/* needed by ngbe_main.c */
+int ngbe_validate_mac_addr(u8 *mc_addr);
+void ngbe_check_options(struct ngbe_adapter *adapter);
+void ngbe_assign_netdev_ops(struct net_device *netdev);
+
+/* needed by ngbe_ethtool.c */
+extern char ngbe_driver_name[];
+extern const char ngbe_driver_version[];
+
+void ngbe_irq_disable(struct ngbe_adapter *adapter);
+void ngbe_irq_enable(struct ngbe_adapter *adapter, bool queues, bool flush);
+int ngbe_open(struct net_device *netdev);
+int ngbe_close(struct net_device *netdev);
+void ngbe_up(struct ngbe_adapter *adapter);
+void ngbe_down(struct ngbe_adapter *adapter);
+void ngbe_reinit_locked(struct ngbe_adapter *adapter);
+void ngbe_reset(struct ngbe_adapter *adapter);
+void ngbe_set_ethtool_ops(struct net_device *netdev);
+int ngbe_setup_rx_resources(struct ngbe_ring *);
+int ngbe_setup_tx_resources(struct ngbe_ring *);
+void ngbe_free_rx_resources(struct ngbe_ring *);
+void ngbe_free_tx_resources(struct ngbe_ring *);
+void ngbe_configure_rx_ring(struct ngbe_adapter *,
+ struct ngbe_ring *);
+void ngbe_configure_tx_ring(struct ngbe_adapter *,
+ struct ngbe_ring *);
+void ngbe_update_stats(struct ngbe_adapter *adapter);
+int ngbe_init_interrupt_scheme(struct ngbe_adapter *adapter);
+void ngbe_reset_interrupt_capability(struct ngbe_adapter *adapter);
+void ngbe_set_interrupt_capability(struct ngbe_adapter *adapter);
+void ngbe_clear_interrupt_scheme(struct ngbe_adapter *adapter);
+netdev_tx_t ngbe_xmit_frame_ring(struct sk_buff *,
+ struct ngbe_adapter *,
+ struct ngbe_ring *);
+void ngbe_unmap_and_free_tx_resource(struct ngbe_ring *,
+ struct ngbe_tx_buffer *);
+void ngbe_alloc_rx_buffers(struct ngbe_ring *, u16);
+
+void ngbe_set_rx_mode(struct net_device *netdev);
+int ngbe_write_mc_addr_list(struct net_device *netdev);
+int ngbe_setup_tc(struct net_device *dev, u8 tc);
+void ngbe_tx_ctxtdesc(struct ngbe_ring *, u32, u32, u32, u32);
+void ngbe_do_reset(struct net_device *netdev);
+void ngbe_write_eitr(struct ngbe_q_vector *q_vector);
+int ngbe_poll(struct napi_struct *napi, int budget);
+void ngbe_disable_rx_queue(struct ngbe_adapter *adapter,
+ struct ngbe_ring *);
+void ngbe_vlan_strip_enable(struct ngbe_adapter *adapter);
+void ngbe_vlan_strip_disable(struct ngbe_adapter *adapter);
+
+#ifdef CONFIG_NGBE_DEBUG_FS
+void ngbe_dbg_adapter_init(struct ngbe_adapter *adapter);
+void ngbe_dbg_adapter_exit(struct ngbe_adapter *adapter);
+void ngbe_dbg_init(void);
+void ngbe_dbg_exit(void);
+void ngbe_dump(struct ngbe_adapter *adapter);
+#endif /* CONFIG_NGBE_DEBUG_FS */
+
+static inline struct netdev_queue *txring_txq(const struct ngbe_ring *ring)
+{
+ return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+
+int ngbe_wol_supported(struct ngbe_adapter *adapter);
+int ngbe_get_settings(struct net_device *netdev,
+ struct ethtool_cmd *ecmd);
+int ngbe_write_uc_addr_list(struct net_device *netdev, int pool);
+void ngbe_full_sync_mac_table(struct ngbe_adapter *adapter);
+int ngbe_add_mac_filter(struct ngbe_adapter *adapter,
+ u8 *addr, u16 pool);
+int ngbe_del_mac_filter(struct ngbe_adapter *adapter,
+ u8 *addr, u16 pool);
+int ngbe_available_rars(struct ngbe_adapter *adapter);
+void ngbe_vlan_mode(struct net_device *, u32);
+
+void ngbe_ptp_init(struct ngbe_adapter *adapter);
+void ngbe_ptp_stop(struct ngbe_adapter *adapter);
+void ngbe_ptp_suspend(struct ngbe_adapter *adapter);
+void ngbe_ptp_overflow_check(struct ngbe_adapter *adapter);
+void ngbe_ptp_rx_hang(struct ngbe_adapter *adapter);
+void ngbe_ptp_rx_hwtstamp(struct ngbe_adapter *adapter, struct sk_buff *skb);
+int ngbe_ptp_set_ts_config(struct ngbe_adapter *adapter, struct ifreq *ifr);
+int ngbe_ptp_get_ts_config(struct ngbe_adapter *adapter, struct ifreq *ifr);
+void ngbe_ptp_start_cyclecounter(struct ngbe_adapter *adapter);
+void ngbe_ptp_reset(struct ngbe_adapter *adapter);
+void ngbe_ptp_check_pps_event(struct ngbe_adapter *adapter);
+
+#ifdef CONFIG_PCI_IOV
+void ngbe_sriov_reinit(struct ngbe_adapter *adapter);
+#endif
+
+void ngbe_set_rx_drop_en(struct ngbe_adapter *adapter);
+
+u32 ngbe_rss_indir_tbl_entries(struct ngbe_adapter *adapter);
+void ngbe_store_reta(struct ngbe_adapter *adapter);
+
+/**
+ * interrupt masking operations. each bit in PX_ICn correspond to a interrupt.
+ * disable a interrupt by writing to PX_IMS with the corresponding bit=1
+ * enable a interrupt by writing to PX_IMC with the corresponding bit=1
+ * trigger a interrupt by writing to PX_ICS with the corresponding bit=1
+ **/
+//#define NGBE_INTR_ALL (~0ULL)
+#define NGBE_INTR_ALL 0x1FF
+#define NGBE_INTR_MISC(A) (1ULL << (A)->num_q_vectors)
+#define NGBE_INTR_MISC_VMDQ(A) (1ULL << ((A)->num_q_vectors + (A)->ring_feature[RING_F_VMDQ].offset))
+#define NGBE_INTR_QALL(A) (NGBE_INTR_MISC(A) - 1)
+#define NGBE_INTR_Q(i) (1ULL << (i))
+static inline void ngbe_intr_enable(struct ngbe_hw *hw, u64 qmask)
+{
+ u32 mask;
+
+ mask = (qmask & 0xFFFFFFFF);
+ if (mask) {
+ wr32(hw, NGBE_PX_IMC, mask);
+ }
+}
+
+static inline void ngbe_intr_disable(struct ngbe_hw *hw, u64 qmask)
+{
+ u32 mask;
+
+ mask = (qmask & 0xFFFFFFFF);
+ if (mask)
+ wr32(hw, NGBE_PX_IMS, mask);
+}
+
+static inline void ngbe_intr_trigger(struct ngbe_hw *hw, u64 qmask)
+{
+ u32 mask;
+
+ mask = (qmask & 0xFFFFFFFF);
+ if (mask)
+ wr32(hw, NGBE_PX_ICS, mask);
+}
+
+#define NGBE_RING_SIZE(R) ((R)->count < NGBE_MAX_TXD ? (R)->count / 128 : 0)
+
+
+#define NGBE_CPU_TO_BE16(_x) cpu_to_be16(_x)
+#define NGBE_BE16_TO_CPU(_x) be16_to_cpu(_x)
+#define NGBE_CPU_TO_BE32(_x) cpu_to_be32(_x)
+#define NGBE_BE32_TO_CPU(_x) be32_to_cpu(_x)
+
+#define msec_delay(_x) msleep(_x)
+
+#define usec_delay(_x) udelay(_x)
+
+#define STATIC static
+
+#define NGBE_NAME "ngbe"
+
+#define DPRINTK(nlevel, klevel, fmt, args...) \
+ ((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
+ printk(KERN_##klevel NGBE_NAME ": %s: %s: " fmt, \
+ adapter->netdev->name, \
+ __func__, ## args)))
+
+#define ngbe_emerg(fmt, ...) printk(KERN_EMERG fmt, ## __VA_ARGS__)
+#define ngbe_alert(fmt, ...) printk(KERN_ALERT fmt, ## __VA_ARGS__)
+#define ngbe_crit(fmt, ...) printk(KERN_CRIT fmt, ## __VA_ARGS__)
+#define ngbe_error(fmt, ...) printk(KERN_ERR fmt, ## __VA_ARGS__)
+#define ngbe_warn(fmt, ...) printk(KERN_WARNING fmt, ## __VA_ARGS__)
+#define ngbe_notice(fmt, ...) printk(KERN_NOTICE fmt, ## __VA_ARGS__)
+#define ngbe_info(fmt, ...) printk(KERN_INFO fmt, ## __VA_ARGS__)
+#define ngbe_print(fmt, ...) printk(KERN_DEBUG fmt, ## __VA_ARGS__)
+#define ngbe_trace(fmt, ...) printk(KERN_INFO fmt, ## __VA_ARGS__)
+
+#define ngbe_debug(fmt, ...) do {} while (0)
+
+#define ASSERT(_x) do {} while (0)
+#define DEBUGOUT(S) do {} while (0)
+#define DEBUGOUT1(S, A...) do {} while (0)
+#define DEBUGOUT2(S, A...) do {} while (0)
+#define DEBUGOUT3(S, A...) do {} while (0)
+#define DEBUGOUT4(S, A...) do {} while (0)
+#define DEBUGOUT5(S, A...) do {} while (0)
+#define DEBUGOUT6(S, A...) do {} while (0)
+#define DEBUGFUNC(fmt, ...) do {} while (0)
+
+#define NGBE_SFP_DETECT_RETRIES 2
+
+struct ngbe_hw;
+struct ngbe_msg {
+ u16 msg_enable;
+};
+struct net_device *ngbe_hw_to_netdev(const struct ngbe_hw *hw);
+struct ngbe_msg *ngbe_hw_to_msg(const struct ngbe_hw *hw);
+
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+ return &pdev->dev;
+}
+
+#define hw_dbg(hw, format, arg...) \
+ netdev_dbg(ngbe_hw_to_netdev(hw), format, ## arg)
+#define hw_err(hw, format, arg...) \
+ netdev_err(ngbe_hw_to_netdev(hw), format, ## arg)
+#define e_dev_info(format, arg...) \
+ dev_info(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_warn(format, arg...) \
+ dev_warn(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_err(format, arg...) \
+ dev_err(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_notice(format, arg...) \
+ dev_notice(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dbg(msglvl, format, arg...) \
+ netif_dbg(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_info(msglvl, format, arg...) \
+ netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_err(msglvl, format, arg...) \
+ netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_warn(msglvl, format, arg...) \
+ netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_crit(msglvl, format, arg...) \
+ netif_crit(adapter, msglvl, adapter->netdev, format, ## arg)
+
+#define NGBE_FAILED_READ_CFG_DWORD 0xffffffffU
+#define NGBE_FAILED_READ_CFG_WORD 0xffffU
+#define NGBE_FAILED_READ_CFG_BYTE 0xffU
+
+extern u32 ngbe_read_reg(struct ngbe_hw *hw, u32 reg, bool quiet);
+extern u16 ngbe_read_pci_cfg_word(struct ngbe_hw *hw, u32 reg);
+extern void ngbe_write_pci_cfg_word(struct ngbe_hw *hw, u32 reg, u16 value);
+
+#define NGBE_READ_PCIE_WORD ngbe_read_pci_cfg_word
+#define NGBE_WRITE_PCIE_WORD ngbe_write_pci_cfg_word
+#define NGBE_R32_Q(h, r) ngbe_read_reg(h, r, true)
+
+#ifndef writeq
+#define writeq(val, addr) do { writel((u32) (val), addr); \
+ writel((u32) (val >> 32), (addr + 4)); \
+ } while (0);
+#endif
+
+#define NGBE_EEPROM_GRANT_ATTEMPS 100
+#define NGBE_HTONL(_i) htonl(_i)
+#define NGBE_NTOHL(_i) ntohl(_i)
+#define NGBE_NTOHS(_i) ntohs(_i)
+#define NGBE_CPU_TO_LE32(_i) cpu_to_le32(_i)
+#define NGBE_LE32_TO_CPUS(_i) le32_to_cpus(_i)
+
+enum {
+ NGBE_ERROR_SOFTWARE,
+ NGBE_ERROR_POLLING,
+ NGBE_ERROR_INVALID_STATE,
+ NGBE_ERROR_UNSUPPORTED,
+ NGBE_ERROR_ARGUMENT,
+ NGBE_ERROR_CAUTION,
+};
+
+#define ERROR_REPORT(level, format, arg...) do { \
+ switch (level) { \
+ case NGBE_ERROR_SOFTWARE: \
+ case NGBE_ERROR_CAUTION: \
+ case NGBE_ERROR_POLLING: \
+ netif_warn(ngbe_hw_to_msg(hw), drv, ngbe_hw_to_netdev(hw), \
+ format, ## arg); \
+ break; \
+ case NGBE_ERROR_INVALID_STATE: \
+ case NGBE_ERROR_UNSUPPORTED: \
+ case NGBE_ERROR_ARGUMENT: \
+ netif_err(ngbe_hw_to_msg(hw), hw, ngbe_hw_to_netdev(hw), \
+ format, ## arg); \
+ break; \
+ default: \
+ break; \
+ } \
+} while (0)
+
+#define ERROR_REPORT1 ERROR_REPORT
+#define ERROR_REPORT2 ERROR_REPORT
+#define ERROR_REPORT3 ERROR_REPORT
+
+#define UNREFERENCED_XPARAMETER
+#define UNREFERENCED_1PARAMETER(_p) do { \
+ uninitialized_var(_p); \
+} while (0)
+#define UNREFERENCED_2PARAMETER(_p, _q) do { \
+ uninitialized_var(_p); \
+ uninitialized_var(_q); \
+} while (0)
+#define UNREFERENCED_3PARAMETER(_p, _q, _r) do { \
+ uninitialized_var(_p); \
+ uninitialized_var(_q); \
+ uninitialized_var(_r); \
+} while (0)
+#define UNREFERENCED_4PARAMETER(_p, _q, _r, _s) do { \
+ uninitialized_var(_p); \
+ uninitialized_var(_q); \
+ uninitialized_var(_r); \
+ uninitialized_var(_s); \
+} while (0)
+#define UNREFERENCED_PARAMETER(_p) UNREFERENCED_1PARAMETER(_p)
+
+#endif /* _NGBE_H_ */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_debugfs.c b/drivers/net/ethernet/netswift/ngbe/ngbe_debugfs.c
new file mode 100644
index 0000000000000..6710dff494796
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_debugfs.c
@@ -0,0 +1,764 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+
+#include "ngbe.h"
+
+#ifdef CONFIG_NGBE_DEBUG_FS
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+static struct dentry *ngbe_dbg_root;
+static int ngbe_data_mode;
+
+#define NGBE_DATA_FUNC(dm) ((dm) & ~0xFFFF)
+#define NGBE_DATA_ARGS(dm) ((dm) & 0xFFFF)
+enum ngbe_data_func {
+ NGBE_FUNC_NONE = (0 << 16),
+ NGBE_FUNC_DUMP_BAR = (1 << 16),
+ NGBE_FUNC_DUMP_RDESC = (2 << 16),
+ NGBE_FUNC_DUMP_TDESC = (3 << 16),
+ NGBE_FUNC_FLASH_READ = (4 << 16),
+ NGBE_FUNC_FLASH_WRITE = (5 << 16),
+};
+
+/**
+ * data operation
+ **/
+ssize_t
+ngbe_simple_read_from_pcibar(struct ngbe_adapter *adapter, int res,
+ void __user *buf, size_t size, loff_t *ppos)
+{
+ loff_t pos = *ppos;
+ u32 miss, len, limit = pci_resource_len(adapter->pdev, res);
+
+ if (pos < 0)
+ return 0;
+
+ limit = (pos + size <= limit ? pos + size : limit);
+ for (miss = 0; pos < limit && !miss; buf += len, pos += len) {
+ u32 val = 0, reg = round_down(pos, 4);
+ u32 off = pos - reg;
+
+ len = (reg + 4 <= limit ? 4 - off : 4 - off - (limit - reg - 4));
+ val = ngbe_rd32(adapter->io_addr + reg);
+ miss = copy_to_user(buf, &val + off, len);
+ }
+
+ size = pos - *ppos - miss;
+ *ppos += size;
+
+ return size;
+}
+
+ssize_t
+ngbe_simple_read_from_flash(struct ngbe_adapter *adapter,
+ void __user *buf, size_t size, loff_t *ppos)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ loff_t pos = *ppos;
+ size_t ret = 0;
+ loff_t rpos, rtail;
+ void __user *to = buf;
+ size_t available = adapter->hw.flash.dword_size << 2;
+
+ if (pos < 0)
+ return -EINVAL;
+ if (pos >= available || !size)
+ return 0;
+ if (size > available - pos)
+ size = available - pos;
+
+ rpos = round_up(pos, 4);
+ rtail = round_down(pos + size, 4);
+ if (rtail < rpos)
+ return 0;
+
+ to += rpos - pos;
+ while (rpos <= rtail) {
+ u32 value = ngbe_rd32(adapter->io_addr + rpos);
+ if (TCALL(hw, flash.ops.write_buffer, rpos>>2, 1, &value)) {
+ ret = size;
+ break;
+ }
+ if (4 == copy_to_user(to, &value, 4)) {
+ ret = size;
+ break;
+ }
+ to += 4;
+ rpos += 4;
+ }
+
+ if (ret == size)
+ return -EFAULT;
+ size -= ret;
+ *ppos = pos + size;
+ return size;
+}
+
+ssize_t
+ngbe_simple_write_to_flash(struct ngbe_adapter *adapter,
+ const void __user *from, size_t size, loff_t *ppos, size_t available)
+{
+ return size;
+}
+
+static ssize_t
+ngbe_dbg_data_ops_read(struct file *filp, char __user *buffer,
+ size_t size, loff_t *ppos)
+{
+ struct ngbe_adapter *adapter = filp->private_data;
+ u32 func = NGBE_DATA_FUNC(ngbe_data_mode);
+
+ rmb();
+
+ switch (func) {
+ case NGBE_FUNC_DUMP_BAR: {
+ u32 bar = NGBE_DATA_ARGS(ngbe_data_mode);
+
+ return ngbe_simple_read_from_pcibar(adapter, bar, buffer, size,
+ ppos);
+ break;
+ }
+ case NGBE_FUNC_FLASH_READ: {
+ return ngbe_simple_read_from_flash(adapter, buffer, size, ppos);
+ break;
+ }
+ case NGBE_FUNC_DUMP_RDESC: {
+ struct ngbe_ring *ring;
+ u32 queue = NGBE_DATA_ARGS(ngbe_data_mode);
+
+ if (queue >= adapter->num_rx_queues)
+ return 0;
+ queue += VMDQ_P(0) * adapter->queues_per_pool;
+ ring = adapter->rx_ring[queue];
+
+ return simple_read_from_buffer(buffer, size, ppos,
+ ring->desc, ring->size);
+ break;
+ }
+ case NGBE_FUNC_DUMP_TDESC: {
+ struct ngbe_ring *ring;
+ u32 queue = NGBE_DATA_ARGS(ngbe_data_mode);
+
+ if (queue >= adapter->num_tx_queues)
+ return 0;
+ queue += VMDQ_P(0) * adapter->queues_per_pool;
+ ring = adapter->tx_ring[queue];
+
+ return simple_read_from_buffer(buffer, size, ppos,
+ ring->desc, ring->size);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static ssize_t
+ngbe_dbg_data_ops_write(struct file *filp,
+ const char __user *buffer,
+ size_t size, loff_t *ppos)
+{
+ struct ngbe_adapter *adapter = filp->private_data;
+ u32 func = NGBE_DATA_FUNC(ngbe_data_mode);
+
+ rmb();
+
+ switch (func) {
+ case NGBE_FUNC_FLASH_WRITE: {
+ u32 size = NGBE_DATA_ARGS(ngbe_data_mode);
+
+ if (size > adapter->hw.flash.dword_size << 2)
+ size = adapter->hw.flash.dword_size << 2;
+
+ return ngbe_simple_write_to_flash(adapter, buffer, size, ppos, size);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return size;
+}
+static struct file_operations ngbe_dbg_data_ops_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ngbe_dbg_data_ops_read,
+ .write = ngbe_dbg_data_ops_write,
+};
+
+/**
+ * reg_ops operation
+ **/
+static char ngbe_dbg_reg_ops_buf[256] = "";
+static ssize_t
+ngbe_dbg_reg_ops_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct ngbe_adapter *adapter = filp->private_data;
+ char *buf;
+ int len;
+
+ /* don't allow partial reads */
+ if (*ppos != 0)
+ return 0;
+
+ buf = kasprintf(GFP_KERNEL, "%s: mode=0x%08x\n%s\n",
+ adapter->netdev->name, ngbe_data_mode,
+ ngbe_dbg_reg_ops_buf);
+ if (!buf)
+ return -ENOMEM;
+
+ if (count < strlen(buf)) {
+ kfree(buf);
+ return -ENOSPC;
+ }
+
+ len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+
+ kfree(buf);
+ return len;
+}
+
+static ssize_t
+ngbe_dbg_reg_ops_write(struct file *filp,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct ngbe_adapter *adapter = filp->private_data;
+ char *pc = ngbe_dbg_reg_ops_buf;
+ int len;
+
+ /* don't allow partial writes */
+ if (*ppos != 0)
+ return 0;
+ if (count >= sizeof(ngbe_dbg_reg_ops_buf))
+ return -ENOSPC;
+
+ len = simple_write_to_buffer(ngbe_dbg_reg_ops_buf,
+ sizeof(ngbe_dbg_reg_ops_buf)-1,
+ ppos,
+ buffer,
+ count);
+ if (len < 0)
+ return len;
+
+ pc[len] = '\0';
+
+ if (strncmp(pc, "dump", 4) == 0) {
+ u32 mode = 0;
+ u16 args;
+
+ pc += 4;
+ pc += strspn(pc, " \t");
+
+ if (!strncmp(pc, "bar", 3)) {
+ pc += 3;
+ mode = NGBE_FUNC_DUMP_BAR;
+ } else if (!strncmp(pc, "rdesc", 5)) {
+ pc += 5;
+ mode = NGBE_FUNC_DUMP_RDESC;
+ } else if (!strncmp(pc, "tdesc", 5)) {
+ pc += 5;
+ mode = NGBE_FUNC_DUMP_TDESC;
+ } else {
+ ngbe_dump(adapter);
+ }
+
+ if (mode && 1 == sscanf(pc, "%hu", &args)) {
+ mode |= args;
+ }
+
+ ngbe_data_mode = mode;
+ } else if (strncmp(pc, "flash", 4) == 0) {
+ u32 mode = 0;
+ u16 args;
+
+ pc += 5;
+ pc += strspn(pc, " \t");
+ if (!strncmp(pc, "read", 3)) {
+ pc += 4;
+ mode = NGBE_FUNC_FLASH_READ;
+ } else if (!strncmp(pc, "write", 5)) {
+ pc += 5;
+ mode = NGBE_FUNC_FLASH_WRITE;
+ }
+
+ if (mode && 1 == sscanf(pc, "%hu", &args)) {
+ mode |= args;
+ }
+
+ ngbe_data_mode = mode;
+ } else if (strncmp(ngbe_dbg_reg_ops_buf, "write", 5) == 0) {
+ u32 reg, value;
+ int cnt;
+ cnt = sscanf(&ngbe_dbg_reg_ops_buf[5], "%x %x", ®, &value);
+ if (cnt == 2) {
+ wr32(&adapter->hw, reg, value);
+ e_dev_info("write: 0x%08x = 0x%08x\n", reg, value);
+ } else {
+ e_dev_info("write <reg> <value>\n");
+ }
+ } else if (strncmp(ngbe_dbg_reg_ops_buf, "read", 4) == 0) {
+ u32 reg, value;
+ int cnt;
+ cnt = sscanf(&ngbe_dbg_reg_ops_buf[4], "%x", ®);
+ if (cnt == 1) {
+ value = rd32(&adapter->hw, reg);
+ e_dev_info("read 0x%08x = 0x%08x\n", reg, value);
+ } else {
+ e_dev_info("read <reg>\n");
+ }
+ } else {
+ e_dev_info("Unknown command %s\n", ngbe_dbg_reg_ops_buf);
+ e_dev_info("Available commands:\n");
+ e_dev_info(" read <reg>\n");
+ e_dev_info(" write <reg> <value>\n");
+ }
+ return count;
+}
+
+static const struct file_operations ngbe_dbg_reg_ops_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ngbe_dbg_reg_ops_read,
+ .write = ngbe_dbg_reg_ops_write,
+};
+
+/**
+ * netdev_ops operation
+ **/
+static char ngbe_dbg_netdev_ops_buf[256] = "";
+static ssize_t
+ngbe_dbg_netdev_ops_read(struct file *filp,
+ char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct ngbe_adapter *adapter = filp->private_data;
+ char *buf;
+ int len;
+
+ /* don't allow partial reads */
+ if (*ppos != 0)
+ return 0;
+
+ buf = kasprintf(GFP_KERNEL, "%s: mode=0x%08x\n%s\n",
+ adapter->netdev->name, ngbe_data_mode,
+ ngbe_dbg_netdev_ops_buf);
+ if (!buf)
+ return -ENOMEM;
+
+ if (count < strlen(buf)) {
+ kfree(buf);
+ return -ENOSPC;
+ }
+
+ len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+
+ kfree(buf);
+ return len;
+}
+
+static ssize_t
+ngbe_dbg_netdev_ops_write(struct file *filp,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct ngbe_adapter *adapter = filp->private_data;
+ int len;
+
+ /* don't allow partial writes */
+ if (*ppos != 0)
+ return 0;
+ if (count >= sizeof(ngbe_dbg_netdev_ops_buf))
+ return -ENOSPC;
+
+ len = simple_write_to_buffer(ngbe_dbg_netdev_ops_buf,
+ sizeof(ngbe_dbg_netdev_ops_buf)-1,
+ ppos,
+ buffer,
+ count);
+ if (len < 0)
+ return len;
+
+ ngbe_dbg_netdev_ops_buf[len] = '\0';
+
+ if (strncmp(ngbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) {
+ adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev);
+ e_dev_info("tx_timeout called\n");
+ } else {
+ e_dev_info("Unknown command: %s\n", ngbe_dbg_netdev_ops_buf);
+ e_dev_info("Available commands:\n");
+ e_dev_info(" tx_timeout\n");
+ }
+ return count;
+}
+
+static struct file_operations ngbe_dbg_netdev_ops_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ngbe_dbg_netdev_ops_read,
+ .write = ngbe_dbg_netdev_ops_write,
+};
+
+/**
+ * ngbe_dbg_adapter_init - setup the debugfs directory for the adapter
+ * @adapter: the adapter that is starting up
+ **/
+void ngbe_dbg_adapter_init(struct ngbe_adapter *adapter)
+{
+ const char *name = pci_name(adapter->pdev);
+ struct dentry *pfile;
+
+ adapter->ngbe_dbg_adapter = debugfs_create_dir(name, ngbe_dbg_root);
+ if (!adapter->ngbe_dbg_adapter) {
+ e_dev_err("debugfs entry for %s failed\n", name);
+ return;
+ }
+
+ pfile = debugfs_create_file("data", 0600,
+ adapter->ngbe_dbg_adapter, adapter,
+ &ngbe_dbg_data_ops_fops);
+ if (!pfile)
+ e_dev_err("debugfs netdev_ops for %s failed\n", name);
+
+ pfile = debugfs_create_file("reg_ops", 0600,
+ adapter->ngbe_dbg_adapter, adapter,
+ &ngbe_dbg_reg_ops_fops);
+ if (!pfile)
+ e_dev_err("debugfs reg_ops for %s failed\n", name);
+
+ pfile = debugfs_create_file("netdev_ops", 0600,
+ adapter->ngbe_dbg_adapter, adapter,
+ &ngbe_dbg_netdev_ops_fops);
+ if (!pfile)
+ e_dev_err("debugfs netdev_ops for %s failed\n", name);
+}
+
+/**
+ * ngbe_dbg_adapter_exit - clear out the adapter's debugfs entries
+ * @pf: the pf that is stopping
+ **/
+void ngbe_dbg_adapter_exit(struct ngbe_adapter *adapter)
+{
+ if (adapter->ngbe_dbg_adapter)
+ debugfs_remove_recursive(adapter->ngbe_dbg_adapter);
+ adapter->ngbe_dbg_adapter = NULL;
+}
+
+/**
+ * ngbe_dbg_init - start up debugfs for the driver
+ **/
+void ngbe_dbg_init(void)
+{
+ ngbe_dbg_root = debugfs_create_dir(ngbe_driver_name, NULL);
+ if (ngbe_dbg_root == NULL)
+ pr_err("init of debugfs failed\n");
+}
+
+/**
+ * ngbe_dbg_exit - clean out the driver's debugfs entries
+ **/
+void ngbe_dbg_exit(void)
+{
+ debugfs_remove_recursive(ngbe_dbg_root);
+}
+
+#endif /* CONFIG_NGBE_DEBUG_FS */
+
+struct ngbe_reg_info {
+ u32 offset;
+ u32 length;
+ char *name;
+};
+
+static struct ngbe_reg_info ngbe_reg_info_tbl[] = {
+
+ /* General Registers */
+ {NGBE_CFG_PORT_CTL, 1, "CTRL"},
+ {NGBE_CFG_PORT_ST, 1, "STATUS"},
+
+ /* RX Registers */
+ {NGBE_PX_RR_CFG(0), 1, "SRRCTL"},
+ {NGBE_PX_RR_RP(0), 1, "RDH"},
+ {NGBE_PX_RR_WP(0), 1, "RDT"},
+ {NGBE_PX_RR_CFG(0), 1, "RXDCTL"},
+ {NGBE_PX_RR_BAL(0), 1, "RDBAL"},
+ {NGBE_PX_RR_BAH(0), 1, "RDBAH"},
+
+ /* TX Registers */
+ {NGBE_PX_TR_BAL(0), 1, "TDBAL"},
+ {NGBE_PX_TR_BAH(0), 1, "TDBAH"},
+ {NGBE_PX_TR_RP(0), 1, "TDH"},
+ {NGBE_PX_TR_WP(0), 1, "TDT"},
+ {NGBE_PX_TR_CFG(0), 1, "TXDCTL"},
+
+ /* MACVLAN */
+ {NGBE_PSR_MAC_SWC_VM, 128, "PSR_MAC_SWC_VM"},
+ {NGBE_PSR_MAC_SWC_AD_L, 32, "PSR_MAC_SWC_AD"},
+ {NGBE_PSR_VLAN_TBL(0), 128, "PSR_VLAN_TBL"},
+
+ /* List Terminator */
+ { .name = NULL }
+};
+
+/**
+ * ngbe_regdump - register printout routine
+ **/
+static void
+ngbe_regdump(struct ngbe_hw *hw, struct ngbe_reg_info *reg_info)
+{
+ int i, n = 0;
+ u32 buffer[32*8];
+
+ switch (reg_info->offset) {
+ case NGBE_PSR_MAC_SWC_AD_L:
+ for (i = 0; i < reg_info->length; i++) {
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, i);
+ buffer[n++] =
+ rd32(hw, NGBE_PSR_MAC_SWC_AD_H);
+ buffer[n++] =
+ rd32(hw, NGBE_PSR_MAC_SWC_AD_L);
+ }
+ break;
+ default:
+ for (i = 0; i < reg_info->length; i++) {
+ buffer[n++] = rd32(hw,
+ reg_info->offset + 4*i);
+ }
+ break;
+ }
+ BUG_ON(n);
+}
+
+/**
+ * ngbe_dump - Print registers, tx-rings and rx-rings
+ **/
+void ngbe_dump(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ngbe_reg_info *reg_info;
+ int n = 0;
+ struct ngbe_ring *tx_ring;
+ struct ngbe_tx_buffer *tx_buffer;
+ union ngbe_tx_desc *tx_desc;
+ struct my_u0 { u64 a; u64 b; } *u0;
+ struct ngbe_ring *rx_ring;
+ union ngbe_rx_desc *rx_desc;
+ struct ngbe_rx_buffer *rx_buffer_info;
+ u32 staterr;
+ int i = 0;
+
+ if (!netif_msg_hw(adapter))
+ return;
+
+ /* Print Registers */
+ dev_info(&adapter->pdev->dev, "Register Dump\n");
+ pr_info(" Register Name Value\n");
+ for (reg_info = ngbe_reg_info_tbl; reg_info->name; reg_info++) {
+ ngbe_regdump(hw, reg_info);
+ }
+
+ /* Print TX Ring Summary */
+ if (!netdev || !netif_running(netdev))
+ return;
+
+ dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
+ pr_info(" %s %s %s %s\n",
+ "Queue [NTU] [NTC] [bi(ntc)->dma ]",
+ "leng", "ntw", "timestamp");
+ for (n = 0; n < adapter->num_tx_queues; n++) {
+ tx_ring = adapter->tx_ring[n];
+ tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
+ pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
+ n, tx_ring->next_to_use, tx_ring->next_to_clean,
+ (u64)dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ tx_buffer->next_to_watch,
+ (u64)tx_buffer->time_stamp);
+ }
+
+ /* Print TX Rings */
+ if (!netif_msg_tx_done(adapter))
+ goto rx_ring_summary;
+
+ dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
+
+ /* Transmit Descriptor Formats
+ *
+ * Transmit Descriptor (Read)
+ * +--------------------------------------------------------------+
+ * 0 | Buffer Address [63:0] |
+ * +--------------------------------------------------------------+
+ * 8 |PAYLEN |POPTS|CC|IDX |STA |DCMD |DTYP |MAC |RSV |DTALEN |
+ * +--------------------------------------------------------------+
+ * 63 46 45 40 39 38 36 35 32 31 24 23 20 19 18 17 16 15 0
+ *
+ * Transmit Descriptor (Write-Back)
+ * +--------------------------------------------------------------+
+ * 0 | RSV [63:0] |
+ * +--------------------------------------------------------------+
+ * 8 | RSV | STA | RSV |
+ * +--------------------------------------------------------------+
+ * 63 36 35 32 31 0
+ */
+
+ for (n = 0; n < adapter->num_tx_queues; n++) {
+ tx_ring = adapter->tx_ring[n];
+ pr_info("------------------------------------\n");
+ pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
+ pr_info("------------------------------------\n");
+ pr_info("%s%s %s %s %s %s\n",
+ "T [desc] [address 63:0 ] ",
+ "[PlPOIdStDDt Ln] [bi->dma ] ",
+ "leng", "ntw", "timestamp", "bi->skb");
+
+ for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
+ tx_desc = NGBE_TX_DESC(tx_ring, i);
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ u0 = (struct my_u0 *)tx_desc;
+ if (dma_unmap_len(tx_buffer, len) > 0) {
+ pr_info("T [0x%03X] %016llX %016llX %016llX "
+ "%08X %p %016llX %p",
+ i,
+ le64_to_cpu(u0->a),
+ le64_to_cpu(u0->b),
+ (u64)dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ tx_buffer->next_to_watch,
+ (u64)tx_buffer->time_stamp,
+ tx_buffer->skb);
+ if (i == tx_ring->next_to_use &&
+ i == tx_ring->next_to_clean)
+ pr_cont(" NTC/U\n");
+ else if (i == tx_ring->next_to_use)
+ pr_cont(" NTU\n");
+ else if (i == tx_ring->next_to_clean)
+ pr_cont(" NTC\n");
+ else
+ pr_cont("\n");
+
+ if (netif_msg_pktdata(adapter) &&
+ tx_buffer->skb)
+ print_hex_dump(KERN_INFO, "",
+ DUMP_PREFIX_ADDRESS, 16, 1,
+ tx_buffer->skb->data,
+ dma_unmap_len(tx_buffer, len),
+ true);
+ }
+ }
+ }
+
+ /* Print RX Rings Summary */
+rx_ring_summary:
+ dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
+ pr_info("Queue [NTU] [NTC]\n");
+ for (n = 0; n < adapter->num_rx_queues; n++) {
+ rx_ring = adapter->rx_ring[n];
+ pr_info("%5d %5X %5X\n",
+ n, rx_ring->next_to_use, rx_ring->next_to_clean);
+ }
+
+ /* Print RX Rings */
+ if (!netif_msg_rx_status(adapter))
+ return;
+
+ dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
+
+ /* Receive Descriptor Formats
+ *
+ * Receive Descriptor (Read)
+ * 63 1 0
+ * +-----------------------------------------------------+
+ * 0 | Packet Buffer Address [63:1] |A0/NSE|
+ * +----------------------------------------------+------+
+ * 8 | Header Buffer Address [63:1] | DD |
+ * +-----------------------------------------------------+
+ *
+ *
+ * Receive Descriptor (Write-Back)
+ *
+ * 63 48 47 32 31 30 21 20 17 16 4 3 0
+ * +------------------------------------------------------+
+ * 0 |RSS / Frag Checksum|SPH| HDR_LEN |RSC- |Packet| RSS |
+ * |/ RTT / PCoE_PARAM | | | CNT | Type | Type |
+ * |/ Flow Dir Flt ID | | | | | |
+ * +------------------------------------------------------+
+ * 8 | VLAN Tag | Length |Extended Error| Xtnd Status/NEXTP |
+ * +------------------------------------------------------+
+ * 63 48 47 32 31 20 19 0
+ */
+
+ for (n = 0; n < adapter->num_rx_queues; n++) {
+ rx_ring = adapter->rx_ring[n];
+ pr_info("------------------------------------\n");
+ pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
+ pr_info("------------------------------------\n");
+ pr_info("%s%s%s",
+ "R [desc] [ PktBuf A0] ",
+ "[ HeadBuf DD] [bi->dma ] [bi->skb ] ",
+ "<-- Adv Rx Read format\n");
+ pr_info("%s%s%s",
+ "RWB[desc] [PcsmIpSHl PtRs] ",
+ "[vl er S cks ln] ---------------- [bi->skb ] ",
+ "<-- Adv Rx Write-Back format\n");
+
+ for (i = 0; i < rx_ring->count; i++) {
+ rx_buffer_info = &rx_ring->rx_buffer_info[i];
+ rx_desc = NGBE_RX_DESC(rx_ring, i);
+ u0 = (struct my_u0 *)rx_desc;
+ staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+ if (staterr & NGBE_RXD_STAT_DD) {
+ /* Descriptor Done */
+ pr_info("RWB[0x%03X] %016llX "
+ "%016llX ---------------- %p", i,
+ le64_to_cpu(u0->a),
+ le64_to_cpu(u0->b),
+ rx_buffer_info->skb);
+ } else {
+ pr_info("R [0x%03X] %016llX "
+ "%016llX %016llX %p", i,
+ le64_to_cpu(u0->a),
+ le64_to_cpu(u0->b),
+ (u64)rx_buffer_info->page_dma,
+ rx_buffer_info->skb);
+
+ if (netif_msg_pktdata(adapter) &&
+ rx_buffer_info->page_dma) {
+ print_hex_dump(KERN_INFO, "",
+ DUMP_PREFIX_ADDRESS, 16, 1,
+ page_address(rx_buffer_info->page) +
+ rx_buffer_info->page_offset,
+ ngbe_rx_bufsz(rx_ring), true);
+ }
+ }
+
+ if (i == rx_ring->next_to_use)
+ pr_cont(" NTU\n");
+ else if (i == rx_ring->next_to_clean)
+ pr_cont(" NTC\n");
+ else
+ pr_cont("\n");
+
+ }
+ }
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/netswift/ngbe/ngbe_ethtool.c
new file mode 100644
index 0000000000000..ca389a7ec4ade
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_ethtool.c
@@ -0,0 +1,2756 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/* ethtool support for ngbe */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/firmware.h>
+#include <asm/uaccess.h>
+#include <linux/net_tstamp.h>
+
+#include "ngbe.h"
+#include "ngbe_hw.h"
+#include "ngbe_phy.h"
+
+#define NGBE_ALL_RAR_ENTRIES 16
+
+struct ngbe_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ int sizeof_stat;
+ int stat_offset;
+};
+
+#define NGBE_NETDEV_STAT(_net_stat) { \
+ .stat_string = #_net_stat, \
+ .sizeof_stat = sizeof_field(struct net_device_stats, _net_stat), \
+ .stat_offset = offsetof(struct net_device_stats, _net_stat) \
+}
+static const struct ngbe_stats ngbe_gstrings_net_stats[] = {
+ NGBE_NETDEV_STAT(rx_packets),
+ NGBE_NETDEV_STAT(tx_packets),
+ NGBE_NETDEV_STAT(rx_bytes),
+ NGBE_NETDEV_STAT(tx_bytes),
+ NGBE_NETDEV_STAT(rx_errors),
+ NGBE_NETDEV_STAT(tx_errors),
+ NGBE_NETDEV_STAT(rx_dropped),
+ NGBE_NETDEV_STAT(tx_dropped),
+ NGBE_NETDEV_STAT(multicast),
+ NGBE_NETDEV_STAT(collisions),
+ NGBE_NETDEV_STAT(rx_over_errors),
+ NGBE_NETDEV_STAT(rx_crc_errors),
+ NGBE_NETDEV_STAT(rx_frame_errors),
+ NGBE_NETDEV_STAT(rx_fifo_errors),
+ NGBE_NETDEV_STAT(rx_missed_errors),
+ NGBE_NETDEV_STAT(tx_aborted_errors),
+ NGBE_NETDEV_STAT(tx_carrier_errors),
+ NGBE_NETDEV_STAT(tx_fifo_errors),
+ NGBE_NETDEV_STAT(tx_heartbeat_errors),
+};
+
+#define NGBE_STAT(_name, _stat) { \
+ .stat_string = _name, \
+ .sizeof_stat = sizeof_field(struct ngbe_adapter, _stat), \
+ .stat_offset = offsetof(struct ngbe_adapter, _stat) \
+}
+static struct ngbe_stats ngbe_gstrings_stats[] = {
+ NGBE_STAT("rx_pkts_nic", stats.gprc),
+ NGBE_STAT("tx_pkts_nic", stats.gptc),
+ NGBE_STAT("rx_bytes_nic", stats.gorc),
+ NGBE_STAT("tx_bytes_nic", stats.gotc),
+ NGBE_STAT("lsc_int", lsc_int),
+ NGBE_STAT("tx_busy", tx_busy),
+ NGBE_STAT("non_eop_descs", non_eop_descs),
+ NGBE_STAT("broadcast", stats.bprc),
+ NGBE_STAT("rx_no_buffer_count", stats.rnbc[0]),
+ NGBE_STAT("tx_timeout_count", tx_timeout_count),
+ NGBE_STAT("tx_restart_queue", restart_queue),
+ NGBE_STAT("rx_long_length_count", stats.roc),
+ NGBE_STAT("rx_short_length_count", stats.ruc),
+ NGBE_STAT("tx_flow_control_xon", stats.lxontxc),
+ NGBE_STAT("rx_flow_control_xon", stats.lxonrxc),
+ NGBE_STAT("tx_flow_control_xoff", stats.lxofftxc),
+ NGBE_STAT("rx_flow_control_xoff", stats.lxoffrxc),
+ NGBE_STAT("rx_csum_offload_good_count", hw_csum_rx_good),
+ NGBE_STAT("rx_csum_offload_errors", hw_csum_rx_error),
+ NGBE_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
+ NGBE_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
+ NGBE_STAT("rx_no_dma_resources", hw_rx_no_dma_resources),
+ NGBE_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+ NGBE_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+ NGBE_STAT("os2bmc_tx_by_host", stats.o2bspc),
+ NGBE_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+ NGBE_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+ NGBE_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+};
+
+/* ngbe allocates num_tx_queues and num_rx_queues symmetrically so
+ * we set the num_rx_queues to evaluate to num_tx_queues. This is
+ * used because we do not have a good way to get the max number of
+ * rx queues with CONFIG_RPS disabled.
+ */
+#define NGBE_NUM_RX_QUEUES netdev->num_tx_queues
+#define NGBE_NUM_TX_QUEUES netdev->num_tx_queues
+
+#define NGBE_QUEUE_STATS_LEN ( \
+ (NGBE_NUM_TX_QUEUES + NGBE_NUM_RX_QUEUES) * \
+ (sizeof(struct ngbe_queue_stats) / sizeof(u64)))
+#define NGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ngbe_gstrings_stats)
+#define NGBE_NETDEV_STATS_LEN ARRAY_SIZE(ngbe_gstrings_net_stats)
+#define NGBE_PB_STATS_LEN ( \
+ (sizeof(((struct ngbe_adapter *)0)->stats.pxonrxc) + \
+ sizeof(((struct ngbe_adapter *)0)->stats.pxontxc) + \
+ sizeof(((struct ngbe_adapter *)0)->stats.pxoffrxc) + \
+ sizeof(((struct ngbe_adapter *)0)->stats.pxofftxc)) \
+ / sizeof(u64))
+#define NGBE_VF_STATS_LEN \
+ ((((struct ngbe_adapter *)netdev_priv(netdev))->num_vfs) * \
+ (sizeof(struct vf_stats) / sizeof(u64)))
+#define NGBE_STATS_LEN (NGBE_GLOBAL_STATS_LEN + \
+ NGBE_NETDEV_STATS_LEN + \
+ NGBE_PB_STATS_LEN + \
+ NGBE_QUEUE_STATS_LEN + \
+ NGBE_VF_STATS_LEN)
+
+static const char ngbe_gstrings_test[][ETH_GSTRING_LEN] = {
+ "Register test (offline)", "Eeprom test (offline)",
+ "Interrupt test (offline)", "Loopback test (offline)",
+ "Link test (on/offline)"
+};
+#define NGBE_TEST_LEN (sizeof(ngbe_gstrings_test) / ETH_GSTRING_LEN)
+
+#define ngbe_isbackplane(type) \
+ ((type == ngbe_media_type_backplane) ? true : false)
+
+int ngbe_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 supported_link = 0;
+ u32 link_speed = 0;
+ bool autoneg = false;
+ u32 supported, advertising;
+ bool link_up = 0;
+
+ ethtool_convert_link_mode_to_legacy_u32(&supported,
+ cmd->link_modes.supported);
+
+ TCALL(hw, mac.ops.get_link_capabilities, &supported_link, &autoneg);
+
+ /* set the supported link speeds */
+ if (supported_link & NGBE_LINK_SPEED_1GB_FULL)
+ supported |= (ngbe_isbackplane(hw->phy.media_type)) ?
+ SUPPORTED_1000baseKX_Full : SUPPORTED_1000baseT_Full;
+ if (supported_link & NGBE_LINK_SPEED_100_FULL)
+ supported |= SUPPORTED_100baseT_Full;
+ if (supported_link & NGBE_LINK_SPEED_10_FULL)
+ supported |= SUPPORTED_10baseT_Full;
+
+ /* default advertised speed if phy.autoneg_advertised isn't set */
+ advertising = supported;
+
+ /* set the advertised speeds */
+ if (hw->phy.autoneg_advertised) {
+ advertising = 0;
+ if (hw->phy.autoneg_advertised & NGBE_LINK_SPEED_100_FULL)
+ advertising |= ADVERTISED_100baseT_Full;
+ if (hw->phy.autoneg_advertised & NGBE_LINK_SPEED_1GB_FULL) {
+ if (supported & SUPPORTED_1000baseKX_Full)
+ advertising |= ADVERTISED_1000baseKX_Full;
+ else
+ advertising |= ADVERTISED_1000baseT_Full;
+ }
+ if (hw->phy.autoneg_advertised & NGBE_LINK_SPEED_10_FULL)
+ advertising |= ADVERTISED_10baseT_Full;
+ } else {
+ /* default modes in case phy.autoneg_advertised isn't set */
+ if (supported_link & NGBE_LINK_SPEED_1GB_FULL)
+ advertising |= ADVERTISED_1000baseT_Full;
+ if (supported_link & NGBE_LINK_SPEED_100_FULL)
+ advertising |= ADVERTISED_100baseT_Full;
+ if (supported_link & NGBE_LINK_SPEED_10_FULL)
+ advertising |= ADVERTISED_10baseT_Full;
+ }
+ supported |= SUPPORTED_Autoneg;
+ if (autoneg) {
+ advertising |= ADVERTISED_Autoneg;
+ autoneg = AUTONEG_ENABLE;
+ cmd->base.autoneg = AUTONEG_ENABLE;
+ } else
+ cmd->base.autoneg = AUTONEG_DISABLE;
+
+ /* Determine the remaining settings based on the PHY type. */
+ switch (adapter->hw.phy.type) {
+ case ngbe_phy_internal:
+ case ngbe_phy_m88e1512:
+ case ngbe_phy_zte:
+ supported |= SUPPORTED_TP;
+ advertising |= ADVERTISED_TP;
+ cmd->base.port = PORT_TP;
+ break;
+ case ngbe_phy_sfp_passive_tyco:
+ case ngbe_phy_sfp_passive_unknown:
+ case ngbe_phy_sfp_ftl:
+ case ngbe_phy_sfp_avago:
+ case ngbe_phy_sfp_intel:
+ case ngbe_phy_sfp_unknown:
+ switch (adapter->hw.phy.sfp_type) {
+ /* SFP+ devices, further checking needed */
+ case ngbe_sfp_type_da_cu:
+ case ngbe_sfp_type_da_cu_core0:
+ case ngbe_sfp_type_da_cu_core1:
+ supported |= SUPPORTED_FIBRE;
+ advertising |= ADVERTISED_FIBRE;
+ cmd->base.port = PORT_DA;
+ break;
+ case ngbe_sfp_type_sr:
+ case ngbe_sfp_type_lr:
+ case ngbe_sfp_type_srlr_core0:
+ case ngbe_sfp_type_srlr_core1:
+ case ngbe_sfp_type_1g_sx_core0:
+ case ngbe_sfp_type_1g_sx_core1:
+ case ngbe_sfp_type_1g_lx_core0:
+ case ngbe_sfp_type_1g_lx_core1:
+ supported |= SUPPORTED_FIBRE;
+ advertising |= ADVERTISED_FIBRE;
+ cmd->base.port = PORT_FIBRE;
+ break;
+ case ngbe_sfp_type_not_present:
+ supported |= SUPPORTED_FIBRE;
+ advertising |= ADVERTISED_FIBRE;
+ cmd->base.port = PORT_NONE;
+ break;
+ case ngbe_sfp_type_1g_cu_core0:
+ case ngbe_sfp_type_1g_cu_core1:
+ supported |= SUPPORTED_TP;
+ advertising |= ADVERTISED_TP;
+ cmd->base.port = PORT_TP;
+ break;
+ case ngbe_sfp_type_unknown:
+ default:
+ supported |= SUPPORTED_FIBRE;
+ advertising |= ADVERTISED_FIBRE;
+ cmd->base.port = PORT_OTHER;
+ break;
+ }
+ break;
+ case ngbe_phy_unknown:
+ case ngbe_phy_generic:
+ case ngbe_phy_sfp_unsupported:
+ default:
+ supported |= SUPPORTED_FIBRE;
+ advertising |= ADVERTISED_FIBRE;
+ cmd->base.port = PORT_OTHER;
+ break;
+ }
+
+ if (!in_interrupt()) {
+ TCALL(hw, mac.ops.check_link, &link_speed, &link_up, false);
+ } else {
+ /*
+ * this case is a special workaround for RHEL5 bonding
+ * that calls this routine from interrupt context
+ */
+ link_speed = adapter->link_speed;
+ link_up = adapter->link_up;
+ }
+
+ supported |= SUPPORTED_Pause;
+
+ switch (hw->fc.requested_mode) {
+ case ngbe_fc_full:
+ advertising |= ADVERTISED_Pause;
+ break;
+ case ngbe_fc_rx_pause:
+ advertising |= ADVERTISED_Pause |
+ ADVERTISED_Asym_Pause;
+ break;
+ case ngbe_fc_tx_pause:
+ advertising |= ADVERTISED_Asym_Pause;
+ break;
+ default:
+ advertising &= ~(ADVERTISED_Pause |
+ ADVERTISED_Asym_Pause);
+ }
+
+ if (link_up) {
+ switch (link_speed) {
+ case NGBE_LINK_SPEED_1GB_FULL:
+ cmd->base.speed = SPEED_1000;
+ break;
+ case NGBE_LINK_SPEED_100_FULL:
+ cmd->base.speed = SPEED_100;
+ break;
+ case NGBE_LINK_SPEED_10_FULL:
+ cmd->base.speed = SPEED_10;
+ break;
+ default:
+ break;
+ }
+ cmd->base.duplex = DUPLEX_FULL;
+ } else {
+ cmd->base.speed = -1;
+ cmd->base.duplex = -1;
+ }
+
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ advertising);
+ return 0;
+}
+
+static int ngbe_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 advertised, old;
+ s32 err = 0;
+ u32 supported, advertising;
+ ethtool_convert_link_mode_to_legacy_u32(&supported,
+ cmd->link_modes.supported);
+ ethtool_convert_link_mode_to_legacy_u32(&advertising,
+ cmd->link_modes.advertising);
+
+ if ((hw->phy.media_type == ngbe_media_type_copper) ||
+ (hw->phy.multispeed_fiber)) {
+ /*
+ * this function does not support duplex forcing, but can
+ * limit the advertising of the adapter to the specified speed
+ */
+ if (advertising & ~supported) {
+ return -EINVAL;
+ }
+ old = hw->phy.autoneg_advertised;
+ advertised = 0;
+
+ if (cmd->base.autoneg == AUTONEG_ENABLE) {
+ hw->mac.autoneg = true;
+ if (advertising & ADVERTISED_1000baseT_Full)
+ advertised |= NGBE_LINK_SPEED_1GB_FULL;
+
+ if (advertising & ADVERTISED_100baseT_Full)
+ advertised |= NGBE_LINK_SPEED_100_FULL;
+
+ if (advertising & ADVERTISED_10baseT_Full)
+ advertised |= NGBE_LINK_SPEED_10_FULL;
+
+ if (old == advertised) {
+ return err;
+ }
+ } else {
+ if (cmd->base.duplex == DUPLEX_HALF) {
+ e_err(probe, "unsupported duplex\n");
+ return -EINVAL;
+ }
+
+ switch (cmd->base.speed) {
+ case SPEED_10:
+ advertised = NGBE_LINK_SPEED_10_FULL;
+ break;
+ case SPEED_100:
+ advertised = NGBE_LINK_SPEED_100_FULL;
+ break;
+ case SPEED_1000:
+ advertised = NGBE_LINK_SPEED_1GB_FULL;
+ break;
+ default:
+ e_err(probe, "unsupported speed\n");
+ return -EINVAL;
+ }
+ hw->mac.autoneg = false;
+ }
+
+ hw->mac.autotry_restart = true;
+ err = TCALL(hw, mac.ops.setup_link, advertised, true);
+ if (err) {
+ e_info(probe, "setup link failed with code %d\n", err);
+ TCALL(hw, mac.ops.setup_link, old, true);
+ } else {
+ hw->phy.autoneg_advertised = advertised;
+ }
+ } else {
+ /* in this case we currently only support 1Gb/FULL */
+ u32 speed = cmd->base.speed;
+ if ((cmd->base.autoneg == AUTONEG_ENABLE) ||
+ (advertising != ADVERTISED_10000baseT_Full) ||
+ (speed + cmd->base.duplex != SPEED_10000 + DUPLEX_FULL))
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static void ngbe_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+
+ if (!hw->fc.disable_fc_autoneg)
+ pause->autoneg = 1;
+ else
+ pause->autoneg = 0;
+
+ if (hw->fc.current_mode == ngbe_fc_rx_pause) {
+ pause->rx_pause = 1;
+ } else if (hw->fc.current_mode == ngbe_fc_tx_pause) {
+ pause->tx_pause = 1;
+ } else if (hw->fc.current_mode == ngbe_fc_full) {
+ pause->rx_pause = 1;
+ pause->tx_pause = 1;
+ }
+}
+
+static int ngbe_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ngbe_fc_info fc = hw->fc;
+
+ fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE);
+
+ if ((pause->rx_pause && pause->tx_pause) || pause->autoneg)
+ fc.requested_mode = ngbe_fc_full;
+ else if (pause->rx_pause)
+ fc.requested_mode = ngbe_fc_rx_pause;
+ else if (pause->tx_pause)
+ fc.requested_mode = ngbe_fc_tx_pause;
+ else
+ fc.requested_mode = ngbe_fc_none;
+
+ /* if the thing changed then we'll update and use new autoneg */
+ if (memcmp(&fc, &hw->fc, sizeof(struct ngbe_fc_info))) {
+ hw->fc = fc;
+ if (netif_running(netdev))
+ ngbe_reinit_locked(adapter);
+ else
+ ngbe_reset(adapter);
+ }
+
+ return 0;
+}
+
+static u32 ngbe_get_msglevel(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ return adapter->msg_enable;
+}
+
+static void ngbe_set_msglevel(struct net_device *netdev, u32 data)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ adapter->msg_enable = data;
+}
+
+static int ngbe_get_regs_len(struct net_device __always_unused *netdev)
+{
+#define NGBE_REGS_LEN 4096
+ return NGBE_REGS_LEN * sizeof(u32);
+}
+
+#define NGBE_GET_STAT(_A_, _R_) (_A_->stats._R_)
+
+static void ngbe_get_regs(struct net_device *netdev,
+ struct ethtool_regs *regs,
+ void *p)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 *regs_buff = p;
+ u32 i;
+ u32 id = 0;
+
+ memset(p, 0, NGBE_REGS_LEN * sizeof(u32));
+ regs_buff[NGBE_REGS_LEN - 1] = 0x55555555;
+
+ regs->version = hw->revision_id << 16 |
+ hw->device_id;
+
+ /* Global Registers */
+ /* chip control */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MIS_PWR);//0
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MIS_CTL);//1
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MIS_PF_SM);//2
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MIS_RST);//3
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MIS_ST);//4
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MIS_SWSM);//5
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MIS_RST_ST);//6
+ /* pvt sensor */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TS_CTL);//7
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TS_EN);//8
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TS_ST);//9
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TS_ALARM_THRE);//10
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TS_DALARM_THRE);//11
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TS_INT_EN);//12
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TS_ALARM_ST);//13
+ /* Fmgr Register */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_CMD);//14
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_DATA);//15
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_STATUS);//16
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_USR_CMD);//17
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_CMDCFG0);//18
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_CMDCFG1);//19
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_ILDR_STATUS);//20
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_SPI_ILDR_SWPTR);//21
+
+ /* Port Registers */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_PORT_CTL);//22
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_PORT_ST);//23
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_EX_VTYPE);//24
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_TCP_TIME);//25
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_LED_CTL);//26
+ /* GPIO */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_GPIO_DR);//27
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_GPIO_DDR);//28
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_GPIO_CTL);//29
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_GPIO_INTEN);//30
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_GPIO_INTMASK);//31
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_GPIO_INTSTATUS);//32
+ /* TX TPH */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_TPH_TDESC);//33
+ /* RX TPH */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_TPH_RDESC);//34
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_TPH_RHDR);//35
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_CFG_TPH_RPL);//36
+
+ /* TDMA */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_CTL);//37
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_POOL_TE);//38
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_PB_THRE);//39
+
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_LLQ);//40
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_ETYPE_LB_L);//41
+
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_ETYPE_AS_L);//42
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_MAC_AS_L);//43
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_VLAN_AS_L);//44
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_TCP_FLG_L);//45
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_TCP_FLG_H);//46
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_VLAN_INS(i));//47-54
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_ETAG_INS(i));//55-62
+ }
+ /* Transmit QOS */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_PBWARB_CTL);//63
+
+ /* statistics */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_DRP_CNT);//64
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_SEC_DRP);//65
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_PKT_CNT);//66
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_BYTE_CNT_L);//67
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_BYTE_CNT_H);//68
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDM_OS2BMC_CNT);//69
+
+ /* RDMA */
+ /* receive control */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_ARB_CTL);//70
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_POOL_RE);//71
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_PF_QDE);//72
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_PF_HIDE);//73
+ /* static */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_DRP_PKT);//74
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_PKT_CNT);//75
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_BYTE_CNT_L);//76
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_BYTE_CNT_H);//77
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDM_BMC2OS_CNT);//78
+
+ /* RDB */
+ /*flow control */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RFCV);//79
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RFCL);//80
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RFCH);//81
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RFCRT);//82
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RFCC);//83
+ /* receive packet buffer */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_PB_CTL);//84
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_PB_SZ);//85
+
+ /* lli interrupt */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_LLI_THRE);//86
+ /* ring assignment */
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_PL_CFG(i));//87-94
+ }
+ for (i = 0; i < 32; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RSSTBL(i));//95-126
+ }
+ for (i = 0; i < 10; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RSSRK(i));//127-136
+ }
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_RA_CTL);//137
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_5T_SDP(i));//138-145
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_5T_CTL0(i));//146-153
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_5T_CTL1(i));//154-161
+ }
+
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_SYN_CLS);//162
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_ETYPE_CLS(i));//163-170
+ }
+ /* statistics */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_MPCNT);//171
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_PKT_CNT);//172
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_REPLI_CNT);//173
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_DRP_CNT);//174
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_LXONTXC);//175
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_LXOFFTXC);//176
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_PFCMACDAL);//177
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_PFCMACDAH);//178
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RDB_TXSWERR);//179
+
+ /* PSR */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_CTL);//180
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MAX_SZ);//181
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VLAN_CTL);//182
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VM_CTL);//183
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_PKT_CNT);//184
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MNG_PKT_CNT);//185
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_DBG_DOP_CNT);//186
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MNG_DOP_CNT);//187
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VM_FLP_L);//188
+
+ /* vm l2 control */
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VM_L2CTL(i));//189-196
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_ETYPE_SWC(i));//197-204
+ }
+ for (i = 0; i < 128; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MC_TBL(i));//205-332
+ }
+ for (i = 0; i < 128; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_UC_TBL(i));///333-460
+ }
+ for (i = 0; i < 128; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VLAN_TBL(i));//461-588
+ }
+ /* mac switcher */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MAC_SWC_AD_L);//589
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MAC_SWC_AD_H);//590
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MAC_SWC_VM);//591
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MAC_SWC_IDX);//592
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VLAN_SWC);//593
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VLAN_SWC_VM_L);//594
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_VLAN_SWC_IDX);//595
+
+ /* mirror */
+ for (i = 0; i < 4; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MR_CTL(i));//596-599
+ }
+ for (i = 0; i < 4; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MR_VLAN_L(i));//600-603
+ }
+ for (i = 0; i < 4; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_MR_VM_L(i));//604-607
+ }
+ /* 1588 */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_1588_CTL);//608
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_1588_STMPL);//609
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_1588_STMPH);//610
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_1588_ATTRL);//611
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_1588_ATTRH);//612
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_1588_MSGTYPE);//613
+ /* wake up */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_WKUP_CTL);//614
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_WKUP_IPV);//615
+ for (i = 0; i < 4; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_WKUP_IP4TBL(i));//616-619
+ }
+ for (i = 0; i < 4; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_WKUP_IP6TBL(i));//620-623
+ }
+ for (i = 0; i < 16; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_LAN_FLEX_DW_L(i));//624-639
+ }
+ for (i = 0; i < 16; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_LAN_FLEX_DW_H(i));//640-655
+ }
+ for (i = 0; i < 16; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_LAN_FLEX_MSK(i));//656-671
+ }
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PSR_LAN_FLEX_CTL);//672
+
+ /* TDB */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDB_RFCS);//673
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDB_PB_SZ);//674
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDB_PBRARB_CTL);//675
+ /* statistic */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDB_OUT_PKT_CNT);//676
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDB_MNG_PKT_CNT);//677
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDB_LB_PKT_CNT);//678
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TDB_MNG_LARGE_DOP_CNT);//679
+
+ /* TSEC */
+ /* general tsec */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_CTL);//680
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_ST);//681
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_BUF_AF);//682
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_BUF_AE);//683
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_MIN_IFG);//684
+ /* 1588 */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_CTL);//685
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_STMPL);//686
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_STMPH);//687
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_SYSTIML);//688
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_SYSTIMH);//689
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_INC);//690
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_ADJL);//691
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_ADJH);//692
+
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_INT_ST);//693
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_INT_EN);//694
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_AUX_CTL);//695
+ for (i = 0; i < 4; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TSEC_1588_SDP(i));//696-699
+ }
+
+ /* RSEC */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RSEC_CTL);//700
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RSEC_ST);//701
+ /* mac wrapper */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_TX_CFG);//702
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_RX_CFG);//703
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_PKT_FLT);//704
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_WDG_TIMEOUT);//705
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_TX_FLOW_CTRL);//706
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_RX_FLOW_CTRL);//707
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_INT_ST);//708
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_MAC_INT_EN);//709
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_RX_FRAME_CNT_GOOD_BAD_LOW);//710
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_TX_FRAME_CNT_GOOD_BAD_LOW);//711
+
+ /* BAR register */
+ /* pf interrupt register */
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_MISC_IC);//712
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_MISC_ICS);//713
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_MISC_IEN);//714
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_GPIE);//715
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_IC);//716
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_ICS);//717
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_IMS);//718
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_IMC);//719
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_ISB_ADDR_L);//720
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_ISB_ADDR_H);//721
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_ITRSEL);//722
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_ITR(i));//723-730
+ }
+ for (i = 0; i < 4; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_IVAR(i));//731-734
+ }
+
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_MISC_IVAR);//735
+ /* pf receive ring register */
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_RR_BAL(i));//736-743
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_RR_BAH(i));//744-751
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_RR_WP(i));//752-759
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_RR_RP(i));//760-767
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_RR_CFG(i));//768-775
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_TR_BAL(i));//776-783
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_TR_BAH(i));//784-791
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_TR_WP(i));//792-709
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_TR_RP(i));//800-807
+ }
+ for (i = 0; i < 8; i++) {
+ regs_buff[id++] = NGBE_R32_Q(hw, NGBE_PX_TR_CFG(i));//808-815
+ }
+}
+
+static int ngbe_get_eeprom_len(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ return adapter->hw.eeprom.word_size * 2;
+}
+
+static int ngbe_get_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u16 *eeprom_buff;
+ int first_word, last_word, eeprom_len;
+ int ret_val = 0;
+ u16 i;
+
+ if (eeprom->len == 0)
+ return -EINVAL;
+
+ eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+ eeprom_len = last_word - first_word + 1;
+
+ eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL);
+ if (!eeprom_buff)
+ return -ENOMEM;
+
+ ret_val = TCALL(hw, eeprom.ops.read_buffer, first_word, eeprom_len,
+ eeprom_buff);
+
+ /* Device's eeprom is always little-endian, word addressable */
+ for (i = 0; i < eeprom_len; i++)
+ le16_to_cpus(&eeprom_buff[i]);
+
+ memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
+ kfree(eeprom_buff);
+
+ return ret_val;
+}
+
+static int ngbe_set_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u16 *eeprom_buff;
+ void *ptr;
+ int max_len, first_word, last_word, ret_val = 0;
+ u16 i;
+
+ if (eeprom->len == 0)
+ return -EINVAL;
+
+ if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+ return -EINVAL;
+
+ max_len = hw->eeprom.word_size * 2;
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+ eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+ if (!eeprom_buff)
+ return -ENOMEM;
+
+ ptr = eeprom_buff;
+
+ if (eeprom->offset & 1) {
+ /*
+ * need read/modify/write of first changed EEPROM word
+ * only the second byte of the word is being modified
+ */
+ ret_val = TCALL(hw, eeprom.ops.read, first_word,
+ &eeprom_buff[0]);
+ if (ret_val)
+ goto err;
+
+ ptr++;
+ }
+ if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
+ /*
+ * need read/modify/write of last changed EEPROM word
+ * only the first byte of the word is being modified
+ */
+ ret_val = TCALL(hw, eeprom.ops.read, last_word,
+ &eeprom_buff[last_word - first_word]);
+ if (ret_val)
+ goto err;
+ }
+
+ /* Device's eeprom is always little-endian, word addressable */
+ for (i = 0; i < last_word - first_word + 1; i++)
+ le16_to_cpus(&eeprom_buff[i]);
+
+ memcpy(ptr, bytes, eeprom->len);
+
+ for (i = 0; i < last_word - first_word + 1; i++)
+ cpu_to_le16s(&eeprom_buff[i]);
+
+ ret_val = TCALL(hw, eeprom.ops.write_buffer, first_word,
+ last_word - first_word + 1,
+ eeprom_buff);
+
+ /* Update the checksum */
+ if (ret_val == 0)
+ TCALL(hw, eeprom.ops.update_checksum);
+
+err:
+ kfree(eeprom_buff);
+ return ret_val;
+}
+
+static void ngbe_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ strncpy(drvinfo->driver, ngbe_driver_name,
+ sizeof(drvinfo->driver) - 1);
+ strncpy(drvinfo->version, ngbe_driver_version,
+ sizeof(drvinfo->version) - 1);
+ strncpy(drvinfo->fw_version, adapter->eeprom_id,
+ sizeof(drvinfo->fw_version));
+ strncpy(drvinfo->bus_info, pci_name(adapter->pdev),
+ sizeof(drvinfo->bus_info) - 1);
+ if (adapter->num_tx_queues <= NGBE_NUM_RX_QUEUES) {
+ drvinfo->n_stats = NGBE_STATS_LEN -
+ (NGBE_NUM_RX_QUEUES - adapter->num_tx_queues)*
+ (sizeof(struct ngbe_queue_stats) / sizeof(u64))*2;
+ } else {
+ drvinfo->n_stats = NGBE_STATS_LEN;
+ }
+ drvinfo->testinfo_len = NGBE_TEST_LEN;
+ drvinfo->regdump_len = ngbe_get_regs_len(netdev);
+}
+
+static void ngbe_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ ring->rx_max_pending = NGBE_MAX_RXD;
+ ring->tx_max_pending = NGBE_MAX_TXD;
+ ring->rx_mini_max_pending = 0;
+ ring->rx_jumbo_max_pending = 0;
+ ring->rx_pending = adapter->rx_ring_count;
+ ring->tx_pending = adapter->tx_ring_count;
+ ring->rx_mini_pending = 0;
+ ring->rx_jumbo_pending = 0;
+}
+
+static int ngbe_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_ring *temp_ring;
+ int i, err = 0;
+ u32 new_rx_count, new_tx_count;
+
+ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+ return -EINVAL;
+
+ new_tx_count = clamp_t(u32, ring->tx_pending,
+ NGBE_MIN_TXD, NGBE_MAX_TXD);
+ new_tx_count = ALIGN(new_tx_count, NGBE_REQ_TX_DESCRIPTOR_MULTIPLE);
+
+ new_rx_count = clamp_t(u32, ring->rx_pending,
+ NGBE_MIN_RXD, NGBE_MAX_RXD);
+ new_rx_count = ALIGN(new_rx_count, NGBE_REQ_RX_DESCRIPTOR_MULTIPLE);
+
+ if ((new_tx_count == adapter->tx_ring_count) &&
+ (new_rx_count == adapter->rx_ring_count)) {
+ /* nothing to do */
+ return 0;
+ }
+
+ while (test_and_set_bit(__NGBE_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+
+ if (!netif_running(adapter->netdev)) {
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ adapter->tx_ring[i]->count = new_tx_count;
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ adapter->rx_ring[i]->count = new_rx_count;
+ adapter->tx_ring_count = new_tx_count;
+ adapter->rx_ring_count = new_rx_count;
+ goto clear_reset;
+ }
+
+ /* allocate temporary buffer to store rings in */
+ i = max_t(int, adapter->num_tx_queues, adapter->num_rx_queues);
+ temp_ring = vmalloc(i * sizeof(struct ngbe_ring));
+
+ if (!temp_ring) {
+ err = -ENOMEM;
+ goto clear_reset;
+ }
+
+ ngbe_down(adapter);
+
+ /*
+ * Setup new Tx resources and free the old Tx resources in that order.
+ * We can then assign the new resources to the rings via a memcpy.
+ * The advantage to this approach is that we are guaranteed to still
+ * have resources even in the case of an allocation failure.
+ */
+ if (new_tx_count != adapter->tx_ring_count) {
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ memcpy(&temp_ring[i], adapter->tx_ring[i],
+ sizeof(struct ngbe_ring));
+
+ temp_ring[i].count = new_tx_count;
+ err = ngbe_setup_tx_resources(&temp_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ ngbe_free_tx_resources(&temp_ring[i]);
+ }
+ goto err_setup;
+ }
+ }
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ ngbe_free_tx_resources(adapter->tx_ring[i]);
+
+ memcpy(adapter->tx_ring[i], &temp_ring[i],
+ sizeof(struct ngbe_ring));
+ }
+
+ adapter->tx_ring_count = new_tx_count;
+ }
+
+ /* Repeat the process for the Rx rings if needed */
+ if (new_rx_count != adapter->rx_ring_count) {
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ memcpy(&temp_ring[i], adapter->rx_ring[i],
+ sizeof(struct ngbe_ring));
+
+ temp_ring[i].count = new_rx_count;
+ err = ngbe_setup_rx_resources(&temp_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ ngbe_free_rx_resources(&temp_ring[i]);
+ }
+ goto err_setup;
+ }
+ }
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ ngbe_free_rx_resources(adapter->rx_ring[i]);
+
+ memcpy(adapter->rx_ring[i], &temp_ring[i],
+ sizeof(struct ngbe_ring));
+ }
+
+ adapter->rx_ring_count = new_rx_count;
+ }
+
+err_setup:
+ ngbe_up(adapter);
+ vfree(temp_ring);
+clear_reset:
+ clear_bit(__NGBE_RESETTING, &adapter->state);
+ return err;
+}
+
+static int ngbe_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ switch (sset) {
+ case ETH_SS_TEST:
+ return NGBE_TEST_LEN;
+ case ETH_SS_STATS:
+ if (adapter->num_tx_queues <= NGBE_NUM_RX_QUEUES) {
+ return NGBE_STATS_LEN - (NGBE_NUM_RX_QUEUES - adapter->num_tx_queues)*
+ (sizeof(struct ngbe_queue_stats) / sizeof(u64))*2;
+ } else {
+ return NGBE_STATS_LEN;
+ }
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void ngbe_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats,
+ u64 *data)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct rtnl_link_stats64 temp;
+ const struct rtnl_link_stats64 *net_stats;
+
+ u64 *queue_stat;
+ int stat_count, k;
+ unsigned int start;
+ struct ngbe_ring *ring;
+ int i, j;
+ char *p;
+
+ ngbe_update_stats(adapter);
+ net_stats = dev_get_stats(netdev, &temp);
+
+ for (i = 0; i < NGBE_NETDEV_STATS_LEN; i++) {
+ p = (char *)net_stats + ngbe_gstrings_net_stats[i].stat_offset;
+ data[i] = (ngbe_gstrings_net_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ for (j = 0; j < NGBE_GLOBAL_STATS_LEN; j++, i++) {
+ p = (char *)adapter + ngbe_gstrings_stats[j].stat_offset;
+ data[i] = (ngbe_gstrings_stats[j].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+
+ for (j = 0; j < adapter->num_tx_queues; j++) {
+ ring = adapter->tx_ring[j];
+ if (!ring) {
+ data[i++] = 0;
+ data[i++] = 0;
+ continue;
+ }
+
+ do {
+ start = u64_stats_fetch_begin_irq(&ring->syncp);
+ data[i] = ring->stats.packets;
+ data[i+1] = ring->stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+ i += 2;
+ }
+
+ for (j = 0; j < adapter->num_rx_queues; j++) {
+ ring = adapter->rx_ring[j];
+ if (!ring) {
+ data[i++] = 0;
+ data[i++] = 0;
+ continue;
+ }
+
+ do {
+ start = u64_stats_fetch_begin_irq(&ring->syncp);
+ data[i] = ring->stats.packets;
+ data[i+1] = ring->stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+ i += 2;
+ }
+
+ for (j = 0; j < NGBE_MAX_PACKET_BUFFERS; j++) {
+ data[i++] = adapter->stats.pxontxc[j];
+ data[i++] = adapter->stats.pxofftxc[j];
+ }
+ for (j = 0; j < NGBE_MAX_PACKET_BUFFERS; j++) {
+ data[i++] = adapter->stats.pxonrxc[j];
+ data[i++] = adapter->stats.pxoffrxc[j];
+ }
+
+ stat_count = sizeof(struct vf_stats) / sizeof(u64);
+ for (j = 0; j < adapter->num_vfs; j++) {
+ queue_stat = (u64 *)&adapter->vfinfo[j].vfstats;
+ for (k = 0; k < stat_count; k++)
+ data[i + k] = queue_stat[k];
+ queue_stat = (u64 *)&adapter->vfinfo[j].saved_rst_vfstats;
+ for (k = 0; k < stat_count; k++)
+ data[i + k] += queue_stat[k];
+ i += k;
+ }
+}
+
+static void ngbe_get_strings(struct net_device *netdev, u32 stringset,
+ u8 *data)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ char *p = (char *)data;
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_TEST:
+ memcpy(data, *ngbe_gstrings_test,
+ NGBE_TEST_LEN * ETH_GSTRING_LEN);
+ break;
+ case ETH_SS_STATS:
+ for (i = 0; i < NGBE_NETDEV_STATS_LEN; i++) {
+ memcpy(p, ngbe_gstrings_net_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < NGBE_GLOBAL_STATS_LEN; i++) {
+ memcpy(p, ngbe_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < adapter->num_tx_queues; i++) { /*temp setting2*/
+ sprintf(p, "tx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < adapter->num_rx_queues; i++) { /*temp setting2*/
+ sprintf(p, "rx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < NGBE_MAX_PACKET_BUFFERS; i++) {
+ sprintf(p, "tx_pb_%u_pxon", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_pb_%u_pxoff", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < NGBE_MAX_PACKET_BUFFERS; i++) {
+ sprintf(p, "rx_pb_%u_pxon", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_pb_%u_pxoff", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < adapter->num_vfs; i++) {
+ sprintf(p, "VF %d Rx Packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d Rx Bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d Tx Packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d Tx Bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d MC Packets", i);
+ p += ETH_GSTRING_LEN;
+ }
+ /* BUG_ON(p - data != NGBE_STATS_LEN * ETH_GSTRING_LEN); */
+ break;
+ }
+}
+
+static int ngbe_link_test(struct ngbe_adapter *adapter, u64 *data)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ bool link_up;
+ u32 link_speed = 0;
+
+ if (NGBE_REMOVED(hw->hw_addr)) {
+ *data = 1;
+ return 1;
+ }
+ *data = 0;
+ TCALL(hw, mac.ops.check_link, &link_speed, &link_up, true);
+ if (link_up)
+ return *data;
+ else
+ *data = 1;
+ return *data;
+}
+
+/* ethtool register test data */
+struct ngbe_reg_test {
+ u32 reg;
+ u8 array_len;
+ u8 test_type;
+ u32 mask;
+ u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x40 bytes apart, or in contiguous tables. We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST 1
+#define SET_READ_TEST 2
+#define WRITE_NO_TEST 3
+#define TABLE32_TEST 4
+#define TABLE64_TEST_LO 5
+#define TABLE64_TEST_HI 6
+
+/* default sapphire register test */
+static struct ngbe_reg_test reg_test_sapphire[] = {
+ { NGBE_RDB_RFCL, 1, PATTERN_TEST, 0x8007FFE0, 0x8007FFE0 },
+ { NGBE_RDB_RFCH, 1, PATTERN_TEST, 0x8007FFE0, 0x8007FFE0 },
+ { NGBE_PSR_VLAN_CTL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
+ { NGBE_PX_RR_BAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 },
+ { NGBE_PX_RR_BAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { NGBE_PX_RR_CFG(0), 4, WRITE_NO_TEST, 0, NGBE_PX_RR_CFG_RR_EN },
+ { NGBE_RDB_RFCH, 1, PATTERN_TEST, 0x8007FFE0, 0x8007FFE0 },
+ { NGBE_RDB_RFCV, 1, PATTERN_TEST, 0xFFFF0000, 0xFFFF0000 },
+ { NGBE_PX_TR_BAL(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { NGBE_PX_TR_BAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { NGBE_RDB_PB_CTL, 1, SET_READ_TEST, 0x00000001, 0x00000001 },
+ { NGBE_PSR_MC_TBL(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { .reg = 0 }
+};
+
+
+static bool reg_pattern_test(struct ngbe_adapter *adapter, u64 *data, int reg,
+ u32 mask, u32 write)
+{
+ u32 pat, val, before;
+ static const u32 test_pattern[] = {
+ 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+ };
+
+ if (NGBE_REMOVED(adapter->hw.hw_addr)) {
+ *data = 1;
+ return true;
+ }
+ for (pat = 0; pat < ARRAY_SIZE(test_pattern); pat++) {
+ before = rd32(&adapter->hw, reg);
+ wr32(&adapter->hw, reg, test_pattern[pat] & write);
+ val = rd32(&adapter->hw, reg);
+ if (val != (test_pattern[pat] & write & mask)) {
+ e_err(drv,
+ "pattern test reg %04X failed: got 0x%08X "
+ "expected 0x%08X\n",
+ reg, val, test_pattern[pat] & write & mask);
+ *data = reg;
+ wr32(&adapter->hw, reg, before);
+ return true;
+ }
+ wr32(&adapter->hw, reg, before);
+ }
+ return false;
+}
+
+static bool reg_set_and_check(struct ngbe_adapter *adapter, u64 *data, int reg,
+ u32 mask, u32 write)
+{
+ u32 val, before;
+
+ if (NGBE_REMOVED(adapter->hw.hw_addr)) {
+ *data = 1;
+ return true;
+ }
+ before = rd32(&adapter->hw, reg);
+ wr32(&adapter->hw, reg, write & mask);
+ val = rd32(&adapter->hw, reg);
+ if ((write & mask) != (val & mask)) {
+ e_err(drv,
+ "set/check reg %04X test failed: got 0x%08X expected"
+ "0x%08X\n",
+ reg, (val & mask), (write & mask));
+ *data = reg;
+ wr32(&adapter->hw, reg, before);
+ return true;
+ }
+ wr32(&adapter->hw, reg, before);
+ return false;
+}
+
+static bool ngbe_reg_test(struct ngbe_adapter *adapter, u64 *data)
+{
+ struct ngbe_reg_test *test;
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 i;
+
+ if (NGBE_REMOVED(hw->hw_addr)) {
+ e_err(drv, "Adapter removed - register test blocked\n");
+ *data = 1;
+ return true;
+ }
+
+ test = reg_test_sapphire;
+
+ /*
+ * Perform the remainder of the register test, looping through
+ * the test table until we either fail or reach the null entry.
+ */
+ while (test->reg) {
+ for (i = 0; i < test->array_len; i++) {
+ bool b = false;
+
+ switch (test->test_type) {
+ case PATTERN_TEST:
+ b = reg_pattern_test(adapter, data,
+ test->reg + (i * 0x40),
+ test->mask,
+ test->write);
+ break;
+ case SET_READ_TEST:
+ b = reg_set_and_check(adapter, data,
+ test->reg + (i * 0x40),
+ test->mask,
+ test->write);
+ break;
+ case WRITE_NO_TEST:
+ wr32(hw, test->reg + (i * 0x40),
+ test->write);
+ break;
+ case TABLE32_TEST:
+ b = reg_pattern_test(adapter, data,
+ test->reg + (i * 4),
+ test->mask,
+ test->write);
+ break;
+ case TABLE64_TEST_LO:
+ b = reg_pattern_test(adapter, data,
+ test->reg + (i * 8),
+ test->mask,
+ test->write);
+ break;
+ case TABLE64_TEST_HI:
+ b = reg_pattern_test(adapter, data,
+ (test->reg + 4) + (i * 8),
+ test->mask,
+ test->write);
+ break;
+ }
+ if (b)
+ return true;
+ }
+ test++;
+ }
+
+ *data = 0;
+ return false;
+}
+
+static bool ngbe_eeprom_test(struct ngbe_adapter *adapter, u64 *data)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 devcap;
+
+ if (TCALL(hw, eeprom.ops.eeprom_chksum_cap_st, NGBE_CALSUM_COMMAND, &devcap)) {
+ *data = 1;
+ return true;
+ } else {
+ *data = 0;
+ return false;
+ }
+}
+
+static irqreturn_t ngbe_test_intr(int __always_unused irq, void *data)
+{
+ struct net_device *netdev = (struct net_device *) data;
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ u64 icr;
+
+ /* get misc interrupt, as cannot get ring interrupt status */
+ icr = ngbe_misc_isb(adapter, NGBE_ISB_VEC1);
+ icr <<= 32;
+ icr |= ngbe_misc_isb(adapter, NGBE_ISB_VEC0);
+
+ adapter->test_icr = icr;
+
+ return IRQ_HANDLED;
+}
+
+static int ngbe_intr_test(struct ngbe_adapter *adapter, u64 *data)
+{
+ struct net_device *netdev = adapter->netdev;
+ u64 mask;
+ u32 i = 0, shared_int = true;
+ u32 irq = adapter->pdev->irq;
+
+ if (NGBE_REMOVED(adapter->hw.hw_addr)) {
+ *data = 1;
+ return -1;
+ }
+ *data = 0;
+
+ /* Hook up test interrupt handler just for this test */
+ if (adapter->msix_entries) {
+ /* NOTE: we don't test MSI-X interrupts here, yet */
+ return 0;
+ } else if (adapter->flags & NGBE_FLAG_MSI_ENABLED) {
+ shared_int = false;
+ if (request_irq(irq, &ngbe_test_intr, 0, netdev->name,
+ netdev)) {
+ *data = 1;
+ return -1;
+ }
+ } else if (!request_irq(irq, &ngbe_test_intr, IRQF_PROBE_SHARED,
+ netdev->name, netdev)) {
+ shared_int = false;
+ } else if (request_irq(irq, &ngbe_test_intr, IRQF_SHARED,
+ netdev->name, netdev)) {
+ *data = 1;
+ return -1;
+ }
+ e_info(hw, "testing %s interrupt\n",
+ (shared_int ? "shared" : "unshared"));
+
+ /* Disable all the interrupts */
+ ngbe_irq_disable(adapter);
+ NGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ /* Test each interrupt */
+ for (; i < 1; i++) {
+ /* Interrupt to test */
+ mask = 1ULL << i;
+
+ if (!shared_int) {
+ /*
+ * Disable the interrupts to be reported in
+ * the cause register and then force the same
+ * interrupt and see if one gets posted. If
+ * an interrupt was posted to the bus, the
+ * test failed.
+ */
+ adapter->test_icr = 0;
+ ngbe_intr_disable(&adapter->hw, ~mask);
+ ngbe_intr_trigger(&adapter->hw, mask);
+ NGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ if (adapter->test_icr & mask) {
+ *data = 3;
+ break;
+ }
+ }
+
+ /*
+ * Enable the interrupt to be reported in the cause
+ * register and then force the same interrupt and see
+ * if one gets posted. If an interrupt was not posted
+ * to the bus, the test failed.
+ */
+ adapter->test_icr = 0;
+ ngbe_intr_disable(&adapter->hw, NGBE_INTR_ALL);
+ ngbe_intr_trigger(&adapter->hw, mask);
+ NGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ if (!(adapter->test_icr & mask)) {
+ *data = 0;
+ break;
+ }
+ }
+
+ /* Disable all the interrupts */
+ ngbe_intr_disable(&adapter->hw, NGBE_INTR_ALL);
+ NGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ /* Unhook test interrupt handler */
+ free_irq(irq, netdev);
+
+ return *data;
+}
+
+static void ngbe_free_desc_rings(struct ngbe_adapter *adapter)
+{
+ struct ngbe_ring *tx_ring = &adapter->test_tx_ring;
+ struct ngbe_ring *rx_ring = &adapter->test_rx_ring;
+ struct ngbe_hw *hw = &adapter->hw;
+
+ /* shut down the DMA engines now so they can be reinitialized later */
+
+ /* first Rx */
+ TCALL(hw, mac.ops.disable_rx);
+ ngbe_disable_rx_queue(adapter, rx_ring);
+
+ /* now Tx */
+ wr32(hw, NGBE_PX_TR_CFG(tx_ring->reg_idx), 0);
+
+ wr32m(hw, NGBE_TDM_CTL, NGBE_TDM_CTL_TE, 0);
+
+ ngbe_reset(adapter);
+
+ ngbe_free_tx_resources(&adapter->test_tx_ring);
+ ngbe_free_rx_resources(&adapter->test_rx_ring);
+}
+
+static int ngbe_setup_desc_rings(struct ngbe_adapter *adapter)
+{
+ struct ngbe_ring *tx_ring = &adapter->test_tx_ring;
+ struct ngbe_ring *rx_ring = &adapter->test_rx_ring;
+ struct ngbe_hw *hw = &adapter->hw;
+ int ret_val;
+ int err;
+
+ TCALL(hw, mac.ops.setup_rxpba, 0, 0, PBA_STRATEGY_EQUAL);
+
+ /* Setup Tx descriptor ring and Tx buffers */
+ tx_ring->count = NGBE_DEFAULT_TXD;
+ tx_ring->queue_index = 0;
+ tx_ring->dev = pci_dev_to_dev(adapter->pdev);
+ tx_ring->netdev = adapter->netdev;
+ tx_ring->reg_idx = adapter->tx_ring[0]->reg_idx;
+
+ err = ngbe_setup_tx_resources(tx_ring);
+ if (err)
+ return 1;
+
+ wr32m(&adapter->hw, NGBE_TDM_CTL,
+ NGBE_TDM_CTL_TE, NGBE_TDM_CTL_TE);
+ wr32m(hw, NGBE_TSEC_CTL, 0x2, 0);
+ wr32m(hw, NGBE_RSEC_CTL, 0x2, 0);
+ ngbe_configure_tx_ring(adapter, tx_ring);
+
+
+ /* enable mac transmitter */
+ wr32m(hw, NGBE_MAC_TX_CFG,
+ NGBE_MAC_TX_CFG_TE | NGBE_MAC_TX_CFG_SPEED_MASK,
+ NGBE_MAC_TX_CFG_TE | NGBE_MAC_TX_CFG_SPEED_1G);
+
+ /* Setup Rx Descriptor ring and Rx buffers */
+ rx_ring->count = NGBE_DEFAULT_RXD;
+ rx_ring->queue_index = 0;
+ rx_ring->dev = pci_dev_to_dev(adapter->pdev);
+ rx_ring->netdev = adapter->netdev;
+ rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx;
+
+ err = ngbe_setup_rx_resources(rx_ring);
+ if (err) {
+ ret_val = 4;
+ goto err_nomem;
+ }
+
+ TCALL(hw, mac.ops.disable_rx);
+
+ ngbe_configure_rx_ring(adapter, rx_ring);
+
+ TCALL(hw, mac.ops.enable_rx);
+
+ return 0;
+
+err_nomem:
+ ngbe_free_desc_rings(adapter);
+ return ret_val;
+}
+
+static int ngbe_setup_loopback_test(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 reg_data;
+
+ /* Setup MAC loopback */
+ wr32m(hw, NGBE_MAC_RX_CFG,
+ NGBE_MAC_RX_CFG_LM, NGBE_MAC_RX_CFG_LM);
+
+ reg_data = rd32(hw, NGBE_PSR_CTL);
+ reg_data |= NGBE_PSR_CTL_BAM | NGBE_PSR_CTL_UPE |
+ NGBE_PSR_CTL_MPE | NGBE_PSR_CTL_TPE;
+ wr32(hw, NGBE_PSR_CTL, reg_data);
+
+ wr32(hw, 0x17000,
+ ((rd32(hw, 0x17000) |
+ 0x00000040U) & ~0x1U));
+
+ wr32(hw, 0x17204, 0x4);
+ wr32(hw, NGBE_PSR_VLAN_CTL,
+ rd32(hw, NGBE_PSR_VLAN_CTL) &
+ ~NGBE_PSR_VLAN_CTL_VFE);
+
+ NGBE_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ return 0;
+}
+
+static void ngbe_loopback_cleanup(struct ngbe_adapter *adapter)
+{
+ wr32m(&adapter->hw, NGBE_MAC_RX_CFG,
+ NGBE_MAC_RX_CFG_LM, ~NGBE_MAC_RX_CFG_LM);
+}
+
+
+static void ngbe_create_lbtest_frame(struct sk_buff *skb,
+ unsigned int frame_size)
+{
+ memset(skb->data, 0xFF, frame_size);
+ frame_size >>= 1;
+ memset(&skb->data[frame_size], 0xAA, frame_size / 2 - 1);
+ memset(&skb->data[frame_size + 10], 0xBE, 1);
+ memset(&skb->data[frame_size + 12], 0xAF, 1);
+}
+
+static bool ngbe_check_lbtest_frame(struct ngbe_rx_buffer *rx_buffer,
+ unsigned int frame_size)
+{
+ unsigned char *data;
+ bool match = true;
+
+ frame_size >>= 1;
+
+ data = kmap(rx_buffer->page) + rx_buffer->page_offset;
+ if (data[3] != 0xFF ||
+ data[frame_size + 10] != 0xBE ||
+ data[frame_size + 12] != 0xAF)
+ match = false;
+
+ kunmap(rx_buffer->page);
+
+ return match;
+}
+
+static u16 ngbe_clean_test_rings(struct ngbe_ring *rx_ring,
+ struct ngbe_ring *tx_ring,
+ unsigned int size)
+{
+ union ngbe_rx_desc *rx_desc;
+ struct ngbe_rx_buffer *rx_buffer;
+ struct ngbe_tx_buffer *tx_buffer;
+ const int bufsz = ngbe_rx_bufsz(rx_ring);
+ u16 rx_ntc, tx_ntc, count = 0;
+
+ /* initialize next to clean and descriptor values */
+ rx_ntc = rx_ring->next_to_clean;
+ tx_ntc = tx_ring->next_to_clean;
+ rx_desc = NGBE_RX_DESC(rx_ring, rx_ntc);
+
+ while (ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_DD)) {
+ /* unmap buffer on Tx side */
+ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc];
+ ngbe_unmap_and_free_tx_resource(tx_ring, tx_buffer);
+
+ /* check Rx buffer */
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ntc];
+
+ /* sync Rx buffer for CPU read */
+ dma_sync_single_for_cpu(rx_ring->dev,
+ rx_buffer->page_dma,
+ bufsz,
+ DMA_FROM_DEVICE);
+
+ /* verify contents of skb */
+ if (ngbe_check_lbtest_frame(rx_buffer, size))
+ count++;
+
+ /* sync Rx buffer for device write */
+ dma_sync_single_for_device(rx_ring->dev,
+ rx_buffer->page_dma,
+ bufsz,
+ DMA_FROM_DEVICE);
+
+ /* increment Rx/Tx next to clean counters */
+ rx_ntc++;
+ if (rx_ntc == rx_ring->count)
+ rx_ntc = 0;
+ tx_ntc++;
+ if (tx_ntc == tx_ring->count)
+ tx_ntc = 0;
+
+ /* fetch next descriptor */
+ rx_desc = NGBE_RX_DESC(rx_ring, rx_ntc);
+ }
+
+ /* re-map buffers to ring, store next to clean values */
+ ngbe_alloc_rx_buffers(rx_ring, count);
+ rx_ring->next_to_clean = rx_ntc;
+ tx_ring->next_to_clean = tx_ntc;
+
+ return count;
+}
+
+static int ngbe_run_loopback_test(struct ngbe_adapter *adapter)
+{
+ struct ngbe_ring *tx_ring = &adapter->test_tx_ring;
+ struct ngbe_ring *rx_ring = &adapter->test_rx_ring;
+ int i, j, lc, good_cnt, ret_val = 0;
+ unsigned int size = 1024;
+ netdev_tx_t tx_ret_val;
+ struct sk_buff *skb;
+ u32 flags_orig = adapter->flags;
+
+ /* DCB can modify the frames on Tx */
+ adapter->flags &= ~NGBE_FLAG_DCB_ENABLED;
+
+ /* allocate test skb */
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb)
+ return 11;
+
+ /* place data into test skb */
+ ngbe_create_lbtest_frame(skb, size);
+ skb_put(skb, size);
+
+ /*
+ * Calculate the loop count based on the largest descriptor ring
+ * The idea is to wrap the largest ring a number of times using 64
+ * send/receive pairs during each loop
+ */
+
+ if (rx_ring->count <= tx_ring->count)
+ lc = ((tx_ring->count / 64) * 2) + 1;
+ else
+ lc = ((rx_ring->count / 64) * 2) + 1;
+
+ for (j = 0; j <= lc; j++) {
+ /* reset count of good packets */
+ good_cnt = 0;
+
+ /* place 64 packets on the transmit queue*/
+ for (i = 0; i < 64; i++) {
+ skb_get(skb);
+ tx_ret_val = ngbe_xmit_frame_ring(skb,
+ adapter,
+ tx_ring);
+ if (tx_ret_val == NETDEV_TX_OK)
+ good_cnt++;
+ }
+
+ msleep(10);
+
+ if (good_cnt != 64) {
+ ret_val = 12;
+ break;
+ }
+
+ /* allow 200 milliseconds for packets to go from Tx to Rx */
+ msleep(200);
+
+ good_cnt = ngbe_clean_test_rings(rx_ring, tx_ring, size);
+ if (good_cnt != 64) {
+ ret_val = 13;
+ e_dev_err("ngbe_run_loopback_test: recv_cnt = %d\n", good_cnt);
+ break;
+ }
+ }
+
+ /* free the original skb */
+ kfree_skb(skb);
+ adapter->flags = flags_orig;
+
+ return ret_val;
+}
+
+static int ngbe_loopback_test(struct ngbe_adapter *adapter, u64 *data)
+{
+ *data = ngbe_setup_desc_rings(adapter);
+ if (*data)
+ goto out;
+ *data = ngbe_setup_loopback_test(adapter);
+ if (*data)
+ goto err_loopback;
+ *data = ngbe_run_loopback_test(adapter);
+ if (*data)
+ e_info(hw, "mac loopback testing failed\n");
+ ngbe_loopback_cleanup(adapter);
+
+err_loopback:
+ ngbe_free_desc_rings(adapter);
+out:
+ return *data;
+}
+
+static void ngbe_diag_test(struct net_device *netdev,
+ struct ethtool_test *eth_test, u64 *data)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ bool if_running = netif_running(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+
+ e_dev_info("ngbe_diag_test: start test\n");
+
+ if (NGBE_REMOVED(hw->hw_addr)) {
+ e_err(hw, "Adapter removed - test blocked\n");
+ data[0] = 1;
+ data[1] = 1;
+ data[2] = 1;
+ data[3] = 1;
+ data[4] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ return;
+ }
+ set_bit(__NGBE_TESTING, &adapter->state);
+ if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+ if (adapter->flags & NGBE_FLAG_SRIOV_ENABLED) {
+ int i;
+ for (i = 0; i < adapter->num_vfs; i++) {
+ if (adapter->vfinfo[i].clear_to_send) {
+ e_warn(drv, "Please take active VFS "
+ "offline and restart the "
+ "adapter before running NIC "
+ "diagnostics\n");
+ data[0] = 1;
+ data[1] = 1;
+ data[2] = 1;
+ data[3] = 1;
+ data[4] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ clear_bit(__NGBE_TESTING,
+ &adapter->state);
+ goto skip_ol_tests;
+ }
+ }
+ }
+
+ /* Offline tests */
+ e_info(hw, "offline testing starting\n");
+
+
+ /* Link test performed before hardware reset so autoneg doesn't
+ * interfere with test result */
+ if (ngbe_link_test(adapter, &data[4]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ if (if_running)
+ /* indicate we're in test mode */
+ ngbe_close(netdev);
+ else {
+ msleep(20);
+ ngbe_reset(adapter);
+ }
+
+ e_info(hw, "register testing starting\n");
+
+ if (ngbe_reg_test(adapter, &data[0]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ msleep(20);
+ ngbe_reset(adapter);
+ e_info(hw, "eeprom testing starting\n");
+ if (ngbe_eeprom_test(adapter, &data[1]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ msleep(20);
+
+ ngbe_reset(adapter);
+ e_info(hw, "interrupt testing starting\n");
+ if (ngbe_intr_test(adapter, &data[2]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ if (!(((hw->subsystem_device_id & OEM_MASK) == OCP_CARD) ||
+ ((hw->subsystem_device_id & NCSI_SUP_MASK) == NCSI_SUP))) {
+ /* If SRIOV or VMDq is enabled then skip MAC
+ * loopback diagnostic. */
+ if (adapter->flags & (NGBE_FLAG_SRIOV_ENABLED |
+ NGBE_FLAG_VMDQ_ENABLED)) {
+ e_info(hw, "skip MAC loopback diagnostic in VT mode\n");
+ data[3] = 0;
+ goto skip_loopback;
+ }
+
+ e_info(hw, "loopback testing starting\n");
+ ngbe_loopback_test(adapter, &data[3]);
+ }
+
+ data[3] = 0;
+
+skip_loopback:
+ msleep(20);
+ ngbe_reset(adapter);
+
+ /* clear testing bit and return adapter to previous state */
+ clear_bit(__NGBE_TESTING, &adapter->state);
+ if (if_running)
+ ngbe_open(netdev);
+ } else {
+ e_info(hw, "online testing starting\n");
+
+ /* Online tests */
+ if (ngbe_link_test(adapter, &data[4]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ /* Offline tests aren't run; pass by default */
+ data[0] = 0;
+ data[1] = 0;
+ data[2] = 0;
+ data[3] = 0;
+
+ clear_bit(__NGBE_TESTING, &adapter->state);
+ }
+
+skip_ol_tests:
+ msleep_interruptible(4 * 1000);
+}
+
+static int ngbe_wol_exclusion(struct ngbe_adapter *adapter,
+ struct ethtool_wolinfo *wol)
+{
+ int retval = 0;
+
+ /* WOL not supported for all devices */
+ if (!ngbe_wol_supported(adapter)) {
+ retval = 1;
+ wol->supported = 0;
+ }
+
+ return retval;
+}
+
+static void ngbe_get_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ struct ngbe_hw *hw = &adapter->hw;
+
+ wol->supported = WAKE_UCAST | WAKE_MCAST |
+ WAKE_BCAST | WAKE_MAGIC;
+ wol->wolopts = 0;
+
+ if (ngbe_wol_exclusion(adapter, wol) ||
+ !device_can_wakeup(pci_dev_to_dev(adapter->pdev)))
+ return;
+
+ if (adapter->wol & NGBE_PSR_WKUP_CTL_EX)
+ wol->wolopts |= WAKE_UCAST;
+ if (adapter->wol & NGBE_PSR_WKUP_CTL_MC)
+ wol->wolopts |= WAKE_MCAST;
+ if (adapter->wol & NGBE_PSR_WKUP_CTL_BC)
+ wol->wolopts |= WAKE_BCAST;
+ if (adapter->wol & NGBE_PSR_WKUP_CTL_MAG)
+ wol->wolopts |= WAKE_MAGIC;
+
+ if (!((hw->subsystem_device_id & WOL_SUP_MASK) == WOL_SUP))
+ wol->wolopts = 0;
+}
+
+static int ngbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 slot = hw->bus.lan_id;
+ u16 value;
+
+ if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE))
+ return -EOPNOTSUPP;
+
+ if (ngbe_wol_exclusion(adapter, wol))
+ return wol->wolopts ? -EOPNOTSUPP : 0;
+ if (!((hw->subsystem_device_id & WOL_SUP_MASK) == WOL_SUP))
+ return -EOPNOTSUPP;
+ adapter->wol = 0;
+
+ if (wol->wolopts & WAKE_UCAST)
+ adapter->wol |= NGBE_PSR_WKUP_CTL_EX;
+ if (wol->wolopts & WAKE_MCAST)
+ adapter->wol |= NGBE_PSR_WKUP_CTL_MC;
+ if (wol->wolopts & WAKE_BCAST)
+ adapter->wol |= NGBE_PSR_WKUP_CTL_BC;
+ if (wol->wolopts & WAKE_MAGIC) {
+ adapter->wol |= NGBE_PSR_WKUP_CTL_MAG;
+ hw->wol_enabled = !!(adapter->wol);
+ wr32(hw, NGBE_PSR_WKUP_CTL, adapter->wol);
+ ngbe_read_ee_hostif(hw, 0x7FE, &value);
+ /*enable wol in shadow ram*/
+ ngbe_write_ee_hostif(hw, 0x7FE, value | (1 << slot));
+ ngbe_write_ee_hostif(hw, 0x7FF, 0x5a5a);
+ device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
+ return 0;
+ }
+
+ ngbe_read_ee_hostif(hw, 0x7FE, &value);
+ /*disable wol in shadow ram*/
+ ngbe_write_ee_hostif(hw, 0x7FE, value & ~(1 << slot));
+ ngbe_write_ee_hostif(hw, 0x7FF, 0x5a5a);
+ return 0;
+}
+
+static int ngbe_nway_reset(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ if (netif_running(netdev))
+ ngbe_reinit_locked(adapter);
+
+ return 0;
+}
+
+static int ngbe_set_phys_id(struct net_device *netdev,
+ enum ethtool_phys_id_state state)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ adapter->led_reg = rd32(hw, NGBE_CFG_LED_CTL);
+ return 2;
+
+ case ETHTOOL_ID_ON:
+ TCALL(hw, mac.ops.led_on, NGBE_LED_LINK_1G);
+ break;
+
+ case ETHTOOL_ID_OFF:
+ TCALL(hw, mac.ops.led_off, NGBE_LED_LINK_100M | NGBE_LED_LINK_1G);
+ break;
+
+ case ETHTOOL_ID_INACTIVE:
+ /* Restore LED settings */
+ wr32(&adapter->hw, NGBE_CFG_LED_CTL,
+ adapter->led_reg);
+ break;
+ }
+
+ return 0;
+}
+
+static int ngbe_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
+ /* only valid if in constant ITR mode */
+ if (adapter->rx_itr_setting <= 1)
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+ else
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+ /* if in mixed tx/rx queues per vector mode, report only rx settings */
+ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+ return 0;
+
+ /* only valid if in constant ITR mode */
+ if (adapter->tx_itr_setting <= 1)
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+ else
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+
+ return 0;
+}
+
+static int ngbe_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ngbe_q_vector *q_vector;
+ int i;
+ u16 tx_itr_param, rx_itr_param;
+ u16 tx_itr_prev;
+ bool need_reset = false;
+
+ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count) {
+ /* reject Tx specific changes in case of mixed RxTx vectors */
+ if (ec->tx_coalesce_usecs)
+ return -EINVAL;
+ tx_itr_prev = adapter->rx_itr_setting;
+ } else {
+ tx_itr_prev = adapter->tx_itr_setting;
+ }
+
+ if (ec->tx_max_coalesced_frames_irq)
+ adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+
+ if ((ec->rx_coalesce_usecs > (NGBE_MAX_EITR >> 2)) ||
+ (ec->tx_coalesce_usecs > (NGBE_MAX_EITR >> 2)))
+ return -EINVAL;
+
+ if (ec->rx_coalesce_usecs > 1)
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+ else
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+
+ if (adapter->rx_itr_setting == 1)
+ rx_itr_param = NGBE_20K_ITR;
+ else
+ rx_itr_param = adapter->rx_itr_setting;
+
+ if (ec->tx_coalesce_usecs > 1)
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+ else
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+
+ if (adapter->tx_itr_setting == 1)
+ tx_itr_param = NGBE_20K_ITR;
+ else
+ tx_itr_param = adapter->tx_itr_setting;
+
+ /* mixed Rx/Tx */
+ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+ adapter->tx_itr_setting = adapter->rx_itr_setting;
+
+ /* detect ITR changes that require update of TXDCTL.WTHRESH */
+ if ((adapter->tx_itr_setting != 1) &&
+ (adapter->tx_itr_setting < NGBE_70K_ITR)) {
+ if ((tx_itr_prev == 1) ||
+ (tx_itr_prev >= NGBE_70K_ITR))
+ need_reset = true;
+ } else {
+ if ((tx_itr_prev != 1) &&
+ (tx_itr_prev < NGBE_70K_ITR))
+ need_reset = true;
+ }
+
+ if (adapter->hw.mac.dmac_config.watchdog_timer &&
+ (!adapter->rx_itr_setting && !adapter->tx_itr_setting)) {
+ e_info(probe,
+ "Disabling DMA coalescing because interrupt throttling "
+ "is disabled\n");
+ adapter->hw.mac.dmac_config.watchdog_timer = 0;
+ TCALL(hw, mac.ops.dmac_config);
+ }
+
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ q_vector = adapter->q_vector[i];
+ q_vector->tx.work_limit = adapter->tx_work_limit;
+ q_vector->rx.work_limit = adapter->rx_work_limit;
+ if (q_vector->tx.count && !q_vector->rx.count)
+ /* tx only */
+ q_vector->itr = tx_itr_param;
+ else
+ /* rx only or mixed */
+ q_vector->itr = rx_itr_param;
+ ngbe_write_eitr(q_vector);
+ }
+
+ /*
+ * do reset here at the end to make sure EITR==0 case is handled
+ * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings
+ * also locks in RSC enable/disable which requires reset
+ */
+ if (need_reset)
+ ngbe_do_reset(netdev);
+
+ return 0;
+}
+
+static int ngbe_get_rss_hash_opts(struct ngbe_adapter *adapter,
+ struct ethtool_rxnfc *cmd)
+{
+ cmd->data = 0;
+
+ /* Report default options for RSS on ngbe */
+ switch (cmd->flow_type) {
+ case TCP_V4_FLOW:
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* fall through */
+ case UDP_V4_FLOW:
+ if (adapter->flags2 & NGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* fall through */
+ case SCTP_V4_FLOW:
+ case AH_ESP_V4_FLOW:
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case IPV4_FLOW:
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ case TCP_V6_FLOW:
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* fall through */
+ case UDP_V6_FLOW:
+ if (adapter->flags2 & NGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* fall through */
+ case SCTP_V6_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case IPV6_FLOW:
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ngbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = adapter->num_rx_queues;
+ ret = 0;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ ret = 0;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ break;
+ case ETHTOOL_GRXFH:
+ ret = ngbe_get_rss_hash_opts(adapter, cmd);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+#define UDP_RSS_FLAGS (NGBE_FLAG2_RSS_FIELD_IPV4_UDP | \
+ NGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+static int ngbe_set_rss_hash_opt(struct ngbe_adapter *adapter,
+ struct ethtool_rxnfc *nfc)
+{
+ u32 flags2 = adapter->flags2;
+
+ /*
+ * RSS does not support anything other than hashing
+ * to queues on src and dst IPs and ports
+ */
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EINVAL;
+
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST) ||
+ !(nfc->data & RXH_L4_B_0_1) ||
+ !(nfc->data & RXH_L4_B_2_3))
+ return -EINVAL;
+ break;
+ case UDP_V4_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST))
+ return -EINVAL;
+ switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ flags2 &= ~NGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ flags2 |= NGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case UDP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST))
+ return -EINVAL;
+ switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ flags2 &= ~NGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ flags2 |= NGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case AH_ESP_V4_FLOW:
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST) ||
+ (nfc->data & RXH_L4_B_0_1) ||
+ (nfc->data & RXH_L4_B_2_3))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* if we changed something we need to update flags */
+ if (flags2 != adapter->flags2) {
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 mrqc;
+
+ mrqc = rd32(hw, NGBE_RDB_RA_CTL);
+
+ if ((flags2 & UDP_RSS_FLAGS) &&
+ !(adapter->flags2 & UDP_RSS_FLAGS))
+ e_warn(drv, "enabling UDP RSS: fragmented packets"
+ " may arrive out of order to the stack above\n");
+
+ adapter->flags2 = flags2;
+
+ /* Perform hash on these packet types */
+ mrqc |= NGBE_RDB_RA_CTL_RSS_IPV4
+ | NGBE_RDB_RA_CTL_RSS_IPV4_TCP
+ | NGBE_RDB_RA_CTL_RSS_IPV6
+ | NGBE_RDB_RA_CTL_RSS_IPV6_TCP;
+
+ mrqc &= ~(NGBE_RDB_RA_CTL_RSS_IPV4_UDP |
+ NGBE_RDB_RA_CTL_RSS_IPV6_UDP);
+
+ if (flags2 & NGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+ mrqc |= NGBE_RDB_RA_CTL_RSS_IPV4_UDP;
+
+ if (flags2 & NGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+ mrqc |= NGBE_RDB_RA_CTL_RSS_IPV6_UDP;
+
+ wr32(hw, NGBE_RDB_RA_CTL, mrqc);
+ }
+
+ return 0;
+}
+
+static int ngbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ break;
+ case ETHTOOL_SRXFH:
+ ret = ngbe_set_rss_hash_opt(adapter, cmd);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int ngbe_rss_indir_tbl_max(struct ngbe_adapter *adapter)
+{
+ return 64;
+}
+
+static u32 ngbe_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ return sizeof(adapter->rss_key);
+}
+
+static u32 ngbe_rss_indir_size(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ return ngbe_rss_indir_tbl_entries(adapter);
+}
+
+static void ngbe_get_reta(struct ngbe_adapter *adapter, u32 *indir)
+{
+ int i, reta_size = ngbe_rss_indir_tbl_entries(adapter);
+
+ for (i = 0; i < reta_size; i++)
+ indir[i] = adapter->rss_indir_tbl[i];
+}
+
+static int ngbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP;
+
+ if (indir)
+ ngbe_get_reta(adapter, indir);
+
+ if (key)
+ memcpy(key, adapter->rss_key, ngbe_get_rxfh_key_size(netdev));
+
+ return 0;
+}
+
+static int ngbe_set_rxfh(struct net_device *netdev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ int i;
+ u32 reta_entries = ngbe_rss_indir_tbl_entries(adapter);
+
+ if (hfunc)
+ return -EINVAL;
+
+ /* Fill out the redirection table */
+ if (indir) {
+ int max_queues = min_t(int, adapter->num_rx_queues,
+ ngbe_rss_indir_tbl_max(adapter));
+
+ /*Allow at least 2 queues w/ SR-IOV.*/
+ if ((adapter->flags & NGBE_FLAG_SRIOV_ENABLED) &&
+ (max_queues < 2))
+ max_queues = 2;
+
+ /* Verify user input. */
+ for (i = 0; i < reta_entries; i++)
+ if (indir[i] >= max_queues)
+ return -EINVAL;
+
+ for (i = 0; i < reta_entries; i++)
+ adapter->rss_indir_tbl[i] = indir[i];
+ }
+
+ /* Fill out the rss hash key */
+ if (key)
+ memcpy(adapter->rss_key, key, ngbe_get_rxfh_key_size(netdev));
+
+ ngbe_store_reta(adapter);
+
+ return 0;
+}
+
+static int ngbe_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+
+ /* we always support timestamping disabled */
+ info->rx_filters = 1 << HWTSTAMP_FILTER_NONE;
+
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ if (adapter->ptp_clock)
+ info->phc_index = ptp_clock_index(adapter->ptp_clock);
+ else
+ info->phc_index = -1;
+
+ info->tx_types =
+ (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON);
+
+ info->rx_filters |=
+ (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+ return 0;
+}
+
+static unsigned int ngbe_max_channels(struct ngbe_adapter *adapter)
+{
+ unsigned int max_combined;
+ u8 tcs = netdev_get_num_tc(adapter->netdev);
+
+ if (!(adapter->flags & NGBE_FLAG_MSIX_ENABLED)) {
+ /* We only support one q_vector without MSI-X */
+ max_combined = 1;
+ } else if (adapter->flags & NGBE_FLAG_SRIOV_ENABLED) {
+ /* SR-IOV currently only allows one queue on the PF */
+ max_combined = 1;
+ } else if (tcs > 1) {
+ /* For DCB report channels per traffic class */
+ if (tcs > 4) {
+ /* 8 TC w/ 8 queues per TC */
+ max_combined = 8;
+ } else {
+ /* 4 TC w/ 16 queues per TC */
+ max_combined = 16;
+ }
+ } else if (adapter->atr_sample_rate) {
+ /* support up to 64 queues with ATR */
+ max_combined = NGBE_MAX_FDIR_INDICES;
+ } else {
+ /* support up to max allowed queues with RSS */
+ max_combined = ngbe_max_rss_indices(adapter);
+ }
+
+ return max_combined;
+}
+
+static void ngbe_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+
+ /* report maximum channels */
+ ch->max_combined = ngbe_max_channels(adapter);
+
+ /* report info for other vector */
+ if (adapter->flags & NGBE_FLAG_MSIX_ENABLED) {
+ ch->max_other = NON_Q_VECTORS;
+ ch->other_count = NON_Q_VECTORS;
+ }
+
+ /* record RSS queues */
+ ch->combined_count = adapter->ring_feature[RING_F_RSS].indices;
+
+ /* nothing else to report if RSS is disabled */
+ if (ch->combined_count == 1)
+ return;
+
+ /* we do not support ATR queueing if SR-IOV is enabled */
+ if (adapter->flags & NGBE_FLAG_SRIOV_ENABLED)
+ return;
+
+ /* same thing goes for being DCB enabled */
+ if (netdev_get_num_tc(dev) > 1)
+ return;
+
+ /* if ATR is disabled we can exit */
+ if (!adapter->atr_sample_rate)
+ return;
+
+}
+
+static int ngbe_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+ unsigned int count = ch->combined_count;
+ u8 max_rss_indices = ngbe_max_rss_indices(adapter);
+
+ /* verify they are not requesting separate vectors */
+ if (!count || ch->rx_count || ch->tx_count)
+ return -EINVAL;
+
+ /* verify other_count has not changed */
+ if (ch->other_count != NON_Q_VECTORS)
+ return -EINVAL;
+
+ /* verify the number of channels does not exceed hardware limits */
+ if (count > ngbe_max_channels(adapter))
+ return -EINVAL;
+
+ /* cap RSS limit */
+ if (count > max_rss_indices)
+ count = max_rss_indices;
+ adapter->ring_feature[RING_F_RSS].limit = count;
+
+ /* use setup TC to update any traffic class queue mapping */
+ return ngbe_setup_tc(dev, netdev_get_num_tc(dev));
+}
+
+static int ngbe_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+ return 0;
+}
+
+static int ngbe_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ethtool_eee eee_data;
+ s32 ret_val;
+
+ if (!(hw->mac.ops.setup_eee &&
+ (adapter->flags2 & NGBE_FLAG2_EEE_CAPABLE)))
+ return -EOPNOTSUPP;
+
+ memset(&eee_data, 0, sizeof(struct ethtool_eee));
+
+ ret_val = ngbe_get_eee(netdev, &eee_data);
+ if (ret_val)
+ return ret_val;
+
+ if (eee_data.eee_enabled && !edata->eee_enabled) {
+ if (eee_data.tx_lpi_enabled != edata->tx_lpi_enabled) {
+ e_dev_err("Setting EEE tx-lpi is not supported\n");
+ return -EINVAL;
+ }
+
+ if (eee_data.tx_lpi_timer != edata->tx_lpi_timer) {
+ e_dev_err("Setting EEE Tx LPI timer is not "
+ "supported\n");
+ return -EINVAL;
+ }
+
+ if (eee_data.advertised != edata->advertised) {
+ e_dev_err("Setting EEE advertised speeds is not "
+ "supported\n");
+ return -EINVAL;
+ }
+
+ }
+
+ if (eee_data.eee_enabled != edata->eee_enabled) {
+
+ if (edata->eee_enabled)
+ adapter->flags2 |= NGBE_FLAG2_EEE_ENABLED;
+ else
+ adapter->flags2 &= ~NGBE_FLAG2_EEE_ENABLED;
+
+ /* reset link */
+ if (netif_running(netdev))
+ ngbe_reinit_locked(adapter);
+ else
+ ngbe_reset(adapter);
+ }
+
+ return 0;
+}
+
+static int ngbe_set_flash(struct net_device *netdev, struct ethtool_flash *ef)
+{
+ int ret;
+ const struct firmware *fw;
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ ret = request_firmware(&fw, ef->data, &netdev->dev);
+ if (ret < 0)
+ return ret;
+
+ if (ef->region == 0) {
+ ret = ngbe_upgrade_flash(&adapter->hw, ef->region,
+ fw->data, fw->size);
+ } else {
+ if (ngbe_mng_present(&adapter->hw)) {
+ ret = ngbe_upgrade_flash_hostif(&adapter->hw, ef->region,
+ fw->data, fw->size);
+ } else
+ ret = -EOPNOTSUPP;
+ }
+
+ release_firmware(fw);
+ if (!ret)
+ dev_info(&netdev->dev,
+ "loaded firmware %s, reboot to make firmware work\n", ef->data);
+ return ret;
+}
+
+
+static struct ethtool_ops ngbe_ethtool_ops = {
+ .get_link_ksettings = ngbe_get_link_ksettings,
+ .set_link_ksettings = ngbe_set_link_ksettings,
+ .get_drvinfo = ngbe_get_drvinfo,
+ .get_regs_len = ngbe_get_regs_len,
+ .get_regs = ngbe_get_regs,
+ .get_wol = ngbe_get_wol,
+ .set_wol = ngbe_set_wol,
+ .nway_reset = ngbe_nway_reset,
+ .get_link = ethtool_op_get_link,
+ .get_eeprom_len = ngbe_get_eeprom_len,
+ .get_eeprom = ngbe_get_eeprom,
+ .set_eeprom = ngbe_set_eeprom,
+ .get_ringparam = ngbe_get_ringparam,
+ .set_ringparam = ngbe_set_ringparam,
+ .get_pauseparam = ngbe_get_pauseparam,
+ .set_pauseparam = ngbe_set_pauseparam,
+ .get_msglevel = ngbe_get_msglevel,
+ .set_msglevel = ngbe_set_msglevel,
+ .self_test = ngbe_diag_test,
+ .get_strings = ngbe_get_strings,
+ .set_phys_id = ngbe_set_phys_id,
+ .get_sset_count = ngbe_get_sset_count,
+ .get_ethtool_stats = ngbe_get_ethtool_stats,
+ .get_coalesce = ngbe_get_coalesce,
+ .set_coalesce = ngbe_set_coalesce,
+ .get_rxnfc = ngbe_get_rxnfc,
+ .set_rxnfc = ngbe_set_rxnfc,
+ .get_eee = ngbe_get_eee,
+ .set_eee = ngbe_set_eee,
+ .get_channels = ngbe_get_channels,
+ .set_channels = ngbe_set_channels,
+ .get_ts_info = ngbe_get_ts_info,
+ .get_rxfh_indir_size = ngbe_rss_indir_size,
+ .get_rxfh_key_size = ngbe_get_rxfh_key_size,
+ .get_rxfh = ngbe_get_rxfh,
+ .set_rxfh = ngbe_set_rxfh,
+ .flash_device = ngbe_set_flash,
+};
+
+void ngbe_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &ngbe_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_hw.c b/drivers/net/ethernet/netswift/ngbe/ngbe_hw.c
new file mode 100644
index 0000000000000..73b8a328c267d
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_hw.c
@@ -0,0 +1,5047 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+
+#include "ngbe_type.h"
+#include "ngbe_hw.h"
+#include "ngbe_phy.h"
+#include "ngbe.h"
+
+#define NGBE_SP_MAX_TX_QUEUES 8
+#define NGBE_SP_MAX_RX_QUEUES 8
+#define NGBE_SP_RAR_ENTRIES 32
+#define NGBE_SP_MC_TBL_SIZE 128
+#define NGBE_SP_VFT_TBL_SIZE 128
+#define NGBE_SP_RX_PB_SIZE 42
+
+STATIC s32 ngbe_get_eeprom_semaphore(struct ngbe_hw *hw);
+STATIC void ngbe_release_eeprom_semaphore(struct ngbe_hw *hw);
+STATIC s32 ngbe_mta_vector(struct ngbe_hw *hw, u8 *mc_addr);
+
+STATIC s32 ngbe_setup_copper_link(struct ngbe_hw *hw,
+ u32 speed,
+ bool need_restart_AN);
+s32 ngbe_check_mac_link(struct ngbe_hw *hw, u32 *speed,
+ bool *link_up, bool link_up_wait_to_complete);
+s32 ngbe_check_mac_link_mdi(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *link_up,
+ bool link_up_wait_to_complete);
+s32 ngbe_check_mac_link_yt8521s(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *link_up,
+ bool link_up_wait_to_complete);
+
+u32 ngbe_rd32_epcs(struct ngbe_hw *hw, u32 addr)
+{
+ unsigned int portRegOffset;
+ u32 data;
+ /* Set the LAN port indicator to portRegOffset[1] */
+ /* 1st, write the regOffset to IDA_ADDR register */
+ portRegOffset = NGBE_XPCS_IDA_ADDR;
+ wr32(hw, portRegOffset, addr);
+
+ /* 2nd, read the data from IDA_DATA register */
+ portRegOffset = NGBE_XPCS_IDA_DATA;
+ data = rd32(hw, portRegOffset);
+
+ return data;
+}
+
+void ngbe_wr32_ephy(struct ngbe_hw *hw, u32 addr, u32 data)
+{
+ unsigned int portRegOffset;
+
+ /* Set the LAN port indicator to portRegOffset[1] */
+ /* 1st, write the regOffset to IDA_ADDR register */
+ portRegOffset = NGBE_ETHPHY_IDA_ADDR;
+ wr32(hw, portRegOffset, addr);
+
+ /* 2nd, read the data from IDA_DATA register */
+ portRegOffset = NGBE_ETHPHY_IDA_DATA;
+ wr32(hw, portRegOffset, data);
+}
+
+void ngbe_wr32_epcs(struct ngbe_hw *hw, u32 addr, u32 data)
+{
+ unsigned int portRegOffset;
+
+ /* Set the LAN port indicator to portRegOffset[1] */
+ /* 1st, write the regOffset to IDA_ADDR register */
+ portRegOffset = NGBE_XPCS_IDA_ADDR;
+ wr32(hw, portRegOffset, addr);
+
+ /* 2nd, read the data from IDA_DATA register */
+ portRegOffset = NGBE_XPCS_IDA_DATA;
+ wr32(hw, portRegOffset, data);
+}
+
+/**
+ * ngbe_get_pcie_msix_count - Gets MSI-X vector count
+ * @hw: pointer to hardware structure
+ *
+ * Read PCIe configuration space, and get the MSI-X vector count from
+ * the capabilities table.
+ **/
+u16 ngbe_get_pcie_msix_count(struct ngbe_hw *hw)
+{
+ u16 msix_count = 1;
+ u16 max_msix_count;
+ u32 pos;
+
+ DEBUGFUNC("\n");
+
+ /* ??? max_msix_count for emerald */
+ max_msix_count = NGBE_MAX_MSIX_VECTORS_EMERALD;
+ pos = pci_find_capability(((struct ngbe_adapter *)hw->back)->pdev,
+ PCI_CAP_ID_MSIX);
+ if (!pos)
+ return msix_count;
+ pci_read_config_word(((struct ngbe_adapter *)hw->back)->pdev,
+ pos + PCI_MSIX_FLAGS, &msix_count);
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ msix_count = 0;
+ msix_count &= NGBE_PCIE_MSIX_TBL_SZ_MASK;
+
+ /* MSI-X count is zero-based in HW */
+ msix_count++;
+
+ if (msix_count > max_msix_count)
+ msix_count = max_msix_count;
+
+ return msix_count;
+}
+
+/**
+ * ngbe_init_hw - Generic hardware initialization
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the hardware by resetting the hardware, filling the bus info
+ * structure and media type, clears all on chip counters, initializes receive
+ * address registers, multicast table, VLAN filter table, calls routine to set
+ * up link and flow control settings, and leaves transmit and receive units
+ * disabled and uninitialized
+ **/
+s32 ngbe_init_hw(struct ngbe_hw *hw)
+{
+ s32 status;
+
+ DEBUGFUNC("\n");
+
+ /* Reset the hardware */
+ status = TCALL(hw, mac.ops.reset_hw);
+
+ if (status == 0) {
+ /* Start the HW */
+ status = TCALL(hw, mac.ops.start_hw);
+ }
+
+ return status;
+}
+
+
+/**
+ * ngbe_clear_hw_cntrs - Generic clear hardware counters
+ * @hw: pointer to hardware structure
+ *
+ * Clears all hardware statistics counters by reading them from the hardware
+ * Statistics counters are clear on read.
+ **/
+s32 ngbe_clear_hw_cntrs(struct ngbe_hw *hw)
+{
+ u16 i = 0;
+
+ DEBUGFUNC("\n");
+
+ rd32(hw, NGBE_RX_CRC_ERROR_FRAMES_LOW);
+ rd32(hw, NGBE_RX_LEN_ERROR_FRAMES_LOW);
+ rd32(hw, NGBE_RDB_LXONTXC);
+ rd32(hw, NGBE_RDB_LXOFFTXC);
+ /* ??? 1e0c not found */
+ /* rd32(hw, NGBE_MAC_LXONRXC); */
+ rd32(hw, NGBE_MAC_LXOFFRXC);
+
+ for (i = 0; i < 8; i++) {
+ /* ??? move 16? */
+ wr32m(hw, NGBE_MMC_CONTROL, NGBE_MMC_CONTROL_UP, i<<16);
+ rd32(hw, NGBE_MAC_PXOFFRXC);
+ }
+
+ for (i = 0; i < 8; i++) {
+ wr32(hw, NGBE_PX_MPRC(i), 0);
+ }
+ /* BPRC */
+
+ rd32(hw, NGBE_PX_GPRC);
+ rd32(hw, NGBE_PX_GPTC);
+ rd32(hw, NGBE_PX_GORC_MSB);
+ rd32(hw, NGBE_PX_GOTC_MSB);
+
+ rd32(hw, NGBE_RX_BC_FRAMES_GOOD_LOW);
+ rd32(hw, NGBE_RX_UNDERSIZE_FRAMES_GOOD);
+ rd32(hw, NGBE_RX_OVERSIZE_FRAMES_GOOD);
+ rd32(hw, NGBE_RX_FRAME_CNT_GOOD_BAD_LOW);
+ rd32(hw, NGBE_TX_FRAME_CNT_GOOD_BAD_LOW);
+ rd32(hw, NGBE_TX_MC_FRAMES_GOOD_LOW);
+ rd32(hw, NGBE_TX_BC_FRAMES_GOOD_LOW);
+ rd32(hw, NGBE_RDM_DRP_PKT);
+ return 0;
+}
+
+/**
+ * ngbe_setup_fc - Set up flow control
+ * @hw: pointer to hardware structure
+ *
+ * Called at init time to set up flow control.
+ **/
+s32 ngbe_setup_fc(struct ngbe_hw *hw)
+{
+ s32 ret_val = 0;
+ u16 pcap_backplane = 0;
+
+ DEBUGFUNC("\n");
+
+ /* Validate the requested mode */
+ if (hw->fc.strict_ieee && hw->fc.requested_mode == ngbe_fc_rx_pause) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "ngbe_fc_rx_pause not valid in strict IEEE mode\n");
+ ret_val = NGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+
+ /*
+ * gig parts do not have a word in the EEPROM to determine the
+ * default flow control setting, so we explicitly set it to full.
+ */
+ if (hw->fc.requested_mode == ngbe_fc_default)
+ hw->fc.requested_mode = ngbe_fc_full;
+
+ /*
+ * The possible values of fc.requested_mode are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames,
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames but
+ * we do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ * other: Invalid.
+ */
+ switch (hw->fc.requested_mode) {
+ case ngbe_fc_none:
+ /* Flow control completely disabled by software override. */
+ break;
+ case ngbe_fc_tx_pause:
+ /*
+ * Tx Flow control is enabled, and Rx Flow control is
+ * disabled by software override.
+ */
+ if (hw->phy.type != ngbe_phy_m88e1512_sfi &&
+ hw->phy.type != ngbe_phy_yt8521s_sfi)
+ pcap_backplane |= NGBE_SR_AN_MMD_ADV_REG1_PAUSE_ASM;
+ else
+ pcap_backplane |= 0x100;
+ break;
+ case ngbe_fc_rx_pause:
+ /*
+ * Rx Flow control is enabled and Tx Flow control is
+ * disabled by software override. Since there really
+ * isn't a way to advertise that we are capable of RX
+ * Pause ONLY, we will advertise that we support both
+ * symmetric and asymmetric Rx PAUSE, as such we fall
+ * through to the fc_full statement. Later, we will
+ * disable the adapter's ability to send PAUSE frames.
+ */
+ case ngbe_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by SW override. */
+ if (hw->phy.type != ngbe_phy_m88e1512_sfi &&
+ hw->phy.type != ngbe_phy_yt8521s_sfi)
+ pcap_backplane |= NGBE_SR_AN_MMD_ADV_REG1_PAUSE_SYM |
+ NGBE_SR_AN_MMD_ADV_REG1_PAUSE_ASM;
+ else
+ pcap_backplane |= 0x80;
+ break;
+ default:
+ ERROR_REPORT1(NGBE_ERROR_ARGUMENT,
+ "Flow control param set incorrectly\n");
+ ret_val = NGBE_ERR_CONFIG;
+ goto out;
+ break;
+ }
+
+ /*
+ * AUTOC restart handles negotiation of 1G on backplane
+ * and copper.
+ */
+ if ((hw->phy.media_type == ngbe_media_type_copper)) {
+ ret_val = TCALL(hw, phy.ops.set_adv_pause, pcap_backplane);
+ }
+
+out:
+ return ret_val;
+}
+
+
+/**
+ * ngbe_get_mac_addr - Generic get MAC address
+ * @hw: pointer to hardware structure
+ * @mac_addr: Adapter MAC address
+ *
+ * Reads the adapter's MAC address from first Receive Address Register (RAR0)
+ * A reset of the adapter must be performed prior to calling this function
+ * in order for the MAC address to have been loaded from the EEPROM into RAR0
+ **/
+s32 ngbe_get_mac_addr(struct ngbe_hw *hw, u8 *mac_addr)
+{
+ u32 rar_high;
+ u32 rar_low;
+ u16 i;
+
+ DEBUGFUNC("\n");
+
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, 0);
+ rar_high = rd32(hw, NGBE_PSR_MAC_SWC_AD_H);
+ rar_low = rd32(hw, NGBE_PSR_MAC_SWC_AD_L);
+
+ for (i = 0; i < 2; i++)
+ mac_addr[i] = (u8)(rar_high >> (1 - i) * 8);
+
+ for (i = 0; i < 4; i++)
+ mac_addr[i + 2] = (u8)(rar_low >> (3 - i) * 8);
+
+ return 0;
+}
+
+/**
+ * ngbe_set_pci_config_data - Generic store PCI bus info
+ * @hw: pointer to hardware structure
+ * @link_status: the link status returned by the PCI config space
+ *
+ * Stores the PCI bus info (speed, width, type) within the ngbe_hw structure
+ **/
+void ngbe_set_pci_config_data(struct ngbe_hw *hw, u16 link_status)
+{
+ if (hw->bus.type == ngbe_bus_type_unknown)
+ hw->bus.type = ngbe_bus_type_pci_express;
+
+ switch (link_status & NGBE_PCI_LINK_WIDTH) {
+ case NGBE_PCI_LINK_WIDTH_1:
+ hw->bus.width = ngbe_bus_width_pcie_x1;
+ break;
+ case NGBE_PCI_LINK_WIDTH_2:
+ hw->bus.width = ngbe_bus_width_pcie_x2;
+ break;
+ case NGBE_PCI_LINK_WIDTH_4:
+ hw->bus.width = ngbe_bus_width_pcie_x4;
+ break;
+ case NGBE_PCI_LINK_WIDTH_8:
+ hw->bus.width = ngbe_bus_width_pcie_x8;
+ break;
+ default:
+ hw->bus.width = ngbe_bus_width_unknown;
+ break;
+ }
+
+ switch (link_status & NGBE_PCI_LINK_SPEED) {
+ case NGBE_PCI_LINK_SPEED_2500:
+ hw->bus.speed = ngbe_bus_speed_2500;
+ break;
+ case NGBE_PCI_LINK_SPEED_5000:
+ hw->bus.speed = ngbe_bus_speed_5000;
+ break;
+ case NGBE_PCI_LINK_SPEED_8000:
+ hw->bus.speed = ngbe_bus_speed_8000;
+ break;
+ default:
+ hw->bus.speed = ngbe_bus_speed_unknown;
+ break;
+ }
+}
+
+/**
+ * ngbe_get_bus_info - Generic set PCI bus info
+ * @hw: pointer to hardware structure
+ *
+ * Gets the PCI bus info (speed, width, type) then calls helper function to
+ * store this data within the ngbe_hw structure.
+ **/
+s32 ngbe_get_bus_info(struct ngbe_hw *hw)
+{
+ u16 link_status;
+
+ DEBUGFUNC("\n");
+
+ /* Get the negotiated link width and speed from PCI config space */
+ link_status = NGBE_READ_PCIE_WORD(hw, NGBE_PCI_LINK_STATUS);
+
+ ngbe_set_pci_config_data(hw, link_status);
+
+ return 0;
+}
+
+/**
+ * ngbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ * @hw: pointer to the HW structure
+ *
+ * Determines the LAN function id by reading memory-mapped registers
+ * and swaps the port value if requested.
+ **/
+void ngbe_set_lan_id_multi_port_pcie(struct ngbe_hw *hw)
+{
+ struct ngbe_bus_info *bus = &hw->bus;
+ u32 reg = 0;
+
+ DEBUGFUNC("\n");
+
+ reg = rd32(hw, NGBE_CFG_PORT_ST);
+ bus->lan_id = NGBE_CFG_PORT_ST_LAN_ID(reg);
+ bus->func = bus->lan_id;
+}
+
+/**
+ * ngbe_stop_adapter - Generic stop Tx/Rx units
+ * @hw: pointer to hardware structure
+ *
+ * Sets the adapter_stopped flag within ngbe_hw struct. Clears interrupts,
+ * disables transmit and receive units. The adapter_stopped flag is used by
+ * the shared code and drivers to determine if the adapter is in a stopped
+ * state and should not touch the hardware.
+ **/
+s32 ngbe_stop_adapter(struct ngbe_hw *hw)
+{
+ u16 i;
+
+ DEBUGFUNC("\n");
+
+ /*
+ * Set the adapter_stopped flag so other driver functions stop touching
+ * the hardware
+ */
+ hw->adapter_stopped = true;
+
+ /* Disable the receive unit */
+ TCALL(hw, mac.ops.disable_rx);
+
+ /* Set interrupt mask to stop interrupts from being generated */
+ ngbe_intr_disable(hw, NGBE_INTR_ALL);
+
+ /* Clear any pending interrupts, flush previous writes */
+ wr32(hw, NGBE_PX_MISC_IC, 0xffffffff);
+
+ /* ??? 0bit RW->RO */
+ wr32(hw, NGBE_BME_CTL, 0x3);
+
+ /* Disable the transmit unit. Each queue must be disabled. */
+ for (i = 0; i < hw->mac.max_tx_queues; i++) {
+ wr32m(hw, NGBE_PX_TR_CFG(i),
+ NGBE_PX_TR_CFG_SWFLSH | NGBE_PX_TR_CFG_ENABLE,
+ NGBE_PX_TR_CFG_SWFLSH);
+ }
+
+ /* Disable the receive unit by stopping each queue */
+ for (i = 0; i < hw->mac.max_rx_queues; i++) {
+ wr32m(hw, NGBE_PX_RR_CFG(i),
+ NGBE_PX_RR_CFG_RR_EN, 0);
+ }
+
+ /* flush all queues disables */
+ NGBE_WRITE_FLUSH(hw);
+ msec_delay(2);
+
+ /*
+ * Prevent the PCI-E bus from hanging by disabling PCI-E master
+ * access and verify no pending requests
+ */
+ return ngbe_disable_pcie_master(hw);
+}
+
+/**
+ * ngbe_led_on - Turns on the software controllable LEDs.
+ * @hw: pointer to hardware structure
+ * @index: led number to turn on
+ **/
+s32 ngbe_led_on(struct ngbe_hw *hw, u32 index)
+{
+ u32 led_reg = rd32(hw, NGBE_CFG_LED_CTL);
+
+ DEBUGFUNC("\n");
+
+ /* ??? */
+ /* To turn on the LED, set mode to ON. */
+ led_reg |= index | (index << NGBE_CFG_LED_CTL_LINK_OD_SHIFT);
+ wr32(hw, NGBE_CFG_LED_CTL, led_reg);
+ NGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ngbe_led_off - Turns off the software controllable LEDs.
+ * @hw: pointer to hardware structure
+ * @index: led number to turn off
+ **/
+s32 ngbe_led_off(struct ngbe_hw *hw, u32 index)
+{
+ u32 led_reg = rd32(hw, NGBE_CFG_LED_CTL);
+
+ DEBUGFUNC("\n");
+
+ /* To turn off the LED, set mode to OFF. */
+ led_reg &= ~(index << NGBE_CFG_LED_CTL_LINK_OD_SHIFT);
+ led_reg |= index;
+ wr32(hw, NGBE_CFG_LED_CTL, led_reg);
+ NGBE_WRITE_FLUSH(hw);
+ return 0;
+}
+
+/**
+ * ngbe_get_eeprom_semaphore - Get hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * Sets the hardware semaphores so EEPROM access can occur for bit-bang method
+ **/
+STATIC s32 ngbe_get_eeprom_semaphore(struct ngbe_hw *hw)
+{
+ s32 status = NGBE_ERR_EEPROM;
+ u32 timeout = 2000;
+ u32 i;
+ u32 swsm;
+
+ /* Get SMBI software semaphore between device drivers first */
+ for (i = 0; i < timeout; i++) {
+ /*
+ * If the SMBI bit is 0 when we read it, then the bit will be
+ * set and we have the semaphore
+ */
+ swsm = rd32(hw, NGBE_MIS_SWSM);
+ if (!(swsm & NGBE_MIS_SWSM_SMBI)) {
+ status = 0;
+ break;
+ }
+ usec_delay(50);
+ }
+
+ if (i == timeout) {
+ DEBUGOUT("Driver can't access the Eeprom - SMBI Semaphore "
+ "not granted.\n");
+ /*
+ * this release is particularly important because our attempts
+ * above to get the semaphore may have succeeded, and if there
+ * was a timeout, we should unconditionally clear the semaphore
+ * bits to free the driver to make progress
+ */
+ ngbe_release_eeprom_semaphore(hw);
+
+ usec_delay(50);
+ /*
+ * one last try
+ * If the SMBI bit is 0 when we read it, then the bit will be
+ * set and we have the semaphore
+ */
+ swsm = rd32(hw, NGBE_MIS_SWSM);
+ if (!(swsm & NGBE_MIS_SWSM_SMBI))
+ status = 0;
+ }
+
+ /* Now get the semaphore between SW/FW through the SWESMBI bit */
+ if (status == 0) {
+ for (i = 0; i < timeout; i++) {
+ if (ngbe_check_mng_access(hw)) {
+ /* Set the SW EEPROM semaphore bit to request access */
+ wr32m(hw, NGBE_MNG_SW_SM,
+ NGBE_MNG_SW_SM_SM, NGBE_MNG_SW_SM_SM);
+
+ /*
+ * If we set the bit successfully then we got
+ * semaphore.
+ */
+ swsm = rd32(hw, NGBE_MNG_SW_SM);
+ if (swsm & NGBE_MNG_SW_SM_SM)
+ break;
+ }
+ usec_delay(50);
+ }
+
+ /*
+ * Release semaphores and return error if SW EEPROM semaphore
+ * was not granted because we don't have access to the EEPROM
+ */
+ if (i >= timeout) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "SWESMBI Software EEPROM semaphore not granted.\n");
+ ngbe_release_eeprom_semaphore(hw);
+ status = NGBE_ERR_EEPROM;
+ }
+ } else {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "Software semaphore SMBI between device drivers "
+ "not granted.\n");
+ }
+
+ return status;
+}
+
+/**
+ * ngbe_release_eeprom_semaphore - Release hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * This function clears hardware semaphore bits.
+ **/
+STATIC void ngbe_release_eeprom_semaphore(struct ngbe_hw *hw)
+{
+ if (ngbe_check_mng_access(hw)) {
+ wr32m(hw, NGBE_MNG_SW_SM,
+ NGBE_MNG_SW_SM_SM, 0);
+ wr32m(hw, NGBE_MIS_SWSM,
+ NGBE_MIS_SWSM_SMBI, 0);
+ NGBE_WRITE_FLUSH(hw);
+ }
+}
+
+/**
+ * ngbe_validate_mac_addr - Validate MAC address
+ * @mac_addr: pointer to MAC address.
+ *
+ * Tests a MAC address to ensure it is a valid Individual Address
+ **/
+s32 ngbe_validate_mac_addr(u8 *mac_addr)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ /* Make sure it is not a multicast address */
+ if (NGBE_IS_MULTICAST(mac_addr)) {
+ DEBUGOUT("MAC address is multicast\n");
+ status = NGBE_ERR_INVALID_MAC_ADDR;
+ /* Not a broadcast address */
+ } else if (NGBE_IS_BROADCAST(mac_addr)) {
+ DEBUGOUT("MAC address is broadcast\n");
+ status = NGBE_ERR_INVALID_MAC_ADDR;
+ /* Reject the zero address */
+ } else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 &&
+ mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) {
+ DEBUGOUT("MAC address is all zeros\n");
+ status = NGBE_ERR_INVALID_MAC_ADDR;
+ }
+ return status;
+}
+
+/**
+ * ngbe_set_rar - Set Rx address register
+ * @hw: pointer to hardware structure
+ * @index: Receive address register to write
+ * @addr: Address to put into receive address register
+ * @vmdq: VMDq "set" or "pool" index
+ * @enable_addr: set flag that address is active
+ *
+ * Puts an ethernet address into a receive address register.
+ **/
+s32 ngbe_set_rar(struct ngbe_hw *hw, u32 index, u8 *addr, u64 pools,
+ u32 enable_addr)
+{
+ u32 rar_low, rar_high;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ DEBUGFUNC("\n");
+
+ /* Make sure we are using a valid rar index range */
+ if (index >= rar_entries) {
+ ERROR_REPORT2(NGBE_ERROR_ARGUMENT,
+ "RAR index %d is out of range.\n", index);
+ return NGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ /* select the MAC address */
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, index);
+
+ /* setup VMDq pool mapping */
+ wr32(hw, NGBE_PSR_MAC_SWC_VM, pools & 0xFFFFFFFF);
+
+ /*
+ * HW expects these in little endian so we reverse the byte
+ * order from network order (big endian) to little endian
+ *
+ * Some parts put the VMDq setting in the extra RAH bits,
+ * so save everything except the lower 16 bits that hold part
+ * of the address and the address valid bit.
+ */
+ rar_low = ((u32)addr[5] |
+ ((u32)addr[4] << 8) |
+ ((u32)addr[3] << 16) |
+ ((u32)addr[2] << 24));
+ rar_high = ((u32)addr[1] |
+ ((u32)addr[0] << 8));
+ if (enable_addr != 0)
+ rar_high |= NGBE_PSR_MAC_SWC_AD_H_AV;
+
+ wr32(hw, NGBE_PSR_MAC_SWC_AD_L, rar_low);
+ wr32m(hw, NGBE_PSR_MAC_SWC_AD_H,
+ (NGBE_PSR_MAC_SWC_AD_H_AD(~0) |
+ NGBE_PSR_MAC_SWC_AD_H_ADTYPE(~0) |
+ NGBE_PSR_MAC_SWC_AD_H_AV),
+ rar_high);
+
+ return 0;
+}
+
+/**
+ * ngbe_clear_rar - Remove Rx address register
+ * @hw: pointer to hardware structure
+ * @index: Receive address register to write
+ *
+ * Clears an ethernet address from a receive address register.
+ **/
+s32 ngbe_clear_rar(struct ngbe_hw *hw, u32 index)
+{
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ DEBUGFUNC("\n");
+
+ /* Make sure we are using a valid rar index range */
+ if (index >= rar_entries) {
+ ERROR_REPORT2(NGBE_ERROR_ARGUMENT,
+ "RAR index %d is out of range.\n", index);
+ return NGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ /*
+ * Some parts put the VMDq setting in the extra RAH bits,
+ * so save everything except the lower 16 bits that hold part
+ * of the address and the address valid bit.
+ */
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, index);
+
+ wr32(hw, NGBE_PSR_MAC_SWC_VM, 0);
+ wr32(hw, NGBE_PSR_MAC_SWC_AD_L, 0);
+ wr32m(hw, NGBE_PSR_MAC_SWC_AD_H,
+ (NGBE_PSR_MAC_SWC_AD_H_AD(~0) |
+ NGBE_PSR_MAC_SWC_AD_H_ADTYPE(~0) |
+ NGBE_PSR_MAC_SWC_AD_H_AV),
+ 0);
+
+ return 0;
+}
+
+/**
+ * ngbe_init_rx_addrs - Initializes receive address filters.
+ * @hw: pointer to hardware structure
+ *
+ * Places the MAC address in receive address register 0 and clears the rest
+ * of the receive address registers. Clears the multicast table. Assumes
+ * the receiver is in reset when the routine is called.
+ **/
+s32 ngbe_init_rx_addrs(struct ngbe_hw *hw)
+{
+ u32 i;
+ u32 rar_entries = hw->mac.num_rar_entries;
+ u32 psrctl;
+
+ DEBUGFUNC("\n");
+
+ /*
+ * If the current mac address is valid, assume it is a software override
+ * to the permanent address.
+ * Otherwise, use the permanent address from the eeprom.
+ */
+ if (ngbe_validate_mac_addr(hw->mac.addr) ==
+ NGBE_ERR_INVALID_MAC_ADDR) {
+ /* Get the MAC address from the RAR0 for later reference */
+ TCALL(hw, mac.ops.get_mac_addr, hw->mac.addr);
+
+ DEBUGOUT3(" Keeping Current RAR0 Addr =%.2X %.2X %.2X %.2X %.2X %.2X\n",
+ hw->mac.addr[0], hw->mac.addr[1],
+ hw->mac.addr[2], hw->mac.addr[3],
+ hw->mac.addr[4], hw->mac.addr[5]);
+ } else {
+ /* Setup the receive address. */
+ DEBUGOUT("Overriding MAC Address in RAR[0]\n");
+ DEBUGOUT3(" New MAC Addr =%.2X %.2X %.2X %.2X %.2X %.2X\n",
+ hw->mac.addr[0], hw->mac.addr[1],
+ hw->mac.addr[2], hw->mac.addr[3],
+ hw->mac.addr[4], hw->mac.addr[5]);
+
+ TCALL(hw, mac.ops.set_rar, 0, hw->mac.addr, 0,
+ NGBE_PSR_MAC_SWC_AD_H_AV);
+ }
+ hw->addr_ctrl.overflow_promisc = 0;
+
+ hw->addr_ctrl.rar_used_count = 1;
+
+ /* Zero out the other receive addresses. */
+ DEBUGOUT1("Clearing RAR[1-%d]\n", rar_entries - 1);
+ for (i = 1; i < rar_entries; i++) {
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, i);
+ wr32(hw, NGBE_PSR_MAC_SWC_AD_L, 0);
+ wr32(hw, NGBE_PSR_MAC_SWC_AD_H, 0);
+ }
+
+ /* Clear the MTA */
+ hw->addr_ctrl.mta_in_use = 0;
+ psrctl = rd32(hw, NGBE_PSR_CTL);
+ psrctl &= ~(NGBE_PSR_CTL_MO | NGBE_PSR_CTL_MFE);
+ psrctl |= hw->mac.mc_filter_type << NGBE_PSR_CTL_MO_SHIFT;
+ wr32(hw, NGBE_PSR_CTL, psrctl);
+ DEBUGOUT(" Clearing MTA\n");
+ for (i = 0; i < hw->mac.mcft_size; i++)
+ wr32(hw, NGBE_PSR_MC_TBL(i), 0);
+
+ TCALL(hw, mac.ops.init_uta_tables);
+
+ return 0;
+}
+
+/**
+ * ngbe_add_uc_addr - Adds a secondary unicast address.
+ * @hw: pointer to hardware structure
+ * @addr: new address
+ *
+ * Adds it to unused receive address register or goes into promiscuous mode.
+ **/
+void ngbe_add_uc_addr(struct ngbe_hw *hw, u8 *addr, u32 vmdq)
+{
+ u32 rar_entries = hw->mac.num_rar_entries;
+ u32 rar;
+
+ DEBUGFUNC("\n");
+
+ DEBUGOUT6(" UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+ /*
+ * Place this address in the RAR if there is room,
+ * else put the controller into promiscuous mode
+ */
+ if (hw->addr_ctrl.rar_used_count < rar_entries) {
+ rar = hw->addr_ctrl.rar_used_count;
+ TCALL(hw, mac.ops.set_rar, rar, addr, vmdq,
+ NGBE_PSR_MAC_SWC_AD_H_AV);
+ DEBUGOUT1("Added a secondary address to RAR[%d]\n", rar);
+ hw->addr_ctrl.rar_used_count++;
+ } else {
+ hw->addr_ctrl.overflow_promisc++;
+ }
+
+ DEBUGOUT("ngbe_add_uc_addr Complete\n");
+}
+
+/**
+ * ngbe_update_uc_addr_list - Updates MAC list of secondary addresses
+ * @hw: pointer to hardware structure
+ * @addr_list: the list of new addresses
+ * @addr_count: number of addresses
+ * @next: iterator function to walk the address list
+ *
+ * The given list replaces any existing list. Clears the secondary addrs from
+ * receive address registers. Uses unused receive address registers for the
+ * first secondary addresses, and falls back to promiscuous mode as needed.
+ *
+ * Drivers using secondary unicast addresses must set user_set_promisc when
+ * manually putting the device into promiscuous mode.
+ **/
+s32 ngbe_update_uc_addr_list(struct ngbe_hw *hw, u8 *addr_list,
+ u32 addr_count, ngbe_mc_addr_itr next)
+{
+ u8 *addr;
+ u32 i;
+ u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc;
+ u32 uc_addr_in_use;
+ u32 vmdq;
+
+ DEBUGFUNC("\n");
+
+ /*
+ * Clear accounting of old secondary address list,
+ * don't count RAR[0]
+ */
+ uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1;
+ hw->addr_ctrl.rar_used_count -= uc_addr_in_use;
+ hw->addr_ctrl.overflow_promisc = 0;
+
+ /* Zero out the other receive addresses */
+ DEBUGOUT1("Clearing RAR[1-%d]\n", uc_addr_in_use + 1);
+ for (i = 0; i < uc_addr_in_use; i++) {
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, 1 + i);
+ wr32(hw, NGBE_PSR_MAC_SWC_AD_L, 0);
+ wr32(hw, NGBE_PSR_MAC_SWC_AD_H, 0);
+ }
+
+ /* Add the new addresses */
+ for (i = 0; i < addr_count; i++) {
+ DEBUGOUT(" Adding the secondary addresses:\n");
+ addr = next(hw, &addr_list, &vmdq);
+ ngbe_add_uc_addr(hw, addr, vmdq);
+ }
+
+ if (hw->addr_ctrl.overflow_promisc) {
+ /* enable promisc if not already in overflow or set by user */
+ if (!old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
+ DEBUGOUT(" Entering address overflow promisc mode\n");
+ wr32m(hw, NGBE_PSR_CTL,
+ NGBE_PSR_CTL_UPE, NGBE_PSR_CTL_UPE);
+ }
+ } else {
+ /* only disable if set by overflow, not by user */
+ if (old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
+ DEBUGOUT(" Leaving address overflow promisc mode\n");
+ wr32m(hw, NGBE_PSR_CTL,
+ NGBE_PSR_CTL_UPE, 0);
+ }
+ }
+
+ DEBUGOUT("ngbe_update_uc_addr_list Complete\n");
+ return 0;
+}
+
+/**
+ * ngbe_mta_vector - Determines bit-vector in multicast table to set
+ * @hw: pointer to hardware structure
+ * @mc_addr: the multicast address
+ *
+ * Extracts the 12 bits, from a multicast address, to determine which
+ * bit-vector to set in the multicast table. The hardware uses 12 bits, from
+ * incoming rx multicast addresses, to determine the bit-vector to check in
+ * the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
+ * by the MO field of the MCSTCTRL. The MO field is set during initialization
+ * to mc_filter_type.
+ **/
+STATIC s32 ngbe_mta_vector(struct ngbe_hw *hw, u8 *mc_addr)
+{
+ u32 vector = 0;
+
+ DEBUGFUNC("\n");
+
+ switch (hw->mac.mc_filter_type) {
+ case 0: /* use bits [47:36] of the address */
+ vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+ break;
+ case 1: /* use bits [46:35] of the address */
+ vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+ break;
+ case 2: /* use bits [45:34] of the address */
+ vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+ break;
+ case 3: /* use bits [43:32] of the address */
+ vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+ break;
+ default: /* Invalid mc_filter_type */
+ DEBUGOUT("MC filter type param set incorrectly\n");
+ ASSERT(0);
+ break;
+ }
+
+ /* vector can only be 12-bits or boundary will be exceeded */
+ vector &= 0xFFF;
+ return vector;
+}
+
+/**
+ * ngbe_set_mta - Set bit-vector in multicast table
+ * @hw: pointer to hardware structure
+ * @hash_value: Multicast address hash value
+ *
+ * Sets the bit-vector in the multicast table.
+ **/
+void ngbe_set_mta(struct ngbe_hw *hw, u8 *mc_addr)
+{
+ u32 vector;
+ u32 vector_bit;
+ u32 vector_reg;
+
+ DEBUGFUNC("\n");
+
+ hw->addr_ctrl.mta_in_use++;
+
+ vector = ngbe_mta_vector(hw, mc_addr);
+ DEBUGOUT1(" bit-vector = 0x%03X\n", vector);
+
+ /*
+ * The MTA is a register array of 128 32-bit registers. It is treated
+ * like an array of 4096 bits. We want to set bit
+ * BitArray[vector_value]. So we figure out what register the bit is
+ * in, read it, OR in the new bit, then write back the new value. The
+ * register is determined by the upper 7 bits of the vector value and
+ * the bit within that register are determined by the lower 5 bits of
+ * the value.
+ */
+ vector_reg = (vector >> 5) & 0x7F;
+ vector_bit = vector & 0x1F;
+ hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit);
+}
+
+/**
+ * ngbe_update_mc_addr_list - Updates MAC list of multicast addresses
+ * @hw: pointer to hardware structure
+ * @mc_addr_list: the list of new multicast addresses
+ * @mc_addr_count: number of addresses
+ * @next: iterator function to walk the multicast address list
+ * @clear: flag, when set clears the table beforehand
+ *
+ * When the clear flag is set, the given list replaces any existing list.
+ * Hashes the given addresses into the multicast table.
+ **/
+s32 ngbe_update_mc_addr_list(struct ngbe_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count, ngbe_mc_addr_itr next,
+ bool clear)
+{
+ u32 i;
+ u32 vmdq;
+ u32 psrctl;
+
+ DEBUGFUNC("\n");
+
+ /*
+ * Set the new number of MC addresses that we are being requested to
+ * use.
+ */
+ hw->addr_ctrl.num_mc_addrs = mc_addr_count;
+ hw->addr_ctrl.mta_in_use = 0;
+
+ /* Clear mta_shadow */
+ if (clear) {
+ DEBUGOUT(" Clearing MTA\n");
+ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+ }
+
+ /* Update mta_shadow */
+ for (i = 0; i < mc_addr_count; i++) {
+ DEBUGOUT(" Adding the multicast addresses:\n");
+ ngbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq));
+ }
+
+ /* Enable mta */
+ for (i = 0; i < hw->mac.mcft_size; i++)
+ wr32a(hw, NGBE_PSR_MC_TBL(0), i,
+ hw->mac.mta_shadow[i]);
+
+ if (hw->addr_ctrl.mta_in_use > 0) {
+ psrctl = rd32(hw, NGBE_PSR_CTL);
+ psrctl &= ~(NGBE_PSR_CTL_MO | NGBE_PSR_CTL_MFE);
+ psrctl |= NGBE_PSR_CTL_MFE |
+ (hw->mac.mc_filter_type << NGBE_PSR_CTL_MO_SHIFT);
+ wr32(hw, NGBE_PSR_CTL, psrctl);
+ }
+
+ DEBUGOUT("ngbe_update_mc_addr_list Complete\n");
+ return 0;
+}
+
+/**
+ * ngbe_enable_mc - Enable multicast address in RAR
+ * @hw: pointer to hardware structure
+ *
+ * Enables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ngbe_enable_mc(struct ngbe_hw *hw)
+{
+ struct ngbe_addr_filter_info *a = &hw->addr_ctrl;
+ u32 psrctl;
+
+ DEBUGFUNC("\n");
+
+ if (a->mta_in_use > 0) {
+ psrctl = rd32(hw, NGBE_PSR_CTL);
+ psrctl &= ~(NGBE_PSR_CTL_MO | NGBE_PSR_CTL_MFE);
+ psrctl |= NGBE_PSR_CTL_MFE |
+ (hw->mac.mc_filter_type << NGBE_PSR_CTL_MO_SHIFT);
+ wr32(hw, NGBE_PSR_CTL, psrctl);
+ }
+
+ return 0;
+}
+
+/**
+ * ngbe_disable_mc - Disable multicast address in RAR
+ * @hw: pointer to hardware structure
+ *
+ * Disables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ngbe_disable_mc(struct ngbe_hw *hw)
+{
+ struct ngbe_addr_filter_info *a = &hw->addr_ctrl;
+ u32 psrctl;
+ DEBUGFUNC("\n");
+
+ if (a->mta_in_use > 0) {
+ psrctl = rd32(hw, NGBE_PSR_CTL);
+ psrctl &= ~(NGBE_PSR_CTL_MO | NGBE_PSR_CTL_MFE);
+ psrctl |= hw->mac.mc_filter_type << NGBE_PSR_CTL_MO_SHIFT;
+ wr32(hw, NGBE_PSR_CTL, psrctl);
+ }
+
+ return 0;
+}
+
+/**
+ * ngbe_fc_enable - Enable flow control
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according to the current settings.
+ **/
+s32 ngbe_fc_enable(struct ngbe_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 mflcn_reg, fccfg_reg;
+ u32 reg;
+ u32 fcrtl, fcrth;
+
+ DEBUGFUNC("\n");
+
+ /* Validate the water mark configuration */
+ if (!hw->fc.pause_time) {
+ ret_val = NGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+
+
+ /* Low water mark of zero causes XOFF floods */
+ if ((hw->fc.current_mode & ngbe_fc_tx_pause) && hw->fc.high_water) {
+ if (!hw->fc.low_water || hw->fc.low_water >= hw->fc.high_water) {
+ DEBUGOUT("Invalid water mark configuration\n");
+ ret_val = NGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+ }
+
+ /* Negotiate the fc mode to use */
+ ngbe_fc_autoneg(hw);
+
+ /* Disable any previous flow control settings */
+ mflcn_reg = rd32(hw, NGBE_MAC_RX_FLOW_CTRL);
+ mflcn_reg &= ~NGBE_MAC_RX_FLOW_CTRL_RFE;
+
+ fccfg_reg = rd32(hw, NGBE_RDB_RFCC);
+ fccfg_reg &= ~NGBE_RDB_RFCC_RFCE_802_3X;
+
+ /*
+ * The possible values of fc.current_mode are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames,
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames but
+ * we do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ * other: Invalid.
+ */
+ switch (hw->fc.current_mode) {
+ case ngbe_fc_none:
+ /*
+ * Flow control is disabled by software override or autoneg.
+ * The code below will actually disable it in the HW.
+ */
+ break;
+ case ngbe_fc_rx_pause:
+ /*
+ * Rx Flow control is enabled and Tx Flow control is
+ * disabled by software override. Since there really
+ * isn't a way to advertise that we are capable of RX
+ * Pause ONLY, we will advertise that we support both
+ * symmetric and asymmetric Rx PAUSE. Later, we will
+ * disable the adapter's ability to send PAUSE frames.
+ */
+ mflcn_reg |= NGBE_MAC_RX_FLOW_CTRL_RFE;
+ break;
+ case ngbe_fc_tx_pause:
+ /*
+ * Tx Flow control is enabled, and Rx Flow control is
+ * disabled by software override.
+ */
+ fccfg_reg |= NGBE_RDB_RFCC_RFCE_802_3X;
+ break;
+ case ngbe_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by SW override. */
+ mflcn_reg |= NGBE_MAC_RX_FLOW_CTRL_RFE;
+ fccfg_reg |= NGBE_RDB_RFCC_RFCE_802_3X;
+ break;
+ default:
+ ERROR_REPORT1(NGBE_ERROR_ARGUMENT,
+ "Flow control param set incorrectly\n");
+ ret_val = NGBE_ERR_CONFIG;
+ goto out;
+ break;
+ }
+
+ /* Set 802.3x based flow control settings. */
+ wr32(hw, NGBE_MAC_RX_FLOW_CTRL, mflcn_reg);
+ wr32(hw, NGBE_RDB_RFCC, fccfg_reg);
+
+ /* Set up and enable Rx high/low water mark thresholds, enable XON. */
+ if ((hw->fc.current_mode & ngbe_fc_tx_pause) &&
+ hw->fc.high_water) {
+ /* 32Byte granularity */
+ fcrtl = (hw->fc.low_water << 10) |
+ NGBE_RDB_RFCL_XONE;
+ wr32(hw, NGBE_RDB_RFCL, fcrtl);
+ fcrth = (hw->fc.high_water << 10) |
+ NGBE_RDB_RFCH_XOFFE;
+ } else {
+ wr32(hw, NGBE_RDB_RFCL, 0);
+ /*
+ * In order to prevent Tx hangs when the internal Tx
+ * switch is enabled we must set the high water mark
+ * to the Rx packet buffer size - 24KB. This allows
+ * the Tx switch to function even under heavy Rx
+ * workloads.
+ */
+ fcrth = rd32(hw, NGBE_RDB_PB_SZ) - 24576;
+ }
+
+ wr32(hw, NGBE_RDB_RFCH, fcrth);
+
+ /* Configure pause time (2 TCs per register) */
+ reg = hw->fc.pause_time * 0x00010000;
+ wr32(hw, NGBE_RDB_RFCV, reg);
+
+ /* Configure flow control refresh threshold value */
+ wr32(hw, NGBE_RDB_RFCRT, hw->fc.pause_time / 2);
+
+out:
+ return ret_val;
+}
+
+/**
+ * ngbe_negotiate_fc - Negotiate flow control
+ * @hw: pointer to hardware structure
+ * @adv_reg: flow control advertised settings
+ * @lp_reg: link partner's flow control settings
+ * @adv_sym: symmetric pause bit in advertisement
+ * @adv_asm: asymmetric pause bit in advertisement
+ * @lp_sym: symmetric pause bit in link partner advertisement
+ * @lp_asm: asymmetric pause bit in link partner advertisement
+ *
+ * Find the intersection between advertised settings and link partner's
+ * advertised settings
+ **/
+STATIC s32 ngbe_negotiate_fc(struct ngbe_hw *hw, u32 adv_reg, u32 lp_reg,
+ u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
+{
+ if ((!(adv_reg)) || (!(lp_reg))) {
+ ERROR_REPORT3(NGBE_ERROR_UNSUPPORTED,
+ "Local or link partner's advertised flow control "
+ "settings are NULL. Local: %x, link partner: %x\n",
+ adv_reg, lp_reg);
+ return NGBE_ERR_FC_NOT_NEGOTIATED;
+ }
+
+ if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) {
+ /*
+ * Now we need to check if the user selected Rx ONLY
+ * of pause frames. In this case, we had to advertise
+ * FULL flow control because we could not advertise RX
+ * ONLY. Hence, we must now check to see if we need to
+ * turn OFF the TRANSMISSION of PAUSE frames.
+ */
+ if (hw->fc.requested_mode == ngbe_fc_full) {
+ hw->fc.current_mode = ngbe_fc_full;
+ DEBUGOUT("Flow Control = FULL.\n");
+ } else {
+ hw->fc.current_mode = ngbe_fc_rx_pause;
+ DEBUGOUT("Flow Control=RX PAUSE frames only\n");
+ }
+ } else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+ (lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+ hw->fc.current_mode = ngbe_fc_tx_pause;
+ DEBUGOUT("Flow Control = TX PAUSE frames only.\n");
+ } else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+ !(lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+ hw->fc.current_mode = ngbe_fc_rx_pause;
+ DEBUGOUT("Flow Control = RX PAUSE frames only.\n");
+ } else {
+ hw->fc.current_mode = ngbe_fc_none;
+ DEBUGOUT("Flow Control = NONE.\n");
+ }
+ return 0;
+}
+
+/**
+ * ngbe_fc_autoneg_copper - Enable flow control IEEE clause 37
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according to IEEE clause 37.
+ **/
+STATIC s32 ngbe_fc_autoneg_copper(struct ngbe_hw *hw)
+{
+ u8 technology_ability_reg = 0;
+ u8 lp_technology_ability_reg = 0;
+
+ TCALL(hw, phy.ops.get_adv_pause, &technology_ability_reg);
+ TCALL(hw, phy.ops.get_lp_adv_pause, &lp_technology_ability_reg);
+
+ return ngbe_negotiate_fc(hw, (u32)technology_ability_reg,
+ (u32)lp_technology_ability_reg,
+ NGBE_TAF_SYM_PAUSE, NGBE_TAF_ASM_PAUSE,
+ NGBE_TAF_SYM_PAUSE, NGBE_TAF_ASM_PAUSE);
+}
+
+/**
+ * ngbe_fc_autoneg - Configure flow control
+ * @hw: pointer to hardware structure
+ *
+ * Compares our advertised flow control capabilities to those advertised by
+ * our link partner, and determines the proper flow control mode to use.
+ **/
+void ngbe_fc_autoneg(struct ngbe_hw *hw)
+{
+ s32 ret_val = NGBE_ERR_FC_NOT_NEGOTIATED;
+ u32 speed;
+ bool link_up;
+
+ DEBUGFUNC("\n");
+
+ /*
+ * AN should have completed when the cable was plugged in.
+ * Look for reasons to bail out. Bail out if:
+ * - FC autoneg is disabled, or if
+ * - link is not up.
+ */
+ if (hw->fc.disable_fc_autoneg) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "Flow control autoneg is disabled");
+ goto out;
+ }
+
+ TCALL(hw, mac.ops.check_link, &speed, &link_up, false);
+ if (!link_up) {
+ ERROR_REPORT1(NGBE_ERROR_SOFTWARE, "The link is down");
+ goto out;
+ }
+
+ switch (hw->phy.media_type) {
+ /* Autoneg flow control on fiber adapters */
+ case ngbe_media_type_fiber:
+ break;
+
+ /* Autoneg flow control on copper adapters */
+ case ngbe_media_type_copper:
+ ret_val = ngbe_fc_autoneg_copper(hw);
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ if (ret_val == NGBE_OK) {
+ hw->fc.fc_was_autonegged = true;
+ } else {
+ hw->fc.fc_was_autonegged = false;
+ hw->fc.current_mode = hw->fc.requested_mode;
+ }
+}
+
+/**
+ * ngbe_disable_pcie_master - Disable PCI-express master access
+ * @hw: pointer to hardware structure
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests. NGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable
+ * bit hasn't caused the master requests to be disabled, else 0
+ * is returned signifying master requests disabled.
+ **/
+s32 ngbe_disable_pcie_master(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+ u32 i;
+
+ DEBUGFUNC("\n");
+
+ /* Always set this bit to ensure any future transactions are blocked */
+ pci_clear_master(((struct ngbe_adapter *)hw->back)->pdev);
+
+ /* Exit if master requests are blocked */
+ if (!(rd32(hw, NGBE_PX_TRANSACTION_PENDING)) ||
+ NGBE_REMOVED(hw->hw_addr))
+ goto out;
+
+
+ /* Poll for master request bit to clear */
+ for (i = 0; i < NGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+ usec_delay(100);
+ if (!(rd32(hw, NGBE_PX_TRANSACTION_PENDING)))
+ goto out;
+ }
+
+
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "PCIe transaction pending bit did not clear.\n");
+ status = NGBE_ERR_MASTER_REQUESTS_PENDING;
+
+out:
+ return status;
+}
+
+/**
+ * ngbe_acquire_swfw_sync - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore through the GSSR register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+s32 ngbe_acquire_swfw_sync(struct ngbe_hw *hw, u32 mask)
+{
+ u32 gssr = 0;
+ u32 swmask = mask;
+ u32 fwmask = mask << 16;
+ u32 timeout = 200;
+ u32 i;
+
+ for (i = 0; i < timeout; i++) {
+ /*
+ * SW NVM semaphore bit is used for access to all
+ * SW_FW_SYNC bits (not just NVM)
+ */
+ if (ngbe_get_eeprom_semaphore(hw))
+ return NGBE_ERR_SWFW_SYNC;
+
+ if (ngbe_check_mng_access(hw)) {
+ gssr = rd32(hw, NGBE_MNG_SWFW_SYNC);
+ if (!(gssr & (fwmask | swmask))) {
+ gssr |= swmask;
+ wr32(hw, NGBE_MNG_SWFW_SYNC, gssr);
+ ngbe_release_eeprom_semaphore(hw);
+ return 0;
+ } else {
+ /* Resource is currently in use by FW or SW */
+ ngbe_release_eeprom_semaphore(hw);
+ msec_delay(5);
+ }
+ }
+ }
+
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "ngbe_acquire_swfw_sync: i = %u, gssr = %u\n", i, gssr);
+
+
+ /* If time expired clear the bits holding the lock and retry */
+ if (gssr & (fwmask | swmask))
+ ngbe_release_swfw_sync(hw, gssr & (fwmask | swmask));
+
+ msec_delay(5);
+ return NGBE_ERR_SWFW_SYNC;
+}
+
+/**
+ * ngbe_release_swfw_sync - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore through the GSSR register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+void ngbe_release_swfw_sync(struct ngbe_hw *hw, u32 mask)
+{
+ ngbe_get_eeprom_semaphore(hw);
+ if (ngbe_check_mng_access(hw))
+ wr32m(hw, NGBE_MNG_SWFW_SYNC, mask, 0);
+
+ ngbe_release_eeprom_semaphore(hw);
+}
+
+/**
+ * ngbe_disable_sec_rx_path - Stops the receive data path
+ * @hw: pointer to hardware structure
+ *
+ * Stops the receive data path and waits for the HW to internally empty
+ * the Rx security block
+ **/
+s32 ngbe_disable_sec_rx_path(struct ngbe_hw *hw)
+{
+#define NGBE_MAX_SECRX_POLL 40
+
+ int i;
+ int secrxreg;
+
+ DEBUGFUNC("\n");
+
+ wr32m(hw, NGBE_RSEC_CTL,
+ NGBE_RSEC_CTL_RX_DIS, NGBE_RSEC_CTL_RX_DIS);
+ for (i = 0; i < NGBE_MAX_SECRX_POLL; i++) {
+ secrxreg = rd32(hw, NGBE_RSEC_ST);
+ if (secrxreg & NGBE_RSEC_ST_RSEC_RDY)
+ break;
+ else
+ /* Use interrupt-safe sleep just in case */
+ usec_delay(1000);
+ }
+
+ /* For informational purposes only */
+ if (i >= NGBE_MAX_SECRX_POLL)
+ DEBUGOUT("Rx unit being enabled before security "
+ "path fully disabled. Continuing with init.\n");
+
+ return 0;
+}
+
+/**
+ * ngbe_enable_sec_rx_path - Enables the receive data path
+ * @hw: pointer to hardware structure
+ *
+ * Enables the receive data path.
+ **/
+s32 ngbe_enable_sec_rx_path(struct ngbe_hw *hw)
+{
+ DEBUGFUNC("\n");
+
+ wr32m(hw, NGBE_RSEC_CTL,
+ NGBE_RSEC_CTL_RX_DIS, 0);
+ NGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ngbe_insert_mac_addr - Find a RAR for this mac address
+ * @hw: pointer to hardware structure
+ * @addr: Address to put into receive address register
+ * @vmdq: VMDq pool to assign
+ *
+ * Puts an ethernet address into a receive address register, or
+ * finds the rar that it is aleady in; adds to the pool list
+ **/
+s32 ngbe_insert_mac_addr(struct ngbe_hw *hw, u8 *addr, u32 vmdq)
+{
+ static const u32 NO_EMPTY_RAR_FOUND = 0xFFFFFFFF;
+ u32 first_empty_rar = NO_EMPTY_RAR_FOUND;
+ u32 rar;
+ u32 rar_low, rar_high;
+ u32 addr_low, addr_high;
+
+ DEBUGFUNC("\n");
+
+ /* swap bytes for HW little endian */
+ addr_low = addr[5] | (addr[4] << 8)
+ | (addr[3] << 16)
+ | (addr[2] << 24);
+ addr_high = addr[1] | (addr[0] << 8);
+
+ /*
+ * Either find the mac_id in rar or find the first empty space.
+ * rar_highwater points to just after the highest currently used
+ * rar in order to shorten the search. It grows when we add a new
+ * rar to the top.
+ */
+ for (rar = 0; rar < hw->mac.rar_highwater; rar++) {
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, rar);
+ rar_high = rd32(hw, NGBE_PSR_MAC_SWC_AD_H);
+
+ if (((NGBE_PSR_MAC_SWC_AD_H_AV & rar_high) == 0)
+ && first_empty_rar == NO_EMPTY_RAR_FOUND) {
+ first_empty_rar = rar;
+ } else if ((rar_high & 0xFFFF) == addr_high) {
+ rar_low = rd32(hw, NGBE_PSR_MAC_SWC_AD_L);
+ if (rar_low == addr_low)
+ break; /* found it already in the rars */
+ }
+ }
+
+ if (rar < hw->mac.rar_highwater) {
+
+ } else if (first_empty_rar != NO_EMPTY_RAR_FOUND) {
+ /* stick it into first empty RAR slot we found */
+ rar = first_empty_rar;
+ TCALL(hw, mac.ops.set_rar, rar, addr, vmdq,
+ NGBE_PSR_MAC_SWC_AD_H_AV);
+ } else if (rar == hw->mac.rar_highwater) {
+ /* add it to the top of the list and inc the highwater mark */
+ TCALL(hw, mac.ops.set_rar, rar, addr, vmdq,
+ NGBE_PSR_MAC_SWC_AD_H_AV);
+ hw->mac.rar_highwater++;
+ } else if (rar >= hw->mac.num_rar_entries) {
+ return NGBE_ERR_INVALID_MAC_ADDR;
+ }
+
+ return rar;
+}
+
+/**
+ * ngbe_clear_vmdq - Disassociate a VMDq pool index from a rx address
+ * @hw: pointer to hardware struct
+ * @rar: receive address register index to disassociate
+ * @vmdq: VMDq pool index to remove from the rar
+ **/
+s32 ngbe_clear_vmdq(struct ngbe_hw *hw, u32 rar, u32 vmdq)
+{
+ u32 mpsar_lo;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ DEBUGFUNC("\n");
+ UNREFERENCED_PARAMETER(vmdq);
+
+ /* Make sure we are using a valid rar index range */
+ if (rar >= rar_entries) {
+ ERROR_REPORT2(NGBE_ERROR_ARGUMENT,
+ "RAR index %d is out of range.\n", rar);
+ return NGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, rar);
+ mpsar_lo = rd32(hw, NGBE_PSR_MAC_SWC_VM);
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ goto done;
+
+ if (!mpsar_lo)
+ goto done;
+
+ /* was that the last pool using this rar? */
+ if (mpsar_lo == 0 && rar != 0)
+ TCALL(hw, mac.ops.clear_rar, rar);
+done:
+ return 0;
+}
+
+/**
+ * ngbe_set_vmdq - Associate a VMDq pool index with a rx address
+ * @hw: pointer to hardware struct
+ * @rar: receive address register index to associate with a VMDq index
+ * @vmdq: VMDq pool index
+ **/
+s32 ngbe_set_vmdq(struct ngbe_hw *hw, u32 rar, u32 pool)
+{
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ DEBUGFUNC("\n");
+ UNREFERENCED_PARAMETER(pool);
+
+ /* Make sure we are using a valid rar index range */
+ if (rar >= rar_entries) {
+ ERROR_REPORT2(NGBE_ERROR_ARGUMENT,
+ "RAR index %d is out of range.\n", rar);
+ return NGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ return 0;
+}
+
+/**
+ * This function should only be involved in the IOV mode.
+ * In IOV mode, Default pool is next pool after the number of
+ * VFs advertized and not 0.
+ * MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
+ *
+ * ngbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address
+ * @hw: pointer to hardware struct
+ * @vmdq: VMDq pool index
+ **/
+s32 ngbe_set_vmdq_san_mac(struct ngbe_hw *hw, u32 vmdq)
+{
+ u32 rar = hw->mac.san_mac_rar_index;
+
+ DEBUGFUNC("\n");
+ /* ??? */
+ if (vmdq > 32)
+ return -1;
+
+ wr32(hw, NGBE_PSR_MAC_SWC_IDX, rar);
+ wr32(hw, NGBE_PSR_MAC_SWC_VM, 1 << vmdq);
+
+ return 0;
+}
+
+/**
+ * ngbe_init_uta_tables - Initialize the Unicast Table Array
+ * @hw: pointer to hardware structure
+ **/
+s32 ngbe_init_uta_tables(struct ngbe_hw *hw)
+{
+ int i;
+
+ DEBUGFUNC("\n");
+ DEBUGOUT(" Clearing UTA\n");
+
+ for (i = 0; i < 128; i++)
+ wr32(hw, NGBE_PSR_UC_TBL(i), 0);
+
+ return 0;
+}
+
+/**
+ * ngbe_find_vlvf_slot - find the vlanid or the first empty slot
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ *
+ * return the VLVF index where this VLAN id should be placed
+ *
+ **/
+s32 ngbe_find_vlvf_slot(struct ngbe_hw *hw, u32 vlan)
+{
+ u32 bits = 0;
+ u32 first_empty_slot = 0;
+ s32 regindex;
+
+ /* short cut the special case */
+ if (vlan == 0)
+ return 0;
+
+ /*
+ * Search for the vlan id in the VLVF entries. Save off the first empty
+ * slot found along the way
+ */
+ for (regindex = 1; regindex < NGBE_PSR_VLAN_SWC_ENTRIES; regindex++) {
+ wr32(hw, NGBE_PSR_VLAN_SWC_IDX, regindex);
+ bits = rd32(hw, NGBE_PSR_VLAN_SWC);
+ if (!bits && !(first_empty_slot))
+ first_empty_slot = regindex;
+ else if ((bits & 0x0FFF) == vlan)
+ break;
+ }
+
+ /*
+ * If regindex is less than NGBE_VLVF_ENTRIES, then we found the vlan
+ * in the VLVF. Else use the first empty VLVF register for this
+ * vlan id.
+ */
+ if (regindex >= NGBE_PSR_VLAN_SWC_ENTRIES) {
+ if (first_empty_slot)
+ regindex = first_empty_slot;
+ else {
+ ERROR_REPORT1(NGBE_ERROR_SOFTWARE,
+ "No space in VLVF.\n");
+ regindex = NGBE_ERR_NO_SPACE;
+ }
+ }
+
+ return regindex;
+}
+
+/**
+ * ngbe_set_vfta - Set VLAN filter table
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ * @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ * @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *
+ * Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ngbe_set_vfta(struct ngbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on)
+{
+ s32 regindex;
+ u32 bitindex;
+ u32 vfta;
+ u32 targetbit;
+ s32 ret_val = 0;
+ bool vfta_changed = false;
+
+ DEBUGFUNC("\n");
+
+ if (vlan > 4095)
+ return NGBE_ERR_PARAM;
+
+ /*
+ * this is a 2 part operation - first the VFTA, then the
+ * VLVF and VLVFB if VT Mode is set
+ * We don't write the VFTA until we know the VLVF part succeeded.
+ */
+
+ /* Part 1
+ * The VFTA is a bitstring made up of 128 32-bit registers
+ * that enable the particular VLAN id, much like the MTA:
+ * bits[11-5]: which register
+ * bits[4-0]: which bit in the register
+ */
+ regindex = (vlan >> 5) & 0x7F;
+ bitindex = vlan & 0x1F;
+ targetbit = (1 << bitindex);
+ /* errata 5 */
+ vfta = hw->mac.vft_shadow[regindex];
+ if (vlan_on) {
+ if (!(vfta & targetbit)) {
+ vfta |= targetbit;
+ vfta_changed = true;
+ }
+ } else {
+ if ((vfta & targetbit)) {
+ vfta &= ~targetbit;
+ vfta_changed = true;
+ }
+ }
+
+ /* Part 2
+ * Call ngbe_set_vlvf to set VLVFB and VLVF
+ */
+ ret_val = ngbe_set_vlvf(hw, vlan, vind, vlan_on,
+ &vfta_changed);
+ if (ret_val != 0)
+ return ret_val;
+
+ if (vfta_changed)
+ wr32(hw, NGBE_PSR_VLAN_TBL(regindex), vfta);
+ /* errata 5 */
+ hw->mac.vft_shadow[regindex] = vfta;
+ return 0;
+}
+
+/**
+ * ngbe_set_vlvf - Set VLAN Pool Filter
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ * @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ * @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ * @vfta_changed: pointer to boolean flag which indicates whether VFTA
+ * should be changed
+ *
+ * Turn on/off specified bit in VLVF table.
+ **/
+s32 ngbe_set_vlvf(struct ngbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on, bool *vfta_changed)
+{
+ u32 vt;
+
+ DEBUGFUNC("\n");
+
+ if (vlan > 4095)
+ return NGBE_ERR_PARAM;
+
+ /* If VT Mode is set
+ * Either vlan_on
+ * make sure the vlan is in VLVF
+ * set the vind bit in the matching VLVFB
+ * Or !vlan_on
+ * clear the pool bit and possibly the vind
+ */
+ vt = rd32(hw, NGBE_CFG_PORT_CTL);
+ if (vt & NGBE_CFG_PORT_CTL_NUM_VT_MASK) {
+ s32 vlvf_index;
+ u32 bits = 0;
+
+ vlvf_index = ngbe_find_vlvf_slot(hw, vlan);
+ if (vlvf_index < 0)
+ return vlvf_index;
+
+ wr32(hw, NGBE_PSR_VLAN_SWC_IDX, vlvf_index);
+ if (vlan_on) {
+ /* set the pool bit */
+ if (vind < 32) {
+ bits = rd32(hw,
+ NGBE_PSR_VLAN_SWC_VM_L);
+ bits |= (1 << vind);
+ wr32(hw,
+ NGBE_PSR_VLAN_SWC_VM_L,
+ bits);
+ }
+ } else {
+ /* clear the pool bit */
+ if (vind < 32) {
+ bits = rd32(hw,
+ NGBE_PSR_VLAN_SWC_VM_L);
+ bits &= ~(1 << vind);
+ wr32(hw,
+ NGBE_PSR_VLAN_SWC_VM_L,
+ bits);
+ } else {
+ bits |= rd32(hw,
+ NGBE_PSR_VLAN_SWC_VM_L);
+ }
+ }
+
+ /*
+ * If there are still bits set in the VLVFB registers
+ * for the VLAN ID indicated we need to see if the
+ * caller is requesting that we clear the VFTA entry bit.
+ * If the caller has requested that we clear the VFTA
+ * entry bit but there are still pools/VFs using this VLAN
+ * ID entry then ignore the request. We're not worried
+ * about the case where we're turning the VFTA VLAN ID
+ * entry bit on, only when requested to turn it off as
+ * there may be multiple pools and/or VFs using the
+ * VLAN ID entry. In that case we cannot clear the
+ * VFTA bit until all pools/VFs using that VLAN ID have also
+ * been cleared. This will be indicated by "bits" being
+ * zero.
+ */
+ if (bits) {
+ wr32(hw, NGBE_PSR_VLAN_SWC,
+ (NGBE_PSR_VLAN_SWC_VIEN | vlan));
+ if ((!vlan_on) && (vfta_changed != NULL)) {
+ /* someone wants to clear the vfta entry
+ * but some pools/VFs are still using it.
+ * Ignore it. */
+ *vfta_changed = false;
+ }
+ } else
+ wr32(hw, NGBE_PSR_VLAN_SWC, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * ngbe_clear_vfta - Clear VLAN filter table
+ * @hw: pointer to hardware structure
+ *
+ * Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+s32 ngbe_clear_vfta(struct ngbe_hw *hw)
+{
+ u32 offset;
+
+ DEBUGFUNC("\n");
+
+ for (offset = 0; offset < hw->mac.vft_size; offset++) {
+ wr32(hw, NGBE_PSR_VLAN_TBL(offset), 0);
+ /* errata 5 */
+ hw->mac.vft_shadow[offset] = 0;
+ }
+
+ for (offset = 0; offset < NGBE_PSR_VLAN_SWC_ENTRIES; offset++) {
+ wr32(hw, NGBE_PSR_VLAN_SWC_IDX, offset);
+ wr32(hw, NGBE_PSR_VLAN_SWC, 0);
+ wr32(hw, NGBE_PSR_VLAN_SWC_VM_L, 0);
+ }
+
+ return 0;
+}
+
+
+/**
+ * ngbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing
+ * @hw: pointer to hardware structure
+ * @enable: enable or disable switch for anti-spoofing
+ * @pf: Physical Function pool - do not enable anti-spoofing for the PF
+ *
+ **/
+void ngbe_set_mac_anti_spoofing(struct ngbe_hw *hw, bool enable, int pf)
+{
+ u64 pfvfspoof = 0;
+
+ DEBUGFUNC("\n");
+
+ if (enable) {
+ /*
+ * The PF should be allowed to spoof so that it can support
+ * emulation mode NICs. Do not set the bits assigned to the PF
+ * Remaining pools belong to the PF so they do not need to have
+ * anti-spoofing enabled.
+ */
+ pfvfspoof = (1 << pf) - 1;
+ wr32(hw, NGBE_TDM_MAC_AS_L,
+ pfvfspoof & 0xff);
+ } else {
+ wr32(hw, NGBE_TDM_MAC_AS_L, 0);
+ }
+}
+
+/**
+ * ngbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing
+ * @hw: pointer to hardware structure
+ * @enable: enable or disable switch for VLAN anti-spoofing
+ * @vf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing
+ *
+ **/
+void ngbe_set_vlan_anti_spoofing(struct ngbe_hw *hw, bool enable, int vf)
+{
+ u32 pfvfspoof;
+
+ DEBUGFUNC("\n");
+
+ if (vf > 8)
+ return;
+
+ pfvfspoof = rd32(hw, NGBE_TDM_VLAN_AS_L);
+ if (enable)
+ pfvfspoof |= (1 << vf);
+ else
+ pfvfspoof &= ~(1 << vf);
+ wr32(hw, NGBE_TDM_VLAN_AS_L, pfvfspoof);
+
+}
+
+/**
+ * ngbe_set_ethertype_anti_spoofing - Enable/Disable Ethertype anti-spoofing
+ * @hw: pointer to hardware structure
+ * @enable: enable or disable switch for Ethertype anti-spoofing
+ * @vf: Virtual Function pool - VF Pool to set for Ethertype anti-spoofing
+ *
+ **/
+void ngbe_set_ethertype_anti_spoofing(struct ngbe_hw *hw,
+ bool enable, int vf)
+{
+ u32 pfvfspoof;
+
+ DEBUGFUNC("\n");
+
+ if (vf <= 8) {
+ pfvfspoof = rd32(hw, NGBE_TDM_ETYPE_AS_L);
+ if (enable)
+ pfvfspoof |= (1 << vf);
+ else
+ pfvfspoof &= ~(1 << vf);
+ wr32(hw, NGBE_TDM_ETYPE_AS_L, pfvfspoof);
+ }
+}
+
+/**
+ * ngbe_get_device_caps - Get additional device capabilities
+ * @hw: pointer to hardware structure
+ * @device_caps: the EEPROM word with the extra device capabilities
+ *
+ * This function will read the EEPROM location for the device capabilities,
+ * and return the word through device_caps.
+ **/
+s32 ngbe_get_device_caps(struct ngbe_hw *hw, u16 *device_caps)
+{
+ DEBUGFUNC("\n");
+
+ TCALL(hw, eeprom.ops.read,
+ hw->eeprom.sw_region_offset + NGBE_DEVICE_CAPS, device_caps);
+
+ return 0;
+}
+
+/**
+ * ngbe_calculate_checksum - Calculate checksum for buffer
+ * @buffer: pointer to EEPROM
+ * @length: size of EEPROM to calculate a checksum for
+ * Calculates the checksum for some buffer on a specified length. The
+ * checksum calculated is returned.
+ **/
+u8 ngbe_calculate_checksum(u8 *buffer, u32 length)
+{
+ u32 i;
+ u8 sum = 0;
+
+ DEBUGFUNC("\n");
+
+ if (!buffer)
+ return 0;
+
+ for (i = 0; i < length; i++)
+ sum += buffer[i];
+
+ return (u8) (0 - sum);
+}
+
+
+s32 ngbe_host_interface_pass_command(struct ngbe_hw *hw, u32 *buffer,
+ u32 length, u32 timeout, bool return_data)
+{
+ u32 i;
+ u32 dword_len;
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ if (length == 0 || length > NGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+ DEBUGOUT1("Buffer length failure buffersize=%d.\n", length);
+ return NGBE_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ if (TCALL(hw, mac.ops.acquire_swfw_sync, NGBE_MNG_SWFW_SYNC_SW_MB)
+ != 0) {
+ return NGBE_ERR_SWFW_SYNC;
+ }
+
+ /* Calculate length in DWORDs. We must be DWORD aligned */
+ if ((length % (sizeof(u32))) != 0) {
+ DEBUGOUT("Buffer length failure, not aligned to dword");
+ status = NGBE_ERR_INVALID_ARGUMENT;
+ goto rel_out;
+ }
+
+ dword_len = length >> 2;
+
+ /* The device driver writes the relevant command block
+ * into the ram area.
+ */
+ for (i = 0; i < dword_len; i++) {
+ if (ngbe_check_mng_access(hw))
+ wr32a(hw, NGBE_MNG_MBOX,
+ i, NGBE_CPU_TO_LE32(buffer[i]));
+ else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ goto rel_out;
+ }
+ }
+ /* Setting this bit tells the ARC that a new command is pending. */
+ if (ngbe_check_mng_access(hw))
+ wr32m(hw, NGBE_MNG_MBOX_CTL,
+ NGBE_MNG_MBOX_CTL_SWRDY, NGBE_MNG_MBOX_CTL_SWRDY);
+ else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ goto rel_out;
+ }
+
+rel_out:
+ TCALL(hw, mac.ops.release_swfw_sync, NGBE_MNG_SWFW_SYNC_SW_MB);
+ return status;
+}
+
+/**
+ * ngbe_host_interface_command - Issue command to manageability block
+ * @hw: pointer to the HW structure
+ * @buffer: contains the command to write and where the return status will
+ * be placed
+ * @length: length of buffer, must be multiple of 4 bytes
+ * @timeout: time in ms to wait for command completion
+ * @return_data: read and return data from the buffer (true) or not (false)
+ * Needed because FW structures are big endian and decoding of
+ * these fields can be 8 bit or 16 bit based on command. Decoding
+ * is not easily understood without making a table of commands.
+ * So we will leave this up to the caller to read back the data
+ * in these cases.
+ *
+ * Communicates with the manageability block. On success return 0
+ * else return NGBE_ERR_HOST_INTERFACE_COMMAND.
+ **/
+s32 ngbe_host_interface_command(struct ngbe_hw *hw, u32 *buffer,
+ u32 length, u32 timeout, bool return_data)
+{
+ u32 hicr, i, bi;
+ u32 hdr_size = sizeof(struct ngbe_hic_hdr);
+ u16 buf_len;
+ u32 dword_len;
+ s32 status = 0;
+ u32 buf[64] = {};
+
+ DEBUGFUNC("\n");
+
+ if (length == 0 || length > NGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+ DEBUGOUT1("Buffer length failure buffersize=%d.\n", length);
+ return NGBE_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ if (TCALL(hw, mac.ops.acquire_swfw_sync, NGBE_MNG_SWFW_SYNC_SW_MB)
+ != 0) {
+ return NGBE_ERR_SWFW_SYNC;
+ }
+
+
+ /* Calculate length in DWORDs. We must be DWORD aligned */
+ if ((length % (sizeof(u32))) != 0) {
+ DEBUGOUT("Buffer length failure, not aligned to dword");
+ status = NGBE_ERR_INVALID_ARGUMENT;
+ goto rel_out;
+ }
+
+ /*read to clean all status*/
+ if (ngbe_check_mng_access(hw)) {
+ hicr = rd32(hw, NGBE_MNG_MBOX_CTL);
+ if ((hicr & NGBE_MNG_MBOX_CTL_FWRDY))
+ ERROR_REPORT1(NGBE_ERROR_CAUTION,
+ "fwrdy is set before command.\n");
+ }
+
+ dword_len = length >> 2;
+
+ /* The device driver writes the relevant command block
+ * into the ram area.
+ */
+ for (i = 0; i < dword_len; i++) {
+ if (ngbe_check_mng_access(hw))
+ wr32a(hw, NGBE_MNG_MBOX,
+ i, NGBE_CPU_TO_LE32(buffer[i]));
+ else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ goto rel_out;
+ }
+ }
+ /* Setting this bit tells the ARC that a new command is pending. */
+ if (ngbe_check_mng_access(hw))
+ wr32m(hw, NGBE_MNG_MBOX_CTL,
+ NGBE_MNG_MBOX_CTL_SWRDY, NGBE_MNG_MBOX_CTL_SWRDY);
+ else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ goto rel_out;
+ }
+
+ for (i = 0; i < timeout; i++) {
+ if (ngbe_check_mng_access(hw)) {
+ hicr = rd32(hw, NGBE_MNG_MBOX_CTL);
+ if ((hicr & NGBE_MNG_MBOX_CTL_FWRDY))
+ break;
+ }
+ msec_delay(1);
+ }
+
+ buf[0] = rd32(hw, NGBE_MNG_MBOX);
+ /* Check command completion */
+ if (timeout != 0 && i == timeout) {
+ ERROR_REPORT1(NGBE_ERROR_CAUTION,
+ "Command has failed with no status valid.\n");
+ printk("===%x= %x=\n", buffer[0] & 0xff, (~buf[0] >> 24));
+ printk("===%08x\n", rd32(hw, 0x1e100));
+ printk("===%08x\n", rd32(hw, 0x1e104));
+ printk("===%08x\n", rd32(hw, 0x1e108));
+ printk("===%08x\n", rd32(hw, 0x1e10c));
+ printk("===%08x\n", rd32(hw, 0x1e044));
+ printk("===%08x\n", rd32(hw, 0x10000));
+ if ((buffer[0] & 0xff) != (~buf[0] >> 24)) {
+ status = NGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto rel_out;
+ }
+ }
+
+ if (!return_data)
+ goto rel_out;
+
+ /* Calculate length in DWORDs */
+ dword_len = hdr_size >> 2;
+
+ /* first pull in the header so we know the buffer length */
+ for (bi = 0; bi < dword_len; bi++) {
+ if (ngbe_check_mng_access(hw)) {
+ buffer[bi] = rd32a(hw, NGBE_MNG_MBOX,
+ bi);
+ NGBE_LE32_TO_CPUS(&buffer[bi]);
+ } else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ goto rel_out;
+ }
+ }
+
+ /* If there is any thing in data position pull it in */
+ buf_len = ((struct ngbe_hic_hdr *)buffer)->buf_len;
+ if (buf_len == 0)
+ goto rel_out;
+
+ if (length < buf_len + hdr_size) {
+ DEBUGOUT("Buffer not large enough for reply message.\n");
+ status = NGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto rel_out;
+ }
+
+ /* Calculate length in DWORDs, add 3 for odd lengths */
+ dword_len = (buf_len + 3) >> 2;
+
+ /* Pull in the rest of the buffer (bi is where we left off) */
+ for (; bi <= dword_len; bi++) {
+ if (ngbe_check_mng_access(hw)) {
+ buffer[bi] = rd32a(hw, NGBE_MNG_MBOX,
+ bi);
+ NGBE_LE32_TO_CPUS(&buffer[bi]);
+ } else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ goto rel_out;
+ }
+ }
+
+rel_out:
+ TCALL(hw, mac.ops.release_swfw_sync, NGBE_MNG_SWFW_SYNC_SW_MB);
+ return status;
+}
+
+/**
+ * ngbe_set_fw_drv_ver - Sends driver version to firmware
+ * @hw: pointer to the HW structure
+ * @maj: driver version major number
+ * @min: driver version minor number
+ * @build: driver version build number
+ * @sub: driver version sub build number
+ *
+ * Sends driver version number to firmware through the manageability
+ * block. On success return 0
+ * else returns NGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ * semaphore or NGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ngbe_set_fw_drv_ver(struct ngbe_hw *hw, u8 maj, u8 min,
+ u8 build, u8 sub)
+{
+ struct ngbe_hic_drv_info fw_cmd;
+ int i;
+ s32 ret_val = 0;
+
+ DEBUGFUNC("\n");
+
+ fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
+ fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
+ fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+ fw_cmd.port_num = (u8)hw->bus.func;
+ fw_cmd.ver_maj = maj;
+ fw_cmd.ver_min = min;
+ fw_cmd.ver_build = build;
+ fw_cmd.ver_sub = sub;
+ fw_cmd.hdr.checksum = 0;
+ fw_cmd.hdr.checksum = ngbe_calculate_checksum((u8 *)&fw_cmd,
+ (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+ fw_cmd.pad = 0;
+ fw_cmd.pad2 = 0;
+
+ for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+ ret_val = ngbe_host_interface_command(hw, (u32 *)&fw_cmd,
+ sizeof(fw_cmd),
+ NGBE_HI_COMMAND_TIMEOUT,
+ true);
+ if (ret_val != 0)
+ continue;
+
+ if (fw_cmd.hdr.cmd_or_resp.ret_status ==
+ FW_CEM_RESP_STATUS_SUCCESS)
+ ret_val = 0;
+ else
+ ret_val = NGBE_ERR_HOST_INTERFACE_COMMAND;
+
+ break;
+ }
+
+ return ret_val;
+}
+
+/**
+ * ngbe_reset_hostif - send reset cmd to fw
+ * @hw: pointer to hardware structure
+ *
+ * Sends reset cmd to firmware through the manageability
+ * block. On success return 0
+ * else returns NGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ * semaphore or NGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ngbe_reset_hostif(struct ngbe_hw *hw)
+{
+ struct ngbe_hic_reset reset_cmd;
+ int i;
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ reset_cmd.hdr.cmd = FW_RESET_CMD;
+ reset_cmd.hdr.buf_len = FW_RESET_LEN;
+ reset_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+ reset_cmd.lan_id = hw->bus.lan_id;
+ reset_cmd.reset_type = (u16)hw->reset_type;
+ reset_cmd.hdr.checksum = 0;
+ reset_cmd.hdr.checksum = ngbe_calculate_checksum((u8 *)&reset_cmd,
+ (FW_CEM_HDR_LEN + reset_cmd.hdr.buf_len));
+
+ /* send reset request to FW and wait for response */
+ for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+ status = ngbe_host_interface_command(hw, (u32 *)&reset_cmd,
+ sizeof(reset_cmd),
+ NGBE_HI_COMMAND_TIMEOUT,
+ true);
+ msleep(1);
+ if (status != 0)
+ continue;
+
+ if (reset_cmd.hdr.cmd_or_resp.ret_status ==
+ FW_CEM_RESP_STATUS_SUCCESS)
+ status = 0;
+ else
+ status = NGBE_ERR_HOST_INTERFACE_COMMAND;
+
+ break;
+ }
+
+ return status;
+}
+
+s32 ngbe_setup_mac_link_hostif(struct ngbe_hw *hw, u32 speed)
+{
+ struct ngbe_hic_phy_cfg cmd;
+ int i;
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ cmd.hdr.cmd = FW_SETUP_MAC_LINK_CMD;
+ cmd.hdr.buf_len = FW_SETUP_MAC_LINK_LEN;
+ cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+ cmd.lan_id = hw->bus.lan_id;
+ cmd.phy_mode = 0;
+ cmd.phy_speed = (u16)speed;
+ cmd.hdr.checksum = 0;
+ cmd.hdr.checksum = ngbe_calculate_checksum((u8 *)&cmd,
+ (FW_CEM_HDR_LEN + cmd.hdr.buf_len));
+
+ for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+ status = ngbe_host_interface_command(hw, (u32 *)&cmd,
+ sizeof(cmd),
+ NGBE_HI_COMMAND_TIMEOUT,
+ true);
+ if (status != 0)
+ continue;
+
+ if (cmd.hdr.cmd_or_resp.ret_status ==
+ FW_CEM_RESP_STATUS_SUCCESS)
+ status = 0;
+ else
+ status = NGBE_ERR_HOST_INTERFACE_COMMAND;
+
+ break;
+ }
+
+ return status;
+}
+
+u16 ngbe_crc16_ccitt(const u8 *buf, int size)
+{
+ u16 crc = 0;
+ int i;
+ while (--size >= 0) {
+ crc ^= (u16)*buf++ << 8;
+ for (i = 0; i < 8; i++) {
+ if (crc & 0x8000)
+ crc = crc << 1 ^ 0x1021;
+ else
+ crc <<= 1;
+ }
+ }
+ return crc;
+}
+
+s32 ngbe_upgrade_flash_hostif(struct ngbe_hw *hw, u32 region,
+ const u8 *data, u32 size)
+{
+ struct ngbe_hic_upg_start start_cmd;
+ struct ngbe_hic_upg_write write_cmd;
+ struct ngbe_hic_upg_verify verify_cmd;
+ u32 offset;
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ start_cmd.hdr.cmd = FW_FLASH_UPGRADE_START_CMD;
+ start_cmd.hdr.buf_len = FW_FLASH_UPGRADE_START_LEN;
+ start_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+ start_cmd.module_id = (u8)region;
+ start_cmd.hdr.checksum = 0;
+ start_cmd.hdr.checksum = ngbe_calculate_checksum((u8 *)&start_cmd,
+ (FW_CEM_HDR_LEN + start_cmd.hdr.buf_len));
+ start_cmd.pad2 = 0;
+ start_cmd.pad3 = 0;
+
+ status = ngbe_host_interface_command(hw, (u32 *)&start_cmd,
+ sizeof(start_cmd),
+ NGBE_HI_FLASH_ERASE_TIMEOUT,
+ true);
+
+ if (start_cmd.hdr.cmd_or_resp.ret_status == FW_CEM_RESP_STATUS_SUCCESS)
+ status = 0;
+ else {
+ status = NGBE_ERR_HOST_INTERFACE_COMMAND;
+ return status;
+ }
+
+ for (offset = 0; offset < size;) {
+ write_cmd.hdr.cmd = FW_FLASH_UPGRADE_WRITE_CMD;
+ if (size - offset > 248) {
+ write_cmd.data_len = 248 / 4;
+ write_cmd.eof_flag = 0;
+ } else {
+ write_cmd.data_len = (u8)((size - offset) / 4);
+ write_cmd.eof_flag = 1;
+ }
+ memcpy((u8 *)write_cmd.data, &data[offset], write_cmd.data_len * 4);
+ write_cmd.hdr.buf_len = (write_cmd.data_len + 1) * 4;
+ write_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+ write_cmd.check_sum = ngbe_crc16_ccitt((u8 *)write_cmd.data,
+ write_cmd.data_len * 4);
+
+ status = ngbe_host_interface_command(hw, (u32 *)&write_cmd,
+ sizeof(write_cmd),
+ NGBE_HI_FLASH_UPDATE_TIMEOUT,
+ true);
+ if (start_cmd.hdr.cmd_or_resp.ret_status ==
+ FW_CEM_RESP_STATUS_SUCCESS)
+ status = 0;
+ else {
+ status = NGBE_ERR_HOST_INTERFACE_COMMAND;
+ return status;
+ }
+ offset += write_cmd.data_len * 4;
+ }
+
+ verify_cmd.hdr.cmd = FW_FLASH_UPGRADE_VERIFY_CMD;
+ verify_cmd.hdr.buf_len = FW_FLASH_UPGRADE_VERIFY_LEN;
+ verify_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+ switch (region) {
+ case NGBE_MODULE_EEPROM:
+ verify_cmd.action_flag = NGBE_RELOAD_EEPROM;
+ break;
+ case NGBE_MODULE_FIRMWARE:
+ verify_cmd.action_flag = NGBE_RESET_FIRMWARE;
+ break;
+ case NGBE_MODULE_HARDWARE:
+ verify_cmd.action_flag = NGBE_RESET_LAN;
+ break;
+ default:
+ ERROR_REPORT1(NGBE_ERROR_ARGUMENT,
+ "ngbe_upgrade_flash_hostif: region err %x\n", region);
+ return status;
+ }
+
+ verify_cmd.hdr.checksum = ngbe_calculate_checksum((u8 *)&verify_cmd,
+ (FW_CEM_HDR_LEN + verify_cmd.hdr.buf_len));
+
+ status = ngbe_host_interface_command(hw, (u32 *)&verify_cmd,
+ sizeof(verify_cmd),
+ NGBE_HI_FLASH_VERIFY_TIMEOUT,
+ true);
+
+ if (verify_cmd.hdr.cmd_or_resp.ret_status == FW_CEM_RESP_STATUS_SUCCESS)
+ status = 0;
+ else {
+ status = NGBE_ERR_HOST_INTERFACE_COMMAND;
+ }
+ return status;
+}
+
+/* cmd_addr is used for some special command:
+** 1. to be sector address, when implemented erase sector command
+** 2. to be flash address when implemented read, write flash address
+*/
+u8 fmgr_cmd_op(struct ngbe_hw *hw, u32 cmd, u32 cmd_addr)
+{
+ u32 cmd_val = 0;
+ u32 time_out = 0;
+
+ cmd_val = (cmd << SPI_CLK_CMD_OFFSET) | (SPI_CLK_DIV << SPI_CLK_DIV_OFFSET) | cmd_addr;
+ wr32(hw, SPI_H_CMD_REG_ADDR, cmd_val);
+ while (1) {
+ if (rd32(hw, SPI_H_STA_REG_ADDR) & 0x1)
+ break;
+
+ if (time_out == SPI_TIME_OUT_VALUE)
+ return 1;
+
+ time_out = time_out + 1;
+ udelay(10);
+ }
+
+ return 0;
+}
+
+u8 fmgr_usr_cmd_op(struct ngbe_hw *hw, u32 usr_cmd)
+{
+ u8 status = 0;
+
+ wr32(hw, SPI_H_USR_CMD_REG_ADDR, usr_cmd);
+ status = fmgr_cmd_op(hw, SPI_CMD_USER_CMD, 0);
+
+ return status;
+}
+
+u8 flash_erase_chip(struct ngbe_hw *hw)
+{
+ u8 status = fmgr_cmd_op(hw, SPI_CMD_ERASE_CHIP, 0);
+ return status;
+}
+
+u8 flash_erase_sector(struct ngbe_hw *hw, u32 sec_addr)
+{
+ u8 status = fmgr_cmd_op(hw, SPI_CMD_ERASE_SECTOR, sec_addr);
+ return status;
+}
+
+u32 flash_read_dword(struct ngbe_hw *hw, u32 addr)
+{
+ u8 status = fmgr_cmd_op(hw, SPI_CMD_READ_DWORD, addr);
+ if (status)
+ return (u32)status;
+
+ return rd32(hw, SPI_H_DAT_REG_ADDR);
+}
+
+u8 flash_write_dword(struct ngbe_hw *hw, u32 addr, u32 dword)
+{
+ u8 status = 0;
+
+ wr32(hw, SPI_H_DAT_REG_ADDR, dword);
+ status = fmgr_cmd_op(hw, SPI_CMD_WRITE_DWORD, addr);
+ if (status)
+ return status;
+
+ if (dword != flash_read_dword(hw, addr)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+int ngbe_flash_write_cab(struct ngbe_hw *hw, u32 addr, u32 value, u16 lan_id)
+{
+ int status;
+ struct ngbe_hic_read_cab buffer;
+
+ buffer.hdr.req.cmd = 0xE2;
+ buffer.hdr.req.buf_lenh = 0x6;
+ buffer.hdr.req.buf_lenl = 0x0;
+ buffer.hdr.req.checksum = 0xFF;
+
+ /* convert offset from words to bytes */
+ buffer.dbuf.d16[0] = cpu_to_le16(lan_id);
+ /* one word */
+ buffer.dbuf.d32[0] = htonl(addr);
+ buffer.dbuf.d32[1] = htonl(value);
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer), 5000, true);
+ printk("0x1e100 :%08x\n", rd32(hw, 0x1e100));
+ printk("0x1e104 :%08x\n", rd32(hw, 0x1e104));
+ printk("0x1e108 :%08x\n", rd32(hw, 0x1e108));
+ printk("0x1e10c :%08x\n", rd32(hw, 0x1e10c));
+
+ return status;
+}
+
+int ngbe_flash_read_cab(struct ngbe_hw *hw, u32 addr, u16 lan_id)
+{
+ int status;
+ struct ngbe_hic_read_cab buffer;
+ u16 *data = NULL;
+
+ buffer.hdr.req.cmd = 0xE1;
+ buffer.hdr.req.buf_lenh = 0xaa;
+ buffer.hdr.req.buf_lenl = 0;
+ buffer.hdr.req.checksum = 0xFF;
+
+ /* convert offset from words to bytes */
+ buffer.dbuf.d16[0] = cpu_to_le16(lan_id);
+ /* one word */
+ buffer.dbuf.d32[0] = htonl(addr);
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer), 5000, true);
+
+ if (status)
+ return status;
+ if (ngbe_check_mng_access(hw)) {
+ *data = (u16)rd32a(hw, 0x1e100, 3);
+ printk("0x1e100 :%08x\n", rd32(hw, 0x1e100));
+ printk("0x1e104 :%08x\n", rd32(hw, 0x1e104));
+ printk("0x1e108 :%08x\n", rd32(hw, 0x1e108));
+ printk("0x1e10c :%08x\n", rd32(hw, 0x1e10c));
+ } else {
+ status = -147;
+ return status;
+ }
+
+ return rd32(hw, 0x1e108);
+}
+
+int ngbe_flash_write_unlock(struct ngbe_hw *hw)
+{
+ int status;
+ struct ngbe_hic_read_shadow_ram buffer;
+
+ buffer.hdr.req.cmd = 0x40;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = 0;
+ buffer.hdr.req.checksum = 0xFF;
+
+ /* convert offset from words to bytes */
+ buffer.address = 0;
+ /* one word */
+ buffer.length = 0;
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer), 5000, false);
+ if (status)
+ return status;
+
+ return status;
+}
+
+int ngbe_flash_write_lock(struct ngbe_hw *hw)
+{
+ int status;
+ struct ngbe_hic_read_shadow_ram buffer;
+
+ buffer.hdr.req.cmd = 0x39;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = 0;
+ buffer.hdr.req.checksum = 0xFF;
+
+ /* convert offset from words to bytes */
+ buffer.address = 0;
+ /* one word */
+ buffer.length = 0;
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer), 5000, false);
+ if (status)
+ return status;
+
+ return status;
+}
+
+int ngbe_upgrade_flash(struct ngbe_hw *hw, u32 region,
+ const u8 *data, u32 size)
+{
+ u32 sector_num = 0;
+ u32 read_data = 0;
+ u8 status = 0;
+ u8 skip = 0;
+ u32 i = 0, k = 0, n = 0;
+ u8 flash_vendor = 0;
+ u32 num[256] = {0};
+ u32 mac_addr0_dword0_t, mac_addr0_dword1_t;
+ u32 mac_addr1_dword0_t, mac_addr1_dword1_t;
+ u32 mac_addr2_dword0_t, mac_addr2_dword1_t;
+ u32 mac_addr3_dword0_t, mac_addr3_dword1_t;
+ u32 serial_num_dword0_t, serial_num_dword1_t, serial_num_dword2_t;
+
+ /*check sub_id*/;
+ printk("Checking sub_id .......\n");
+ printk("The card's sub_id : %04x\n", hw->subsystem_device_id);
+ printk("The image's sub_id : %04x\n", data[0xfffdc] << 8 | data[0xfffdd]);
+ if ((hw->subsystem_device_id & 0xffff) ==
+ ((data[0xfffdc] << 8 | data[0xfffdd]) & 0xffff)) {
+ printk("It is a right image\n");
+ } else if (hw->subsystem_device_id == 0xffff) {
+ printk("update anyway\n");
+ } else {
+ printk("====The Gigabit image do not match the Gigabit card====\n");
+ printk("====Please check your image====\n");
+ return -EOPNOTSUPP;
+ }
+
+ /*check dev_id*/
+ printk("Checking dev_id .......\n");
+ printk("The image's dev_id : %04x\n", data[0xfffde] << 8 | data[0xfffdf]);
+ printk("The card's dev_id : %04x\n", hw->device_id);
+ if (!((hw->device_id & 0xffff) ==
+ ((data[0xfffde] << 8 | data[0xfffdf]) & 0xffff))
+ && !(hw->device_id == 0xffff)) {
+ printk("====The Gigabit image is not match the Gigabit card====\n");
+ printk("====Please check your image====\n");
+ return -EOPNOTSUPP;
+ }
+
+ // unlock flash write protect
+ ngbe_release_eeprom_semaphore(hw);
+ ngbe_flash_write_unlock(hw);
+
+ wr32(hw, 0x10114, 0x9f050206);
+ wr32(hw, 0x10194, 0x9f050206);
+
+ ngbe_flash_write_cab(hw, 0x188, 0, 0);
+ ngbe_flash_write_cab(hw, 0x184, 0x60000000, 0);
+ msleep(1000);
+
+ mac_addr0_dword0_t = flash_read_dword(hw, MAC_ADDR0_WORD0_OFFSET_1G);
+ mac_addr0_dword1_t = flash_read_dword(hw, MAC_ADDR0_WORD1_OFFSET_1G) & 0xffff;
+ mac_addr1_dword0_t = flash_read_dword(hw, MAC_ADDR1_WORD0_OFFSET_1G);
+ mac_addr1_dword1_t = flash_read_dword(hw, MAC_ADDR1_WORD1_OFFSET_1G) & 0xffff;
+ mac_addr2_dword0_t = flash_read_dword(hw, MAC_ADDR2_WORD0_OFFSET_1G);
+ mac_addr2_dword1_t = flash_read_dword(hw, MAC_ADDR2_WORD1_OFFSET_1G) & 0xffff;
+ mac_addr3_dword0_t = flash_read_dword(hw, MAC_ADDR3_WORD0_OFFSET_1G);
+ mac_addr3_dword1_t = flash_read_dword(hw, MAC_ADDR3_WORD1_OFFSET_1G) & 0xffff;
+
+ serial_num_dword0_t = flash_read_dword(hw, PRODUCT_SERIAL_NUM_OFFSET_1G);
+ serial_num_dword1_t = flash_read_dword(hw, PRODUCT_SERIAL_NUM_OFFSET_1G + 4);
+ serial_num_dword2_t = flash_read_dword(hw, PRODUCT_SERIAL_NUM_OFFSET_1G + 8);
+ printk("Old: MAC Address0 is: 0x%04x%08x\n", mac_addr0_dword1_t,
+ mac_addr0_dword0_t);
+ printk(" MAC Address1 is: 0x%04x%08x\n", mac_addr1_dword1_t,
+ mac_addr1_dword0_t);
+ printk(" MAC Address2 is: 0x%04x%08x\n", mac_addr2_dword1_t,
+ mac_addr2_dword0_t);
+ printk(" MAC Address3 is: 0x%04x%08x\n", mac_addr3_dword1_t,
+ mac_addr3_dword0_t);
+
+ for (k = 0; k < (1024 / 4); k++) {
+ num[k] = flash_read_dword(hw, 0xfe000 + k * 4);
+ }
+
+ status = fmgr_usr_cmd_op(hw, 0x6); // write enable
+ status = fmgr_usr_cmd_op(hw, 0x98); // global protection un-lock
+ msleep(1000); // 1s
+
+ // Note: for Spanish FLASH, first 8 sectors (4KB) in sector0 (64KB) need to use
+ // a special erase command (4K sector erase)
+ if (flash_vendor == 1) {
+ wr32(hw, SPI_CMD_CFG1_ADDR, 0x0103c720);
+ for (i = 0; i < 8; i++) {
+ flash_erase_sector(hw, i*128);
+ msleep(20); // 20 ms
+ }
+ wr32(hw, SPI_CMD_CFG1_ADDR, 0x0103c7d8);
+ }
+
+ /* Winbond Flash, erase chip command is okay, but erase sector doestn't work*/
+ sector_num = size / SPI_SECTOR_SIZE;
+ if (flash_vendor == 2) {
+ status = flash_erase_chip(hw);
+ printk("Erase chip command, return status = %0d\n", status);
+ msleep(1000); // 1 s
+ } else {
+ wr32(hw, SPI_CMD_CFG1_ADDR, 0x0103c720);
+ for (i = 0; i < sector_num; i++) {
+ status = flash_erase_sector(hw, i * SPI_SECTOR_SIZE);
+ printk("Erase sector[%2d] command, return status = %0d\n", i, status);
+ msleep(50); // 50 ms
+ }
+ wr32(hw, SPI_CMD_CFG1_ADDR, 0x0103c7d8);
+ }
+
+ // Program Image file in dword
+ for (i = 0; i < size / 4; i++) {
+ read_data = data[4 * i + 3] << 24 | data[4 * i + 2] << 16 |
+ data[4 * i + 1] << 8 | data[4 * i];
+ read_data = __le32_to_cpu(read_data);
+ skip = ((i * 4 == MAC_ADDR0_WORD0_OFFSET_1G) ||
+ (i * 4 == MAC_ADDR0_WORD1_OFFSET_1G) ||
+ (i * 4 == MAC_ADDR1_WORD0_OFFSET_1G) ||
+ (i * 4 == MAC_ADDR1_WORD1_OFFSET_1G) ||
+ (i * 4 == MAC_ADDR2_WORD0_OFFSET_1G) ||
+ (i * 4 == MAC_ADDR2_WORD1_OFFSET_1G) ||
+ (i * 4 == MAC_ADDR3_WORD0_OFFSET_1G) ||
+ (i * 4 == MAC_ADDR3_WORD1_OFFSET_1G) ||
+ (i * 4 >= PRODUCT_SERIAL_NUM_OFFSET_1G &&
+ i * 4 <= PRODUCT_SERIAL_NUM_OFFSET_1G + 8));
+ if (read_data != 0xffffffff && !skip) {
+ status = flash_write_dword(hw, i * 4, read_data);
+ if (status) {
+ printk("ERROR: Program 0x%08x @addr: 0x%08x is failed !!\n",
+ read_data, i * 4);
+ read_data = flash_read_dword(hw, i * 4);
+ printk(" Read data from Flash is: 0x%08x\n", read_data);
+ return 1;
+ }
+ }
+ if (i % 1024 == 0) {
+ printk("\b\b\b\b%3d%%", (int)(i * 4 * 100 / size));
+ }
+ }
+
+ flash_write_dword(hw, MAC_ADDR0_WORD0_OFFSET_1G,
+ mac_addr0_dword0_t);
+ flash_write_dword(hw, MAC_ADDR0_WORD1_OFFSET_1G,
+ (mac_addr0_dword1_t | 0x80000000));//lan0
+ flash_write_dword(hw, MAC_ADDR1_WORD0_OFFSET_1G,
+ mac_addr1_dword0_t);
+ flash_write_dword(hw, MAC_ADDR1_WORD1_OFFSET_1G,
+ (mac_addr1_dword1_t | 0x80000000));//lan1
+ flash_write_dword(hw, MAC_ADDR2_WORD0_OFFSET_1G,
+ mac_addr2_dword0_t);
+ flash_write_dword(hw, MAC_ADDR2_WORD1_OFFSET_1G,
+ (mac_addr2_dword1_t | 0x80000000));//lan2
+ flash_write_dword(hw, MAC_ADDR3_WORD0_OFFSET_1G,
+ mac_addr3_dword0_t);
+ flash_write_dword(hw, MAC_ADDR3_WORD1_OFFSET_1G,
+ (mac_addr3_dword1_t | 0x80000000));//lan3
+ flash_write_dword(hw, PRODUCT_SERIAL_NUM_OFFSET_1G, serial_num_dword0_t);
+ flash_write_dword(hw, PRODUCT_SERIAL_NUM_OFFSET_1G+4, serial_num_dword1_t);
+ flash_write_dword(hw, PRODUCT_SERIAL_NUM_OFFSET_1G+8, serial_num_dword2_t);
+
+ for (n = 0; n < 1024/4; n++) {
+ if (!(num[n] == 0xffffffff))
+ flash_write_dword(hw, 0xfe000 + n * 4, num[n]);
+ }
+
+ return 0;
+}
+
+/**
+ * ngbe_set_rxpba - Initialize Rx packet buffer
+ * @hw: pointer to hardware structure
+ * @num_pb: number of packet buffers to allocate
+ * @headroom: reserve n KB of headroom
+ * @strategy: packet buffer allocation strategy
+ **/
+void ngbe_set_rxpba(struct ngbe_hw *hw, int num_pb, u32 headroom,
+ int strategy)
+{
+ u32 pbsize = hw->mac.rx_pb_size;
+ u32 rxpktsize, txpktsize, txpbthresh;
+
+ DEBUGFUNC("\n");
+
+ /* Reserve headroom */
+ pbsize -= headroom;
+
+ if (!num_pb)
+ num_pb = 1;
+
+ /* Divide remaining packet buffer space amongst the number of packet
+ * buffers requested using supplied strategy.
+ */
+ switch (strategy) {
+ case PBA_STRATEGY_EQUAL:
+ rxpktsize = (pbsize / num_pb) << NGBE_RDB_PB_SZ_SHIFT;
+ wr32(hw, NGBE_RDB_PB_SZ, rxpktsize);
+ break;
+ default:
+ break;
+ }
+
+ /* Only support an equally distributed Tx packet buffer strategy. */
+ txpktsize = NGBE_TDB_PB_SZ_MAX / num_pb;
+ txpbthresh = (txpktsize / NGBE_KB_TO_B) - NGBE_TXPKT_SIZE_MAX;
+
+ wr32(hw, NGBE_TDB_PB_SZ, txpktsize);
+ wr32(hw, NGBE_TDM_PB_THRE, txpbthresh);
+}
+
+STATIC const u8 ngbe_emc_temp_data[4] = {
+ NGBE_EMC_INTERNAL_DATA,
+ NGBE_EMC_DIODE1_DATA,
+ NGBE_EMC_DIODE2_DATA,
+ NGBE_EMC_DIODE3_DATA
+};
+
+STATIC const u8 ngbe_emc_therm_limit[4] = {
+ NGBE_EMC_INTERNAL_THERM_LIMIT,
+ NGBE_EMC_DIODE1_THERM_LIMIT,
+ NGBE_EMC_DIODE2_THERM_LIMIT,
+ NGBE_EMC_DIODE3_THERM_LIMIT
+};
+
+/**
+ * ngbe_get_thermal_sensor_data - Gathers thermal sensor data
+ * @hw: pointer to hardware structure
+ * @data: pointer to the thermal sensor data structure
+ *
+ * algorithm:
+ * T = (-4.8380E+01)N^0 + (3.1020E-01)N^1 + (-1.8201E-04)N^2 +
+ (8.1542E-08)N^3 + (-1.6743E-11)N^4
+ * algorithm with 5% more deviation, easy for implementation
+ * T = (-50)N^0 + (0.31)N^1 + (-0.0002)N^2 + (0.0000001)N^3
+ *
+ * Returns the thermal sensor data structure
+ **/
+s32 ngbe_get_thermal_sensor_data(struct ngbe_hw *hw)
+{
+ s64 tsv;
+ struct ngbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+ DEBUGFUNC("\n");
+
+ /* Only support thermal sensors attached to physical port 0 */
+ if (hw->bus.lan_id)
+ return NGBE_NOT_IMPLEMENTED;
+
+ tsv = (s64)(rd32(hw, NGBE_TS_ST) &
+ NGBE_TS_ST_DATA_OUT_MASK);
+ /* 216 < tsv < 876 */
+
+ tsv = tsv < 876 ? tsv : 876;
+ tsv = tsv - 216;
+ tsv = tsv/4;
+ tsv = tsv - 40;
+ data->sensor.temp = (s16)tsv;
+
+ return 0;
+}
+
+/**
+ * ngbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds
+ * @hw: pointer to hardware structure
+ *
+ * Inits the thermal sensor thresholds according to the NVM map
+ * and save off the threshold and location values into mac.thermal_sensor_data
+ **/
+s32 ngbe_init_thermal_sensor_thresh(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+
+ struct ngbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+ DEBUGFUNC("\n");
+
+ memset(data, 0, sizeof(struct ngbe_thermal_sensor_data));
+
+ /* Only support thermal sensors attached to SP physical port 0 */
+ if (hw->bus.lan_id)
+ return NGBE_NOT_IMPLEMENTED;
+
+ wr32(hw, NGBE_TS_INT_EN, NGBE_TS_INT_EN_DALARM_INT_EN | NGBE_TS_INT_EN_ALARM_INT_EN);
+
+ wr32(hw, NGBE_TS_EN, NGBE_TS_EN_ENA);
+
+ data->sensor.alarm_thresh = 115;
+ wr32(hw, NGBE_TS_ALARM_THRE, 0x344);/* magic num */
+ data->sensor.dalarm_thresh = 110;
+ wr32(hw, NGBE_TS_DALARM_THRE, 0x330);/* magic num */
+
+ return status;
+}
+
+void ngbe_disable_rx(struct ngbe_hw *hw)
+{
+ u32 pfdtxgswc;
+ u32 rxctrl;
+
+ DEBUGFUNC("\n");
+
+ rxctrl = rd32(hw, NGBE_RDB_PB_CTL);
+ if (rxctrl & NGBE_RDB_PB_CTL_PBEN) {
+ pfdtxgswc = rd32(hw, NGBE_PSR_CTL);
+ if (pfdtxgswc & NGBE_PSR_CTL_SW_EN) {
+ pfdtxgswc &= ~NGBE_PSR_CTL_SW_EN;
+ wr32(hw, NGBE_PSR_CTL, pfdtxgswc);
+ hw->mac.set_lben = true;
+ } else {
+ hw->mac.set_lben = false;
+ }
+ rxctrl &= ~NGBE_RDB_PB_CTL_PBEN;
+ wr32(hw, NGBE_RDB_PB_CTL, rxctrl);
+
+ /*OCP NCSI BMC need it*/
+ if (!(((hw->subsystem_device_id & OEM_MASK) == OCP_CARD) ||
+ ((hw->subsystem_device_id & WOL_SUP_MASK) == WOL_SUP) ||
+ ((hw->subsystem_device_id & NCSI_SUP_MASK) == NCSI_SUP))) {
+ /* disable mac receiver */
+ wr32m(hw, NGBE_MAC_RX_CFG,
+ NGBE_MAC_RX_CFG_RE, 0);
+ }
+ }
+}
+
+
+void ngbe_enable_rx(struct ngbe_hw *hw)
+{
+ u32 pfdtxgswc;
+
+ DEBUGFUNC("\n");
+
+ /* enable mac receiver */
+ wr32m(hw, NGBE_MAC_RX_CFG,
+ NGBE_MAC_RX_CFG_RE, NGBE_MAC_RX_CFG_RE);
+
+ wr32m(hw, NGBE_RSEC_CTL,
+ 0x2, 0);
+
+ wr32m(hw, NGBE_RDB_PB_CTL,
+ NGBE_RDB_PB_CTL_PBEN, NGBE_RDB_PB_CTL_PBEN);
+
+ if (hw->mac.set_lben) {
+ pfdtxgswc = rd32(hw, NGBE_PSR_CTL);
+ pfdtxgswc |= NGBE_PSR_CTL_SW_EN;
+ wr32(hw, NGBE_PSR_CTL, pfdtxgswc);
+ hw->mac.set_lben = false;
+ }
+}
+
+/**
+ * ngbe_mng_present - returns true when manangbeent capability is present
+ * @hw: pointer to hardware structure
+ */
+bool ngbe_mng_present(struct ngbe_hw *hw)
+{
+ u32 fwsm;
+
+ fwsm = rd32(hw, NGBE_MIS_ST);
+ return fwsm & NGBE_MIS_ST_MNG_INIT_DN;
+}
+
+bool ngbe_check_mng_access(struct ngbe_hw *hw)
+{
+ if (!ngbe_mng_present(hw))
+ return false;
+ return true;
+}
+
+int ngbe_check_flash_load(struct ngbe_hw *hw, u32 check_bit)
+{
+ u32 i = 0;
+ u32 reg = 0;
+ int err = 0;
+ /* if there's flash existing */
+ if (!(rd32(hw, NGBE_SPI_STATUS) &
+ NGBE_SPI_STATUS_FLASH_BYPASS)) {
+ /* wait hw load flash done */
+ for (i = 0; i < NGBE_MAX_FLASH_LOAD_POLL_TIME; i++) {
+ reg = rd32(hw, NGBE_SPI_ILDR_STATUS);
+ if (!(reg & check_bit)) {
+ /* done */
+ break;
+ }
+ msleep(200);
+ }
+ if (i == NGBE_MAX_FLASH_LOAD_POLL_TIME) {
+ err = NGBE_ERR_FLASH_LOADING_FAILED;
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "HW Loading Flash failed: %d\n", err);
+ }
+ }
+ return err;
+}
+
+/* The ngbe_ptype_lookup is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT ngbe_ptype_lookup[ptype].known
+ * THEN
+ * Packet is unknown
+ * ELSE IF ngbe_ptype_lookup[ptype].mac == NGBE_DEC_PTYPE_MAC_IP
+ * Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ * Use the enum ngbe_l2_ptypes to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short */
+#define NGBE_PTT(ptype, mac, ip, etype, eip, proto, layer)\
+ { ptype, \
+ 1, \
+ /* mac */ NGBE_DEC_PTYPE_MAC_##mac, \
+ /* ip */ NGBE_DEC_PTYPE_IP_##ip, \
+ /* etype */ NGBE_DEC_PTYPE_ETYPE_##etype, \
+ /* eip */ NGBE_DEC_PTYPE_IP_##eip, \
+ /* proto */ NGBE_DEC_PTYPE_PROT_##proto, \
+ /* layer */ NGBE_DEC_PTYPE_LAYER_##layer }
+
+#define NGBE_UKN(ptype) \
+ { ptype, 0, 0, 0, 0, 0, 0, 0 }
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+/* for ((pt=0;pt<256;pt++)); do printf "macro(0x%02X),\n" $pt; done */
+ngbe_dptype ngbe_ptype_lookup[256] = {
+ NGBE_UKN(0x00),
+ NGBE_UKN(0x01),
+ NGBE_UKN(0x02),
+ NGBE_UKN(0x03),
+ NGBE_UKN(0x04),
+ NGBE_UKN(0x05),
+ NGBE_UKN(0x06),
+ NGBE_UKN(0x07),
+ NGBE_UKN(0x08),
+ NGBE_UKN(0x09),
+ NGBE_UKN(0x0A),
+ NGBE_UKN(0x0B),
+ NGBE_UKN(0x0C),
+ NGBE_UKN(0x0D),
+ NGBE_UKN(0x0E),
+ NGBE_UKN(0x0F),
+
+ /* L2: mac */
+ NGBE_UKN(0x10),
+ NGBE_PTT(0x11, L2, NONE, NONE, NONE, NONE, PAY2),
+ NGBE_PTT(0x12, L2, NONE, NONE, NONE, TS, PAY2),
+ NGBE_PTT(0x13, L2, NONE, NONE, NONE, NONE, PAY2),
+ NGBE_PTT(0x14, L2, NONE, NONE, NONE, NONE, PAY2),
+ NGBE_PTT(0x15, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x16, L2, NONE, NONE, NONE, NONE, PAY2),
+ NGBE_PTT(0x17, L2, NONE, NONE, NONE, NONE, NONE),
+
+ /* L2: ethertype filter */
+ NGBE_PTT(0x18, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x19, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x1A, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x1B, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x1C, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x1D, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x1E, L2, NONE, NONE, NONE, NONE, NONE),
+ NGBE_PTT(0x1F, L2, NONE, NONE, NONE, NONE, NONE),
+
+ /* L3: ip non-tunnel */
+ NGBE_UKN(0x20),
+ NGBE_PTT(0x21, IP, FGV4, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x22, IP, IPV4, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x23, IP, IPV4, NONE, NONE, UDP, PAY4),
+ NGBE_PTT(0x24, IP, IPV4, NONE, NONE, TCP, PAY4),
+ NGBE_PTT(0x25, IP, IPV4, NONE, NONE, SCTP, PAY4),
+ NGBE_UKN(0x26),
+ NGBE_UKN(0x27),
+ NGBE_UKN(0x28),
+ NGBE_PTT(0x29, IP, FGV6, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x2A, IP, IPV6, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x2B, IP, IPV6, NONE, NONE, UDP, PAY3),
+ NGBE_PTT(0x2C, IP, IPV6, NONE, NONE, TCP, PAY4),
+ NGBE_PTT(0x2D, IP, IPV6, NONE, NONE, SCTP, PAY4),
+ NGBE_UKN(0x2E),
+ NGBE_UKN(0x2F),
+
+ /* L2: fcoe */
+ NGBE_PTT(0x30, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x31, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x32, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x33, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x34, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_UKN(0x35),
+ NGBE_UKN(0x36),
+ NGBE_UKN(0x37),
+ NGBE_PTT(0x38, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x39, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x3A, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x3B, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_PTT(0x3C, FCOE, NONE, NONE, NONE, NONE, PAY3),
+ NGBE_UKN(0x3D),
+ NGBE_UKN(0x3E),
+ NGBE_UKN(0x3F),
+
+ NGBE_UKN(0x40),
+ NGBE_UKN(0x41),
+ NGBE_UKN(0x42),
+ NGBE_UKN(0x43),
+ NGBE_UKN(0x44),
+ NGBE_UKN(0x45),
+ NGBE_UKN(0x46),
+ NGBE_UKN(0x47),
+ NGBE_UKN(0x48),
+ NGBE_UKN(0x49),
+ NGBE_UKN(0x4A),
+ NGBE_UKN(0x4B),
+ NGBE_UKN(0x4C),
+ NGBE_UKN(0x4D),
+ NGBE_UKN(0x4E),
+ NGBE_UKN(0x4F),
+ NGBE_UKN(0x50),
+ NGBE_UKN(0x51),
+ NGBE_UKN(0x52),
+ NGBE_UKN(0x53),
+ NGBE_UKN(0x54),
+ NGBE_UKN(0x55),
+ NGBE_UKN(0x56),
+ NGBE_UKN(0x57),
+ NGBE_UKN(0x58),
+ NGBE_UKN(0x59),
+ NGBE_UKN(0x5A),
+ NGBE_UKN(0x5B),
+ NGBE_UKN(0x5C),
+ NGBE_UKN(0x5D),
+ NGBE_UKN(0x5E),
+ NGBE_UKN(0x5F),
+ NGBE_UKN(0x60),
+ NGBE_UKN(0x61),
+ NGBE_UKN(0x62),
+ NGBE_UKN(0x63),
+ NGBE_UKN(0x64),
+ NGBE_UKN(0x65),
+ NGBE_UKN(0x66),
+ NGBE_UKN(0x67),
+ NGBE_UKN(0x68),
+ NGBE_UKN(0x69),
+ NGBE_UKN(0x6A),
+ NGBE_UKN(0x6B),
+ NGBE_UKN(0x6C),
+ NGBE_UKN(0x6D),
+ NGBE_UKN(0x6E),
+ NGBE_UKN(0x6F),
+ NGBE_UKN(0x70),
+ NGBE_UKN(0x71),
+ NGBE_UKN(0x72),
+ NGBE_UKN(0x73),
+ NGBE_UKN(0x74),
+ NGBE_UKN(0x75),
+ NGBE_UKN(0x76),
+ NGBE_UKN(0x77),
+ NGBE_UKN(0x78),
+ NGBE_UKN(0x79),
+ NGBE_UKN(0x7A),
+ NGBE_UKN(0x7B),
+ NGBE_UKN(0x7C),
+ NGBE_UKN(0x7D),
+ NGBE_UKN(0x7E),
+ NGBE_UKN(0x7F),
+
+ /* IPv4 --> IPv4/IPv6 */
+ NGBE_UKN(0x80),
+ NGBE_PTT(0x81, IP, IPV4, IPIP, FGV4, NONE, PAY3),
+ NGBE_PTT(0x82, IP, IPV4, IPIP, IPV4, NONE, PAY3),
+ NGBE_PTT(0x83, IP, IPV4, IPIP, IPV4, UDP, PAY4),
+ NGBE_PTT(0x84, IP, IPV4, IPIP, IPV4, TCP, PAY4),
+ NGBE_PTT(0x85, IP, IPV4, IPIP, IPV4, SCTP, PAY4),
+ NGBE_UKN(0x86),
+ NGBE_UKN(0x87),
+ NGBE_UKN(0x88),
+ NGBE_PTT(0x89, IP, IPV4, IPIP, FGV6, NONE, PAY3),
+ NGBE_PTT(0x8A, IP, IPV4, IPIP, IPV6, NONE, PAY3),
+ NGBE_PTT(0x8B, IP, IPV4, IPIP, IPV6, UDP, PAY4),
+ NGBE_PTT(0x8C, IP, IPV4, IPIP, IPV6, TCP, PAY4),
+ NGBE_PTT(0x8D, IP, IPV4, IPIP, IPV6, SCTP, PAY4),
+ NGBE_UKN(0x8E),
+ NGBE_UKN(0x8F),
+
+ /* IPv4 --> GRE/NAT --> NONE/IPv4/IPv6 */
+ NGBE_PTT(0x90, IP, IPV4, IG, NONE, NONE, PAY3),
+ NGBE_PTT(0x91, IP, IPV4, IG, FGV4, NONE, PAY3),
+ NGBE_PTT(0x92, IP, IPV4, IG, IPV4, NONE, PAY3),
+ NGBE_PTT(0x93, IP, IPV4, IG, IPV4, UDP, PAY4),
+ NGBE_PTT(0x94, IP, IPV4, IG, IPV4, TCP, PAY4),
+ NGBE_PTT(0x95, IP, IPV4, IG, IPV4, SCTP, PAY4),
+ NGBE_UKN(0x96),
+ NGBE_UKN(0x97),
+ NGBE_UKN(0x98),
+ NGBE_PTT(0x99, IP, IPV4, IG, FGV6, NONE, PAY3),
+ NGBE_PTT(0x9A, IP, IPV4, IG, IPV6, NONE, PAY3),
+ NGBE_PTT(0x9B, IP, IPV4, IG, IPV6, UDP, PAY4),
+ NGBE_PTT(0x9C, IP, IPV4, IG, IPV6, TCP, PAY4),
+ NGBE_PTT(0x9D, IP, IPV4, IG, IPV6, SCTP, PAY4),
+ NGBE_UKN(0x9E),
+ NGBE_UKN(0x9F),
+
+ /* IPv4 --> GRE/NAT --> MAC --> NONE/IPv4/IPv6 */
+ NGBE_PTT(0xA0, IP, IPV4, IGM, NONE, NONE, PAY3),
+ NGBE_PTT(0xA1, IP, IPV4, IGM, FGV4, NONE, PAY3),
+ NGBE_PTT(0xA2, IP, IPV4, IGM, IPV4, NONE, PAY3),
+ NGBE_PTT(0xA3, IP, IPV4, IGM, IPV4, UDP, PAY4),
+ NGBE_PTT(0xA4, IP, IPV4, IGM, IPV4, TCP, PAY4),
+ NGBE_PTT(0xA5, IP, IPV4, IGM, IPV4, SCTP, PAY4),
+ NGBE_UKN(0xA6),
+ NGBE_UKN(0xA7),
+ NGBE_UKN(0xA8),
+ NGBE_PTT(0xA9, IP, IPV4, IGM, FGV6, NONE, PAY3),
+ NGBE_PTT(0xAA, IP, IPV4, IGM, IPV6, NONE, PAY3),
+ NGBE_PTT(0xAB, IP, IPV4, IGM, IPV6, UDP, PAY4),
+ NGBE_PTT(0xAC, IP, IPV4, IGM, IPV6, TCP, PAY4),
+ NGBE_PTT(0xAD, IP, IPV4, IGM, IPV6, SCTP, PAY4),
+ NGBE_UKN(0xAE),
+ NGBE_UKN(0xAF),
+
+ /* IPv4 --> GRE/NAT --> MAC+VLAN --> NONE/IPv4/IPv6 */
+ NGBE_PTT(0xB0, IP, IPV4, IGMV, NONE, NONE, PAY3),
+ NGBE_PTT(0xB1, IP, IPV4, IGMV, FGV4, NONE, PAY3),
+ NGBE_PTT(0xB2, IP, IPV4, IGMV, IPV4, NONE, PAY3),
+ NGBE_PTT(0xB3, IP, IPV4, IGMV, IPV4, UDP, PAY4),
+ NGBE_PTT(0xB4, IP, IPV4, IGMV, IPV4, TCP, PAY4),
+ NGBE_PTT(0xB5, IP, IPV4, IGMV, IPV4, SCTP, PAY4),
+ NGBE_UKN(0xB6),
+ NGBE_UKN(0xB7),
+ NGBE_UKN(0xB8),
+ NGBE_PTT(0xB9, IP, IPV4, IGMV, FGV6, NONE, PAY3),
+ NGBE_PTT(0xBA, IP, IPV4, IGMV, IPV6, NONE, PAY3),
+ NGBE_PTT(0xBB, IP, IPV4, IGMV, IPV6, UDP, PAY4),
+ NGBE_PTT(0xBC, IP, IPV4, IGMV, IPV6, TCP, PAY4),
+ NGBE_PTT(0xBD, IP, IPV4, IGMV, IPV6, SCTP, PAY4),
+ NGBE_UKN(0xBE),
+ NGBE_UKN(0xBF),
+
+ /* IPv6 --> IPv4/IPv6 */
+ NGBE_UKN(0xC0),
+ NGBE_PTT(0xC1, IP, IPV6, IPIP, FGV4, NONE, PAY3),
+ NGBE_PTT(0xC2, IP, IPV6, IPIP, IPV4, NONE, PAY3),
+ NGBE_PTT(0xC3, IP, IPV6, IPIP, IPV4, UDP, PAY4),
+ NGBE_PTT(0xC4, IP, IPV6, IPIP, IPV4, TCP, PAY4),
+ NGBE_PTT(0xC5, IP, IPV6, IPIP, IPV4, SCTP, PAY4),
+ NGBE_UKN(0xC6),
+ NGBE_UKN(0xC7),
+ NGBE_UKN(0xC8),
+ NGBE_PTT(0xC9, IP, IPV6, IPIP, FGV6, NONE, PAY3),
+ NGBE_PTT(0xCA, IP, IPV6, IPIP, IPV6, NONE, PAY3),
+ NGBE_PTT(0xCB, IP, IPV6, IPIP, IPV6, UDP, PAY4),
+ NGBE_PTT(0xCC, IP, IPV6, IPIP, IPV6, TCP, PAY4),
+ NGBE_PTT(0xCD, IP, IPV6, IPIP, IPV6, SCTP, PAY4),
+ NGBE_UKN(0xCE),
+ NGBE_UKN(0xCF),
+
+ /* IPv6 --> GRE/NAT -> NONE/IPv4/IPv6 */
+ NGBE_PTT(0xD0, IP, IPV6, IG, NONE, NONE, PAY3),
+ NGBE_PTT(0xD1, IP, IPV6, IG, FGV4, NONE, PAY3),
+ NGBE_PTT(0xD2, IP, IPV6, IG, IPV4, NONE, PAY3),
+ NGBE_PTT(0xD3, IP, IPV6, IG, IPV4, UDP, PAY4),
+ NGBE_PTT(0xD4, IP, IPV6, IG, IPV4, TCP, PAY4),
+ NGBE_PTT(0xD5, IP, IPV6, IG, IPV4, SCTP, PAY4),
+ NGBE_UKN(0xD6),
+ NGBE_UKN(0xD7),
+ NGBE_UKN(0xD8),
+ NGBE_PTT(0xD9, IP, IPV6, IG, FGV6, NONE, PAY3),
+ NGBE_PTT(0xDA, IP, IPV6, IG, IPV6, NONE, PAY3),
+ NGBE_PTT(0xDB, IP, IPV6, IG, IPV6, UDP, PAY4),
+ NGBE_PTT(0xDC, IP, IPV6, IG, IPV6, TCP, PAY4),
+ NGBE_PTT(0xDD, IP, IPV6, IG, IPV6, SCTP, PAY4),
+ NGBE_UKN(0xDE),
+ NGBE_UKN(0xDF),
+
+ /* IPv6 --> GRE/NAT -> MAC -> NONE/IPv4/IPv6 */
+ NGBE_PTT(0xE0, IP, IPV6, IGM, NONE, NONE, PAY3),
+ NGBE_PTT(0xE1, IP, IPV6, IGM, FGV4, NONE, PAY3),
+ NGBE_PTT(0xE2, IP, IPV6, IGM, IPV4, NONE, PAY3),
+ NGBE_PTT(0xE3, IP, IPV6, IGM, IPV4, UDP, PAY4),
+ NGBE_PTT(0xE4, IP, IPV6, IGM, IPV4, TCP, PAY4),
+ NGBE_PTT(0xE5, IP, IPV6, IGM, IPV4, SCTP, PAY4),
+ NGBE_UKN(0xE6),
+ NGBE_UKN(0xE7),
+ NGBE_UKN(0xE8),
+ NGBE_PTT(0xE9, IP, IPV6, IGM, FGV6, NONE, PAY3),
+ NGBE_PTT(0xEA, IP, IPV6, IGM, IPV6, NONE, PAY3),
+ NGBE_PTT(0xEB, IP, IPV6, IGM, IPV6, UDP, PAY4),
+ NGBE_PTT(0xEC, IP, IPV6, IGM, IPV6, TCP, PAY4),
+ NGBE_PTT(0xED, IP, IPV6, IGM, IPV6, SCTP, PAY4),
+ NGBE_UKN(0xEE),
+ NGBE_UKN(0xEF),
+
+ /* IPv6 --> GRE/NAT -> MAC--> NONE/IPv */
+ NGBE_PTT(0xF0, IP, IPV6, IGMV, NONE, NONE, PAY3),
+ NGBE_PTT(0xF1, IP, IPV6, IGMV, FGV4, NONE, PAY3),
+ NGBE_PTT(0xF2, IP, IPV6, IGMV, IPV4, NONE, PAY3),
+ NGBE_PTT(0xF3, IP, IPV6, IGMV, IPV4, UDP, PAY4),
+ NGBE_PTT(0xF4, IP, IPV6, IGMV, IPV4, TCP, PAY4),
+ NGBE_PTT(0xF5, IP, IPV6, IGMV, IPV4, SCTP, PAY4),
+ NGBE_UKN(0xF6),
+ NGBE_UKN(0xF7),
+ NGBE_UKN(0xF8),
+ NGBE_PTT(0xF9, IP, IPV6, IGMV, FGV6, NONE, PAY3),
+ NGBE_PTT(0xFA, IP, IPV6, IGMV, IPV6, NONE, PAY3),
+ NGBE_PTT(0xFB, IP, IPV6, IGMV, IPV6, UDP, PAY4),
+ NGBE_PTT(0xFC, IP, IPV6, IGMV, IPV6, TCP, PAY4),
+ NGBE_PTT(0xFD, IP, IPV6, IGMV, IPV6, SCTP, PAY4),
+ NGBE_UKN(0xFE),
+ NGBE_UKN(0xFF),
+};
+
+
+void ngbe_init_mac_link_ops(struct ngbe_hw *hw)
+{
+ struct ngbe_mac_info *mac = &hw->mac;
+
+ DEBUGFUNC("\n");
+
+ mac->ops.setup_link = ngbe_setup_mac_link;
+}
+
+/**
+ * ngbe_init_ops - Inits func ptrs and MAC type
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the function pointers and assign the MAC type for emerald.
+ * Does not touch the hardware.
+ **/
+
+s32 ngbe_init_ops(struct ngbe_hw *hw)
+{
+ struct ngbe_mac_info *mac = &hw->mac;
+ struct ngbe_phy_info *phy = &hw->phy;
+
+ DEBUGFUNC("ngbe_init_ops");
+
+ ngbe_init_phy_ops_common(hw);
+ ngbe_init_ops_common(hw);
+
+ if (hw->phy.type == ngbe_phy_m88e1512 ||
+ hw->phy.type == ngbe_phy_m88e1512_sfi) {
+ phy->ops.read_reg_mdi = ngbe_phy_read_reg_mdi;
+ phy->ops.write_reg_mdi = ngbe_phy_write_reg_mdi;
+ phy->ops.setup_link = ngbe_phy_setup_link_m88e1512;
+ phy->ops.reset = ngbe_phy_reset_m88e1512;
+ phy->ops.check_event = ngbe_phy_check_event_m88e1512;
+ phy->ops.get_adv_pause = ngbe_phy_get_advertised_pause_m88e1512;
+ phy->ops.get_lp_adv_pause = ngbe_phy_get_lp_advertised_pause_m88e1512;
+ phy->ops.set_adv_pause = ngbe_phy_set_pause_advertisement_m88e1512;
+
+ mac->ops.check_link = ngbe_check_mac_link_mdi;
+ } else if (hw->phy.type == ngbe_phy_yt8521s_sfi) {
+ phy->ops.read_reg_mdi = ngbe_phy_read_reg_mdi;
+ phy->ops.write_reg_mdi = ngbe_phy_write_reg_mdi;
+ phy->ops.setup_link = ngbe_phy_setup_link_yt8521s;
+ phy->ops.reset = ngbe_phy_reset_yt8521s;
+ phy->ops.check_event = ngbe_phy_check_event_yt8521s;
+ phy->ops.get_adv_pause = ngbe_phy_get_advertised_pause_yt8521s;
+ phy->ops.get_lp_adv_pause = ngbe_phy_get_lp_advertised_pause_yt8521s;
+ phy->ops.set_adv_pause = ngbe_phy_set_pause_advertisement_yt8521s;
+
+ mac->ops.check_link = ngbe_check_mac_link_yt8521s;
+ }
+
+ return NGBE_OK;
+}
+
+/**
+ * ngbe_get_link_capabilities - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: true when autoneg or autotry is enabled
+ *
+ * Determines the link capabilities by reading the AUTOC register.
+ **/
+s32 ngbe_get_link_capabilities(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *autoneg)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ if (hw->device_id == NGBE_DEV_ID_EM_TEST ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860A2 ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860A2S ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860A4 ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860A4S ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860AL2 ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860AL2S ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860AL4 ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860AL4S ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860AL_W ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860A1 ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860AL1 ||
+
+ hw->device_id == 0x10c ||
+ hw->device_id == NGBE_DEV_ID_EM_WX1860NCSI) {
+ *speed = NGBE_LINK_SPEED_1GB_FULL |
+ NGBE_LINK_SPEED_100_FULL |
+ NGBE_LINK_SPEED_10_FULL;
+ *autoneg = false;
+ hw->phy.link_mode = NGBE_PHYSICAL_LAYER_1000BASE_T |
+ NGBE_PHYSICAL_LAYER_100BASE_TX;
+ }
+
+ return status;
+}
+
+/**
+ * ngbe_get_copper_link_capabilities - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: boolean auto-negotiation value
+ *
+ * Determines the supported link capabilities by reading the PHY auto
+ * negotiation register.
+**/
+s32 ngbe_get_copper_link_capabilities(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *autoneg)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ *speed = 0;
+
+ if (hw->mac.autoneg)
+ *autoneg = true;
+ else
+ *autoneg = false;
+
+ if (status == 0) {
+ *speed = NGBE_LINK_SPEED_10_FULL |
+ NGBE_LINK_SPEED_100_FULL |
+ NGBE_LINK_SPEED_1GB_FULL;
+ }
+
+ return status;
+}
+
+/**
+ * ngbe_get_media_type - Get media type
+ * @hw: pointer to hardware structure
+ *
+ * Returns the media type (fiber, copper, backplane)
+ **/
+enum ngbe_media_type ngbe_get_media_type(struct ngbe_hw *hw)
+{
+ enum ngbe_media_type media_type;
+
+ DEBUGFUNC("\n");
+
+ ERROR_REPORT1(NGBE_ERROR_ARGUMENT,
+ "ngbe_get_media_type: hw->device_id = %u/n", hw->device_id);
+
+ media_type = ngbe_media_type_copper;
+
+ return media_type;
+}
+
+/**
+ * ngbe_stop_mac_link_on_d3 - Disables link on D3
+ * @hw: pointer to hardware structure
+ *
+ * Disables link during D3 power down sequence.
+ *
+ **/
+void ngbe_stop_mac_link_on_d3(struct ngbe_hw *hw)
+{
+ UNREFERENCED_PARAMETER(hw);
+ return;
+}
+
+/**
+ * ngbe_setup_mac_link - Set MAC link speed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ * Set the link speed in the AUTOC register and restarts link.
+ **/
+s32 ngbe_setup_mac_link(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete)
+{
+ bool autoneg = false;
+ s32 status = 0;
+ u32 link_capabilities = NGBE_LINK_SPEED_UNKNOWN;
+ u32 link_speed = NGBE_LINK_SPEED_UNKNOWN;
+ u32 lan_speed = 0;
+ bool link_up = false;
+
+ UNREFERENCED_PARAMETER(autoneg_wait_to_complete);
+ DEBUGFUNC("\n");
+
+ if (!(((hw->subsystem_device_id & OEM_MASK) == OCP_CARD) ||
+ ((hw->subsystem_device_id & WOL_SUP_MASK) == WOL_SUP) ||
+ ((hw->subsystem_device_id & NCSI_SUP_MASK) == NCSI_SUP))) {
+ /* Check to see if speed passed in is supported. */
+ status = TCALL(hw, mac.ops.get_link_capabilities,
+ &link_capabilities, &autoneg);
+ if (status)
+ goto out;
+
+ speed &= link_capabilities;
+
+ if (speed == NGBE_LINK_SPEED_UNKNOWN) {
+ status = NGBE_ERR_LINK_SETUP;
+ goto out;
+ }
+ }
+
+ status = TCALL(hw, mac.ops.check_link,
+ &link_speed, &link_up, false);
+ if (status != 0)
+ goto out;
+ if ((link_speed == speed) && link_up) {
+ switch (link_speed) {
+ case NGBE_LINK_SPEED_100_FULL:
+ lan_speed = 1;
+ break;
+ case NGBE_LINK_SPEED_1GB_FULL:
+ lan_speed = 2;
+ break;
+ case NGBE_LINK_SPEED_10_FULL:
+ lan_speed = 0;
+ break;
+ default:
+ break;
+ }
+ wr32m(hw, NGBE_CFG_LAN_SPEED,
+ 0x3, lan_speed);
+ }
+
+out:
+ return status;
+}
+
+
+/**
+ * ngbe_setup_copper_link - Set the PHY autoneg advertised field
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg_wait_to_complete: true if waiting is needed to complete
+ *
+ * Restarts link on PHY and MAC based on settings passed in.
+ **/
+STATIC s32 ngbe_setup_copper_link(struct ngbe_hw *hw,
+ u32 speed,
+ bool need_restart_AN)
+{
+ s32 status;
+ struct ngbe_adapter *adapter = hw->back;
+
+ DEBUGFUNC("\n");
+
+ /* Setup the PHY according to input speed */
+ status = TCALL(hw, phy.ops.setup_link, speed,
+ need_restart_AN);
+
+ adapter->flags |= NGBE_FLAG_NEED_ANC_CHECK;
+
+ return status;
+}
+
+int ngbe_reset_misc(struct ngbe_hw *hw)
+{
+ int i;
+
+ /* receive packets that size > 2048 */
+ wr32m(hw, NGBE_MAC_RX_CFG,
+ NGBE_MAC_RX_CFG_JE, NGBE_MAC_RX_CFG_JE);
+
+ /* clear counters on read */
+ wr32m(hw, NGBE_MMC_CONTROL,
+ NGBE_MMC_CONTROL_RSTONRD, NGBE_MMC_CONTROL_RSTONRD);
+
+ wr32m(hw, NGBE_MAC_RX_FLOW_CTRL,
+ NGBE_MAC_RX_FLOW_CTRL_RFE, NGBE_MAC_RX_FLOW_CTRL_RFE);
+
+ wr32(hw, NGBE_MAC_PKT_FLT,
+ NGBE_MAC_PKT_FLT_PR);
+
+ wr32m(hw, NGBE_MIS_RST_ST,
+ NGBE_MIS_RST_ST_RST_INIT, 0x1E00);
+
+ /* errata 4: initialize mng flex tbl and wakeup flex tbl*/
+ wr32(hw, NGBE_PSR_MNG_FLEX_SEL, 0);
+ for (i = 0; i < 16; i++) {
+ wr32(hw, NGBE_PSR_MNG_FLEX_DW_L(i), 0);
+ wr32(hw, NGBE_PSR_MNG_FLEX_DW_H(i), 0);
+ wr32(hw, NGBE_PSR_MNG_FLEX_MSK(i), 0);
+ }
+ wr32(hw, NGBE_PSR_LAN_FLEX_SEL, 0);
+ for (i = 0; i < 16; i++) {
+ wr32(hw, NGBE_PSR_LAN_FLEX_DW_L(i), 0);
+ wr32(hw, NGBE_PSR_LAN_FLEX_DW_H(i), 0);
+ wr32(hw, NGBE_PSR_LAN_FLEX_MSK(i), 0);
+ }
+
+ /* set pause frame dst mac addr */
+ wr32(hw, NGBE_RDB_PFCMACDAL, 0xC2000001);
+ wr32(hw, NGBE_RDB_PFCMACDAH, 0x0180);
+
+ wr32(hw, NGBE_MDIO_CLAUSE_SELECT, 0xF);
+
+ if (((hw->subsystem_device_id & OEM_MASK) == LY_M88E1512_SFP) ||
+ (hw->subsystem_device_id & OEM_MASK) == LY_YT8521S_SFP) {
+ /* gpio0 is used to power on/off control*/
+ wr32(hw, NGBE_GPIO_DDR, 0x1);
+ wr32(hw, NGBE_GPIO_DR, NGBE_GPIO_DR_0);
+ }
+
+ ngbe_init_thermal_sensor_thresh(hw);
+
+ return 0;
+}
+
+/**
+ * ngbe_reset_hw - Perform hardware reset
+ * @hw: pointer to hardware structure
+ *
+ * Resets the hardware by resetting the transmit and receive units, masks
+ * and clears all interrupts, perform a PHY reset, and perform a link (MAC)
+ * reset.
+ **/
+s32 ngbe_reset_hw(struct ngbe_hw *hw)
+{
+ s32 status;
+ u32 reset = 0;
+ u32 i;
+ struct ngbe_mac_info *mac = &hw->mac;
+
+ u32 sr_pcs_ctl = 0, sr_pma_mmd_ctl1 = 0, sr_an_mmd_ctl = 0;
+ u32 sr_an_mmd_adv_reg2 = 0;
+ u32 vr_xs_or_pcs_mmd_digi_ctl1 = 0, curr_vr_xs_or_pcs_mmd_digi_ctl1 = 0;
+ u32 curr_sr_pcs_ctl = 0, curr_sr_pma_mmd_ctl1 = 0;
+ u32 curr_sr_an_mmd_ctl = 0, curr_sr_an_mmd_adv_reg2 = 0;
+
+ u32 reset_status = 0;
+ u32 rst_delay = 0;
+
+ struct ngbe_adapter *adapter = NULL;
+
+ DEBUGFUNC("\n");
+
+ /* Call adapter stop to disable tx/rx and clear interrupts */
+ status = TCALL(hw, mac.ops.stop_adapter);
+ if (status != 0)
+ goto reset_hw_out;
+
+ /* Identify PHY and related function pointers */
+ status = TCALL(hw, phy.ops.init);
+
+ if (status)
+ goto reset_hw_out;
+
+ if (ngbe_get_media_type(hw) == ngbe_media_type_copper) {
+ mac->ops.setup_link = ngbe_setup_copper_link;
+ mac->ops.get_link_capabilities =
+ ngbe_get_copper_link_capabilities;
+ }
+
+ /*
+ * Issue global reset to the MAC. Needs to be SW reset if link is up.
+ * If link reset is used when link is up, it might reset the PHY when
+ * mng is using it. If link is down or the flag to force full link
+ * reset is set, then perform link reset.
+ */
+ if (hw->force_full_reset) {
+ rst_delay = (rd32(hw, NGBE_MIS_RST_ST) &
+ NGBE_MIS_RST_ST_RST_INIT) >>
+ NGBE_MIS_RST_ST_RST_INI_SHIFT;
+ if (hw->reset_type == NGBE_SW_RESET) {
+ for (i = 0; i < rst_delay + 20; i++) {
+ reset_status =
+ rd32(hw, NGBE_MIS_RST_ST);
+ if (!(reset_status &
+ NGBE_MIS_RST_ST_DEV_RST_ST_MASK))
+ break;
+ msleep(100);
+ }
+
+ if (reset_status & NGBE_MIS_RST_ST_DEV_RST_ST_MASK) {
+ status = NGBE_ERR_RESET_FAILED;
+ DEBUGOUT("software reset polling failed to "
+ "complete.\n");
+ goto reset_hw_out;
+ }
+ status = ngbe_check_flash_load(hw,
+ NGBE_SPI_ILDR_STATUS_SW_RESET);
+ if (status != 0)
+ goto reset_hw_out;
+
+ } else if (hw->reset_type == NGBE_GLOBAL_RESET) {
+ adapter = (struct ngbe_adapter *)hw->back;
+ msleep(100 * rst_delay + 2000);
+ pci_restore_state(adapter->pdev);
+ pci_save_state(adapter->pdev);
+ pci_wake_from_d3(adapter->pdev, false);
+ }
+ } else {
+
+ if (hw->bus.lan_id == 0) {
+ reset = NGBE_MIS_RST_LAN0_RST;
+ } else if (hw->bus.lan_id == 1) {
+ reset = NGBE_MIS_RST_LAN1_RST;
+ } else if (hw->bus.lan_id == 2) {
+ reset = NGBE_MIS_RST_LAN2_RST;
+ } else if (hw->bus.lan_id == 3) {
+ reset = NGBE_MIS_RST_LAN3_RST;
+ }
+
+ wr32(hw, NGBE_MIS_RST,
+ reset | rd32(hw, NGBE_MIS_RST));
+ NGBE_WRITE_FLUSH(hw);
+
+ msleep(15);
+ }
+
+ status = ngbe_reset_misc(hw);
+ if (status != 0)
+ goto reset_hw_out;
+
+ if (hw->mac.orig_link_settings_stored == false) {
+ hw->mac.orig_sr_pcs_ctl2 = sr_pcs_ctl;
+ hw->mac.orig_sr_pma_mmd_ctl1 = sr_pma_mmd_ctl1;
+ hw->mac.orig_sr_an_mmd_ctl = sr_an_mmd_ctl;
+ hw->mac.orig_sr_an_mmd_adv_reg2 = sr_an_mmd_adv_reg2;
+ hw->mac.orig_vr_xs_or_pcs_mmd_digi_ctl1 =
+ vr_xs_or_pcs_mmd_digi_ctl1;
+ hw->mac.orig_link_settings_stored = true;
+ } else {
+ /* If MNG FW is running on a multi-speed device that
+ * doesn't autoneg with out driver support we need to
+ * leave LMS in the state it was before we MAC reset.
+ * Likewise if we support WoL we don't want change the
+ * LMS state.
+ */
+ hw->mac.orig_sr_pcs_ctl2 = curr_sr_pcs_ctl;
+ hw->mac.orig_sr_pma_mmd_ctl1 = curr_sr_pma_mmd_ctl1;
+ hw->mac.orig_sr_an_mmd_ctl = curr_sr_an_mmd_ctl;
+ hw->mac.orig_sr_an_mmd_adv_reg2 =
+ curr_sr_an_mmd_adv_reg2;
+ hw->mac.orig_vr_xs_or_pcs_mmd_digi_ctl1 =
+ curr_vr_xs_or_pcs_mmd_digi_ctl1;
+ }
+
+ /* Store the permanent mac address */
+ TCALL(hw, mac.ops.get_mac_addr, hw->mac.perm_addr);
+
+ /*
+ * Store MAC address from RAR0, clear receive address registers, and
+ * clear the multicast table. Also reset num_rar_entries to 128,
+ * since we modify this value when programming the SAN MAC address.
+ */
+ hw->mac.num_rar_entries = NGBE_SP_RAR_ENTRIES;
+ TCALL(hw, mac.ops.init_rx_addrs);
+
+ pci_set_master(((struct ngbe_adapter *)hw->back)->pdev);
+
+reset_hw_out:
+ return status;
+}
+
+/*
+ * These defines allow us to quickly generate all of the necessary instructions
+ * in the function below by simply calling out NGBE_COMPUTE_SIG_HASH_ITERATION
+ * for values 0 through 15
+ */
+#define NGBE_ATR_COMMON_HASH_KEY \
+ (NGBE_ATR_BUCKET_HASH_KEY & NGBE_ATR_SIGNATURE_HASH_KEY)
+#define NGBE_COMPUTE_SIG_HASH_ITERATION(_n) \
+do { \
+ u32 n = (_n); \
+ if (NGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \
+ common_hash ^= lo_hash_dword >> n; \
+ else if (NGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+ bucket_hash ^= lo_hash_dword >> n; \
+ else if (NGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \
+ sig_hash ^= lo_hash_dword << (16 - n); \
+ if (NGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \
+ common_hash ^= hi_hash_dword >> n; \
+ else if (NGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+ bucket_hash ^= hi_hash_dword >> n; \
+ else if (NGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \
+ sig_hash ^= hi_hash_dword << (16 - n); \
+} while (0)
+
+#define NGBE_COMPUTE_BKT_HASH_ITERATION(_n) \
+do { \
+ u32 n = (_n); \
+ if (NGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+ bucket_hash ^= lo_hash_dword >> n; \
+ if (NGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+ bucket_hash ^= hi_hash_dword >> n; \
+} while (0)
+
+/*
+ * These two macros are meant to address the fact that we have registers
+ * that are either all or in part big-endian. As a result on big-endian
+ * systems we will end up byte swapping the value to little-endian before
+ * it is byte swapped again and written to the hardware in the original
+ * big-endian format.
+ */
+#define NGBE_STORE_AS_BE32(_value) \
+ (((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \
+ (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24))
+
+#define NGBE_WRITE_REG_BE32(a, reg, value) \
+ wr32((a), (reg), NGBE_STORE_AS_BE32(NGBE_NTOHL(value)))
+
+#define NGBE_STORE_AS_BE16(_value) \
+ NGBE_NTOHS(((u16)(_value) >> 8) | ((u16)(_value) << 8))
+
+/**
+ * ngbe_start_hw - Prepare hardware for Tx/Rx
+ * @hw: pointer to hardware structure
+ *
+ * Starts the hardware using the generic start_hw function
+ * and the generation start_hw function.
+ * Then performs revision-specific operations, if any.
+ **/
+s32 ngbe_start_hw(struct ngbe_hw *hw)
+{
+ int ret_val = 0;
+
+ DEBUGFUNC("\n");
+
+ /* Set the media type */
+ hw->phy.media_type = TCALL(hw, mac.ops.get_media_type);
+
+ /* PHY ops initialization must be done in reset_hw() */
+
+ /* Clear the VLAN filter table */
+ TCALL(hw, mac.ops.clear_vfta);
+
+ /* Clear statistics registers */
+ TCALL(hw, mac.ops.clear_hw_cntrs);
+
+ NGBE_WRITE_FLUSH(hw);
+
+ /* Setup flow control */
+ ret_val = TCALL(hw, mac.ops.setup_fc);
+
+ /* Clear adapter stopped flag */
+ hw->adapter_stopped = false;
+
+ /* We need to run link autotry after the driver loads */
+ hw->mac.autotry_restart = true;
+
+ return ret_val;
+}
+
+/**
+ * ngbe_enable_rx_dma - Enable the Rx DMA unit on emerald
+ * @hw: pointer to hardware structure
+ * @regval: register value to write to RXCTRL
+ *
+ * Enables the Rx DMA unit for emerald
+ **/
+s32 ngbe_enable_rx_dma(struct ngbe_hw *hw, u32 regval)
+{
+ DEBUGFUNC("\n");
+
+ /*
+ * Workaround for emerald silicon errata when enabling the Rx datapath.
+ * If traffic is incoming before we enable the Rx unit, it could hang
+ * the Rx DMA unit. Therefore, make sure the security engine is
+ * completely disabled prior to enabling the Rx unit.
+ */
+
+ TCALL(hw, mac.ops.disable_sec_rx_path);
+
+ if (regval & NGBE_RDB_PB_CTL_PBEN)
+ TCALL(hw, mac.ops.enable_rx);
+ else
+ TCALL(hw, mac.ops.disable_rx);
+
+ TCALL(hw, mac.ops.enable_sec_rx_path);
+
+ return 0;
+}
+
+/**
+ * ngbe_init_flash_params - Initialize flash params
+ * @hw: pointer to hardware structure
+ *
+ * Initializes the EEPROM parameters ngbe_eeprom_info within the
+ * ngbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ngbe_init_flash_params(struct ngbe_hw *hw)
+{
+ struct ngbe_flash_info *flash = &hw->flash;
+ u32 eec;
+
+ DEBUGFUNC("\n");
+
+ eec = 0x1000000;
+ flash->semaphore_delay = 10;
+ flash->dword_size = (eec >> 2);
+ flash->address_bits = 24;
+ DEBUGOUT3("FLASH params: size = %d, address bits: %d\n",
+ flash->dword_size,
+ flash->address_bits);
+
+ return 0;
+}
+
+/**
+ * ngbe_read_flash_buffer - Read FLASH dword(s) using
+ * fastest available method
+ *
+ * @hw: pointer to hardware structure
+ * @offset: offset of dword in EEPROM to read
+ * @dwords: number of dwords
+ * @data: dword(s) read from the EEPROM
+ *
+ * Retrieves 32 bit dword(s) read from EEPROM
+ **/
+s32 ngbe_read_flash_buffer(struct ngbe_hw *hw, u32 offset,
+ u32 dwords, u32 *data)
+{
+ s32 status = 0;
+ u32 i;
+
+ DEBUGFUNC("\n");
+
+ TCALL(hw, eeprom.ops.init_params);
+
+ if (!dwords || offset + dwords >= hw->flash.dword_size) {
+ status = NGBE_ERR_INVALID_ARGUMENT;
+ ERROR_REPORT1(NGBE_ERROR_ARGUMENT, "Invalid FLASH arguments");
+ return status;
+ }
+
+ for (i = 0; i < dwords; i++) {
+ wr32(hw, NGBE_SPI_DATA, data[i]);
+ wr32(hw, NGBE_SPI_CMD,
+ NGBE_SPI_CMD_ADDR(offset + i) |
+ NGBE_SPI_CMD_CMD(0x0));
+
+ status = po32m(hw, NGBE_SPI_STATUS,
+ NGBE_SPI_STATUS_OPDONE, NGBE_SPI_STATUS_OPDONE,
+ NGBE_SPI_TIMEOUT, 0);
+ if (status) {
+ DEBUGOUT("FLASH read timed out\n");
+ break;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * ngbe_write_flash_buffer - Write FLASH dword(s) using
+ * fastest available method
+ *
+ * @hw: pointer to hardware structure
+ * @offset: offset of dword in EEPROM to write
+ * @dwords: number of dwords
+ * @data: dword(s) write from to EEPROM
+ *
+ **/
+s32 ngbe_write_flash_buffer(struct ngbe_hw *hw, u32 offset,
+ u32 dwords, u32 *data)
+{
+ s32 status = 0;
+ u32 i;
+
+ DEBUGFUNC("\n");
+
+ TCALL(hw, eeprom.ops.init_params);
+
+ if (!dwords || offset + dwords >= hw->flash.dword_size) {
+ status = NGBE_ERR_INVALID_ARGUMENT;
+ ERROR_REPORT1(NGBE_ERROR_ARGUMENT, "Invalid FLASH arguments");
+ return status;
+ }
+
+ for (i = 0; i < dwords; i++) {
+ wr32(hw, NGBE_SPI_CMD,
+ NGBE_SPI_CMD_ADDR(offset + i) |
+ NGBE_SPI_CMD_CMD(0x1));
+
+ status = po32m(hw, NGBE_SPI_STATUS,
+ NGBE_SPI_STATUS_OPDONE, NGBE_SPI_STATUS_OPDONE,
+ NGBE_SPI_TIMEOUT, 0);
+ if (status != 0) {
+ DEBUGOUT("FLASH write timed out\n");
+ break;
+ }
+ data[i] = rd32(hw, NGBE_SPI_DATA);
+ }
+
+ return status;
+}
+
+/**
+ * ngbe_init_eeprom_params - Initialize EEPROM params
+ * @hw: pointer to hardware structure
+ *
+ * Initializes the EEPROM parameters ngbe_eeprom_info within the
+ * ngbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ngbe_init_eeprom_params(struct ngbe_hw *hw)
+{
+ struct ngbe_eeprom_info *eeprom = &hw->eeprom;
+ u16 eeprom_size;
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ if (eeprom->type == ngbe_eeprom_uninitialized) {
+ eeprom->semaphore_delay = 10;
+ eeprom->type = ngbe_eeprom_none;
+
+ if (!(rd32(hw, NGBE_SPI_STATUS) &
+ NGBE_SPI_STATUS_FLASH_BYPASS)) {
+ eeprom->type = ngbe_flash;
+ eeprom_size = 4096;
+ eeprom->word_size = eeprom_size >> 1;
+
+ DEBUGOUT2("Eeprom params: type = %d, size = %d\n",
+ eeprom->type, eeprom->word_size);
+ }
+ }
+
+ eeprom->sw_region_offset = 0x80;
+
+ return status;
+}
+
+/**
+ * ngbe_read_ee_hostif - Read EEPROM word using a host interface cmd
+ * assuming that the semaphore is already obtained.
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the hostif.
+ **/
+s32 ngbe_read_ee_hostif_data(struct ngbe_hw *hw, u16 offset,
+ u16 *data)
+{
+ s32 status;
+ struct ngbe_hic_read_shadow_ram buffer;
+
+ DEBUGFUNC("\n");
+ buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* convert offset from words to bytes */
+ buffer.address = NGBE_CPU_TO_BE32(offset * 2);
+ /* one word */
+ buffer.length = NGBE_CPU_TO_BE16(sizeof(u16));
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ NGBE_HI_COMMAND_TIMEOUT, false);
+
+ if (status)
+ return status;
+ if (ngbe_check_mng_access(hw))
+ *data = (u16)rd32a(hw, NGBE_MNG_MBOX,
+ FW_NVM_DATA_OFFSET);
+ else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ return status;
+ }
+
+ return 0;
+}
+
+s32 ngbe_eepromcheck_cap(struct ngbe_hw *hw, u16 offset,
+ u32 *data)
+{
+ int tmp;
+ s32 status;
+ struct ngbe_hic_read_shadow_ram buffer;
+
+ DEBUGFUNC("\n");
+ buffer.hdr.req.cmd = FW_EEPROM_CHECK_STATUS;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = 0;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* convert offset from words to bytes */
+ buffer.address = 0;
+ /* one word */
+ buffer.length = 0;
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ NGBE_HI_COMMAND_TIMEOUT, false);
+
+ if (status)
+ return status;
+ if (ngbe_check_mng_access(hw)) {
+ tmp = (u32)rd32a(hw, NGBE_MNG_MBOX, 1);
+ if (tmp == NGBE_CHECKSUM_CAP_ST_PASS) {
+ status = 0;
+ } else
+ status = NGBE_ERR_EEPROM_CHECKSUM;
+ } else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ return status;
+ }
+
+ return status;
+}
+
+s32 ngbe_phy_signal_set(struct ngbe_hw *hw)
+{
+ s32 status;
+ struct ngbe_hic_read_shadow_ram buffer;
+
+ DEBUGFUNC("\n");
+ buffer.hdr.req.cmd = FW_PHY_SIGNAL;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = 0;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* convert offset from words to bytes */
+ buffer.address = 0;
+ /* one word */
+ buffer.length = 0;
+
+ status = ngbe_host_interface_pass_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ NGBE_HI_COMMAND_TIMEOUT, false);
+
+ if (status)
+ return status;
+
+ return status;
+}
+
+/**
+ * ngbe_read_ee_hostif - Read EEPROM word using a host interface cmd
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the hostif.
+ **/
+s32 ngbe_read_ee_hostif(struct ngbe_hw *hw, u16 offset,
+ u16 *data)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ if (TCALL(hw, mac.ops.acquire_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH) == 0) {
+ status = ngbe_read_ee_hostif_data(hw, offset, data);
+ TCALL(hw, mac.ops.release_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ } else {
+ status = NGBE_ERR_SWFW_SYNC;
+ }
+
+ return status;
+}
+
+/**
+ * ngbe_read_ee_hostif_buffer- Read EEPROM word(s) using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words
+ * @data: word(s) read from the EEPROM
+ *
+ * Reads a 16 bit word(s) from the EEPROM using the hostif.
+ **/
+s32 ngbe_read_ee_hostif_buffer(struct ngbe_hw *hw,
+ u16 offset, u16 words, u16 *data)
+{
+ struct ngbe_hic_read_shadow_ram buffer;
+ u32 current_word = 0;
+ u16 words_to_read;
+ s32 status;
+ u32 i;
+ u32 value = 0;
+
+ DEBUGFUNC("\n");
+
+ /* Take semaphore for the entire operation. */
+ status = TCALL(hw, mac.ops.acquire_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ if (status) {
+ DEBUGOUT("EEPROM read buffer - semaphore failed\n");
+ return status;
+ }
+ while (words) {
+ if (words > FW_MAX_READ_BUFFER_SIZE / 2)
+ words_to_read = FW_MAX_READ_BUFFER_SIZE / 2;
+ else
+ words_to_read = words;
+
+ buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* convert offset from words to bytes */
+ buffer.address = NGBE_CPU_TO_BE32((offset + current_word) * 2);
+ buffer.length = NGBE_CPU_TO_BE16(words_to_read * 2);
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ NGBE_HI_COMMAND_TIMEOUT,
+ false);
+
+ if (status) {
+ DEBUGOUT("Host interface command failed\n");
+ goto out;
+ }
+
+ for (i = 0; i < words_to_read; i++) {
+ u32 reg = NGBE_MNG_MBOX + (FW_NVM_DATA_OFFSET << 2) +
+ 2 * i;
+ if (ngbe_check_mng_access(hw))
+ value = rd32(hw, reg);
+ else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ goto out;
+ }
+ data[current_word] = (u16)(value & 0xffff);
+ current_word++;
+ i++;
+ if (i < words_to_read) {
+ value >>= 16;
+ data[current_word] = (u16)(value & 0xffff);
+ current_word++;
+ }
+ }
+ words -= words_to_read;
+ }
+
+out:
+ TCALL(hw, mac.ops.release_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ return status;
+}
+
+
+/**
+ * ngbe_read_ee_hostif - Read EEPROM word using a host interface cmd
+ * assuming that the semaphore is already obtained.
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 32 bit word from the EEPROM using the hostif.
+ **/
+s32 ngbe_read_ee_hostif_data32(struct ngbe_hw *hw, u16 offset,
+ u32 *data)
+{
+ s32 status;
+ struct ngbe_hic_read_shadow_ram buffer;
+
+ DEBUGFUNC("\n");
+ buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* convert offset from words to bytes */
+ buffer.address = NGBE_CPU_TO_BE32(offset * 2);
+ /* one word */
+ buffer.length = NGBE_CPU_TO_BE16(sizeof(u32));
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ NGBE_HI_COMMAND_TIMEOUT, false);
+ if (status)
+ return status;
+ if (ngbe_check_mng_access(hw))
+ *data = (u32)rd32a(hw, NGBE_MNG_MBOX, FW_NVM_DATA_OFFSET);
+ else {
+ status = NGBE_ERR_MNG_ACCESS_FAILED;
+ return status;
+ }
+ return 0;
+}
+
+/**
+ * ngbe_read_ee_hostif - Read EEPROM word using a host interface cmd
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 32 bit word from the EEPROM using the hostif.
+ **/
+s32 ngbe_read_ee_hostif32(struct ngbe_hw *hw, u16 offset,
+ u32 *data)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ if (TCALL(hw, mac.ops.acquire_swfw_sync, NGBE_MNG_SWFW_SYNC_SW_FLASH) == 0) {
+ status = ngbe_read_ee_hostif_data32(hw, offset, data);
+ TCALL(hw, mac.ops.release_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ } else {
+ status = NGBE_ERR_SWFW_SYNC;
+ }
+
+ return status;
+}
+
+
+/**
+ * ngbe_write_ee_hostif - Write EEPROM word using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the hostif.
+ **/
+s32 ngbe_write_ee_hostif_data(struct ngbe_hw *hw, u16 offset,
+ u16 data)
+{
+ s32 status;
+ struct ngbe_hic_write_shadow_ram buffer;
+
+ DEBUGFUNC("\n");
+
+ buffer.hdr.req.cmd = FW_WRITE_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_WRITE_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* one word */
+ buffer.length = NGBE_CPU_TO_BE16(sizeof(u16));
+ buffer.data = data;
+ buffer.address = NGBE_CPU_TO_BE32(offset * 2);
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ NGBE_HI_COMMAND_TIMEOUT, false);
+
+ return status;
+}
+
+/**
+ * ngbe_write_ee_hostif - Write EEPROM word using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the hostif.
+ **/
+s32 ngbe_write_ee_hostif(struct ngbe_hw *hw, u16 offset,
+ u16 data)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ if (TCALL(hw, mac.ops.acquire_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH) == 0) {
+ status = ngbe_write_ee_hostif_data(hw, offset, data);
+ TCALL(hw, mac.ops.release_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ } else {
+ DEBUGOUT("write ee hostif failed to get semaphore");
+ status = NGBE_ERR_SWFW_SYNC;
+ }
+
+ return status;
+}
+
+/**
+ * ngbe_write_ee_hostif - Write EEPROM word using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the hostif.
+ **/
+s32 ngbe_write_ee_hostif_data32(struct ngbe_hw *hw, u16 offset,
+ u32 data)
+{
+ s32 status;
+ struct ngbe_hic_write_shadow_ram buffer;
+
+ DEBUGFUNC("\n");
+
+ buffer.hdr.req.cmd = FW_WRITE_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_WRITE_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* one word */
+ buffer.length = NGBE_CPU_TO_BE16(sizeof(u32));
+ buffer.data = data;
+ buffer.address = NGBE_CPU_TO_BE32(offset * 2);
+
+ status = ngbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ NGBE_HI_COMMAND_TIMEOUT, false);
+
+ return status;
+}
+
+/*** ngbe_write_ee_hostif - Write EEPROM word using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the hostif.
+ **/
+s32 ngbe_write_ee_hostif32(struct ngbe_hw *hw, u16 offset,
+ u32 data)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("\n");
+
+ if (TCALL(hw, mac.ops.acquire_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH) == 0) {
+ status = ngbe_write_ee_hostif_data32(hw, offset, data);
+ TCALL(hw, mac.ops.release_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ } else {
+ DEBUGOUT("write ee hostif failed to get semaphore");
+ status = NGBE_ERR_SWFW_SYNC;
+ }
+
+ return status;
+}
+
+
+/**
+ * ngbe_write_ee_hostif_buffer - Write EEPROM word(s) using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @words: number of words
+ * @data: word(s) write to the EEPROM
+ *
+ * Write a 16 bit word(s) to the EEPROM using the hostif.
+ **/
+s32 ngbe_write_ee_hostif_buffer(struct ngbe_hw *hw,
+ u16 offset, u16 words, u16 *data)
+{
+ s32 status = 0;
+ u16 i = 0;
+
+ DEBUGFUNC("\n");
+
+ /* Take semaphore for the entire operation. */
+ status = TCALL(hw, mac.ops.acquire_swfw_sync,
+ NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ if (status != 0) {
+ DEBUGOUT("EEPROM write buffer - semaphore failed\n");
+ return status;
+ }
+
+ for (i = 0; i < words; i++) {
+ status = ngbe_write_ee_hostif_data(hw, offset + i,
+ data[i]);
+
+ if (status != 0) {
+ DEBUGOUT("Eeprom buffered write failed\n");
+ break;
+ }
+ }
+
+ TCALL(hw, mac.ops.release_swfw_sync, NGBE_MNG_SWFW_SYNC_SW_FLASH);
+ return status;
+}
+
+
+
+/**
+ * ngbe_calc_eeprom_checksum - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ *
+ * Returns a negative error code on error, or the 16-bit checksum
+ **/
+s32 ngbe_calc_eeprom_checksum(struct ngbe_hw *hw)
+{
+ u16 *buffer = NULL;
+ u32 buffer_size = 0;
+
+ u16 *eeprom_ptrs = NULL;
+ u16 *local_buffer;
+ s32 status;
+ u16 checksum = 0;
+ u16 i;
+
+ DEBUGFUNC("\n");
+
+ TCALL(hw, eeprom.ops.init_params);
+
+ if (!buffer) {
+ eeprom_ptrs = (u16 *)vmalloc(NGBE_EEPROM_LAST_WORD *
+ sizeof(u16));
+ if (!eeprom_ptrs)
+ return NGBE_ERR_NO_SPACE;
+ /* Read pointer area */
+ status = ngbe_read_ee_hostif_buffer(hw, 0,
+ NGBE_EEPROM_LAST_WORD,
+ eeprom_ptrs);
+ if (status) {
+ DEBUGOUT("Failed to read EEPROM image\n");
+ return status;
+ }
+ local_buffer = eeprom_ptrs;
+ } else {
+ if (buffer_size < NGBE_EEPROM_LAST_WORD)
+ return NGBE_ERR_PARAM;
+ local_buffer = buffer;
+ }
+
+ for (i = 0; i < NGBE_EEPROM_LAST_WORD; i++)
+ if (i != hw->eeprom.sw_region_offset + NGBE_EEPROM_CHECKSUM)
+ checksum += local_buffer[i];
+
+ checksum = (u16)NGBE_EEPROM_SUM - checksum;
+ if (eeprom_ptrs)
+ vfree(eeprom_ptrs);
+
+ return (s32)checksum;
+}
+
+/**
+ * ngbe_update_eeprom_checksum - Updates the EEPROM checksum and flash
+ * @hw: pointer to hardware structure
+ *
+ * After writing EEPROM to shadow RAM using EEWR register, software calculates
+ * checksum and updates the EEPROM and instructs the hardware to update
+ * the flash.
+ **/
+s32 ngbe_update_eeprom_checksum(struct ngbe_hw *hw)
+{
+ s32 status;
+ u16 checksum = 0;
+
+ DEBUGFUNC("\n");
+
+ /* Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = ngbe_read_ee_hostif(hw, 0, &checksum);
+ if (status) {
+ DEBUGOUT("EEPROM read failed\n");
+ return status;
+ }
+
+ status = ngbe_calc_eeprom_checksum(hw);
+ if (status < 0)
+ return status;
+
+ checksum = (u16)(status & 0xffff);
+
+ status = ngbe_write_ee_hostif(hw, NGBE_EEPROM_CHECKSUM,
+ checksum);
+ if (status)
+ return status;
+
+ return status;
+}
+
+/**
+ * ngbe_validate_eeprom_checksum - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum_val: calculated checksum
+ *
+ * Performs checksum calculation and validates the EEPROM checksum. If the
+ * caller does not need checksum_val, the value can be NULL.
+ **/
+s32 ngbe_validate_eeprom_checksum(struct ngbe_hw *hw,
+ u16 *checksum_val)
+{
+ s32 status;
+ u16 checksum;
+ u16 read_checksum = 0;
+
+ DEBUGFUNC("\n");
+
+ /* Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = TCALL(hw, eeprom.ops.read, 0, &checksum);
+ if (status) {
+ DEBUGOUT("EEPROM read failed\n");
+ return status;
+ }
+
+ status = TCALL(hw, eeprom.ops.calc_checksum);
+ if (status < 0)
+ return status;
+
+ checksum = (u16)(status & 0xffff);
+
+ status = ngbe_read_ee_hostif(hw, hw->eeprom.sw_region_offset +
+ NGBE_EEPROM_CHECKSUM,
+ &read_checksum);
+ if (status)
+ return status;
+
+ /* Verify read checksum from EEPROM is the same as
+ * calculated checksum
+ */
+ if (read_checksum != checksum) {
+ status = NGBE_ERR_EEPROM_CHECKSUM;
+ ERROR_REPORT1(NGBE_ERROR_INVALID_STATE,
+ "Invalid EEPROM checksum\n");
+ }
+
+ /* If the user cares, return the calculated checksum */
+ if (checksum_val)
+ *checksum_val = checksum;
+
+ return status;
+}
+
+/**
+ * ngbe_check_mac_link - Determine link and speed status
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @link_up: true when link is up
+ * @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ * Reads the links register to determine if link is up and the current speed
+ **/
+s32 ngbe_check_mac_link(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *link_up,
+ bool link_up_wait_to_complete)
+{
+ u32 i;
+ u16 value = 0;
+ s32 status = 0;
+ u16 speed_sta = 0;
+
+ DEBUGFUNC("ngbe_check_mac_link");
+
+ if (link_up_wait_to_complete) {
+ for (i = 0; i < NGBE_LINK_UP_TIME; i++) {
+ status = TCALL(hw, phy.ops.read_reg, 0x1A, 0xA43, &value);
+ if (!status && (value & 0x4)) {
+ *link_up = true;
+ break;
+ } else {
+ *link_up = false;
+ }
+ msleep(100);
+ }
+ } else {
+ status = TCALL(hw, phy.ops.read_reg, 0x1A, 0xA43, &value);
+ if (!status && (value & 0x4)) {
+ *link_up = true;
+ } else {
+ *link_up = false;
+ }
+ }
+
+ speed_sta = value & 0x38;
+ if (*link_up) {
+ if (speed_sta == 0x28) {
+ *speed = NGBE_LINK_SPEED_1GB_FULL;
+ } else if (speed_sta == 0x18) {
+ *speed = NGBE_LINK_SPEED_100_FULL;
+ } else if (speed_sta == 0x8) {
+ *speed = NGBE_LINK_SPEED_10_FULL;
+ }
+ } else
+ *speed = NGBE_LINK_SPEED_UNKNOWN;
+
+ if (*speed == NGBE_LINK_SPEED_1GB_FULL) {
+ status = TCALL(hw, phy.ops.read_reg, 0xA, 0x0, &value);
+ if (!status && !(value & 0x2000)) {
+ *link_up = false;
+ }
+ }
+ return status;
+}
+
+s32 ngbe_check_mac_link_mdi(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *link_up,
+ bool link_up_wait_to_complete)
+{
+ u32 i;
+ u16 value = 0;
+ s32 status = 0;
+ u16 speed_sta = 0;
+
+ DEBUGFUNC("ngbe_check_mac_link_mdi");
+
+ if (hw->phy.type == ngbe_phy_m88e1512)
+ /* select page 0 */
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ else
+ /* select page 1 */
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ status = TCALL(hw, phy.ops.read_reg_mdi, 17, 0, &value);
+ if (link_up_wait_to_complete) {
+ for (i = 0; i < NGBE_LINK_UP_TIME; i++) {
+ status = TCALL(hw, phy.ops.read_reg_mdi, 17, 0, &value);
+ if (value & 0x400) {
+ *link_up = true;
+ break;
+ } else {
+ *link_up = false;
+ }
+ msleep(100);
+ }
+ } else {
+ status = TCALL(hw, phy.ops.read_reg_mdi, 17, 0, &value);
+ if (value & 0x400) {
+ *link_up = true;
+ } else {
+ *link_up = false;
+ }
+ }
+
+ speed_sta = value & 0xC000;
+ if (*link_up) {
+ if (speed_sta == 0x8000) {
+ *speed = NGBE_LINK_SPEED_1GB_FULL;
+ } else if (speed_sta == 0x4000) {
+ *speed = NGBE_LINK_SPEED_100_FULL;
+ } else if (speed_sta == 0x0000) {
+ *speed = NGBE_LINK_SPEED_10_FULL;
+ }
+ } else
+ *speed = NGBE_LINK_SPEED_UNKNOWN;
+
+ return status;
+}
+
+s32 ngbe_check_mac_link_yt8521s(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *link_up,
+ bool link_up_wait_to_complete)
+{
+ u32 i;
+ u16 value = 0;
+ s32 status = 0;
+ u16 speed_sta = 0;
+
+ DEBUGFUNC("ngbe_check_mac_link_yt");
+
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x11, 0, &value);
+ if (link_up_wait_to_complete) {
+ for (i = 0; i < NGBE_LINK_UP_TIME; i++) {
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x11, 0, &value);
+ if (value & 0x400) {
+ *link_up = true;
+ break;
+ } else {
+ *link_up = false;
+ }
+ msleep(100);
+ }
+ } else {
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x11, 0, &value);
+ if (value & 0x400) {
+ *link_up = true;
+ } else {
+ *link_up = false;
+ ngbe_phy_read_reg_mdi(hw, 0x11, 0, &value);
+ if (value & 0x400) {
+ *link_up = true;
+ //printk("yt8521: diankou link is up\n");
+ } else {
+ *link_up = false;
+ //printk("yt8521: diankou link is down\n");
+ }
+ }
+ }
+
+ speed_sta = value & 0xC000;
+ if (*link_up) {
+ if (speed_sta == 0x8000) {
+ *speed = NGBE_LINK_SPEED_1GB_FULL;
+ } else if (speed_sta == 0x4000) {
+ *speed = NGBE_LINK_SPEED_100_FULL;
+ } else if (speed_sta == 0x0000) {
+ *speed = NGBE_LINK_SPEED_10_FULL;
+ }
+ } else
+ *speed = NGBE_LINK_SPEED_UNKNOWN;
+ return status;
+}
+
+s32 ngbe_check_mac_link_zte(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *link_up,
+ bool link_up_wait_to_complete)
+{
+ u32 i;
+ u16 value = 0;
+ s32 status = 0;
+ u16 speed_sta = 0;
+
+ DEBUGFUNC("ngbe_check_mac_link_zte");
+
+ /* PHY status register */
+ status = TCALL(hw, phy.ops.read_reg_mdi, 0x1a, 0, &value);
+
+ if (link_up_wait_to_complete) {
+ for (i = 0; i < NGBE_LINK_UP_TIME; i++) {
+ status = TCALL(hw, phy.ops.read_reg_mdi, 0x1a, 0, &value);
+ /*bit 6->0x0040*/
+ if (value & 0x40) {
+ *link_up = true;
+ break;
+ } else {
+ *link_up = false;
+ }
+ msleep(100);
+ }
+ } else {
+ status = TCALL(hw, phy.ops.read_reg_mdi, 0x1a, 0, &value);
+ if (value & 0x40) {
+ *link_up = true;
+ } else {
+ *link_up = false;
+ }
+ }
+
+ speed_sta = value & 0xC000;
+ if (*link_up) {
+ if (speed_sta == 0x0200) {
+ *speed = NGBE_LINK_SPEED_1GB_FULL;
+ } else if (speed_sta == 0x0100) {
+ *speed = NGBE_LINK_SPEED_100_FULL;
+ } else if (speed_sta == 0x0000) {
+ *speed = NGBE_LINK_SPEED_10_FULL;
+ }
+ } else {
+ *speed = NGBE_LINK_SPEED_UNKNOWN;
+ }
+ return status;
+}
+
+/**
+ * ngbe_setup_eee - Enable/disable EEE support
+ * @hw: pointer to the HW structure
+ * @enable_eee: boolean flag to enable EEE
+ *
+ * Enable/disable EEE based on enable_eee flag.
+ * Auto-negotiation must be started after BASE-T EEE bits in PHY register 7.3C
+ * are modified.
+ *
+ **/
+s32 ngbe_setup_eee(struct ngbe_hw *hw, bool enable_eee)
+{
+ /* fix eee */
+ UNREFERENCED_PARAMETER(hw);
+ UNREFERENCED_PARAMETER(enable_eee);
+ DEBUGFUNC("\n");
+
+ return 0;
+}
+
+s32 ngbe_init_ops_common(struct ngbe_hw *hw)
+{
+ struct ngbe_mac_info *mac = &hw->mac;
+ struct ngbe_eeprom_info *eeprom = &hw->eeprom;
+ struct ngbe_flash_info *flash = &hw->flash;
+
+ /* MAC */
+ mac->ops.init_hw = ngbe_init_hw;
+ mac->ops.clear_hw_cntrs = ngbe_clear_hw_cntrs;
+ mac->ops.get_mac_addr = ngbe_get_mac_addr;
+ mac->ops.stop_adapter = ngbe_stop_adapter;
+ mac->ops.get_bus_info = ngbe_get_bus_info;
+ mac->ops.set_lan_id = ngbe_set_lan_id_multi_port_pcie;
+ mac->ops.acquire_swfw_sync = ngbe_acquire_swfw_sync;
+ mac->ops.release_swfw_sync = ngbe_release_swfw_sync;
+ mac->ops.reset_hw = ngbe_reset_hw;
+ mac->ops.get_media_type = ngbe_get_media_type;
+ mac->ops.disable_sec_rx_path = ngbe_disable_sec_rx_path;
+ mac->ops.enable_sec_rx_path = ngbe_enable_sec_rx_path;
+ mac->ops.enable_rx_dma = ngbe_enable_rx_dma;
+ mac->ops.start_hw = ngbe_start_hw;
+ mac->ops.get_device_caps = ngbe_get_device_caps;
+ mac->ops.setup_eee = ngbe_setup_eee;
+
+ /* LEDs */
+ mac->ops.led_on = ngbe_led_on;
+ mac->ops.led_off = ngbe_led_off;
+
+ /* RAR, Multicast, VLAN */
+ mac->ops.set_rar = ngbe_set_rar;
+ mac->ops.clear_rar = ngbe_clear_rar;
+ mac->ops.init_rx_addrs = ngbe_init_rx_addrs;
+ mac->ops.update_uc_addr_list = ngbe_update_uc_addr_list;
+ mac->ops.update_mc_addr_list = ngbe_update_mc_addr_list;
+ mac->ops.enable_mc = ngbe_enable_mc;
+ mac->ops.disable_mc = ngbe_disable_mc;
+ mac->ops.enable_rx = ngbe_enable_rx;
+ mac->ops.disable_rx = ngbe_disable_rx;
+ mac->ops.set_vmdq_san_mac = ngbe_set_vmdq_san_mac;
+ mac->ops.insert_mac_addr = ngbe_insert_mac_addr;
+ mac->rar_highwater = 1;
+ mac->ops.set_vfta = ngbe_set_vfta;
+ mac->ops.set_vlvf = ngbe_set_vlvf;
+ mac->ops.clear_vfta = ngbe_clear_vfta;
+ mac->ops.init_uta_tables = ngbe_init_uta_tables;
+ mac->ops.set_mac_anti_spoofing = ngbe_set_mac_anti_spoofing;
+ mac->ops.set_vlan_anti_spoofing = ngbe_set_vlan_anti_spoofing;
+ mac->ops.set_ethertype_anti_spoofing =
+ ngbe_set_ethertype_anti_spoofing;
+
+ /* Flow Control */
+ mac->ops.fc_enable = ngbe_fc_enable;
+ mac->ops.setup_fc = ngbe_setup_fc;
+
+ /* Link */
+ mac->ops.get_link_capabilities = ngbe_get_link_capabilities;
+ mac->ops.check_link = ngbe_check_mac_link;
+ mac->ops.setup_rxpba = ngbe_set_rxpba;
+
+ mac->mcft_size = NGBE_SP_MC_TBL_SIZE;
+ mac->vft_size = NGBE_SP_VFT_TBL_SIZE;
+ mac->num_rar_entries = NGBE_SP_RAR_ENTRIES;
+ mac->rx_pb_size = NGBE_SP_RX_PB_SIZE;
+ mac->max_rx_queues = NGBE_SP_MAX_RX_QUEUES;
+ mac->max_tx_queues = NGBE_SP_MAX_TX_QUEUES;
+ mac->max_msix_vectors = ngbe_get_pcie_msix_count(hw);
+
+ mac->arc_subsystem_valid = (rd32(hw, NGBE_MIS_ST) &
+ NGBE_MIS_ST_MNG_INIT_DN) ? true : false;
+
+ hw->mbx.ops.init_params = ngbe_init_mbx_params_pf;
+
+ /* EEPROM */
+ eeprom->ops.init_params = ngbe_init_eeprom_params;
+ eeprom->ops.calc_checksum = ngbe_calc_eeprom_checksum;
+ eeprom->ops.read = ngbe_read_ee_hostif;
+ eeprom->ops.read_buffer = ngbe_read_ee_hostif_buffer;
+ eeprom->ops.read32 = ngbe_read_ee_hostif32;
+ eeprom->ops.write = ngbe_write_ee_hostif;
+ eeprom->ops.write_buffer = ngbe_write_ee_hostif_buffer;
+ eeprom->ops.update_checksum = ngbe_update_eeprom_checksum;
+ eeprom->ops.validate_checksum = ngbe_validate_eeprom_checksum;
+ eeprom->ops.eeprom_chksum_cap_st = ngbe_eepromcheck_cap;
+ eeprom->ops.phy_signal_set = ngbe_phy_signal_set;
+
+ /* FLASH */
+ flash->ops.init_params = ngbe_init_flash_params;
+ flash->ops.read_buffer = ngbe_read_flash_buffer;
+ flash->ops.write_buffer = ngbe_write_flash_buffer;
+
+ /* Manageability interface */
+ mac->ops.set_fw_drv_ver = ngbe_set_fw_drv_ver;
+
+ mac->ops.get_thermal_sensor_data =
+ ngbe_get_thermal_sensor_data;
+ mac->ops.init_thermal_sensor_thresh =
+ ngbe_init_thermal_sensor_thresh;
+
+ return NGBE_OK;
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_hw.h b/drivers/net/ethernet/netswift/ngbe/ngbe_hw.h
new file mode 100644
index 0000000000000..d7f06643f258a
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_hw.h
@@ -0,0 +1,280 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#ifndef _NGBE_HW_H_
+#define _NGBE_HW_H_
+
+#define NGBE_EMC_INTERNAL_DATA 0x00
+#define NGBE_EMC_INTERNAL_THERM_LIMIT 0x20
+#define NGBE_EMC_DIODE1_DATA 0x01
+#define NGBE_EMC_DIODE1_THERM_LIMIT 0x19
+#define NGBE_EMC_DIODE2_DATA 0x23
+#define NGBE_EMC_DIODE2_THERM_LIMIT 0x1A
+#define NGBE_EMC_DIODE3_DATA 0x2A
+#define NGBE_EMC_DIODE3_THERM_LIMIT 0x30
+
+#define SPI_CLK_DIV 3
+
+#define SPI_CMD_ERASE_CHIP 4 // SPI erase chip command
+#define SPI_CMD_ERASE_SECTOR 3 // SPI erase sector command
+#define SPI_CMD_WRITE_DWORD 0 // SPI write a dword command
+#define SPI_CMD_READ_DWORD 1 // SPI read a dword command
+#define SPI_CMD_USER_CMD 5 // SPI user command
+
+#define SPI_CLK_CMD_OFFSET 28 // SPI command field offset in Command register
+#define SPI_CLK_DIV_OFFSET 25 // SPI clock divide field offset in Command register
+
+#define SPI_TIME_OUT_VALUE 10000
+#define SPI_SECTOR_SIZE (4 * 1024) // FLASH sector size is 64KB
+#define SPI_H_CMD_REG_ADDR 0x10104 // SPI Command register address
+#define SPI_H_DAT_REG_ADDR 0x10108 // SPI Data register address
+#define SPI_H_STA_REG_ADDR 0x1010c // SPI Status register address
+#define SPI_H_USR_CMD_REG_ADDR 0x10110 // SPI User Command register address
+#define SPI_CMD_CFG1_ADDR 0x10118 // Flash command configuration register 1
+#define MISC_RST_REG_ADDR 0x1000c // Misc reset register address
+#define MGR_FLASH_RELOAD_REG_ADDR 0x101a0 // MGR reload flash read
+
+#define MAC_ADDR0_WORD0_OFFSET_1G 0x006000c // MAC Address for LAN0, stored in external FLASH
+#define MAC_ADDR0_WORD1_OFFSET_1G 0x0060014
+#define MAC_ADDR1_WORD0_OFFSET_1G 0x006800c // MAC Address for LAN1, stored in external FLASH
+#define MAC_ADDR1_WORD1_OFFSET_1G 0x0068014
+#define MAC_ADDR2_WORD0_OFFSET_1G 0x007000c // MAC Address for LAN2, stored in external FLASH
+#define MAC_ADDR2_WORD1_OFFSET_1G 0x0070014
+#define MAC_ADDR3_WORD0_OFFSET_1G 0x007800c // MAC Address for LAN3, stored in external FLASH
+#define MAC_ADDR3_WORD1_OFFSET_1G 0x0078014
+#define PRODUCT_SERIAL_NUM_OFFSET_1G 0x00f0000 // Product Serial Number, stored in external FLASH last sector
+
+struct ngbe_hic_read_cab {
+ union ngbe_hic_hdr2 hdr;
+ union {
+ u8 d8[252];
+ u16 d16[126];
+ u32 d32[63];
+ } dbuf;
+};
+
+
+/**
+ * Packet Type decoding
+ **/
+/* ngbe_dec_ptype.mac: outer mac */
+enum ngbe_dec_ptype_mac {
+ NGBE_DEC_PTYPE_MAC_IP = 0,
+ NGBE_DEC_PTYPE_MAC_L2 = 2,
+ NGBE_DEC_PTYPE_MAC_FCOE = 3,
+};
+
+/* ngbe_dec_ptype.[e]ip: outer&encaped ip */
+#define NGBE_DEC_PTYPE_IP_FRAG (0x4)
+enum ngbe_dec_ptype_ip {
+ NGBE_DEC_PTYPE_IP_NONE = 0,
+ NGBE_DEC_PTYPE_IP_IPV4 = 1,
+ NGBE_DEC_PTYPE_IP_IPV6 = 2,
+ NGBE_DEC_PTYPE_IP_FGV4 =
+ (NGBE_DEC_PTYPE_IP_FRAG | NGBE_DEC_PTYPE_IP_IPV4),
+ NGBE_DEC_PTYPE_IP_FGV6 =
+ (NGBE_DEC_PTYPE_IP_FRAG | NGBE_DEC_PTYPE_IP_IPV6),
+};
+
+/* ngbe_dec_ptype.etype: encaped type */
+enum ngbe_dec_ptype_etype {
+ NGBE_DEC_PTYPE_ETYPE_NONE = 0,
+ NGBE_DEC_PTYPE_ETYPE_IPIP = 1, /* IP+IP */
+ NGBE_DEC_PTYPE_ETYPE_IG = 2, /* IP+GRE */
+ NGBE_DEC_PTYPE_ETYPE_IGM = 3, /* IP+GRE+MAC */
+ NGBE_DEC_PTYPE_ETYPE_IGMV = 4, /* IP+GRE+MAC+VLAN */
+};
+
+/* ngbe_dec_ptype.proto: payload proto */
+enum ngbe_dec_ptype_prot {
+ NGBE_DEC_PTYPE_PROT_NONE = 0,
+ NGBE_DEC_PTYPE_PROT_UDP = 1,
+ NGBE_DEC_PTYPE_PROT_TCP = 2,
+ NGBE_DEC_PTYPE_PROT_SCTP = 3,
+ NGBE_DEC_PTYPE_PROT_ICMP = 4,
+ NGBE_DEC_PTYPE_PROT_TS = 5, /* time sync */
+};
+
+/* ngbe_dec_ptype.layer: payload layer */
+enum ngbe_dec_ptype_layer {
+ NGBE_DEC_PTYPE_LAYER_NONE = 0,
+ NGBE_DEC_PTYPE_LAYER_PAY2 = 1,
+ NGBE_DEC_PTYPE_LAYER_PAY3 = 2,
+ NGBE_DEC_PTYPE_LAYER_PAY4 = 3,
+};
+
+struct ngbe_dec_ptype {
+ u32 ptype:8;
+ u32 known:1;
+ u32 mac:2; /* outer mac */
+ u32 ip:3; /* outer ip*/
+ u32 etype:3; /* encaped type */
+ u32 eip:3; /* encaped ip */
+ u32 prot:4; /* payload proto */
+ u32 layer:3; /* payload layer */
+};
+typedef struct ngbe_dec_ptype ngbe_dptype;
+
+
+u16 ngbe_get_pcie_msix_count(struct ngbe_hw *hw);
+s32 ngbe_init_hw(struct ngbe_hw *hw);
+s32 ngbe_start_hw(struct ngbe_hw *hw);
+s32 ngbe_clear_hw_cntrs(struct ngbe_hw *hw);
+s32 ngbe_read_pba_string(struct ngbe_hw *hw, u8 *pba_num,
+ u32 pba_num_size);
+s32 ngbe_get_mac_addr(struct ngbe_hw *hw, u8 *mac_addr);
+s32 ngbe_get_bus_info(struct ngbe_hw *hw);
+void ngbe_set_pci_config_data(struct ngbe_hw *hw, u16 link_status);
+void ngbe_set_lan_id_multi_port_pcie(struct ngbe_hw *hw);
+s32 ngbe_stop_adapter(struct ngbe_hw *hw);
+
+s32 ngbe_led_on(struct ngbe_hw *hw, u32 index);
+s32 ngbe_led_off(struct ngbe_hw *hw, u32 index);
+
+s32 ngbe_set_rar(struct ngbe_hw *hw, u32 index, u8 *addr, u64 pools,
+ u32 enable_addr);
+s32 ngbe_clear_rar(struct ngbe_hw *hw, u32 index);
+s32 ngbe_init_rx_addrs(struct ngbe_hw *hw);
+s32 ngbe_update_mc_addr_list(struct ngbe_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count,
+ ngbe_mc_addr_itr func, bool clear);
+s32 ngbe_update_uc_addr_list(struct ngbe_hw *hw, u8 *addr_list,
+ u32 addr_count, ngbe_mc_addr_itr func);
+s32 ngbe_enable_mc(struct ngbe_hw *hw);
+s32 ngbe_disable_mc(struct ngbe_hw *hw);
+s32 ngbe_disable_sec_rx_path(struct ngbe_hw *hw);
+s32 ngbe_enable_sec_rx_path(struct ngbe_hw *hw);
+
+s32 ngbe_fc_enable(struct ngbe_hw *hw);
+void ngbe_fc_autoneg(struct ngbe_hw *hw);
+s32 ngbe_setup_fc(struct ngbe_hw *hw);
+
+s32 ngbe_validate_mac_addr(u8 *mac_addr);
+s32 ngbe_acquire_swfw_sync(struct ngbe_hw *hw, u32 mask);
+void ngbe_release_swfw_sync(struct ngbe_hw *hw, u32 mask);
+s32 ngbe_disable_pcie_master(struct ngbe_hw *hw);
+
+s32 ngbe_set_vmdq(struct ngbe_hw *hw, u32 rar, u32 vmdq);
+s32 ngbe_set_vmdq_san_mac(struct ngbe_hw *hw, u32 vmdq);
+s32 ngbe_clear_vmdq(struct ngbe_hw *hw, u32 rar, u32 vmdq);
+s32 ngbe_insert_mac_addr(struct ngbe_hw *hw, u8 *addr, u32 vmdq);
+s32 ngbe_init_uta_tables(struct ngbe_hw *hw);
+s32 ngbe_set_vfta(struct ngbe_hw *hw, u32 vlan,
+ u32 vind, bool vlan_on);
+s32 ngbe_set_vlvf(struct ngbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on, bool *vfta_changed);
+s32 ngbe_clear_vfta(struct ngbe_hw *hw);
+s32 ngbe_find_vlvf_slot(struct ngbe_hw *hw, u32 vlan);
+
+void ngbe_set_mac_anti_spoofing(struct ngbe_hw *hw, bool enable, int pf);
+void ngbe_set_vlan_anti_spoofing(struct ngbe_hw *hw, bool enable, int vf);
+void ngbe_set_ethertype_anti_spoofing(struct ngbe_hw *hw,
+ bool enable, int vf);
+s32 ngbe_get_device_caps(struct ngbe_hw *hw, u16 *device_caps);
+void ngbe_set_rxpba(struct ngbe_hw *hw, int num_pb, u32 headroom,
+ int strategy);
+s32 ngbe_set_fw_drv_ver(struct ngbe_hw *hw, u8 maj, u8 min,
+ u8 build, u8 ver);
+s32 ngbe_reset_hostif(struct ngbe_hw *hw);
+u8 ngbe_calculate_checksum(u8 *buffer, u32 length);
+s32 ngbe_host_interface_command(struct ngbe_hw *hw, u32 *buffer,
+ u32 length, u32 timeout, bool return_data);
+
+void ngbe_clear_tx_pending(struct ngbe_hw *hw);
+void ngbe_stop_mac_link_on_d3(struct ngbe_hw *hw);
+bool ngbe_mng_present(struct ngbe_hw *hw);
+bool ngbe_check_mng_access(struct ngbe_hw *hw);
+
+s32 ngbe_get_thermal_sensor_data(struct ngbe_hw *hw);
+s32 ngbe_init_thermal_sensor_thresh(struct ngbe_hw *hw);
+void ngbe_enable_rx(struct ngbe_hw *hw);
+void ngbe_disable_rx(struct ngbe_hw *hw);
+s32 ngbe_setup_mac_link_multispeed_fiber(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete);
+int ngbe_check_flash_load(struct ngbe_hw *hw, u32 check_bit);
+
+/* @ngbe_api.h */
+void ngbe_atr_compute_perfect_hash(union ngbe_atr_input *input,
+ union ngbe_atr_input *mask);
+u32 ngbe_atr_compute_sig_hash(union ngbe_atr_hash_dword input,
+ union ngbe_atr_hash_dword common);
+
+s32 ngbe_get_link_capabilities(struct ngbe_hw *hw,
+ u32 *speed, bool *autoneg);
+enum ngbe_media_type ngbe_get_media_type(struct ngbe_hw *hw);
+void ngbe_disable_tx_laser_multispeed_fiber(struct ngbe_hw *hw);
+void ngbe_enable_tx_laser_multispeed_fiber(struct ngbe_hw *hw);
+void ngbe_flap_tx_laser_multispeed_fiber(struct ngbe_hw *hw);
+void ngbe_set_hard_rate_select_speed(struct ngbe_hw *hw,
+ u32 speed);
+s32 ngbe_setup_mac_link(struct ngbe_hw *hw, u32 speed,
+ bool autoneg_wait_to_complete);
+void ngbe_init_mac_link_ops(struct ngbe_hw *hw);
+s32 ngbe_reset_hw(struct ngbe_hw *hw);
+s32 ngbe_identify_phy(struct ngbe_hw *hw);
+s32 ngbe_init_ops_common(struct ngbe_hw *hw);
+s32 ngbe_enable_rx_dma(struct ngbe_hw *hw, u32 regval);
+s32 ngbe_init_ops(struct ngbe_hw *hw);
+s32 ngbe_setup_eee(struct ngbe_hw *hw, bool enable_eee);
+
+s32 ngbe_init_flash_params(struct ngbe_hw *hw);
+s32 ngbe_read_flash_buffer(struct ngbe_hw *hw, u32 offset,
+ u32 dwords, u32 *data);
+s32 ngbe_write_flash_buffer(struct ngbe_hw *hw, u32 offset,
+ u32 dwords, u32 *data);
+
+s32 ngbe_read_eeprom(struct ngbe_hw *hw,
+ u16 offset, u16 *data);
+s32 ngbe_read_eeprom_buffer(struct ngbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+s32 ngbe_init_eeprom_params(struct ngbe_hw *hw);
+s32 ngbe_update_eeprom_checksum(struct ngbe_hw *hw);
+s32 ngbe_calc_eeprom_checksum(struct ngbe_hw *hw);
+s32 ngbe_validate_eeprom_checksum(struct ngbe_hw *hw,
+ u16 *checksum_val);
+s32 ngbe_upgrade_flash(struct ngbe_hw *hw, u32 region,
+ const u8 *data, u32 size);
+s32 ngbe_write_ee_hostif_buffer(struct ngbe_hw *hw,
+ u16 offset, u16 words, u16 *data);
+s32 ngbe_write_ee_hostif(struct ngbe_hw *hw, u16 offset,
+ u16 data);
+s32 ngbe_write_ee_hostif32(struct ngbe_hw *hw, u16 offset,
+ u32 data);
+
+s32 ngbe_read_ee_hostif_buffer(struct ngbe_hw *hw,
+ u16 offset, u16 words, u16 *data);
+s32 ngbe_read_ee_hostif(struct ngbe_hw *hw, u16 offset, u16 *data);
+
+s32 ngbe_read_ee_hostif32(struct ngbe_hw *hw, u16 offset, u32 *data);
+
+u32 ngbe_rd32_epcs(struct ngbe_hw *hw, u32 addr);
+void ngbe_wr32_epcs(struct ngbe_hw *hw, u32 addr, u32 data);
+void ngbe_wr32_ephy(struct ngbe_hw *hw, u32 addr, u32 data);
+s32 ngbe_upgrade_flash_hostif(struct ngbe_hw *hw, u32 region,
+ const u8 *data, u32 size);
+
+s32 ngbe_check_mac_link_zte(struct ngbe_hw *hw,
+ u32 *speed,
+ bool *link_up,
+ bool link_up_wait_to_complete);
+
+s32 ngbe_eepromcheck_cap(struct ngbe_hw *hw, u16 offset,
+ u32 *data);
+s32 ngbe_phy_signal_set(struct ngbe_hw *hw);
+
+#endif /* _NGBE_HW_H_ */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_lib.c b/drivers/net/ethernet/netswift/ngbe/ngbe_lib.c
new file mode 100644
index 0000000000000..200fc34e9fa89
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_lib.c
@@ -0,0 +1,701 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "ngbe.h"
+#include "ngbe_sriov.h"
+
+/**
+ * ngbe_cache_ring_vmdq - Descriptor ring to register mapping for VMDq
+ * @adapter: board private structure to initialize
+ *
+ * Cache the descriptor ring offsets for VMDq to the assigned rings. It
+ * will also try to cache the proper offsets if RSS/FCoE/SRIOV are enabled along
+ * with VMDq.
+ *
+ **/
+static bool ngbe_cache_ring_vmdq(struct ngbe_adapter *adapter)
+{
+ struct ngbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+ int i;
+ u16 reg_idx;
+
+ /* only proceed if VMDq is enabled */
+ if (!(adapter->flags & NGBE_FLAG_VMDQ_ENABLED))
+ return false;
+
+ /* start at VMDq register offset for SR-IOV enabled setups */
+ reg_idx = vmdq->offset;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+
+ /* If we are greater than indices move to next pool */
+ adapter->rx_ring[i]->reg_idx = reg_idx + i;
+ }
+
+ reg_idx = vmdq->offset;
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+
+ /* If we are greater than indices move to next pool */
+ adapter->tx_ring[i]->reg_idx = reg_idx + i;
+ }
+
+ return true;
+}
+
+/**
+ * ngbe_cache_ring_rss - Descriptor ring to register mapping for RSS
+ * @adapter: board private structure to initialize
+ *
+ * Cache the descriptor ring offsets for RSS, ATR, FCoE, and SR-IOV.
+ *
+ **/
+static bool ngbe_cache_ring_rss(struct ngbe_adapter *adapter)
+{
+ u16 i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ adapter->rx_ring[i]->reg_idx = i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ adapter->tx_ring[i]->reg_idx = i;
+
+ return true;
+}
+
+/**
+ * ngbe_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ *
+ * Note, the order the various feature calls is important. It must start with
+ * the "most" features enabled at the same time, then trickle down to the
+ * least amount of features turned on at once.
+ **/
+static void ngbe_cache_ring_register(struct ngbe_adapter *adapter)
+{
+ if (ngbe_cache_ring_vmdq(adapter))
+ return;
+
+ ngbe_cache_ring_rss(adapter);
+}
+
+#define NGBE_RSS_64Q_MASK 0x3F
+#define NGBE_RSS_16Q_MASK 0xF
+#define NGBE_RSS_8Q_MASK 0x7
+#define NGBE_RSS_4Q_MASK 0x3
+#define NGBE_RSS_2Q_MASK 0x1
+#define NGBE_RSS_DISABLED_MASK 0x0
+
+/**
+ * ngbe_set_vmdq_queues: Allocate queues for VMDq devices
+ * @adapter: board private structure to initialize
+ *
+ * When VMDq (Virtual Machine Devices queue) is enabled, allocate queues
+ * and VM pools where appropriate. If RSS is available, then also try and
+ * enable RSS and map accordingly.
+ *
+ **/
+static bool ngbe_set_vmdq_queues(struct ngbe_adapter *adapter)
+{
+ u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
+ u16 vmdq_m = 0;
+ u16 rss_i = adapter->ring_feature[RING_F_RSS].limit;
+ u16 rss_m = NGBE_RSS_DISABLED_MASK;
+
+ /* only proceed if VMDq is enabled */
+ if (!(adapter->flags & NGBE_FLAG_VMDQ_ENABLED))
+ return false;
+
+ /* Add starting offset to total pool count */
+ vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
+
+ /* double check we are limited to maximum pools */
+ vmdq_i = min_t(u16, NGBE_MAX_VMDQ_INDICES, vmdq_i);
+
+ /* when VMDQ on, disable RSS */
+ rss_i = 1;
+
+ /* remove the starting offset from the pool count */
+ vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
+
+ /* save features for later use */
+ adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
+ adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
+
+ /* limit RSS based on user input and save for later use */
+ adapter->ring_feature[RING_F_RSS].indices = rss_i;
+ adapter->ring_feature[RING_F_RSS].mask = rss_m;
+
+ adapter->queues_per_pool = rss_i;
+ adapter->num_rx_queues = vmdq_i * rss_i;
+ adapter->num_tx_queues = vmdq_i * rss_i;
+
+ return true;
+}
+
+/**
+ * ngbe_set_rss_queues: Allocate queues for RSS
+ * @adapter: board private structure to initialize
+ *
+ * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
+ * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
+ *
+ **/
+static bool ngbe_set_rss_queues(struct ngbe_adapter *adapter)
+{
+ struct ngbe_ring_feature *f;
+ u16 rss_i;
+
+ /* set mask for 16 queue limit of RSS */
+ f = &adapter->ring_feature[RING_F_RSS];
+ rss_i = f->limit;
+
+ f->indices = rss_i;
+ f->mask = NGBE_RSS_8Q_MASK;
+
+ adapter->num_rx_queues = rss_i;
+ adapter->num_tx_queues = rss_i;
+
+ return true;
+}
+
+/*
+ * ngbe_set_num_queues: Allocate queues for device, feature dependent
+ * @adapter: board private structure to initialize
+ *
+ * This is the top level queue allocation routine. The order here is very
+ * important, starting with the "most" number of features turned on at once,
+ * and ending with the smallest set of features. This way large combinations
+ * can be allocated if they're turned on, and smaller combinations are the
+ * fallthrough conditions.
+ *
+ **/
+static void ngbe_set_num_queues(struct ngbe_adapter *adapter)
+{
+ /* Start with base case */
+ adapter->num_rx_queues = 1;
+ adapter->num_tx_queues = 1;
+ adapter->queues_per_pool = 1;
+
+ if (ngbe_set_vmdq_queues(adapter))
+ return;
+
+ ngbe_set_rss_queues(adapter);
+
+}
+
+/**
+ * ngbe_acquire_msix_vectors - acquire MSI-X vectors
+ * @adapter: board private structure
+ *
+ * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
+ * return a negative error code if unable to acquire MSI-X vectors for any
+ * reason.
+ */
+static int ngbe_acquire_msix_vectors(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int i, vectors, vector_threshold;
+
+ if (!(adapter->flags & NGBE_FLAG_MSIX_CAPABLE))
+ return -EOPNOTSUPP;
+
+ /* We start by asking for one vector per queue pair */
+ vectors = max(adapter->num_rx_queues, adapter->num_tx_queues);
+
+ /* It is easy to be greedy for MSI-X vectors. However, it really
+ * doesn't do much good if we have a lot more vectors than CPUs. We'll
+ * be somewhat conservative and only ask for (roughly) the same number
+ * of vectors as there are CPUs.
+ */
+ vectors = min_t(int, vectors, num_online_cpus());
+
+ /* Some vectors are necessary for non-queue interrupts */
+ vectors += NON_Q_VECTORS;
+
+ /* Hardware can only support a maximum of hw.mac->max_msix_vectors.
+ * With features such as RSS and VMDq, we can easily surpass the
+ * number of Rx and Tx descriptor queues supported by our device.
+ * Thus, we cap the maximum in the rare cases where the CPU count also
+ * exceeds our vector limit
+ */
+ vectors = min_t(int, vectors, hw->mac.max_msix_vectors);
+
+ /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
+ * handler, and (2) an Other (Link Status Change, etc.) handler.
+ */
+ vector_threshold = MIN_MSIX_COUNT;
+
+ /* we need to alloc (7vfs+1pf+1misc) or (8vfs+1misc) msix entries */
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP) {
+ vectors += adapter->ring_feature[RING_F_VMDQ].offset;
+ }
+
+ adapter->msix_entries = kcalloc(vectors,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!adapter->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < vectors; i++)
+ adapter->msix_entries[i].entry = i;
+
+ vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+ vector_threshold, vectors);
+ if (vectors < 0) {
+ /* A negative count of allocated vectors indicates an error in
+ * acquiring within the specified range of MSI-X vectors */
+ e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
+ vectors);
+
+ adapter->flags &= ~NGBE_FLAG_MSIX_ENABLED;
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+
+ return vectors;
+ }
+
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP) {
+ if (vectors < 9) {
+ adapter->flags2 &= ~NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP;
+ e_dev_warn("Remain available irqs < 9. Disable MISC IRQ REMAP.\n");
+ } else
+ vectors -= adapter->ring_feature[RING_F_VMDQ].offset;
+ }
+
+ /* we successfully allocated some number of vectors within our
+ * requested range.
+ */
+ adapter->flags |= NGBE_FLAG_MSIX_ENABLED;
+
+ /* Adjust for only the vectors we'll use, which is minimum
+ * of max_q_vectors, or the number of vectors we were allocated.
+ */
+ vectors -= NON_Q_VECTORS;
+ adapter->num_q_vectors = min_t(int, vectors, adapter->max_q_vectors);
+
+ return 0;
+}
+
+static void ngbe_add_ring(struct ngbe_ring *ring,
+ struct ngbe_ring_container *head)
+{
+ ring->next = head->ring;
+ head->ring = ring;
+ head->count++;
+}
+
+/**
+ * ngbe_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ **/
+static int ngbe_alloc_q_vector(struct ngbe_adapter *adapter,
+ unsigned int v_count, unsigned int v_idx,
+ unsigned int txr_count, unsigned int txr_idx,
+ unsigned int rxr_count, unsigned int rxr_idx)
+{
+ struct ngbe_q_vector *q_vector;
+ struct ngbe_ring *ring;
+ int node = -1;
+ int cpu = -1;
+ u8 tcs = netdev_get_num_tc(adapter->netdev);
+
+ int ring_count, size;
+
+ /* note this will allocate space for the ring structure as well! */
+ ring_count = txr_count + rxr_count;
+ size = sizeof(struct ngbe_q_vector) +
+ (sizeof(struct ngbe_ring) * ring_count);
+
+ /* customize cpu for Flow Director mapping */
+ if ((tcs <= 1) && !(adapter->flags & NGBE_FLAG_VMDQ_ENABLED)) {
+ u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+ if (rss_i > 1 && adapter->atr_sample_rate) {
+ if (cpu_online(v_idx)) {
+ cpu = v_idx;
+ node = cpu_to_node(cpu);
+ }
+ }
+ }
+
+ /* allocate q_vector and rings */
+ q_vector = kzalloc_node(size, GFP_KERNEL, node);
+ if (!q_vector)
+ q_vector = kzalloc(size, GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+ /* setup affinity mask and node */
+ if (cpu != -1)
+ cpumask_set_cpu(cpu, &q_vector->affinity_mask);
+ q_vector->numa_node = node;
+
+ /* initialize CPU for DCA */
+ q_vector->cpu = -1;
+
+ /* initialize NAPI */
+ netif_napi_add(adapter->netdev, &q_vector->napi,
+ ngbe_poll, 64);
+
+ /* tie q_vector and adapter together */
+ adapter->q_vector[v_idx] = q_vector;
+ q_vector->adapter = adapter;
+ q_vector->v_idx = v_idx;
+
+ /* initialize work limits */
+ q_vector->tx.work_limit = adapter->tx_work_limit;
+ q_vector->rx.work_limit = adapter->rx_work_limit;
+
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+
+ /* intialize ITR */
+ if (txr_count && !rxr_count) {
+ /* tx only vector */
+ if (adapter->tx_itr_setting == 1)
+ q_vector->itr = NGBE_7K_ITR;
+ else
+ q_vector->itr = adapter->tx_itr_setting;
+ } else {
+ /* rx or rx/tx vector */
+ if (adapter->rx_itr_setting == 1)
+ q_vector->itr = NGBE_7K_ITR;
+ else
+ q_vector->itr = adapter->rx_itr_setting;
+ }
+
+ while (txr_count) {
+ /* assign generic ring traits */
+ ring->dev = pci_dev_to_dev(adapter->pdev);
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Tx values */
+ ngbe_add_ring(ring, &q_vector->tx);
+
+ /* apply Tx specific ring traits */
+ ring->count = adapter->tx_ring_count;
+ if (adapter->num_vmdqs > 1)
+ ring->queue_index =
+ txr_idx % adapter->queues_per_pool;
+ else
+ ring->queue_index = txr_idx;
+
+ /* assign ring to adapter */
+ adapter->tx_ring[txr_idx] = ring;
+
+ /* update count and index */
+ txr_count--;
+ txr_idx += v_count;
+
+ /* push pointer to next ring */
+ ring++;
+ }
+
+ while (rxr_count) {
+ /* assign generic ring traits */
+ ring->dev = pci_dev_to_dev(adapter->pdev);
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Rx values */
+ ngbe_add_ring(ring, &q_vector->rx);
+
+ /* apply Rx specific ring traits */
+ ring->count = adapter->rx_ring_count;
+ if (adapter->num_vmdqs > 1)
+ ring->queue_index =
+ rxr_idx % adapter->queues_per_pool;
+ else
+ ring->queue_index = rxr_idx;
+
+ /* assign ring to adapter */
+ adapter->rx_ring[rxr_idx] = ring;
+
+ /* update count and index */
+ rxr_count--;
+ rxr_idx += v_count;
+
+ /* push pointer to next ring */
+ ring++;
+ }
+
+ return 0;
+}
+
+/**
+ * ngbe_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void ngbe_free_q_vector(struct ngbe_adapter *adapter, int v_idx)
+{
+ struct ngbe_q_vector *q_vector = adapter->q_vector[v_idx];
+ struct ngbe_ring *ring;
+
+ ngbe_for_each_ring(ring, q_vector->tx)
+ adapter->tx_ring[ring->queue_index] = NULL;
+
+ ngbe_for_each_ring(ring, q_vector->rx)
+ adapter->rx_ring[ring->queue_index] = NULL;
+
+ adapter->q_vector[v_idx] = NULL;
+ netif_napi_del(&q_vector->napi);
+ kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * ngbe_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ **/
+static int ngbe_alloc_q_vectors(struct ngbe_adapter *adapter)
+{
+ unsigned int q_vectors = adapter->num_q_vectors;
+ unsigned int rxr_remaining = adapter->num_rx_queues;
+ unsigned int txr_remaining = adapter->num_tx_queues;
+ unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+ int err;
+
+ if (q_vectors >= (rxr_remaining + txr_remaining)) {
+ for (; rxr_remaining; v_idx++) {
+ err = ngbe_alloc_q_vector(adapter, q_vectors, v_idx,
+ 0, 0, 1, rxr_idx);
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining--;
+ rxr_idx++;
+ }
+ }
+
+ for (; v_idx < q_vectors; v_idx++) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+ int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+ err = ngbe_alloc_q_vector(adapter, q_vectors, v_idx,
+ tqpv, txr_idx,
+ rqpv, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ txr_remaining -= tqpv;
+ rxr_idx++;
+ txr_idx++;
+ }
+
+ return 0;
+
+err_out:
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--)
+ ngbe_free_q_vector(adapter, v_idx);
+
+ return -ENOMEM;
+}
+
+/**
+ * ngbe_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void ngbe_free_q_vectors(struct ngbe_adapter *adapter)
+{
+ int v_idx = adapter->num_q_vectors;
+
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--)
+ ngbe_free_q_vector(adapter, v_idx);
+}
+
+void ngbe_reset_interrupt_capability(struct ngbe_adapter *adapter)
+{
+ if (adapter->flags & NGBE_FLAG_MSIX_ENABLED) {
+ adapter->flags &= ~NGBE_FLAG_MSIX_ENABLED;
+ pci_disable_msix(adapter->pdev);
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+ } else if (adapter->flags & NGBE_FLAG_MSI_ENABLED) {
+ adapter->flags &= ~NGBE_FLAG_MSI_ENABLED;
+ pci_disable_msi(adapter->pdev);
+ }
+}
+
+/**
+ * ngbe_set_interrupt_capability - set MSI-X or MSI if supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+void ngbe_set_interrupt_capability(struct ngbe_adapter *adapter)
+{
+ int err;
+
+ /* We will try to get MSI-X interrupts first */
+ if (!ngbe_acquire_msix_vectors(adapter))
+ return;
+
+ /* At this point, we do not have MSI-X capabilities. We need to
+ * reconfigure or disable various features which require MSI-X
+ * capability.
+ */
+ /* Disable VMDq support */
+ e_dev_warn("Disabling VMQd support\n");
+ adapter->flags &= ~NGBE_FLAG_VMDQ_ENABLED;
+
+#ifdef CONFIG_PCI_IOV
+ /* Disable SR-IOV support */
+ e_dev_warn("Disabling SR-IOV support\n");
+ ngbe_disable_sriov(adapter);
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP)
+ adapter->flags2 &= ~NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP;
+#endif /* CONFIG_PCI_IOV */
+
+ /* Disable RSS */
+ e_dev_warn("Disabling RSS support\n");
+ adapter->ring_feature[RING_F_RSS].limit = 1;
+
+ /* recalculate number of queues now that many features have been
+ * changed or disabled.
+ */
+ ngbe_set_num_queues(adapter);
+ adapter->num_q_vectors = 1;
+
+ if (!(adapter->flags & NGBE_FLAG_MSI_CAPABLE))
+ return;
+
+ err = pci_enable_msi(adapter->pdev);
+ if (err)
+ e_dev_warn("Failed to allocate MSI interrupt, falling back to "
+ "legacy. Error: %d\n",
+ err);
+ else
+ adapter->flags |= NGBE_FLAG_MSI_ENABLED;
+}
+
+/**
+ * ngbe_init_interrupt_scheme - Determine proper interrupt scheme
+ * @adapter: board private structure to initialize
+ *
+ * We determine which interrupt scheme to use based on...
+ * - Kernel support (MSI, MSI-X)
+ * - which can be user-defined (via MODULE_PARAM)
+ * - Hardware queue count (num_*_queues)
+ * - defined by miscellaneous hardware support/features (RSS, etc.)
+ **/
+int ngbe_init_interrupt_scheme(struct ngbe_adapter *adapter)
+{
+ int err;
+
+ /* if assigned vfs >= 7, the PF queue irq remain seq 0 and misc irq move from
+ * seq 1 to seq 8. it needs extra processions.
+ */
+ if (adapter->num_vfs >= NGBE_MAX_VF_FUNCTIONS - 1) {
+ adapter->flags2 |= NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP;
+ }
+
+ /* Number of supported queues */
+ ngbe_set_num_queues(adapter);
+
+ /* Set interrupt mode */
+ ngbe_set_interrupt_capability(adapter);
+
+ /* Allocate memory for queues */
+ err = ngbe_alloc_q_vectors(adapter);
+ if (err) {
+ e_err(probe, "Unable to allocate memory for queue vectors\n");
+ ngbe_reset_interrupt_capability(adapter);
+ return err;
+ }
+
+ ngbe_cache_ring_register(adapter);
+
+ set_bit(__NGBE_DOWN, &adapter->state);
+
+ return 0;
+}
+
+/**
+ * ngbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @adapter: board private structure to clear interrupt scheme on
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+void ngbe_clear_interrupt_scheme(struct ngbe_adapter *adapter)
+{
+ ngbe_free_q_vectors(adapter);
+ ngbe_reset_interrupt_capability(adapter);
+
+ /* remove this flags */
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP) {
+ adapter->flags2 &= ~NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP;
+ }
+}
+
+void ngbe_tx_ctxtdesc(struct ngbe_ring *tx_ring, u32 vlan_macip_lens,
+ u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
+{
+ struct ngbe_tx_context_desc *context_desc;
+ u16 i = tx_ring->next_to_use;
+
+ context_desc = NGBE_TX_CTXTDESC(tx_ring, i);
+
+ i++;
+ tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+ /* set bits to identify this as an advanced context descriptor */
+ type_tucmd |= NGBE_TXD_DTYP_CTXT;
+ context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
+ context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof);
+ context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
+ context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_main.c b/drivers/net/ethernet/netswift/ngbe/ngbe_main.c
new file mode 100644
index 0000000000000..9a3f4450d38e9
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_main.c
@@ -0,0 +1,7119 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/pkt_sched.h>
+#include <linux/ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/if_macvlan.h>
+#include <linux/ethtool.h>
+#include <linux/if_bridge.h>
+#include <net/vxlan.h>
+
+#include "ngbe.h"
+#include "ngbe_sriov.h"
+#include "ngbe_hw.h"
+#include "ngbe_phy.h"
+#include "ngbe_pcierr.h"
+
+char ngbe_driver_name[32] = NGBE_NAME;
+static const char ngbe_driver_string[] =
+ "WangXun Gigabit PCI Express Network Driver";
+
+#define DRV_VERSION __stringify(1.1.0oe)
+
+const char ngbe_driver_version[32] = DRV_VERSION;
+static const char ngbe_copyright[] =
+ "Copyright (c) 2018 -2019 Beijing WangXun Technology Co., Ltd";
+static const char ngbe_overheat_msg[] =
+ "Network adapter has been stopped because it has over heated. "
+ "If the problem persists, restart the computer, or "
+ "power off the system and replace the adapter";
+static const char ngbe_underheat_msg[] =
+ "Network adapter has been started again since the temperature "
+ "has been back to normal state";
+
+/* ngbe_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ * Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ngbe_pci_tbl[] = {
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_TEST), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860A2), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860A2S), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860A4), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860A4S), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860AL2), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860AL2S), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860AL4), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860AL4S), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860AL_W), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860NCSI), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860AL1), 0},
+ { PCI_VDEVICE(TRUSTNETIC, NGBE_DEV_ID_EM_WX1860A1), 0},
+ { PCI_VDEVICE(TRUSTNETIC, 0x10c), 0},
+ /* required last entry */
+ { .device = 0 }
+};
+MODULE_DEVICE_TABLE(pci, ngbe_pci_tbl);
+
+MODULE_AUTHOR("Beijing WangXun Technology Co., Ltd, <linux.nic(a)trustnetic.com>");
+MODULE_DESCRIPTION("WangXun(R) Gigabit PCI Express Network Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+#define DEFAULT_DEBUG_LEVEL_SHIFT 3
+
+static struct workqueue_struct *ngbe_wq;
+
+static bool ngbe_check_cfg_remove(struct ngbe_hw *hw, struct pci_dev *pdev);
+static void ngbe_clean_rx_ring(struct ngbe_ring *rx_ring);
+static void ngbe_clean_tx_ring(struct ngbe_ring *tx_ring);
+
+extern ngbe_dptype ngbe_ptype_lookup[256];
+
+static inline ngbe_dptype ngbe_decode_ptype(const u8 ptype)
+{
+ return ngbe_ptype_lookup[ptype];
+}
+
+static inline ngbe_dptype
+decode_rx_desc_ptype(const union ngbe_rx_desc *rx_desc)
+{
+ return ngbe_decode_ptype(NGBE_RXD_PKTTYPE(rx_desc));
+}
+
+static void ngbe_check_minimum_link(struct ngbe_adapter *adapter,
+ int expected_gts)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct pci_dev *pdev;
+
+ /* Some devices are not connected over PCIe and thus do not negotiate
+ * speed. These devices do not have valid bus info, and thus any report
+ * we generate may not be correct.
+ */
+ if (hw->bus.type == ngbe_bus_type_internal)
+ return;
+
+ pdev = adapter->pdev;
+
+ pcie_print_link_status(pdev);
+}
+
+/**
+ * ngbe_enumerate_functions - Get the number of ports this device has
+ * @adapter: adapter structure
+ *
+ * This function enumerates the phsyical functions co-located on a single slot,
+ * in order to determine how many ports a device has. This is most useful in
+ * determining the required GT/s of PCIe bandwidth necessary for optimal
+ * performance.
+ **/
+static inline int ngbe_enumerate_functions(struct ngbe_adapter *adapter)
+{
+ struct pci_dev *entry, *pdev = adapter->pdev;
+ int physfns = 0;
+
+ list_for_each_entry(entry, &pdev->bus->devices, bus_list) {
+#ifdef CONFIG_PCI_IOV
+ /* don't count virtual functions */
+ if (entry->is_virtfn)
+ continue;
+#endif
+
+ /* When the devices on the bus don't all match our device ID,
+ * we can't reliably determine the correct number of
+ * functions. This can occur if a function has been direct
+ * attached to a virtual machine using VT-d, for example. In
+ * this case, simply return -1 to indicate this.
+ */
+ if ((entry->vendor != pdev->vendor) ||
+ (entry->device != pdev->device))
+ return -1;
+
+ physfns++;
+ }
+
+ return physfns;
+}
+
+void ngbe_service_event_schedule(struct ngbe_adapter *adapter)
+{
+ if (!test_bit(__NGBE_DOWN, &adapter->state) &&
+ !test_bit(__NGBE_REMOVING, &adapter->state) &&
+ !test_and_set_bit(__NGBE_SERVICE_SCHED, &adapter->state))
+ queue_work(ngbe_wq, &adapter->service_task);
+}
+
+static void ngbe_service_event_complete(struct ngbe_adapter *adapter)
+{
+ BUG_ON(!test_bit(__NGBE_SERVICE_SCHED, &adapter->state));
+
+ /* flush memory to make sure state is correct before next watchdog */
+ smp_mb__before_atomic();
+ clear_bit(__NGBE_SERVICE_SCHED, &adapter->state);
+}
+
+static void ngbe_remove_adapter(struct ngbe_hw *hw)
+{
+ struct ngbe_adapter *adapter = hw->back;
+
+ if (!hw->hw_addr)
+ return;
+ hw->hw_addr = NULL;
+ e_dev_err("Adapter removed\n");
+ if (test_bit(__NGBE_SERVICE_INITED, &adapter->state))
+ ngbe_service_event_schedule(adapter);
+}
+
+static void ngbe_check_remove(struct ngbe_hw *hw, u32 reg)
+{
+ u32 value;
+
+ /* The following check not only optimizes a bit by not
+ * performing a read on the status register when the
+ * register just read was a status register read that
+ * returned NGBE_FAILED_READ_REG. It also blocks any
+ * potential recursion.
+ */
+ if (reg == NGBE_CFG_PORT_ST) {
+ ngbe_remove_adapter(hw);
+ return;
+ }
+ value = rd32(hw, NGBE_CFG_PORT_ST);
+ if (value == NGBE_FAILED_READ_REG)
+ ngbe_remove_adapter(hw);
+}
+
+static u32 ngbe_validate_register_read(struct ngbe_hw *hw, u32 reg, bool quiet)
+{
+ int i;
+ u32 value;
+ u8 __iomem *reg_addr;
+ struct ngbe_adapter *adapter = hw->back;
+
+ reg_addr = READ_ONCE(hw->hw_addr);
+ if (NGBE_REMOVED(reg_addr))
+ return NGBE_FAILED_READ_REG;
+ for (i = 0; i < NGBE_DEAD_READ_RETRIES; ++i) {
+ value = ngbe_rd32(reg_addr + reg);
+ if (value != NGBE_DEAD_READ_REG)
+ break;
+ }
+ if (quiet)
+ return value;
+ if (value == NGBE_DEAD_READ_REG)
+ e_err(drv, "%s: register %x read unchanged\n", __func__, reg);
+ else
+ e_warn(hw, "%s: register %x read recovered after %d retries\n",
+ __func__, reg, i + 1);
+ return value;
+}
+
+/**
+ * ngbe_read_reg - Read from device register
+ * @hw: hw specific details
+ * @reg: offset of register to read
+ *
+ * Returns : value read or NGBE_FAILED_READ_REG if removed
+ *
+ * This function is used to read device registers. It checks for device
+ * removal by confirming any read that returns all ones by checking the
+ * status register value for all ones. This function avoids reading from
+ * the hardware if a removal was previously detected in which case it
+ * returns NGBE_FAILED_READ_REG (all ones).
+ */
+u32 ngbe_read_reg(struct ngbe_hw *hw, u32 reg, bool quiet)
+{
+ u32 value;
+ u8 __iomem *reg_addr;
+
+ reg_addr = READ_ONCE(hw->hw_addr);
+ if (NGBE_REMOVED(reg_addr))
+ return NGBE_FAILED_READ_REG;
+ value = ngbe_rd32(reg_addr + reg);
+ if (unlikely(value == NGBE_FAILED_READ_REG))
+ ngbe_check_remove(hw, reg);
+ if (unlikely(value == NGBE_DEAD_READ_REG))
+ value = ngbe_validate_register_read(hw, reg, quiet);
+ return value;
+}
+
+static void ngbe_release_hw_control(struct ngbe_adapter *adapter)
+{
+ /* Let firmware take over control of h/w */
+ wr32m(&adapter->hw, NGBE_CFG_PORT_CTL,
+ NGBE_CFG_PORT_CTL_DRV_LOAD, 0);
+}
+
+static void ngbe_get_hw_control(struct ngbe_adapter *adapter)
+{
+ /* Let firmware know the driver has taken over */
+ wr32m(&adapter->hw, NGBE_CFG_PORT_CTL,
+ NGBE_CFG_PORT_CTL_DRV_LOAD, NGBE_CFG_PORT_CTL_DRV_LOAD);
+}
+
+/**
+ * ngbe_set_ivar - set the IVAR registers, mapping interrupt causes to vectors
+ * @adapter: pointer to adapter struct
+ * @direction: 0 for Rx, 1 for Tx, -1 for other causes
+ * @queue: queue to map the corresponding interrupt to
+ * @msix_vector: the vector to map to the corresponding queue
+ *
+ **/
+static void ngbe_set_ivar(struct ngbe_adapter *adapter, s8 direction,
+ u16 queue, u16 msix_vector)
+{
+ u32 ivar, index;
+ struct ngbe_hw *hw = &adapter->hw;
+
+ if (direction == -1) {
+ /* other causes */
+ msix_vector |= NGBE_PX_IVAR_ALLOC_VAL;
+ index = 0;
+ ivar = rd32(&adapter->hw, NGBE_PX_MISC_IVAR);
+ ivar &= ~(0xFF << index);
+ ivar |= (msix_vector << index);
+ /* if assigned VFs >= 7, the pf misc irq shall be remapped to 0x88. */
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP)
+ ivar = msix_vector;
+ wr32(&adapter->hw, NGBE_PX_MISC_IVAR, ivar);
+ } else {
+ /* tx or rx causes */
+ msix_vector |= NGBE_PX_IVAR_ALLOC_VAL;
+ index = ((16 * (queue & 1)) + (8 * direction));
+ ivar = rd32(hw, NGBE_PX_IVAR(queue >> 1));
+ ivar &= ~(0xFF << index);
+ ivar |= (msix_vector << index);
+ wr32(hw, NGBE_PX_IVAR(queue >> 1), ivar);
+ }
+}
+
+void ngbe_unmap_and_free_tx_resource(struct ngbe_ring *ring,
+ struct ngbe_tx_buffer *tx_buffer)
+{
+ if (tx_buffer->skb) {
+ dev_kfree_skb_any(tx_buffer->skb);
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_single(ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ } else if (dma_unmap_len(tx_buffer, len)) {
+ dma_unmap_page(ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ }
+ tx_buffer->next_to_watch = NULL;
+ tx_buffer->skb = NULL;
+ dma_unmap_len_set(tx_buffer, len, 0);
+ /* tx_buffer must be completely set up in the transmit path */
+}
+
+static void ngbe_update_xoff_rx_lfc(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ngbe_hw_stats *hwstats = &adapter->stats;
+ int i;
+ u32 data;
+
+ if ((hw->fc.current_mode != ngbe_fc_full) &&
+ (hw->fc.current_mode != ngbe_fc_rx_pause))
+ return;
+
+ data = rd32(hw, NGBE_MAC_LXOFFRXC);
+
+ hwstats->lxoffrxc += data;
+
+ /* refill credits (no tx hang) if we received xoff */
+ if (!data)
+ return;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ clear_bit(__NGBE_HANG_CHECK_ARMED,
+ &adapter->tx_ring[i]->state);
+}
+
+static u64 ngbe_get_tx_completed(struct ngbe_ring *ring)
+{
+ return ring->stats.packets;
+}
+
+static u64 ngbe_get_tx_pending(struct ngbe_ring *ring)
+{
+ struct ngbe_adapter *adapter;
+ struct ngbe_hw *hw;
+ u32 head, tail;
+
+ if (ring->accel)
+ adapter = ring->accel->adapter;
+ else
+ adapter = ring->q_vector->adapter;
+
+ hw = &adapter->hw;
+ head = rd32(hw, NGBE_PX_TR_RP(ring->reg_idx));
+ tail = rd32(hw, NGBE_PX_TR_WP(ring->reg_idx));
+
+ return ((head <= tail) ? tail : tail + ring->count) - head;
+}
+
+static inline bool ngbe_check_tx_hang(struct ngbe_ring *tx_ring)
+{
+ u64 tx_done = ngbe_get_tx_completed(tx_ring);
+ u64 tx_done_old = tx_ring->tx_stats.tx_done_old;
+ u64 tx_pending = ngbe_get_tx_pending(tx_ring);
+
+ clear_check_for_tx_hang(tx_ring);
+
+ /*
+ * Check for a hung queue, but be thorough. This verifies
+ * that a transmit has been completed since the previous
+ * check AND there is at least one packet pending. The
+ * ARMED bit is set to indicate a potential hang. The
+ * bit is cleared if a pause frame is received to remove
+ * false hang detection due to PFC or 802.3x frames. By
+ * requiring this to fail twice we avoid races with
+ * pfc clearing the ARMED bit and conditions where we
+ * run the check_tx_hang logic with a transmit completion
+ * pending but without time to complete it yet.
+ */
+ if (tx_done_old == tx_done && tx_pending) {
+
+ /* make sure it is true for two checks in a row */
+ return test_and_set_bit(__NGBE_HANG_CHECK_ARMED,
+ &tx_ring->state);
+ }
+ /* update completed stats and continue */
+ tx_ring->tx_stats.tx_done_old = tx_done;
+ /* reset the countdown */
+ clear_bit(__NGBE_HANG_CHECK_ARMED, &tx_ring->state);
+
+ return false;
+}
+
+/**
+ * ngbe_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ **/
+static void ngbe_tx_timeout(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ bool real_tx_hang = false;
+ int i;
+ u16 value = 0;
+ u32 value2 = 0;
+ u32 head, tail;
+
+#define TX_TIMEO_LIMIT 16000
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct ngbe_ring *tx_ring = adapter->tx_ring[i];
+ if (check_for_tx_hang(tx_ring) && ngbe_check_tx_hang(tx_ring)) {
+ real_tx_hang = true;
+ e_info(drv, "&&ngbe_tx_timeout:i=%d&&", i);
+ }
+ }
+
+ pci_read_config_word(adapter->pdev, PCI_VENDOR_ID, &value);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "pci vendor id is 0x%x\n", value);
+
+ pci_read_config_word(adapter->pdev, PCI_COMMAND, &value);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "pci command reg is 0x%x.\n", value);
+
+ value2 = rd32(&adapter->hw, 0x10000);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "reg 0x10000 value is 0x%08x\n", value2);
+ value2 = rd32(&adapter->hw, 0x180d0);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "reg 0x180d0 value is 0x%08x\n", value2);
+ value2 = rd32(&adapter->hw, 0x180d4);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "reg 0x180d4 value is 0x%08x\n", value2);
+ value2 = rd32(&adapter->hw, 0x180d8);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "reg 0x180d8 value is 0x%08x\n", value2);
+ value2 = rd32(&adapter->hw, 0x180dc);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "reg 0x180dc value is 0x%08x\n", value2);
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ head = rd32(&adapter->hw, NGBE_PX_TR_RP(adapter->tx_ring[i]->reg_idx));
+ tail = rd32(&adapter->hw, NGBE_PX_TR_WP(adapter->tx_ring[i]->reg_idx));
+
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "tx ring %d next_to_use is %d, next_to_clean is %d\n",
+ i, adapter->tx_ring[i]->next_to_use, adapter->tx_ring[i]->next_to_clean);
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "tx ring %d hw rp is 0x%x, wp is 0x%x\n", i, head, tail);
+ }
+
+ value2 = rd32(&adapter->hw, NGBE_PX_IMS);
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "PX_IMS value is 0x%08x\n", value2);
+
+ if (value2) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "clear interrupt mask.\n");
+ wr32(&adapter->hw, NGBE_PX_ICS, value2);
+ wr32(&adapter->hw, NGBE_PX_IMC, value2);
+ }
+
+ if (adapter->hw.bus.lan_id == 0) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "tx timeout. do pcie recovery.\n");
+ adapter->flags2 |= NGBE_FLAG2_PCIE_NEED_RECOVER;
+ ngbe_service_event_schedule(adapter);
+ } else
+ wr32(&adapter->hw, NGBE_MIS_PF_SM, 1);
+}
+
+/**
+ * ngbe_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: structure containing interrupt and ring information
+ * @tx_ring: tx ring to clean
+ **/
+static bool ngbe_clean_tx_irq(struct ngbe_q_vector *q_vector,
+ struct ngbe_ring *tx_ring)
+{
+ struct ngbe_adapter *adapter = q_vector->adapter;
+ struct ngbe_tx_buffer *tx_buffer;
+ union ngbe_tx_desc *tx_desc;
+ unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int budget = q_vector->tx.work_limit;
+ unsigned int i = tx_ring->next_to_clean;
+
+ if (test_bit(__NGBE_DOWN, &adapter->state))
+ return true;
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ tx_desc = NGBE_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ do {
+ union ngbe_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+ /* if next_to_watch is not set then there is no work pending */
+ if (!eop_desc)
+ break;
+
+ /* prevent any other reads prior to eop_desc */
+ read_barrier_depends();
+
+ /* if DD is not set pending work has not been completed */
+ if (!(eop_desc->wb.status & cpu_to_le32(NGBE_TXD_STAT_DD)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buffer->next_to_watch = NULL;
+
+ /* update the statistics for this packet */
+ total_bytes += tx_buffer->bytecount;
+ total_packets += tx_buffer->gso_segs;
+
+ /* free the skb */
+ dev_consume_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* clear tx_buffer data */
+ tx_buffer->skb = NULL;
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = NGBE_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len)) {
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+ }
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = NGBE_TX_DESC(tx_ring, 0);
+ }
+
+ /* issue prefetch for next Tx descriptor */
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ budget--;
+ } while (likely(budget));
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->stats.bytes += total_bytes;
+ tx_ring->stats.packets += total_packets;
+ u64_stats_update_end(&tx_ring->syncp);
+ q_vector->tx.total_bytes += total_bytes;
+ q_vector->tx.total_packets += total_packets;
+
+ if (check_for_tx_hang(tx_ring)) {
+ if (!ngbe_check_tx_hang(tx_ring)) {
+ adapter->hang_cnt = 0;
+ } else
+ adapter->hang_cnt++;
+
+ if (adapter->hang_cnt >= 5) {
+ /* schedule immediate reset if we believe we hung */
+ struct ngbe_hw *hw = &adapter->hw;
+ u16 value = 0;
+
+ e_err(drv, "Detected Tx Unit Hang\n"
+ " Tx Queue <%d>\n"
+ " TDH, TDT <%x>, <%x>\n"
+ " next_to_use <%x>\n"
+ " next_to_clean <%x>\n"
+ "tx_buffer_info[next_to_clean]\n"
+ " time_stamp <%lx>\n"
+ " jiffies <%lx>\n",
+ tx_ring->queue_index,
+ rd32(hw, NGBE_PX_TR_RP(tx_ring->reg_idx)),
+ rd32(hw, NGBE_PX_TR_WP(tx_ring->reg_idx)),
+ tx_ring->next_to_use, i,
+ tx_ring->tx_buffer_info[i].time_stamp, jiffies);
+
+ pci_read_config_word(adapter->pdev, PCI_VENDOR_ID, &value);
+ if (value == NGBE_FAILED_READ_CFG_WORD) {
+ e_info(hw, "pcie link has been lost.\n");
+ }
+
+ netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+ e_info(probe,
+ "tx hang %d detected on queue %d, resetting adapter\n",
+ adapter->tx_timeout_count + 1, tx_ring->queue_index);
+
+ /* schedule immediate reset if we believe we hung */
+ e_info(hw, "real tx hang. do pcie recovery.\n");
+ adapter->flags2 |= NGBE_FLAG2_PCIE_NEED_RECOVER;
+ ngbe_service_event_schedule(adapter);
+
+ /* the adapter is about to reset, no point in enabling stuff */
+ return true;
+ }
+ }
+
+ netdev_tx_completed_queue(txring_txq(tx_ring),
+ total_packets, total_bytes);
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+ if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+ (ngbe_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
+ /* Make sure that anybody stopping the queue after this
+ * sees the new next_to_clean.
+ */
+ smp_mb();
+
+ if (__netif_subqueue_stopped(tx_ring->netdev,
+ tx_ring->queue_index)
+ && !test_bit(__NGBE_DOWN, &adapter->state)) {
+ netif_wake_subqueue(tx_ring->netdev,
+ tx_ring->queue_index);
+ ++tx_ring->tx_stats.restart_queue;
+ }
+ }
+
+ return !!budget;
+}
+
+#define NGBE_RSS_L4_TYPES_MASK \
+ ((1ul << NGBE_RXD_RSSTYPE_IPV4_TCP) | \
+ (1ul << NGBE_RXD_RSSTYPE_IPV4_UDP) | \
+ (1ul << NGBE_RXD_RSSTYPE_IPV4_SCTP) | \
+ (1ul << NGBE_RXD_RSSTYPE_IPV6_TCP) | \
+ (1ul << NGBE_RXD_RSSTYPE_IPV6_UDP) | \
+ (1ul << NGBE_RXD_RSSTYPE_IPV6_SCTP))
+
+static inline void ngbe_rx_hash(struct ngbe_ring *ring,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u16 rss_type;
+
+ if (!(ring->netdev->features & NETIF_F_RXHASH))
+ return;
+
+ rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
+ NGBE_RXD_RSSTYPE_MASK;
+
+ if (!rss_type)
+ return;
+
+ skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+ (NGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
+ PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+}
+
+/**
+ * ngbe_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @ring: structure containing ring specific data
+ * @rx_desc: current Rx descriptor being processed
+ * @skb: skb currently being received and modified
+ **/
+static inline void ngbe_rx_checksum(struct ngbe_ring *ring,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ ngbe_dptype dptype = decode_rx_desc_ptype(rx_desc);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb_checksum_none_assert(skb);
+
+ /* Rx csum disabled */
+ if (!(ring->netdev->features & NETIF_F_RXCSUM))
+ return;
+
+ /* if IPv4 header checksum error */
+ if ((ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_IPCS) &&
+ ngbe_test_staterr(rx_desc, NGBE_RXD_ERR_IPE)) ||
+ (ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_OUTERIPCS) &&
+ ngbe_test_staterr(rx_desc, NGBE_RXD_ERR_OUTERIPER))) {
+ ring->rx_stats.csum_err++;
+ return;
+ }
+
+ /* L4 checksum offload flag must set for the below code to work */
+ if (!ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_L4CS))
+ return;
+
+ /*likely incorrect csum if IPv6 Dest Header found */
+ if (dptype.prot != NGBE_DEC_PTYPE_PROT_SCTP && NGBE_RXD_IPV6EX(rx_desc))
+ return;
+
+ /* if L4 checksum error */
+ if (ngbe_test_staterr(rx_desc, NGBE_RXD_ERR_TCPE)) {
+ ring->rx_stats.csum_err++;
+ return;
+ }
+ /* If there is an outer header present that might contain a checksum
+ * we need to bump the checksum level by 1 to reflect the fact that
+ * we are indicating we validated the inner checksum.
+ */
+ if (dptype.etype >= NGBE_DEC_PTYPE_ETYPE_IG) {
+ skb->csum_level = 1;
+ }
+
+ /* It must be a TCP or UDP or SCTP packet with a valid checksum */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ ring->rx_stats.csum_good_cnt++;
+}
+
+static bool ngbe_alloc_mapped_skb(struct ngbe_ring *rx_ring,
+ struct ngbe_rx_buffer *bi)
+{
+ struct sk_buff *skb = bi->skb;
+ dma_addr_t dma = bi->dma;
+
+ if (unlikely(dma))
+ return true;
+
+ if (likely(!skb)) {
+ skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+ rx_ring->rx_buf_len);
+ if (unlikely(!skb)) {
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
+ return false;
+ }
+
+ bi->skb = skb;
+ }
+
+ dma = dma_map_single(rx_ring->dev, skb->data,
+ rx_ring->rx_buf_len, DMA_FROM_DEVICE);
+
+ /*
+ * if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ dev_kfree_skb_any(skb);
+ bi->skb = NULL;
+
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
+ return false;
+ }
+
+ bi->dma = dma;
+ return true;
+}
+
+static bool ngbe_alloc_mapped_page(struct ngbe_ring *rx_ring,
+ struct ngbe_rx_buffer *bi)
+{
+ struct page *page = bi->page;
+ dma_addr_t dma;
+
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
+ return true;
+
+ /* alloc new page for storage */
+ page = dev_alloc_pages(ngbe_rx_pg_order(rx_ring));
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_rx_page_failed++;
+ return false;
+ }
+
+ /* map page for use */
+ dma = dma_map_page(rx_ring->dev, page, 0,
+ ngbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
+
+ /*
+ * if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_pages(page, ngbe_rx_pg_order(rx_ring));
+
+ rx_ring->rx_stats.alloc_rx_page_failed++;
+ return false;
+ }
+
+ bi->page_dma = dma;
+ bi->page = page;
+ bi->page_offset = 0;
+
+ return true;
+}
+
+/**
+ * ngbe_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+void ngbe_alloc_rx_buffers(struct ngbe_ring *rx_ring, u16 cleaned_count)
+{
+ union ngbe_rx_desc *rx_desc;
+ struct ngbe_rx_buffer *bi;
+ u16 i = rx_ring->next_to_use;
+
+ /* nothing to do */
+ if (!cleaned_count)
+ return;
+
+ rx_desc = NGBE_RX_DESC(rx_ring, i);
+ bi = &rx_ring->rx_buffer_info[i];
+ i -= rx_ring->count;
+
+ do {
+ if (ring_is_hs_enabled(rx_ring)) {
+ if (!ngbe_alloc_mapped_skb(rx_ring, bi))
+ break;
+ rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
+ }
+
+ if (!ngbe_alloc_mapped_page(rx_ring, bi))
+ break;
+ rx_desc->read.pkt_addr =
+ cpu_to_le64(bi->page_dma + bi->page_offset);
+
+ rx_desc++;
+ bi++;
+ i++;
+ if (unlikely(!i)) {
+ rx_desc = NGBE_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_buffer_info;
+ i -= rx_ring->count;
+ }
+
+ /* clear the status bits for the next_to_use descriptor */
+ rx_desc->wb.upper.status_error = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ i += rx_ring->count;
+
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = i;
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(i, rx_ring->tail);
+ }
+}
+
+static inline u16 ngbe_get_hlen(struct ngbe_ring *rx_ring,
+ union ngbe_rx_desc *rx_desc)
+{
+ __le16 hdr_info = rx_desc->wb.lower.lo_dword.hs_rss.hdr_info;
+ u16 hlen = le16_to_cpu(hdr_info) & NGBE_RXD_HDRBUFLEN_MASK;
+
+ UNREFERENCED_PARAMETER(rx_ring);
+
+ if (hlen > (NGBE_RX_HDR_SIZE << NGBE_RXD_HDRBUFLEN_SHIFT))
+ hlen = 0;
+ else
+ hlen >>= NGBE_RXD_HDRBUFLEN_SHIFT;
+
+ return hlen;
+}
+
+static void ngbe_rx_vlan(struct ngbe_ring *ring,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u8 idx = 0;
+ u16 ethertype;
+
+ if ((ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_VP)) {
+ idx = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
+ NGBE_RXD_TPID_MASK) >> NGBE_RXD_TPID_SHIFT;
+ ethertype = ring->q_vector->adapter->hw.tpid[idx];
+ __vlan_hwaccel_put_tag(skb,
+ htons(ethertype),
+ le16_to_cpu(rx_desc->wb.upper.vlan));
+ }
+}
+
+/**
+ * ngbe_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ **/
+static void ngbe_process_skb_fields(struct ngbe_ring *rx_ring,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u32 flags = rx_ring->q_vector->adapter->flags;
+
+ ngbe_rx_hash(rx_ring, rx_desc, skb);
+ ngbe_rx_checksum(rx_ring, rx_desc, skb);
+
+ if (unlikely(flags & NGBE_FLAG_RX_HWTSTAMP_ENABLED) &&
+ unlikely(ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_TS))) {
+ ngbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb);
+ rx_ring->last_rx_timestamp = jiffies;
+ }
+
+ ngbe_rx_vlan(rx_ring, rx_desc, skb);
+ skb_record_rx_queue(skb, rx_ring->queue_index);
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static void ngbe_rx_skb(struct ngbe_q_vector *q_vector,
+ struct ngbe_ring *rx_ring,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ napi_gro_receive(&q_vector->napi, skb);
+}
+
+/**
+ * ngbe_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool ngbe_is_non_eop(struct ngbe_ring *rx_ring,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct ngbe_rx_buffer *rx_buffer =
+ &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ prefetch(NGBE_RX_DESC(rx_ring, ntc));
+
+ /* if we are the last buffer then there is nothing else to do */
+ if (likely(ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_EOP)))
+ return false;
+
+ /* place skb in next buffer to be received */
+ if (ring_is_hs_enabled(rx_ring)) {
+ rx_buffer->skb = rx_ring->rx_buffer_info[ntc].skb;
+ rx_buffer->dma = rx_ring->rx_buffer_info[ntc].dma;
+ rx_ring->rx_buffer_info[ntc].dma = 0;
+ }
+ rx_ring->rx_buffer_info[ntc].skb = skb;
+
+ rx_ring->rx_stats.non_eop_descs++;
+
+ return true;
+}
+
+/**
+ * ngbe_pull_tail - ngbe specific version of skb_pull_tail
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an ngbe specific version of __pskb_pull_tail. The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void ngbe_pull_tail(struct sk_buff *skb)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned char *va;
+ unsigned int pull_len;
+
+ /*
+ * it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool per
+ * alloc_page(GFP_ATOMIC)
+ */
+ va = skb_frag_address(frag);
+
+ /*
+ * we need the header to contain the greater of either ETH_HLEN or
+ * 60 bytes if the skb->len is less than 60 for skb_pad.
+ */
+ pull_len = eth_get_headlen(va, NGBE_RX_HDR_SIZE);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ frag->page_offset += pull_len;
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
+
+/**
+ * ngbe_dma_sync_frag - perform DMA sync for first frag of SKB
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being updated
+ *
+ * This function provides a basic DMA sync up for the first fragment of an
+ * skb. The reason for doing this is that the first fragment cannot be
+ * unmapped until we have reached the end of packet descriptor for a buffer
+ * chain.
+ */
+static void ngbe_dma_sync_frag(struct ngbe_ring *rx_ring,
+ struct sk_buff *skb)
+{
+ if (ring_uses_build_skb(rx_ring)) {
+ unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
+
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ NGBE_CB(skb)->dma,
+ offset,
+ skb_headlen(skb),
+ DMA_FROM_DEVICE);
+ } else {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ NGBE_CB(skb)->dma,
+ frag->page_offset,
+ skb_frag_size(frag),
+ DMA_FROM_DEVICE);
+ }
+
+ /* if the page was released unmap it */
+ if (unlikely(NGBE_CB(skb)->page_released)) {
+ dma_unmap_page_attrs(rx_ring->dev, NGBE_CB(skb)->dma,
+ ngbe_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ NGBE_RX_DMA_ATTR);
+ }
+}
+
+/**
+ * ngbe_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right. These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool ngbe_cleanup_headers(struct ngbe_ring *rx_ring,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct net_device *netdev = rx_ring->netdev;
+
+ /* verify that the packet does not have any known errors */
+ if (unlikely(ngbe_test_staterr(rx_desc,
+ NGBE_RXD_ERR_FRAME_ERR_MASK) &&
+ !(netdev->features & NETIF_F_RXALL))) {
+ dev_kfree_skb_any(skb);
+ return true;
+ }
+
+ /* place header in linear portion of buffer */
+ if (skb_is_nonlinear(skb) && !skb_headlen(skb))
+ ngbe_pull_tail(skb);
+
+ /* if eth_skb_pad returns an error the skb was freed */
+ if (eth_skb_pad(skb))
+ return true;
+
+ return false;
+}
+
+/**
+ * ngbe_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void ngbe_reuse_rx_page(struct ngbe_ring *rx_ring,
+ struct ngbe_rx_buffer *old_buff)
+{
+ struct ngbe_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
+
+ new_buff = &rx_ring->rx_buffer_info[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ new_buff->page_dma = old_buff->page_dma;
+ new_buff->page = old_buff->page;
+ new_buff->page_offset = old_buff->page_offset;
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, new_buff->page_dma,
+ new_buff->page_offset,
+ ngbe_rx_bufsz(rx_ring),
+ DMA_FROM_DEVICE);
+}
+
+static inline bool ngbe_page_is_reserved(struct page *page)
+{
+ return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+/**
+ * ngbe_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static bool ngbe_add_rx_frag(struct ngbe_ring *rx_ring,
+ struct ngbe_rx_buffer *rx_buffer,
+ union ngbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct page *page = rx_buffer->page;
+ unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ngbe_rx_bufsz(rx_ring);
+#else
+ unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+ unsigned int last_offset = ngbe_rx_pg_size(rx_ring) -
+ ngbe_rx_bufsz(rx_ring);
+#endif
+
+ if ((size <= NGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb) &&
+ !ring_is_hs_enabled(rx_ring)) {
+ unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+ /* page is not reserved, we can reuse buffer as-is */
+ if (likely(!ngbe_page_is_reserved(page)))
+ return true;
+
+ /* this page cannot be reused so discard it */
+ __free_pages(page, ngbe_rx_pg_order(rx_ring));
+ return false;
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rx_buffer->page_offset, size, truesize);
+
+ /* avoid re-using remote pages */
+ if (unlikely(ngbe_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_count(page) != 1))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= truesize;
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > last_offset)
+ return false;
+#endif
+
+ /* Even if we own the page, we are not allowed to use atomic_set()
+ * This would break get_page_unless_zero() users.
+ */
+ page_ref_inc(page);
+
+ return true;
+}
+
+static struct sk_buff *ngbe_fetch_rx_buffer(struct ngbe_ring *rx_ring,
+ union ngbe_rx_desc *rx_desc)
+{
+ struct ngbe_rx_buffer *rx_buffer;
+ struct sk_buff *skb;
+ struct page *page;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ page = rx_buffer->page;
+ prefetchw(page);
+
+ skb = rx_buffer->skb;
+
+ if (likely(!skb)) {
+ void *page_addr = page_address(page) +
+ rx_buffer->page_offset;
+
+ /* prefetch first cache line of first page */
+ prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+ prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+ NGBE_RX_HDR_SIZE);
+ if (unlikely(!skb)) {
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
+ return NULL;
+ }
+
+ /*
+ * we will be copying header into skb->data in
+ * pskb_may_pull so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+
+ /*
+ * Delay unmapping of the first packet. It carries the
+ * header information, HW may still access the header
+ * after the writeback. Only unmap it when EOP is
+ * reached
+ */
+ if (likely(ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_EOP)))
+ goto dma_sync;
+
+ NGBE_CB(skb)->dma = rx_buffer->page_dma;
+ } else {
+ if (ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_EOP))
+ ngbe_dma_sync_frag(rx_ring, skb);
+
+dma_sync:
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->page_dma,
+ rx_buffer->page_offset,
+ ngbe_rx_bufsz(rx_ring),
+ DMA_FROM_DEVICE);
+
+ rx_buffer->skb = NULL;
+ }
+
+ /* pull page into skb */
+ if (ngbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ ngbe_reuse_rx_page(rx_ring, rx_buffer);
+ } else if (NGBE_CB(skb)->dma == rx_buffer->page_dma) {
+ /* the page has been released from the ring */
+ NGBE_CB(skb)->page_released = true;
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, rx_buffer->page_dma,
+ ngbe_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE);
+ }
+
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+
+ return skb;
+}
+
+static struct sk_buff *ngbe_fetch_rx_buffer_hs(struct ngbe_ring *rx_ring,
+ union ngbe_rx_desc *rx_desc)
+{
+ struct ngbe_rx_buffer *rx_buffer;
+ struct sk_buff *skb;
+ struct page *page;
+ int hdr_len = 0;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ page = rx_buffer->page;
+ prefetchw(page);
+
+ skb = rx_buffer->skb;
+ rx_buffer->skb = NULL;
+ prefetchw(skb->data);
+
+ if (!skb_is_nonlinear(skb)) {
+ hdr_len = ngbe_get_hlen(rx_ring, rx_desc);
+ if (hdr_len > 0) {
+ __skb_put(skb, hdr_len);
+ NGBE_CB(skb)->dma_released = true;
+ NGBE_CB(skb)->dma = rx_buffer->dma;
+ rx_buffer->dma = 0;
+ } else {
+ dma_unmap_single(rx_ring->dev,
+ rx_buffer->dma,
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
+ rx_buffer->dma = 0;
+ if (likely(ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_EOP)))
+ goto dma_sync;
+ NGBE_CB(skb)->dma = rx_buffer->page_dma;
+ goto add_frag;
+ }
+ }
+
+ if (ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_EOP)) {
+ if (skb_headlen(skb)) {
+ if (NGBE_CB(skb)->dma_released == true) {
+ dma_unmap_single(rx_ring->dev,
+ NGBE_CB(skb)->dma,
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
+ NGBE_CB(skb)->dma = 0;
+ NGBE_CB(skb)->dma_released = false;
+ }
+ } else
+ ngbe_dma_sync_frag(rx_ring, skb);
+ }
+
+dma_sync:
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->page_dma,
+ rx_buffer->page_offset,
+ ngbe_rx_bufsz(rx_ring),
+ DMA_FROM_DEVICE);
+add_frag:
+ /* pull page into skb */
+ if (ngbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ ngbe_reuse_rx_page(rx_ring, rx_buffer);
+ } else if (NGBE_CB(skb)->dma == rx_buffer->page_dma) {
+ /* the page has been released from the ring */
+ NGBE_CB(skb)->page_released = true;
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, rx_buffer->page_dma,
+ ngbe_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE);
+ }
+
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+
+ return skb;
+}
+
+/**
+ * ngbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @q_vector: structure containing interrupt and ring information
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the syste.
+ *
+ * Returns amount of work completed.
+ **/
+static int ngbe_clean_rx_irq(struct ngbe_q_vector *q_vector,
+ struct ngbe_ring *rx_ring,
+ int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ u16 cleaned_count = ngbe_desc_unused(rx_ring);
+
+ do {
+ union ngbe_rx_desc *rx_desc;
+ struct sk_buff *skb;
+
+ /* return some buffers to hardware, one at a time is too slow */
+ if (cleaned_count >= NGBE_RX_BUFFER_WRITE) {
+ ngbe_alloc_rx_buffers(rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = NGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ if (!ngbe_test_staterr(rx_desc, NGBE_RXD_STAT_DD))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * descriptor has been written back
+ */
+ dma_rmb();
+
+ /* retrieve a buffer from the ring */
+ if (ring_is_hs_enabled(rx_ring))
+ skb = ngbe_fetch_rx_buffer_hs(rx_ring, rx_desc);
+ else
+ skb = ngbe_fetch_rx_buffer(rx_ring, rx_desc);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb)
+ break;
+
+ cleaned_count++;
+
+ /* place incomplete frames back on ring for completion */
+ if (ngbe_is_non_eop(rx_ring, rx_desc, skb))
+ continue;
+
+ /* verify the packet layout is correct */
+ if (ngbe_cleanup_headers(rx_ring, rx_desc, skb))
+ continue;
+
+ /* probably a little skewed due to removing CRC */
+ total_rx_bytes += skb->len;
+
+ /* populate checksum, timestamp, VLAN, and protocol */
+ ngbe_process_skb_fields(rx_ring, rx_desc, skb);
+
+ ngbe_rx_skb(q_vector, rx_ring, rx_desc, skb);
+
+ /* update budget accounting */
+ total_rx_packets++;
+ } while (likely(total_rx_packets < budget));
+
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->stats.packets += total_rx_packets;
+ rx_ring->stats.bytes += total_rx_bytes;
+ u64_stats_update_end(&rx_ring->syncp);
+ q_vector->rx.total_packets += total_rx_packets;
+ q_vector->rx.total_bytes += total_rx_bytes;
+
+ return total_rx_packets;
+}
+
+/**
+ * ngbe_configure_msix - Configure MSI-X hardware
+ * @adapter: board private structure
+ *
+ * ngbe_configure_msix sets up the hardware to properly generate MSI-X
+ * interrupts.
+ **/
+static void ngbe_configure_msix(struct ngbe_adapter *adapter)
+{
+ u16 v_idx;
+ u32 i;
+ u32 eitrsel = 0;
+
+ /* Populate MSIX to EITR Select */
+ if (!(adapter->flags & NGBE_FLAG_VMDQ_ENABLED))
+ wr32(&adapter->hw, NGBE_PX_ITRSEL, eitrsel);
+ else {
+ for (i = 0; i < adapter->num_vfs; i++) {
+ eitrsel |= 1 << i;
+ }
+ wr32(&adapter->hw, NGBE_PX_ITRSEL, eitrsel);
+ }
+
+ /*
+ * Populate the IVAR table and set the ITR values to the
+ * corresponding register.
+ */
+ for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
+ struct ngbe_q_vector *q_vector = adapter->q_vector[v_idx];
+ struct ngbe_ring *ring;
+
+ ngbe_for_each_ring(ring, q_vector->rx)
+ ngbe_set_ivar(adapter, 0, ring->reg_idx, v_idx);
+
+ ngbe_for_each_ring(ring, q_vector->tx)
+ ngbe_set_ivar(adapter, 1, ring->reg_idx, v_idx);
+
+ ngbe_write_eitr(q_vector);
+ }
+
+ /* misc ivar from seq 1 to seq 8 */
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP)
+ v_idx += adapter->ring_feature[RING_F_VMDQ].offset;
+
+ ngbe_set_ivar(adapter, -1, 0, v_idx);
+ wr32(&adapter->hw, NGBE_PX_ITR(v_idx), 1950);
+}
+
+enum latency_range {
+ lowest_latency = 0,
+ low_latency = 1,
+ bulk_latency = 2,
+ latency_invalid = 255
+};
+
+/**
+ * ngbe_write_eitr - write EITR register in hardware specific way
+ * @q_vector: structure containing interrupt and ring information
+ *
+ * This function is made to be called by ethtool and by the driver
+ * when it needs to update EITR registers at runtime. Hardware
+ * specific quirks/differences are taken care of here.
+ */
+void ngbe_write_eitr(struct ngbe_q_vector *q_vector)
+{
+ struct ngbe_adapter *adapter = q_vector->adapter;
+ struct ngbe_hw *hw = &adapter->hw;
+ int v_idx = q_vector->v_idx;
+ u32 itr_reg = q_vector->itr & NGBE_MAX_EITR;
+
+ itr_reg |= NGBE_PX_ITR_CNT_WDIS;
+
+ wr32(hw, NGBE_PX_ITR(v_idx), itr_reg);
+}
+
+/**
+ * ngbe_check_overtemp_subtask - check for over temperature
+ * @adapter: pointer to adapter
+ **/
+static void ngbe_check_overtemp_subtask(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 eicr = adapter->interrupt_event;
+ s32 temp_state;
+
+ if (test_bit(__NGBE_DOWN, &adapter->state))
+ return;
+ if (!(adapter->flags2 & NGBE_FLAG2_TEMP_SENSOR_CAPABLE))
+ return;
+ if (!(adapter->flags2 & NGBE_FLAG2_TEMP_SENSOR_EVENT))
+ return;
+
+ adapter->flags2 &= ~NGBE_FLAG2_TEMP_SENSOR_EVENT;
+
+ /*
+ * Since the warning interrupt is for both ports
+ * we don't have to check if:
+ * - This interrupt wasn't for our port.
+ * - We may have missed the interrupt so always have to
+ * check if we got a LSC
+ */
+ if (!(eicr & NGBE_PX_MISC_IC_OVER_HEAT))
+ return;
+
+ temp_state = ngbe_phy_check_overtemp(hw);
+ if (!temp_state || temp_state == NGBE_NOT_IMPLEMENTED)
+ return;
+
+ if (temp_state == NGBE_ERR_UNDERTEMP &&
+ test_bit(__NGBE_HANGING, &adapter->state)) {
+ e_crit(drv, "%s\n", ngbe_underheat_msg);
+ wr32m(&adapter->hw, NGBE_RDB_PB_CTL,
+ NGBE_RDB_PB_CTL_PBEN, NGBE_RDB_PB_CTL_PBEN);
+ netif_carrier_on(adapter->netdev);
+ clear_bit(__NGBE_HANGING, &adapter->state);
+ } else if (temp_state == NGBE_ERR_OVERTEMP &&
+ !test_and_set_bit(__NGBE_HANGING, &adapter->state)) {
+ e_crit(drv, "%s\n", ngbe_overheat_msg);
+ netif_carrier_off(adapter->netdev);
+ wr32m(&adapter->hw, NGBE_RDB_PB_CTL,
+ NGBE_RDB_PB_CTL_PBEN, 0);
+ }
+
+ adapter->interrupt_event = 0;
+}
+
+static void ngbe_check_overtemp_event(struct ngbe_adapter *adapter, u32 eicr)
+{
+ if (!(adapter->flags2 & NGBE_FLAG2_TEMP_SENSOR_CAPABLE))
+ return;
+
+ if (!(eicr & NGBE_PX_MISC_IC_OVER_HEAT))
+ return;
+ if (!test_bit(__NGBE_DOWN, &adapter->state)) {
+ adapter->interrupt_event = eicr;
+ adapter->flags2 |= NGBE_FLAG2_TEMP_SENSOR_EVENT;
+ ngbe_service_event_schedule(adapter);
+ }
+}
+
+
+static void ngbe_handle_phy_event(struct ngbe_hw *hw)
+{
+ struct ngbe_adapter *adapter = hw->back;
+ u32 reg;
+
+ reg = rd32(hw, NGBE_GPIO_INTSTATUS);
+ wr32(hw, NGBE_GPIO_EOI, reg);
+ TCALL(hw, phy.ops.check_event);
+ adapter->lsc_int++;
+ adapter->link_check_timeout = jiffies;
+ if (!test_bit(__NGBE_DOWN, &adapter->state)) {
+ ngbe_service_event_schedule(adapter);
+ }
+}
+
+/**
+ * ngbe_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+void ngbe_irq_enable(struct ngbe_adapter *adapter, bool queues, bool flush)
+{
+ u32 mask = 0;
+
+ /* enable misc interrupt */
+ mask = NGBE_PX_MISC_IEN_MASK;
+
+ if (adapter->flags2 & NGBE_FLAG2_TEMP_SENSOR_CAPABLE)
+ mask |= NGBE_PX_MISC_IEN_OVER_HEAT;
+
+ mask |= NGBE_PX_MISC_IEN_TIMESYNC;
+
+ wr32(&adapter->hw, NGBE_GPIO_DDR, 0x1);
+ wr32(&adapter->hw, NGBE_GPIO_INTEN, 0x3);
+ wr32(&adapter->hw, NGBE_GPIO_INTTYPE_LEVEL, 0x0);
+ if (adapter->hw.phy.type == ngbe_phy_yt8521s_sfi)
+ wr32(&adapter->hw, NGBE_GPIO_POLARITY, 0x0);
+ else
+ wr32(&adapter->hw, NGBE_GPIO_POLARITY, 0x3);
+
+ if (adapter->hw.phy.type == ngbe_phy_yt8521s_sfi)
+ mask |= NGBE_PX_MISC_IEN_GPIO;
+
+ wr32(&adapter->hw, NGBE_PX_MISC_IEN, mask);
+
+ /* unmask interrupt */
+ if (queues)
+ ngbe_intr_enable(&adapter->hw, NGBE_INTR_ALL);
+ else {
+ if (!(adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP))
+ ngbe_intr_enable(&adapter->hw, NGBE_INTR_MISC(adapter));
+ else
+ ngbe_intr_enable(&adapter->hw, NGBE_INTR_MISC_VMDQ(adapter));
+ }
+
+ /* flush configuration */
+ if (flush)
+ NGBE_WRITE_FLUSH(&adapter->hw);
+}
+
+static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
+{
+ struct ngbe_adapter *adapter = data;
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 eicr;
+ u32 ecc;
+ u16 pci_val = 0;
+
+ eicr = ngbe_misc_isb(adapter, NGBE_ISB_MISC);
+ if (eicr & (NGBE_PX_MISC_IC_PHY | NGBE_PX_MISC_IC_GPIO))
+ ngbe_handle_phy_event(hw);
+
+ if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
+ ngbe_msg_task(adapter);
+
+ if (eicr & NGBE_PX_MISC_IC_PCIE_REQ_ERR) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "lan id %d, PCIe request error founded.\n", hw->bus.lan_id);
+
+ pci_read_config_word(adapter->pdev, PCI_VENDOR_ID, &pci_val);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "pci vendor id is 0x%x\n", pci_val);
+
+ pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_val);
+ ERROR_REPORT1(NGBE_ERROR_POLLING, "pci command reg is 0x%x.\n", pci_val);
+
+ if (hw->bus.lan_id == 0) {
+ adapter->flags2 |= NGBE_FLAG2_PCIE_NEED_RECOVER;
+ ngbe_service_event_schedule(adapter);
+ } else
+ wr32(&adapter->hw, NGBE_MIS_PF_SM, 1);
+ }
+
+ if (eicr & NGBE_PX_MISC_IC_INT_ERR) {
+ e_info(link, "Received unrecoverable ECC Err,"
+ "initiating reset.\n");
+ ecc = rd32(hw, NGBE_MIS_ST);
+ e_info(link, "ecc error status is 0x%08x\n", ecc);
+ if (((ecc & NGBE_MIS_ST_LAN0_ECC) && (hw->bus.lan_id == 0)) ||
+ ((ecc & NGBE_MIS_ST_LAN1_ECC) && (hw->bus.lan_id == 1)))
+ adapter->flags2 |= NGBE_FLAG2_DEV_RESET_REQUESTED;
+
+ ngbe_service_event_schedule(adapter);
+ }
+ if (eicr & NGBE_PX_MISC_IC_DEV_RST) {
+ adapter->flags2 |= NGBE_FLAG2_RESET_INTR_RECEIVED;
+ ngbe_service_event_schedule(adapter);
+ }
+ if ((eicr & NGBE_PX_MISC_IC_STALL) ||
+ (eicr & NGBE_PX_MISC_IC_ETH_EVENT)) {
+ adapter->flags2 |= NGBE_FLAG2_PF_RESET_REQUESTED;
+ ngbe_service_event_schedule(adapter);
+ }
+
+ ngbe_check_overtemp_event(adapter, eicr);
+
+ if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC))
+ ngbe_ptp_check_pps_event(adapter);
+
+ /* re-enable the original interrupt state, no lsc, no queues */
+ if (!test_bit(__NGBE_DOWN, &adapter->state))
+ ngbe_irq_enable(adapter, false, false);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ngbe_msix_clean_rings(int __always_unused irq, void *data)
+{
+ struct ngbe_q_vector *q_vector = data;
+
+ /* EIAM disabled interrupts (on this vector) for us */
+
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_schedule_irqoff(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ngbe_poll - NAPI polling RX/TX cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ **/
+int ngbe_poll(struct napi_struct *napi, int budget)
+{
+ struct ngbe_q_vector *q_vector =
+ container_of(napi, struct ngbe_q_vector, napi);
+ struct ngbe_adapter *adapter = q_vector->adapter;
+ struct ngbe_ring *ring;
+ int per_ring_budget;
+ bool clean_complete = true;
+
+ ngbe_for_each_ring(ring, q_vector->tx) {
+ if (!ngbe_clean_tx_irq(q_vector, ring))
+ clean_complete = false;
+ }
+
+ /* Exit if we are called by netpoll */
+ if (budget <= 0)
+ return budget;
+
+ /* attempt to distribute budget to each queue fairly, but don't allow
+ * the budget to go below 1 because we'll exit polling */
+ if (q_vector->rx.count > 1)
+ per_ring_budget = max(budget/q_vector->rx.count, 1);
+ else
+ per_ring_budget = budget;
+
+ ngbe_for_each_ring(ring, q_vector->rx) {
+ int cleaned = ngbe_clean_rx_irq(q_vector, ring,
+ per_ring_budget);
+
+ if (cleaned >= per_ring_budget)
+ clean_complete = false;
+ }
+
+ /* If all work not completed, return budget and keep polling */
+ if (!clean_complete)
+ return budget;
+
+ /* all work done, exit the polling mode */
+ napi_complete(napi);
+ if (!test_bit(__NGBE_DOWN, &adapter->state))
+ ngbe_intr_enable(&adapter->hw,
+ NGBE_INTR_Q(q_vector->v_idx));
+
+ return 0;
+}
+
+/**
+ * ngbe_request_msix_irqs - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * ngbe_request_msix_irqs allocates MSI-X vectors and requests
+ * interrupts from the kernel.
+ **/
+static int ngbe_request_msix_irqs(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int vector, err;
+ int ri = 0, ti = 0;
+
+ for (vector = 0; vector < adapter->num_q_vectors; vector++) {
+ struct ngbe_q_vector *q_vector = adapter->q_vector[vector];
+ struct msix_entry *entry = &adapter->msix_entries[vector];
+
+ if (q_vector->tx.ring && q_vector->rx.ring) {
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-TxRx-%d", netdev->name, ri++);
+ ti++;
+ } else if (q_vector->rx.ring) {
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-rx-%d", netdev->name, ri++);
+ } else if (q_vector->tx.ring) {
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-tx-%d", netdev->name, ti++);
+ } else {
+ /* skip this unused q_vector */
+ continue;
+ }
+ err = request_irq(entry->vector, &ngbe_msix_clean_rings, 0,
+ q_vector->name, q_vector);
+ if (err) {
+ e_err(probe, "request_irq failed for MSIX interrupt"
+ " '%s' Error: %d\n", q_vector->name, err);
+ goto free_queue_irqs;
+ }
+ }
+
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP)
+ vector += adapter->ring_feature[RING_F_VMDQ].offset;
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ ngbe_msix_other, 0, netdev->name, adapter);
+
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP)
+ vector -= adapter->ring_feature[RING_F_VMDQ].offset;
+
+ if (err) {
+ e_err(probe, "request_irq for msix_other failed: %d\n", err);
+ goto free_queue_irqs;
+ }
+
+ return 0;
+
+free_queue_irqs:
+ while (vector) {
+ vector--;
+
+ irq_set_affinity_hint(adapter->msix_entries[vector].vector,
+ NULL);
+
+ free_irq(adapter->msix_entries[vector].vector,
+ adapter->q_vector[vector]);
+ }
+ adapter->flags &= ~NGBE_FLAG_MSIX_ENABLED;
+ pci_disable_msix(adapter->pdev);
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+ return err;
+}
+
+/**
+ * ngbe_intr - legacy mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t ngbe_intr(int __always_unused irq, void *data)
+{
+ struct ngbe_adapter *adapter = data;
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ngbe_q_vector *q_vector = adapter->q_vector[0];
+ u32 eicr;
+ u32 eicr_misc;
+ u32 ecc = 0;
+
+ eicr = ngbe_misc_isb(adapter, NGBE_ISB_VEC0);
+ if (!eicr) {
+ /*
+ * shared interrupt alert!
+ * the interrupt that we masked before the EICR read.
+ */
+ if (!test_bit(__NGBE_DOWN, &adapter->state))
+ ngbe_irq_enable(adapter, true, true);
+ return IRQ_NONE; /* Not our interrupt */
+ }
+ adapter->isb_mem[NGBE_ISB_VEC0] = 0;
+ if (!(adapter->flags & NGBE_FLAG_MSI_ENABLED))
+ wr32(&(adapter->hw), NGBE_PX_INTA, 1);
+
+ eicr_misc = ngbe_misc_isb(adapter, NGBE_ISB_MISC);
+ if (eicr_misc & (NGBE_PX_MISC_IC_PHY | NGBE_PX_MISC_IC_GPIO))
+ ngbe_handle_phy_event(hw);
+
+ if (eicr_misc & NGBE_PX_MISC_IC_INT_ERR) {
+ e_info(link, "Received unrecoverable ECC Err,"
+ "initiating reset.\n");
+ ecc = rd32(hw, NGBE_MIS_ST);
+ e_info(link, "ecc error status is 0x%08x\n", ecc);
+ adapter->flags2 |= NGBE_FLAG2_DEV_RESET_REQUESTED;
+ ngbe_service_event_schedule(adapter);
+ }
+
+ if (eicr_misc & NGBE_PX_MISC_IC_DEV_RST) {
+ adapter->flags2 |= NGBE_FLAG2_RESET_INTR_RECEIVED;
+ ngbe_service_event_schedule(adapter);
+ }
+ ngbe_check_overtemp_event(adapter, eicr_misc);
+
+ if (unlikely(eicr_misc & NGBE_PX_MISC_IC_TIMESYNC))
+ ngbe_ptp_check_pps_event(adapter);
+
+ adapter->isb_mem[NGBE_ISB_MISC] = 0;
+ /* would disable interrupts here but it is auto disabled */
+ napi_schedule_irqoff(&q_vector->napi);
+
+ /*
+ * re-enable link(maybe) and non-queue interrupts, no flush.
+ * ngbe_poll will re-enable the queue interrupts
+ */
+ if (!test_bit(__NGBE_DOWN, &adapter->state))
+ ngbe_irq_enable(adapter, false, false);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ngbe_request_irq - initialize interrupts
+ * @adapter: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int ngbe_request_irq(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int err;
+
+ if (adapter->flags & NGBE_FLAG_MSIX_ENABLED)
+ err = ngbe_request_msix_irqs(adapter);
+ else if (adapter->flags & NGBE_FLAG_MSI_ENABLED)
+ err = request_irq(adapter->pdev->irq, &ngbe_intr, 0,
+ netdev->name, adapter);
+ else
+ err = request_irq(adapter->pdev->irq, &ngbe_intr, IRQF_SHARED,
+ netdev->name, adapter);
+
+ if (err)
+ e_err(probe, "request_irq failed, Error %d\n", err);
+
+ return err;
+}
+
+static void ngbe_free_irq(struct ngbe_adapter *adapter)
+{
+ int vector;
+
+ if (!(adapter->flags & NGBE_FLAG_MSIX_ENABLED)) {
+ free_irq(adapter->pdev->irq, adapter);
+ return;
+ }
+
+ for (vector = 0; vector < adapter->num_q_vectors; vector++) {
+ struct ngbe_q_vector *q_vector = adapter->q_vector[vector];
+ struct msix_entry *entry = &adapter->msix_entries[vector];
+
+ /* free only the irqs that were actually requested */
+ if (!q_vector->rx.ring && !q_vector->tx.ring)
+ continue;
+
+ /* clear the affinity_mask in the IRQ descriptor */
+ irq_set_affinity_hint(entry->vector, NULL);
+
+ free_irq(entry->vector, q_vector);
+ }
+
+ if (adapter->flags2 & NGBE_FLAG2_SRIOV_MISC_IRQ_REMAP) {
+ free_irq(
+ adapter->msix_entries[vector + adapter->ring_feature[RING_F_VMDQ].offset].vector,
+ adapter);
+ } else
+ free_irq(adapter->msix_entries[vector++].vector, adapter);
+}
+
+/**
+ * ngbe_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+void ngbe_irq_disable(struct ngbe_adapter *adapter)
+{
+ wr32(&adapter->hw, NGBE_PX_MISC_IEN, 0);
+ ngbe_intr_disable(&adapter->hw, NGBE_INTR_ALL);
+
+ NGBE_WRITE_FLUSH(&adapter->hw);
+ if (adapter->flags & NGBE_FLAG_MSIX_ENABLED) {
+ int vector;
+
+ for (vector = 0; vector < adapter->num_q_vectors; vector++)
+ synchronize_irq(adapter->msix_entries[vector].vector);
+
+ synchronize_irq(adapter->msix_entries[vector++].vector);
+ } else {
+ synchronize_irq(adapter->pdev->irq);
+ }
+}
+
+/**
+ * ngbe_configure_msi_and_legacy - Initialize PIN (INTA...) and MSI interrupts
+ *
+ **/
+static void ngbe_configure_msi_and_legacy(struct ngbe_adapter *adapter)
+{
+ struct ngbe_q_vector *q_vector = adapter->q_vector[0];
+ struct ngbe_ring *ring;
+
+ ngbe_write_eitr(q_vector);
+
+ ngbe_for_each_ring(ring, q_vector->rx)
+ ngbe_set_ivar(adapter, 0, ring->reg_idx, 0);
+
+ ngbe_for_each_ring(ring, q_vector->tx)
+ ngbe_set_ivar(adapter, 1, ring->reg_idx, 0);
+
+ ngbe_set_ivar(adapter, -1, 0, 1);
+
+ e_info(hw, "Legacy interrupt IVAR setup done\n");
+}
+
+/**
+ * ngbe_configure_tx_ring - Configure Tx ring after Reset
+ * @adapter: board private structure
+ * @ring: structure containing ring specific data
+ *
+ * Configure the Tx descriptor ring after a reset.
+ **/
+void ngbe_configure_tx_ring(struct ngbe_adapter *adapter,
+ struct ngbe_ring *ring)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u64 tdba = ring->dma;
+ int wait_loop = 10;
+ u32 txdctl = NGBE_PX_TR_CFG_ENABLE;
+ u8 reg_idx = ring->reg_idx;
+
+ /* disable queue to avoid issues while updating state */
+ wr32(hw, NGBE_PX_TR_CFG(reg_idx), NGBE_PX_TR_CFG_SWFLSH);
+ NGBE_WRITE_FLUSH(hw);
+
+ wr32(hw, NGBE_PX_TR_BAL(reg_idx), tdba & DMA_BIT_MASK(32));
+ wr32(hw, NGBE_PX_TR_BAH(reg_idx), tdba >> 32);
+
+ /* reset head and tail pointers */
+ wr32(hw, NGBE_PX_TR_RP(reg_idx), 0);
+ wr32(hw, NGBE_PX_TR_WP(reg_idx), 0);
+ ring->tail = adapter->io_addr + NGBE_PX_TR_WP(reg_idx);
+
+ /* reset ntu and ntc to place SW in sync with hardwdare */
+ ring->next_to_clean = 0;
+ ring->next_to_use = 0;
+
+ txdctl |= NGBE_RING_SIZE(ring) << NGBE_PX_TR_CFG_TR_SIZE_SHIFT;
+
+ /*
+ * set WTHRESH to encourage burst writeback, it should not be set
+ * higher than 1 when:
+ * - ITR is 0 as it could cause false TX hangs
+ * - ITR is set to > 100k int/sec and BQL is enabled
+ *
+ * In order to avoid issues WTHRESH + PTHRESH should always be equal
+ * to or less than the number of on chip descriptors, which is
+ * currently 40.
+ */
+ txdctl |= 0x20 << NGBE_PX_TR_CFG_WTHRESH_SHIFT;
+ /*
+ * Setting PTHRESH to 32 both improves performance
+ * and avoids a TX hang with DFP enabled
+ */
+
+ /* initialize XPS */
+ if (!test_and_set_bit(__NGBE_TX_XPS_INIT_DONE, &ring->state)) {
+ struct ngbe_q_vector *q_vector = ring->q_vector;
+
+ if (q_vector)
+ netif_set_xps_queue(adapter->netdev,
+ &q_vector->affinity_mask,
+ ring->queue_index);
+ }
+
+ clear_bit(__NGBE_HANG_CHECK_ARMED, &ring->state);
+
+ /* enable queue */
+ wr32(hw, NGBE_PX_TR_CFG(reg_idx), txdctl);
+
+ /* poll to verify queue is enabled */
+ do {
+ msleep(1);
+ txdctl = rd32(hw, NGBE_PX_TR_CFG(reg_idx));
+ } while (--wait_loop && !(txdctl & NGBE_PX_TR_CFG_ENABLE));
+ if (!wait_loop)
+ e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+}
+
+/**
+ * ngbe_configure_tx - Configure Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void ngbe_configure_tx(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 i;
+
+ /* TDM_CTL.TE must be before Tx queues are enabled */
+ wr32m(hw, NGBE_TDM_CTL,
+ NGBE_TDM_CTL_TE, NGBE_TDM_CTL_TE);
+
+ /* Setup the HW Tx Head and Tail descriptor pointers */
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ ngbe_configure_tx_ring(adapter, adapter->tx_ring[i]);
+
+ wr32m(hw, NGBE_TSEC_BUF_AE, 0x3FF, 0x10);
+ wr32m(hw, NGBE_TSEC_CTL, 0x2, 0);
+
+ wr32m(hw, NGBE_TSEC_CTL, 0x1, 1);
+
+ /* enable mac transmitter */
+ wr32m(hw, NGBE_MAC_TX_CFG,
+ NGBE_MAC_TX_CFG_TE, NGBE_MAC_TX_CFG_TE);
+}
+
+static void ngbe_enable_rx_drop(struct ngbe_adapter *adapter,
+ struct ngbe_ring *ring)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u16 reg_idx = ring->reg_idx;
+
+ u32 srrctl = rd32(hw, NGBE_PX_RR_CFG(reg_idx));
+
+ srrctl |= NGBE_PX_RR_CFG_DROP_EN;
+
+ wr32(hw, NGBE_PX_RR_CFG(reg_idx), srrctl);
+}
+
+static void ngbe_disable_rx_drop(struct ngbe_adapter *adapter,
+ struct ngbe_ring *ring)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u16 reg_idx = ring->reg_idx;
+
+ u32 srrctl = rd32(hw, NGBE_PX_RR_CFG(reg_idx));
+
+ srrctl &= ~NGBE_PX_RR_CFG_DROP_EN;
+
+ wr32(hw, NGBE_PX_RR_CFG(reg_idx), srrctl);
+}
+
+void ngbe_set_rx_drop_en(struct ngbe_adapter *adapter)
+{
+ int i;
+
+ /*
+ * We should set the drop enable bit if:
+ * SR-IOV is enabled
+ * or
+ * Number of Rx queues > 1 and flow control is disabled
+ *
+ * This allows us to avoid head of line blocking for security
+ * and performance reasons.
+ */
+ if (adapter->num_vfs || (adapter->num_rx_queues > 1 &&
+ !(adapter->hw.fc.current_mode & ngbe_fc_tx_pause))) {
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ngbe_enable_rx_drop(adapter, adapter->rx_ring[i]);
+ } else {
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ngbe_disable_rx_drop(adapter, adapter->rx_ring[i]);
+ }
+}
+
+static void ngbe_configure_srrctl(struct ngbe_adapter *adapter,
+ struct ngbe_ring *rx_ring)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 srrctl;
+ u16 reg_idx = rx_ring->reg_idx;
+
+ srrctl = rd32m(hw, NGBE_PX_RR_CFG(reg_idx),
+ ~(NGBE_PX_RR_CFG_RR_HDR_SZ |
+ NGBE_PX_RR_CFG_RR_BUF_SZ |
+ NGBE_PX_RR_CFG_SPLIT_MODE));
+
+ /* configure header buffer length, needed for RSC */
+ srrctl |= NGBE_RX_HDR_SIZE << NGBE_PX_RR_CFG_BSIZEHDRSIZE_SHIFT;
+
+ /* configure the packet buffer length */
+ srrctl |= ngbe_rx_bufsz(rx_ring) >> NGBE_PX_RR_CFG_BSIZEPKT_SHIFT;
+ if (ring_is_hs_enabled(rx_ring))
+ srrctl |= NGBE_PX_RR_CFG_SPLIT_MODE;
+
+ wr32(hw, NGBE_PX_RR_CFG(reg_idx), srrctl);
+}
+
+/**
+ * Return a number of entries in the RSS indirection table
+ *
+ * @adapter: device handle
+ *
+ */
+u32 ngbe_rss_indir_tbl_entries(struct ngbe_adapter *adapter)
+{
+ if (adapter->flags & NGBE_FLAG_SRIOV_ENABLED)
+ return 64;
+ else
+ return 128;
+}
+
+/**
+ * Write the RETA table to HW
+ *
+ * @adapter: device handle
+ *
+ * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
+ */
+void ngbe_store_reta(struct ngbe_adapter *adapter)
+{
+ u32 i, reta_entries = ngbe_rss_indir_tbl_entries(adapter);
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 reta = 0;
+ u8 *indir_tbl = adapter->rss_indir_tbl;
+
+ /* Write redirection table to HW */
+ for (i = 0; i < reta_entries; i++) {
+ reta |= indir_tbl[i] << (i & 0x3) * 8;
+ if ((i & 3) == 3) {
+ wr32(hw, NGBE_RDB_RSSTBL(i >> 2), reta);
+ reta = 0;
+ }
+ }
+}
+
+static void ngbe_setup_reta(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 i, j;
+ u32 reta_entries = ngbe_rss_indir_tbl_entries(adapter);
+ u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+
+ /*
+ * Program table for at least 2 queues w/ SR-IOV so that VFs can
+ * make full use of any rings they may have. We will use the
+ * PSRTYPE register to control how many rings we use within the PF.
+ */
+ if ((adapter->flags & NGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2))
+ rss_i = 1;
+
+ /* Fill out hash function seeds */
+ for (i = 0; i < 10; i++)
+ wr32(hw, NGBE_RDB_RSSRK(i), adapter->rss_key[i]);
+
+ /* Fill out redirection table */
+ memset(adapter->rss_indir_tbl, 0, sizeof(adapter->rss_indir_tbl));
+
+ for (i = 0, j = 0; i < reta_entries; i++, j++) {
+ if (j == rss_i)
+ j = 0;
+
+ adapter->rss_indir_tbl[i] = j;
+ }
+
+ ngbe_store_reta(adapter);
+}
+
+static void ngbe_setup_mrqc(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 rss_field = 0;
+
+ /* VT, and RSS do not coexist at the same time */
+ if (adapter->flags & NGBE_FLAG_VMDQ_ENABLED) {
+ return;
+ }
+
+ /* Disable indicating checksum in descriptor, enables RSS hash */
+ wr32m(hw, NGBE_PSR_CTL,
+ NGBE_PSR_CTL_PCSD, NGBE_PSR_CTL_PCSD);
+
+ /* Perform hash on these packet types */
+ rss_field = NGBE_RDB_RA_CTL_RSS_IPV4 |
+ NGBE_RDB_RA_CTL_RSS_IPV4_TCP |
+ NGBE_RDB_RA_CTL_RSS_IPV6 |
+ NGBE_RDB_RA_CTL_RSS_IPV6_TCP;
+
+ if (adapter->flags2 & NGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+ rss_field |= NGBE_RDB_RA_CTL_RSS_IPV4_UDP;
+ if (adapter->flags2 & NGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+ rss_field |= NGBE_RDB_RA_CTL_RSS_IPV6_UDP;
+
+ netdev_rss_key_fill(adapter->rss_key, sizeof(adapter->rss_key));
+
+ ngbe_setup_reta(adapter);
+
+ if (adapter->flags2 & NGBE_FLAG2_RSS_ENABLED)
+ rss_field |= NGBE_RDB_RA_CTL_RSS_EN;
+ wr32(hw, NGBE_RDB_RA_CTL, rss_field);
+}
+
+static void ngbe_rx_desc_queue_enable(struct ngbe_adapter *adapter,
+ struct ngbe_ring *ring)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int wait_loop = NGBE_MAX_RX_DESC_POLL;
+ u32 rxdctl;
+ u8 reg_idx = ring->reg_idx;
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ return;
+
+ do {
+ msleep(1);
+ rxdctl = rd32(hw, NGBE_PX_RR_CFG(reg_idx));
+ } while (--wait_loop && !(rxdctl & NGBE_PX_RR_CFG_RR_EN));
+
+ if (!wait_loop) {
+ e_err(drv, "RXDCTL.ENABLE on Rx queue %d "
+ "not set within the polling period\n", reg_idx);
+ }
+}
+
+/* disable the specified rx ring/queue */
+void ngbe_disable_rx_queue(struct ngbe_adapter *adapter,
+ struct ngbe_ring *ring)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int wait_loop = NGBE_MAX_RX_DESC_POLL;
+ u32 rxdctl;
+ u8 reg_idx = ring->reg_idx;
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ return;
+
+ /* write value back with RXDCTL.ENABLE bit cleared */
+ wr32m(hw, NGBE_PX_RR_CFG(reg_idx),
+ NGBE_PX_RR_CFG_RR_EN, 0);
+
+ /* hardware may take up to 100us to actually disable rx queue */
+ do {
+ udelay(10);
+ rxdctl = rd32(hw, NGBE_PX_RR_CFG(reg_idx));
+ } while (--wait_loop && (rxdctl & NGBE_PX_RR_CFG_RR_EN));
+
+ if (!wait_loop) {
+ e_err(drv, "RXDCTL.ENABLE on Rx queue %d not cleared within "
+ "the polling period\n", reg_idx);
+ }
+}
+
+void ngbe_configure_rx_ring(struct ngbe_adapter *adapter,
+ struct ngbe_ring *ring)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u64 rdba = ring->dma;
+ u32 rxdctl;
+ u16 reg_idx = ring->reg_idx;
+
+ /* disable queue to avoid issues while updating state */
+ rxdctl = rd32(hw, NGBE_PX_RR_CFG(reg_idx));
+ ngbe_disable_rx_queue(adapter, ring);
+
+ wr32(hw, NGBE_PX_RR_BAL(reg_idx), rdba & DMA_BIT_MASK(32));
+ wr32(hw, NGBE_PX_RR_BAH(reg_idx), rdba >> 32);
+
+ if (ring->count == NGBE_MAX_RXD)
+ rxdctl |= 0 << NGBE_PX_RR_CFG_RR_SIZE_SHIFT;
+ else
+ rxdctl |= (ring->count / 128) << NGBE_PX_RR_CFG_RR_SIZE_SHIFT;
+
+ rxdctl |= 0x1 << NGBE_PX_RR_CFG_RR_THER_SHIFT;
+ wr32(hw, NGBE_PX_RR_CFG(reg_idx), rxdctl);
+
+ /* reset head and tail pointers */
+ wr32(hw, NGBE_PX_RR_RP(reg_idx), 0);
+ wr32(hw, NGBE_PX_RR_WP(reg_idx), 0);
+ ring->tail = adapter->io_addr + NGBE_PX_RR_WP(reg_idx);
+
+ /* reset ntu and ntc to place SW in sync with hardwdare */
+ ring->next_to_clean = 0;
+ ring->next_to_use = 0;
+ ring->next_to_alloc = 0;
+
+ ngbe_configure_srrctl(adapter, ring);
+
+ /* enable receive descriptor ring */
+ wr32m(hw, NGBE_PX_RR_CFG(reg_idx),
+ NGBE_PX_RR_CFG_RR_EN, NGBE_PX_RR_CFG_RR_EN);
+
+ ngbe_rx_desc_queue_enable(adapter, ring);
+ ngbe_alloc_rx_buffers(ring, ngbe_desc_unused(ring));
+}
+
+static void ngbe_setup_psrtype(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int pool;
+
+ /* PSRTYPE must be initialized in adapters */
+ u32 psrtype = NGBE_RDB_PL_CFG_L4HDR |
+ NGBE_RDB_PL_CFG_L3HDR |
+ NGBE_RDB_PL_CFG_L2HDR |
+ NGBE_RDB_PL_CFG_TUN_OUTER_L2HDR |
+ NGBE_RDB_PL_CFG_TUN_TUNHDR;
+
+ for_each_set_bit(pool, &adapter->fwd_bitmask, NGBE_MAX_MACVLANS) {
+ wr32(hw, NGBE_RDB_PL_CFG(VMDQ_P(pool)), psrtype);
+ }
+}
+
+/**
+ * ngbe_configure_bridge_mode - common settings for configuring bridge mode
+ * @adapter - the private structure
+ *
+ * This function's purpose is to remove code duplication and configure some
+ * settings require to switch bridge modes.
+ **/
+static void ngbe_configure_bridge_mode(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ if (adapter->flags & NGBE_FLAG_SRIOV_VEPA_BRIDGE_MODE) {
+ /* disable Tx loopback, rely on switch hairpin mode */
+ wr32m(hw, NGBE_PSR_CTL,
+ NGBE_PSR_CTL_SW_EN, 0);
+ } else {
+ /* enable Tx loopback for internal VF/PF communication */
+ wr32m(hw, NGBE_PSR_CTL,
+ NGBE_PSR_CTL_SW_EN, NGBE_PSR_CTL_SW_EN);
+ }
+}
+
+static void ngbe_configure_virtualization(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 i;
+ u8 vfe = 0;
+
+ if (!(adapter->flags & NGBE_FLAG_VMDQ_ENABLED))
+ return;
+
+ wr32m(hw, NGBE_PSR_VM_CTL,
+ NGBE_PSR_VM_CTL_POOL_MASK |
+ NGBE_PSR_VM_CTL_REPLEN,
+ VMDQ_P(0) << NGBE_PSR_VM_CTL_POOL_SHIFT |
+ NGBE_PSR_VM_CTL_REPLEN);
+
+ for_each_set_bit(i, &adapter->fwd_bitmask, NGBE_MAX_MACVLANS) {
+ /* accept untagged packets until a vlan tag is
+ * specifically set for the VMDQ queue/pool
+ */
+ wr32m(hw, NGBE_PSR_VM_L2CTL(i),
+ NGBE_PSR_VM_L2CTL_AUPE, NGBE_PSR_VM_L2CTL_AUPE);
+ }
+
+ vfe = 1 << (VMDQ_P(0));
+ /* Enable only the PF pools for Tx/Rx */
+ wr32(hw, NGBE_RDM_POOL_RE, vfe);
+ wr32(hw, NGBE_TDM_POOL_TE, vfe);
+
+ if (!(adapter->flags & NGBE_FLAG_SRIOV_ENABLED))
+ return;
+
+ /* configure default bridge settings */
+ ngbe_configure_bridge_mode(adapter);
+
+ /* Ensure LLDP and FC is set for Ethertype Antispoofing if we will be
+ * calling set_ethertype_anti_spoofing for each VF in loop below.
+ */
+ if (hw->mac.ops.set_ethertype_anti_spoofing) {
+ wr32(hw,
+ NGBE_PSR_ETYPE_SWC(NGBE_PSR_ETYPE_SWC_FILTER_LLDP),
+ (NGBE_PSR_ETYPE_SWC_FILTER_EN | /* enable filter */
+ NGBE_PSR_ETYPE_SWC_TX_ANTISPOOF |
+ NGBE_ETH_P_LLDP)); /* LLDP eth procotol type */
+
+ wr32(hw,
+ NGBE_PSR_ETYPE_SWC(NGBE_PSR_ETYPE_SWC_FILTER_FC),
+ (NGBE_PSR_ETYPE_SWC_FILTER_EN |
+ NGBE_PSR_ETYPE_SWC_TX_ANTISPOOF |
+ ETH_P_PAUSE));
+ }
+
+ for (i = 0; i < adapter->num_vfs; i++) {
+ if (!adapter->vfinfo[i].spoofchk_enabled)
+ ngbe_ndo_set_vf_spoofchk(adapter->netdev, i, false);
+ /* enable ethertype anti spoofing if hw supports it */
+ TCALL(hw, mac.ops.set_ethertype_anti_spoofing, true, i);
+ }
+}
+
+static void ngbe_set_rx_buffer_len(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ u32 max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+ struct ngbe_ring *rx_ring;
+ int i;
+ u32 mhadd;
+
+ /* adjust max frame to be at least the size of a standard frame */
+ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+ max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN);
+
+ mhadd = rd32(hw, NGBE_PSR_MAX_SZ);
+ if (max_frame != mhadd) {
+ wr32(hw, NGBE_PSR_MAX_SZ, max_frame);
+ }
+
+ /*
+ * Setup the HW Rx Head and Tail Descriptor Pointers and
+ * the Base and Length of the Rx Descriptor Ring
+ */
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ rx_ring = adapter->rx_ring[i];
+
+ if (adapter->flags & NGBE_FLAG_RX_HS_ENABLED) {
+ rx_ring->rx_buf_len = NGBE_RX_HDR_SIZE;
+ set_ring_hs_enabled(rx_ring);
+ } else
+ clear_ring_hs_enabled(rx_ring);
+ }
+}
+
+/**
+ * ngbe_configure_rx - Configure Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void ngbe_configure_rx(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int i;
+ u32 rxctrl;
+
+ /* disable receives while setting up the descriptors */
+ TCALL(hw, mac.ops.disable_rx);
+
+ ngbe_setup_psrtype(adapter);
+
+ /* enable hw crc stripping */
+ wr32m(hw, NGBE_RSEC_CTL,
+ NGBE_RSEC_CTL_CRC_STRIP, NGBE_RSEC_CTL_CRC_STRIP);
+
+ /* Program registers for the distribution of queues */
+ ngbe_setup_mrqc(adapter);
+
+ /* set_rx_buffer_len must be called before ring initialization */
+ ngbe_set_rx_buffer_len(adapter);
+
+ /*
+ * Setup the HW Rx Head and Tail Descriptor Pointers and
+ * the Base and Length of the Rx Descriptor Ring
+ */
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ngbe_configure_rx_ring(adapter, adapter->rx_ring[i]);
+
+ rxctrl = rd32(hw, NGBE_RDB_PB_CTL);
+
+ /* enable all receives */
+ rxctrl |= NGBE_RDB_PB_CTL_PBEN;
+ TCALL(hw, mac.ops.enable_rx_dma, rxctrl);
+}
+
+static int ngbe_vlan_rx_add_vid(struct net_device *netdev,
+ __always_unused __be16 proto, u16 vid)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ int pool_ndx = VMDQ_P(0);
+
+ /* add VID to filter table */
+ if (hw->mac.ops.set_vfta) {
+ if (vid < VLAN_N_VID)
+ set_bit(vid, adapter->active_vlans);
+
+ TCALL(hw, mac.ops.set_vfta, vid, pool_ndx, true);
+ if (adapter->flags & NGBE_FLAG_VMDQ_ENABLED) {
+ int i;
+ /* enable vlan id for all pools */
+ for_each_set_bit(i, &adapter->fwd_bitmask,
+ NGBE_MAX_MACVLANS)
+ TCALL(hw, mac.ops.set_vfta, vid,
+ VMDQ_P(i), true);
+ }
+ }
+
+ return 0;
+}
+
+static int ngbe_vlan_rx_kill_vid(struct net_device *netdev,
+ __always_unused __be16 proto, u16 vid)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ int pool_ndx = VMDQ_P(0);
+
+ /* User is not allowed to remove vlan ID 0 */
+ if (!vid)
+ return 0;
+
+ /* remove VID from filter table */
+ if (hw->mac.ops.set_vfta) {
+ TCALL(hw, mac.ops.set_vfta, vid, pool_ndx, false);
+ if (adapter->flags & NGBE_FLAG_VMDQ_ENABLED) {
+ int i;
+ /* remove vlan id from all pools */
+ for_each_set_bit(i, &adapter->fwd_bitmask,
+ NGBE_MAX_MACVLANS)
+ TCALL(hw, mac.ops.set_vfta, vid,
+ VMDQ_P(i), false);
+ }
+ }
+
+ clear_bit(vid, adapter->active_vlans);
+ return 0;
+}
+
+/**
+ * ngbe_vlan_strip_disable - helper to disable vlan tag stripping
+ * @adapter: driver data
+ */
+void ngbe_vlan_strip_disable(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int i, j;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ngbe_ring *ring = adapter->rx_ring[i];
+ if (ring->accel)
+ continue;
+ j = ring->reg_idx;
+ wr32m(hw, NGBE_PX_RR_CFG(j),
+ NGBE_PX_RR_CFG_VLAN, 0);
+ }
+}
+
+/**
+ * ngbe_vlan_strip_enable - helper to enable vlan tag stripping
+ * @adapter: driver data
+ */
+void ngbe_vlan_strip_enable(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int i, j;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ngbe_ring *ring = adapter->rx_ring[i];
+ if (ring->accel)
+ continue;
+ j = ring->reg_idx;
+ wr32m(hw, NGBE_PX_RR_CFG(j),
+ NGBE_PX_RR_CFG_VLAN, NGBE_PX_RR_CFG_VLAN);
+ }
+}
+
+void ngbe_vlan_mode(struct net_device *netdev, u32 features)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ bool enable;
+
+ enable = !!(features & (NETIF_F_HW_VLAN_CTAG_RX));
+ if (enable)
+ /* enable VLAN tag insert/strip */
+ ngbe_vlan_strip_enable(adapter);
+ else
+ /* disable VLAN tag insert/strip */
+ ngbe_vlan_strip_disable(adapter);
+}
+
+static void ngbe_restore_vlan(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ u16 vid;
+
+ ngbe_vlan_mode(netdev, netdev->features);
+
+ for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+ ngbe_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
+}
+
+static u8 *ngbe_addr_list_itr(struct ngbe_hw *hw,
+ u8 **mc_addr_ptr, u32 *vmdq)
+{
+ struct netdev_hw_addr *mc_ptr;
+ u8 *addr = *mc_addr_ptr;
+ struct ngbe_adapter *adapter = hw->back;
+
+ /* VMDQ_P implicitely uses the adapter struct when CONFIG_PCI_IOV is
+ * defined, so we have to wrap the pointer above correctly to prevent
+ * a warning.
+ */
+ *vmdq = VMDQ_P(0);
+
+ mc_ptr = container_of(addr, struct netdev_hw_addr, addr[0]);
+ if (mc_ptr->list.next) {
+ struct netdev_hw_addr *ha;
+
+ ha = list_entry(mc_ptr->list.next, struct netdev_hw_addr, list);
+ *mc_addr_ptr = ha->addr;
+ } else
+ *mc_addr_ptr = NULL;
+
+ return addr;
+}
+
+/**
+ * ngbe_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ * 0 on no addresses written
+ * X on writing X addresses to MTA
+ **/
+int ngbe_write_mc_addr_list(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ struct netdev_hw_addr *ha;
+ u8 *addr_list = NULL;
+ int addr_count = 0;
+
+ if (!hw->mac.ops.update_mc_addr_list)
+ return -ENOMEM;
+
+ if (!netif_running(netdev))
+ return 0;
+
+ if (netdev_mc_empty(netdev)) {
+ TCALL(hw, mac.ops.update_mc_addr_list, NULL, 0,
+ ngbe_addr_list_itr, true);
+ } else {
+ ha = list_first_entry(&netdev->mc.list,
+ struct netdev_hw_addr, list);
+ addr_list = ha->addr;
+ addr_count = netdev_mc_count(netdev);
+ TCALL(hw, mac.ops.update_mc_addr_list, addr_list, addr_count,
+ ngbe_addr_list_itr, true);
+ }
+
+#ifdef CONFIG_PCI_IOV
+ ngbe_restore_vf_multicasts(adapter);
+#endif
+ return addr_count;
+}
+
+void ngbe_full_sync_mac_table(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int i;
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state & NGBE_MAC_STATE_IN_USE) {
+ TCALL(hw, mac.ops.set_rar, i,
+ adapter->mac_table[i].addr,
+ adapter->mac_table[i].pools,
+ NGBE_PSR_MAC_SWC_AD_H_AV);
+ } else {
+ TCALL(hw, mac.ops.clear_rar, i);
+ }
+ adapter->mac_table[i].state &= ~(NGBE_MAC_STATE_MODIFIED);
+ }
+}
+
+static void ngbe_sync_mac_table(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int i;
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state & NGBE_MAC_STATE_MODIFIED) {
+ if (adapter->mac_table[i].state &
+ NGBE_MAC_STATE_IN_USE) {
+ TCALL(hw, mac.ops.set_rar, i,
+ adapter->mac_table[i].addr,
+ adapter->mac_table[i].pools,
+ NGBE_PSR_MAC_SWC_AD_H_AV);
+ } else {
+ TCALL(hw, mac.ops.clear_rar, i);
+ }
+ adapter->mac_table[i].state &=
+ ~(NGBE_MAC_STATE_MODIFIED);
+ }
+ }
+}
+
+int ngbe_available_rars(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 i, count = 0;
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state == 0)
+ count++;
+ }
+ return count;
+}
+
+/* this function destroys the first RAR entry */
+static void ngbe_mac_set_default_filter(struct ngbe_adapter *adapter,
+ u8 *addr)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN);
+ adapter->mac_table[0].pools = 1ULL << VMDQ_P(0);
+ adapter->mac_table[0].state = (NGBE_MAC_STATE_DEFAULT |
+ NGBE_MAC_STATE_IN_USE);
+ TCALL(hw, mac.ops.set_rar, 0, adapter->mac_table[0].addr,
+ adapter->mac_table[0].pools,
+ NGBE_PSR_MAC_SWC_AD_H_AV);
+}
+
+int ngbe_add_mac_filter(struct ngbe_adapter *adapter, u8 *addr, u16 pool)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 i;
+
+ if (is_zero_ether_addr(addr))
+ return -EINVAL;
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state & NGBE_MAC_STATE_IN_USE) {
+ continue;
+ }
+ adapter->mac_table[i].state |= (NGBE_MAC_STATE_MODIFIED |
+ NGBE_MAC_STATE_IN_USE);
+ memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
+ adapter->mac_table[i].pools = (1ULL << pool);
+ ngbe_sync_mac_table(adapter);
+ return i;
+ }
+ return -ENOMEM;
+}
+
+static void ngbe_flush_sw_mac_table(struct ngbe_adapter *adapter)
+{
+ u32 i;
+ struct ngbe_hw *hw = &adapter->hw;
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ adapter->mac_table[i].state |= NGBE_MAC_STATE_MODIFIED;
+ adapter->mac_table[i].state &= ~NGBE_MAC_STATE_IN_USE;
+ memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ adapter->mac_table[i].pools = 0;
+ }
+ ngbe_sync_mac_table(adapter);
+}
+
+int ngbe_del_mac_filter(struct ngbe_adapter *adapter, u8 *addr, u16 pool)
+{
+ /* search table for addr, if found, set to 0 and sync */
+ u32 i;
+ struct ngbe_hw *hw = &adapter->hw;
+
+ if (is_zero_ether_addr(addr))
+ return -EINVAL;
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
+ adapter->mac_table[i].pools | (1ULL << pool)) {
+ adapter->mac_table[i].state |= NGBE_MAC_STATE_MODIFIED;
+ adapter->mac_table[i].state &= ~NGBE_MAC_STATE_IN_USE;
+ memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ adapter->mac_table[i].pools = 0;
+ ngbe_sync_mac_table(adapter);
+ return 0;
+ }
+ }
+ return -ENOMEM;
+}
+
+/**
+ * ngbe_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ * 0 on no addresses written
+ * X on writing X addresses to the RAR table
+ **/
+int ngbe_write_uc_addr_list(struct net_device *netdev, int pool)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ int count = 0;
+
+ /* return ENOMEM indicating insufficient memory for addresses */
+ if (netdev_uc_count(netdev) > ngbe_available_rars(adapter))
+ return -ENOMEM;
+
+ if (!netdev_uc_empty(netdev)) {
+ struct netdev_hw_addr *ha;
+ netdev_for_each_uc_addr(ha, netdev) {
+ ngbe_del_mac_filter(adapter, ha->addr, pool);
+ ngbe_add_mac_filter(adapter, ha->addr, pool);
+ count++;
+ }
+ }
+ return count;
+}
+
+/**
+ * ngbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_method entry point is called whenever the unicast/multicast
+ * address list or the network interface flags are updated. This routine is
+ * responsible for configuring the hardware for proper unicast, multicast and
+ * promiscuous mode.
+ **/
+void ngbe_set_rx_mode(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 fctrl, vmolr, vlnctrl;
+ int count;
+
+ /* Check for Promiscuous and All Multicast modes */
+ fctrl = rd32m(hw, NGBE_PSR_CTL,
+ ~(NGBE_PSR_CTL_UPE | NGBE_PSR_CTL_MPE));
+ vmolr = rd32m(hw, NGBE_PSR_VM_L2CTL(VMDQ_P(0)),
+ ~(NGBE_PSR_VM_L2CTL_UPE |
+ NGBE_PSR_VM_L2CTL_MPE |
+ NGBE_PSR_VM_L2CTL_ROPE |
+ NGBE_PSR_VM_L2CTL_ROMPE));
+ vlnctrl = rd32m(hw, NGBE_PSR_VLAN_CTL,
+ ~(NGBE_PSR_VLAN_CTL_VFE |
+ NGBE_PSR_VLAN_CTL_CFIEN));
+
+ /* set all bits that we expect to always be set */
+ fctrl |= NGBE_PSR_CTL_BAM | NGBE_PSR_CTL_MFE;
+ vmolr |= NGBE_PSR_VM_L2CTL_BAM |
+ NGBE_PSR_VM_L2CTL_AUPE |
+ NGBE_PSR_VM_L2CTL_VACC;
+ vlnctrl |= NGBE_PSR_VLAN_CTL_VFE;
+
+ hw->addr_ctrl.user_set_promisc = false;
+ if (netdev->flags & IFF_PROMISC) {
+ hw->addr_ctrl.user_set_promisc = true;
+ fctrl |= (NGBE_PSR_CTL_UPE | NGBE_PSR_CTL_MPE);
+ /* pf don't want packets routing to vf, so clear UPE */
+ vmolr |= NGBE_PSR_VM_L2CTL_MPE;
+ vlnctrl &= ~NGBE_PSR_VLAN_CTL_VFE;
+ }
+
+ if (netdev->flags & IFF_ALLMULTI) {
+ fctrl |= NGBE_PSR_CTL_MPE;
+ vmolr |= NGBE_PSR_VM_L2CTL_MPE;
+ }
+
+ /* This is useful for sniffing bad packets. */
+ if (netdev->features & NETIF_F_RXALL) {
+ vmolr |= (NGBE_PSR_VM_L2CTL_UPE | NGBE_PSR_VM_L2CTL_MPE);
+ vlnctrl &= ~NGBE_PSR_VLAN_CTL_VFE;
+ /* receive bad packets */
+ wr32m(hw, NGBE_RSEC_CTL,
+ NGBE_RSEC_CTL_SAVE_MAC_ERR,
+ NGBE_RSEC_CTL_SAVE_MAC_ERR);
+ } else {
+ vmolr |= NGBE_PSR_VM_L2CTL_ROPE | NGBE_PSR_VM_L2CTL_ROMPE;
+ }
+
+ /*
+ * Write addresses to available RAR registers, if there is not
+ * sufficient space to store all the addresses then enable
+ * unicast promiscuous mode
+ */
+ count = ngbe_write_uc_addr_list(netdev, VMDQ_P(0));
+ if (count < 0) {
+ vmolr &= ~NGBE_PSR_VM_L2CTL_ROPE;
+ vmolr |= NGBE_PSR_VM_L2CTL_UPE;
+ }
+
+ /*
+ * Write addresses to the MTA, if the attempt fails
+ * then we should just turn on promiscuous mode so
+ * that we can at least receive multicast traffic
+ */
+ count = ngbe_write_mc_addr_list(netdev);
+ if (count < 0) {
+ vmolr &= ~NGBE_PSR_VM_L2CTL_ROMPE;
+ vmolr |= NGBE_PSR_VM_L2CTL_MPE;
+ }
+
+ wr32(hw, NGBE_PSR_VLAN_CTL, vlnctrl);
+ wr32(hw, NGBE_PSR_CTL, fctrl);
+ wr32(hw, NGBE_PSR_VM_L2CTL(VMDQ_P(0)), vmolr);
+
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+ ngbe_vlan_strip_enable(adapter);
+ else
+ ngbe_vlan_strip_disable(adapter);
+}
+
+static void ngbe_napi_enable_all(struct ngbe_adapter *adapter)
+{
+ struct ngbe_q_vector *q_vector;
+ int q_idx;
+
+ for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) {
+ q_vector = adapter->q_vector[q_idx];
+ napi_enable(&q_vector->napi);
+ }
+}
+
+static void ngbe_napi_disable_all(struct ngbe_adapter *adapter)
+{
+ struct ngbe_q_vector *q_vector;
+ int q_idx;
+
+ for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) {
+ q_vector = adapter->q_vector[q_idx];
+ napi_disable(&q_vector->napi);
+ }
+}
+
+/* NETIF_F_GSO_IPXIP4/6 may not be defined in all distributions */
+#define NGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+ NETIF_F_GSO_GRE_CSUM | \
+ NETIF_F_GSO_IPXIP4 | \
+ NETIF_F_GSO_IPXIP6 | \
+ NETIF_F_GSO_UDP_TUNNEL | \
+ NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+static inline unsigned long ngbe_tso_features(void)
+{
+ unsigned long features = 0;
+
+ features |= NETIF_F_TSO;
+ features |= NETIF_F_TSO6;
+ features |= NETIF_F_GSO_PARTIAL | NGBE_GSO_PARTIAL_FEATURES;
+
+ return features;
+}
+
+#ifndef CONFIG_NGBE_NO_LLI
+static void ngbe_configure_lli(struct ngbe_adapter *adapter)
+{
+ /* lli should only be enabled with MSI-X and MSI */
+ if (!(adapter->flags & NGBE_FLAG_MSI_ENABLED) &&
+ !(adapter->flags & NGBE_FLAG_MSIX_ENABLED))
+ return;
+
+ if (adapter->lli_etype) {
+ wr32(&adapter->hw, NGBE_RDB_5T_CTL1(0),
+ (NGBE_RDB_5T_CTL1_LLI |
+ NGBE_RDB_5T_CTL1_SIZE_BP));
+ wr32(&adapter->hw, NGBE_RDB_ETYPE_CLS(0),
+ NGBE_RDB_ETYPE_CLS_LLI);
+ wr32(&adapter->hw, NGBE_PSR_ETYPE_SWC(0),
+ (adapter->lli_etype |
+ NGBE_PSR_ETYPE_SWC_FILTER_EN));
+ }
+
+ if (adapter->lli_port) {
+ wr32(&adapter->hw, NGBE_RDB_5T_CTL1(0),
+ (NGBE_RDB_5T_CTL1_LLI |
+ NGBE_RDB_5T_CTL1_SIZE_BP));
+
+ wr32(&adapter->hw, NGBE_RDB_5T_CTL0(0),
+ (NGBE_RDB_5T_CTL0_POOL_MASK_EN |
+ (NGBE_RDB_5T_CTL0_PRIORITY_MASK <<
+ NGBE_RDB_5T_CTL0_PRIORITY_SHIFT) |
+ (NGBE_RDB_5T_CTL0_DEST_PORT_MASK <<
+ NGBE_RDB_5T_CTL0_5TUPLE_MASK_SHIFT)));
+
+ wr32(&adapter->hw, NGBE_RDB_5T_SDP(0),
+ (adapter->lli_port << 16));
+ }
+
+ if (adapter->lli_size) {
+ wr32(&adapter->hw, NGBE_RDB_5T_CTL1(0),
+ NGBE_RDB_5T_CTL1_LLI);
+ wr32m(&adapter->hw, NGBE_RDB_LLI_THRE,
+ NGBE_RDB_LLI_THRE_SZ(~0), adapter->lli_size);
+ wr32(&adapter->hw, NGBE_RDB_5T_CTL0(0),
+ (NGBE_RDB_5T_CTL0_POOL_MASK_EN |
+ (NGBE_RDB_5T_CTL0_PRIORITY_MASK <<
+ NGBE_RDB_5T_CTL0_PRIORITY_SHIFT) |
+ (NGBE_RDB_5T_CTL0_5TUPLE_MASK_MASK <<
+ NGBE_RDB_5T_CTL0_5TUPLE_MASK_SHIFT)));
+ }
+
+ if (adapter->lli_vlan_pri) {
+ wr32m(&adapter->hw, NGBE_RDB_LLI_THRE,
+ NGBE_RDB_LLI_THRE_PRIORITY_EN |
+ NGBE_RDB_LLI_THRE_UP(~0),
+ NGBE_RDB_LLI_THRE_PRIORITY_EN |
+ (adapter->lli_vlan_pri << NGBE_RDB_LLI_THRE_UP_SHIFT));
+ }
+}
+
+#endif /* CONFIG_NGBE_NO_LLI */
+/* Additional bittime to account for NGBE framing */
+#define NGBE_ETH_FRAMING 20
+
+/*
+ * ngbe_hpbthresh - calculate high water mark for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ngbe_hpbthresh(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct net_device *dev = adapter->netdev;
+ int link, tc, kb, marker;
+ u32 dv_id, rx_pba;
+
+ /* Calculate max LAN frame size */
+ tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NGBE_ETH_FRAMING;
+
+ /* Calculate delay value for device */
+ dv_id = NGBE_DV(link, tc);
+
+ /* Loopback switch introduces additional latency */
+ if (adapter->flags & NGBE_FLAG_SRIOV_ENABLED)
+ dv_id += NGBE_B2BT(tc);
+
+ /* Delay value is calculated in bit times convert to KB */
+ kb = NGBE_BT2KB(dv_id);
+ rx_pba = rd32(hw, NGBE_RDB_PB_SZ)
+ >> NGBE_RDB_PB_SZ_SHIFT;
+
+ marker = rx_pba - kb;
+
+ /* It is possible that the packet buffer is not large enough
+ * to provide required headroom. In this case throw an error
+ * to user and a do the best we can.
+ */
+ if (marker < 0) {
+ e_warn(drv, "Packet Buffer can not provide enough"
+ "headroom to suppport flow control."
+ "Decrease MTU or number of traffic classes\n");
+ marker = tc + 1;
+ }
+
+ return marker;
+}
+
+/*
+ * ngbe_lpbthresh - calculate low water mark for for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ngbe_lpbthresh(struct ngbe_adapter *adapter)
+{
+ struct net_device *dev = adapter->netdev;
+ int tc;
+ u32 dv_id;
+
+ /* Calculate max LAN frame size */
+ tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+ /* Calculate delay value for device */
+ dv_id = NGBE_LOW_DV(tc);
+
+ /* Delay value is calculated in bit times convert to KB */
+ return NGBE_BT2KB(dv_id);
+}
+
+/*
+ * ngbe_pbthresh_setup - calculate and setup high low water marks
+ */
+
+static void ngbe_pbthresh_setup(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int num_tc = netdev_get_num_tc(adapter->netdev);
+
+ if (!num_tc)
+ num_tc = 1;
+
+ hw->fc.high_water = ngbe_hpbthresh(adapter);
+ hw->fc.low_water = ngbe_lpbthresh(adapter);
+
+ /* Low water marks must not be larger than high water marks */
+ if (hw->fc.low_water > hw->fc.high_water)
+ hw->fc.low_water = 0;
+}
+
+static void ngbe_configure_pb(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int hdrm = 0;
+ int tc = netdev_get_num_tc(adapter->netdev);
+
+ TCALL(hw, mac.ops.setup_rxpba, tc, hdrm, PBA_STRATEGY_EQUAL);
+ ngbe_pbthresh_setup(adapter);
+}
+
+void ngbe_configure_isb(struct ngbe_adapter *adapter)
+{
+ /* set ISB Address */
+ struct ngbe_hw *hw = &adapter->hw;
+
+ wr32(hw, NGBE_PX_ISB_ADDR_L,
+ adapter->isb_dma & DMA_BIT_MASK(32));
+ wr32(hw, NGBE_PX_ISB_ADDR_H, adapter->isb_dma >> 32);
+}
+
+void ngbe_configure_port(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 value, i;
+
+ if (adapter->num_vfs == 0) {
+ value = NGBE_CFG_PORT_CTL_NUM_VT_NONE;
+ } else
+ value = NGBE_CFG_PORT_CTL_NUM_VT_8;
+
+ /* enable double vlan and qinq, NONE VT at default */
+ value |= NGBE_CFG_PORT_CTL_D_VLAN |
+ NGBE_CFG_PORT_CTL_QINQ;
+ wr32m(hw, NGBE_CFG_PORT_CTL,
+ NGBE_CFG_PORT_CTL_D_VLAN |
+ NGBE_CFG_PORT_CTL_QINQ |
+ NGBE_CFG_PORT_CTL_NUM_VT_MASK,
+ value);
+
+ wr32(hw, NGBE_CFG_TAG_TPID(0),
+ ETH_P_8021Q | ETH_P_8021AD << 16);
+ adapter->hw.tpid[0] = ETH_P_8021Q;
+ adapter->hw.tpid[1] = ETH_P_8021AD;
+ for (i = 1; i < 4; i++)
+ wr32(hw, NGBE_CFG_TAG_TPID(i),
+ ETH_P_8021Q | ETH_P_8021Q << 16);
+ for (i = 2; i < 8; i++)
+ adapter->hw.tpid[i] = ETH_P_8021Q;
+}
+
+static void ngbe_configure(struct ngbe_adapter *adapter)
+{
+ ngbe_configure_pb(adapter);
+
+ /*
+ * We must restore virtualization before VLANs or else
+ * the VLVF registers will not be populated
+ */
+ ngbe_configure_virtualization(adapter);
+ /* configure Double Vlan */
+ ngbe_configure_port(adapter);
+
+ ngbe_set_rx_mode(adapter->netdev);
+ ngbe_restore_vlan(adapter);
+
+ ngbe_configure_tx(adapter);
+ ngbe_configure_rx(adapter);
+ ngbe_configure_isb(adapter);
+}
+
+
+/**
+ * ngbe_non_sfp_link_config - set up non-SFP+ link
+ * @hw: pointer to private hardware struct
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ngbe_non_sfp_link_config(struct ngbe_hw *hw)
+{
+ u32 speed;
+ bool autoneg, link_up = false;
+ u32 ret = NGBE_ERR_LINK_SETUP;
+
+ ret = TCALL(hw, mac.ops.check_link, &speed, &link_up, false);
+
+ speed = hw->phy.autoneg_advertised;
+ if (!speed)
+ ret = TCALL(hw, mac.ops.get_link_capabilities, &speed,
+ &autoneg);
+
+ if ((hw->subsystem_device_id & OEM_MASK) == OCP_CARD ||
+ ((hw->subsystem_device_id & NCSI_SUP_MASK) == NCSI_SUP)) {
+
+ } else {
+ msleep(50);
+ if (hw->phy.type == ngbe_phy_internal) {
+ TCALL(hw, eeprom.ops.phy_signal_set);
+ TCALL(hw, phy.ops.setup_once);
+ }
+ }
+
+ ret = TCALL(hw, mac.ops.setup_link, speed, false);
+
+ return ret;
+}
+
+static void ngbe_setup_gpie(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 gpie = 0;
+
+ if (adapter->flags & NGBE_FLAG_MSIX_ENABLED) {
+ gpie = NGBE_PX_GPIE_MODEL;
+ /*
+ * use EIAM to auto-mask when MSI-X interrupt is asserted
+ * this saves a register write for every interrupt
+ */
+ } else {
+ /* legacy interrupts, use EIAM to auto-mask when reading EICR,
+ * specifically only auto mask tx and rx interrupts */
+ }
+
+ wr32(hw, NGBE_PX_GPIE, gpie);
+}
+
+static void ngbe_up_complete(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int err;
+
+ ngbe_get_hw_control(adapter);
+ ngbe_setup_gpie(adapter);
+
+ if (adapter->flags & NGBE_FLAG_MSIX_ENABLED)
+ ngbe_configure_msix(adapter);
+ else
+ ngbe_configure_msi_and_legacy(adapter);
+
+ smp_mb__before_atomic();
+ clear_bit(__NGBE_DOWN, &adapter->state);
+ ngbe_napi_enable_all(adapter);
+#ifndef CONFIG_NGBE_NO_LLI
+ ngbe_configure_lli(adapter);
+#endif
+
+ err = ngbe_non_sfp_link_config(hw);
+ if (err)
+ e_err(probe, "link_config FAILED %d\n", err);
+
+ /* sellect GMII */
+ wr32(hw, NGBE_MAC_TX_CFG,
+ (rd32(hw, NGBE_MAC_TX_CFG) & ~NGBE_MAC_TX_CFG_SPEED_MASK) |
+ NGBE_MAC_TX_CFG_SPEED_1G);
+
+ /* clear any pending interrupts, may auto mask */
+ rd32(hw, NGBE_PX_IC);
+ rd32(hw, NGBE_PX_MISC_IC);
+ ngbe_irq_enable(adapter, true, true);
+
+ if (((hw->subsystem_device_id & OEM_MASK) == LY_M88E1512_SFP) ||
+ (hw->subsystem_device_id & OEM_MASK) == LY_YT8521S_SFP)
+ /* gpio0 is used to power on/off control*/
+ wr32(hw, NGBE_GPIO_DR, 0);
+
+ /* enable transmits */
+ netif_tx_start_all_queues(adapter->netdev);
+
+ /* bring the link up in the watchdog, this could race with our first
+ * link up interrupt but shouldn't be a problem */
+ adapter->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+ adapter->link_check_timeout = jiffies;
+#ifdef CONFIG_NGBE_POLL_LINK_STATUS
+ mod_timer(&adapter->link_check_timer, jiffies);
+#endif
+ mod_timer(&adapter->service_timer, jiffies);
+ /* ngbe_clear_vf_stats_counters(adapter); */
+
+ /* Set PF Reset Done bit so PF/VF Mail Ops can work */
+ wr32m(hw, NGBE_CFG_PORT_CTL,
+ NGBE_CFG_PORT_CTL_PFRSTD, NGBE_CFG_PORT_CTL_PFRSTD);
+}
+
+void ngbe_reinit_locked(struct ngbe_adapter *adapter)
+{
+ WARN_ON(in_interrupt());
+ /* put off any impending NetWatchDogTimeout */
+ netif_trans_update(adapter->netdev);
+
+ while (test_and_set_bit(__NGBE_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+ ngbe_down(adapter);
+ /*
+ * If SR-IOV enabled then wait a bit before bringing the adapter
+ * back up to give the VFs time to respond to the reset. The
+ * two second wait is based upon the watchdog timer cycle in
+ * the VF driver.
+ */
+ if (adapter->flags & NGBE_FLAG_SRIOV_ENABLED)
+ msleep(2000);
+ ngbe_up(adapter);
+ clear_bit(__NGBE_RESETTING, &adapter->state);
+}
+
+void ngbe_up(struct ngbe_adapter *adapter)
+{
+ /* hardware has been reset, we need to reload some things */
+ ngbe_configure(adapter);
+ ngbe_up_complete(adapter);
+}
+
+void ngbe_reset(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ int err;
+ u8 old_addr[ETH_ALEN];
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ return;
+
+ err = TCALL(hw, mac.ops.init_hw);
+ switch (err) {
+ case 0:
+ break;
+ case NGBE_ERR_MASTER_REQUESTS_PENDING:
+ e_dev_err("master disable timed out\n");
+ break;
+ case NGBE_ERR_EEPROM_VERSION:
+ /* We are running on a pre-production device, log a warning */
+ e_dev_warn("This device is a pre-production adapter/LOM. "
+ "Please be aware there may be issues associated "
+ "with your hardware. If you are experiencing "
+ "problems please contact your hardware "
+ "representative who provided you with this "
+ "hardware.\n");
+ break;
+ default:
+ e_dev_err("Hardware Error: %d\n", err);
+ }
+
+ /* do not flush user set addresses */
+ memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len);
+ ngbe_flush_sw_mac_table(adapter);
+ ngbe_mac_set_default_filter(adapter, old_addr);
+
+ /* update SAN MAC vmdq pool selection */
+ TCALL(hw, mac.ops.set_vmdq_san_mac, VMDQ_P(0));
+
+ /* Clear saved DMA coalescing values except for watchdog_timer */
+ hw->mac.dmac_config.fcoe_en = false;
+ hw->mac.dmac_config.link_speed = 0;
+ hw->mac.dmac_config.fcoe_tc = 0;
+ hw->mac.dmac_config.num_tcs = 0;
+
+ if (test_bit(__NGBE_PTP_RUNNING, &adapter->state))
+ ngbe_ptp_reset(adapter);
+}
+
+/**
+ * ngbe_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+static void ngbe_clean_rx_ring(struct ngbe_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ unsigned long size;
+ u16 i;
+
+ /* ring already cleared, nothing to do */
+ if (!rx_ring->rx_buffer_info)
+ return;
+
+ /* Free all the Rx ring sk_buffs */
+ for (i = 0; i < rx_ring->count; i++) {
+ struct ngbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
+ if (rx_buffer->dma) {
+ dma_unmap_single(dev,
+ rx_buffer->dma,
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
+ rx_buffer->dma = 0;
+ }
+
+ if (rx_buffer->skb) {
+ struct sk_buff *skb = rx_buffer->skb;
+ if (NGBE_CB(skb)->dma_released) {
+ dma_unmap_single(dev,
+ NGBE_CB(skb)->dma,
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
+ NGBE_CB(skb)->dma = 0;
+ NGBE_CB(skb)->dma_released = false;
+ }
+
+ if (NGBE_CB(skb)->page_released)
+ dma_unmap_page(dev,
+ NGBE_CB(skb)->dma,
+ ngbe_rx_bufsz(rx_ring),
+ DMA_FROM_DEVICE);
+ dev_kfree_skb(skb);
+ rx_buffer->skb = NULL;
+ }
+
+ if (!rx_buffer->page)
+ continue;
+
+ dma_unmap_page(dev, rx_buffer->page_dma,
+ ngbe_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE);
+
+ __free_pages(rx_buffer->page,
+ ngbe_rx_pg_order(rx_ring));
+ rx_buffer->page = NULL;
+ }
+
+ size = sizeof(struct ngbe_rx_buffer) * rx_ring->count;
+ memset(rx_ring->rx_buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+ memset(rx_ring->desc, 0, rx_ring->size);
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+}
+
+/**
+ * ngbe_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void ngbe_clean_tx_ring(struct ngbe_ring *tx_ring)
+{
+ struct ngbe_tx_buffer *tx_buffer_info;
+ unsigned long size;
+ u16 i;
+
+ /* ring already cleared, nothing to do */
+ if (!tx_ring->tx_buffer_info)
+ return;
+
+ /* Free all the Tx ring sk_buffs */
+ for (i = 0; i < tx_ring->count; i++) {
+ tx_buffer_info = &tx_ring->tx_buffer_info[i];
+ ngbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
+ }
+
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
+ size = sizeof(struct ngbe_tx_buffer) * tx_ring->count;
+ memset(tx_ring->tx_buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+ memset(tx_ring->desc, 0, tx_ring->size);
+}
+
+/**
+ * ngbe_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void ngbe_clean_all_rx_rings(struct ngbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ngbe_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * ngbe_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void ngbe_clean_all_tx_rings(struct ngbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ ngbe_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+void ngbe_disable_device(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 i;
+
+ /* signal that we are down to the interrupt handler */
+ if (test_and_set_bit(__NGBE_DOWN, &adapter->state))
+ return; /* do nothing if already down */
+
+ ngbe_disable_pcie_master(hw);
+ /* disable receives */
+ TCALL(hw, mac.ops.disable_rx);
+
+ /* disable all enabled rx queues */
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ /* this call also flushes the previous write */
+ ngbe_disable_rx_queue(adapter, adapter->rx_ring[i]);
+
+ netif_tx_stop_all_queues(netdev);
+
+ /* call carrier off first to avoid false dev_watchdog timeouts */
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+
+ if ((hw->subsystem_device_id & OEM_MASK) == LY_M88E1512_SFP ||
+ (hw->subsystem_device_id & OEM_MASK) == LY_YT8521S_SFP)
+ /* gpio0 is used to power on/off control*/
+ wr32(hw, NGBE_GPIO_DR, NGBE_GPIO_DR_0);
+
+ ngbe_irq_disable(adapter);
+
+ ngbe_napi_disable_all(adapter);
+
+ adapter->flags2 &= ~(NGBE_FLAG2_PF_RESET_REQUESTED |
+ NGBE_FLAG2_DEV_RESET_REQUESTED |
+ NGBE_FLAG2_GLOBAL_RESET_REQUESTED);
+ adapter->flags &= ~NGBE_FLAG_NEED_LINK_UPDATE;
+
+ del_timer_sync(&adapter->service_timer);
+#ifdef CONFIG_NGBE_POLL_LINK_STATUS
+ del_timer_sync(&adapter->link_check_timer);
+#endif
+ if (adapter->num_vfs) {
+ /* Clear EITR Select mapping */
+ wr32(&adapter->hw, NGBE_PX_ITRSEL, 0);
+
+ /* Mark all the VFs as inactive */
+ for (i = 0 ; i < adapter->num_vfs; i++)
+ adapter->vfinfo[i].clear_to_send = 0;
+
+ /* ping all the active vfs to let them know we are going down */
+ ngbe_ping_all_vfs(adapter);
+
+ /* Disable all VFTE/VFRE TX/RX */
+ ngbe_disable_tx_rx(adapter);
+ }
+
+ /*OCP NCSI need it*/
+ if (!(((hw->subsystem_device_id & OEM_MASK) == OCP_CARD) ||
+ ((hw->subsystem_device_id & WOL_SUP_MASK) == WOL_SUP) ||
+ ((hw->subsystem_device_id & NCSI_SUP_MASK) == NCSI_SUP))) {
+ /* disable mac transmiter */
+ wr32m(hw, NGBE_MAC_TX_CFG, NGBE_MAC_TX_CFG_TE, 0);
+ }
+
+ /* disable transmits in the hardware now that interrupts are off */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ u8 reg_idx = adapter->tx_ring[i]->reg_idx;
+ wr32(hw, NGBE_PX_TR_CFG(reg_idx),
+ NGBE_PX_TR_CFG_SWFLSH);
+ }
+
+ /* Disable the Tx DMA engine */
+ wr32m(hw, NGBE_TDM_CTL, NGBE_TDM_CTL_TE, 0);
+}
+
+
+void ngbe_down(struct ngbe_adapter *adapter)
+{
+ ngbe_disable_device(adapter);
+
+ if (!pci_channel_offline(adapter->pdev))
+ ngbe_reset(adapter);
+
+ ngbe_clean_all_tx_rings(adapter);
+ ngbe_clean_all_rx_rings(adapter);
+}
+
+/**
+ * ngbe_init_shared_code - Initialize the shared code
+ * @hw: pointer to hardware structure
+ *
+ * This will assign function pointers and assign the MAC type and PHY code.
+ * Does not touch the hardware. This function must be called prior to any
+ * other function in the shared code. The ngbe_hw structure should be
+ * memset to 0 prior to calling this function. The following fields in
+ * hw structure should be filled in prior to calling this function:
+ * hw_addr, back, device_id, vendor_id, subsystem_device_id,
+ * subsystem_vendor_id, and revision_id
+ **/
+s32 ngbe_init_shared_code(struct ngbe_hw *hw)
+{
+ DEBUGFUNC("\n");
+
+ if ((hw->subsystem_device_id & INTERNAL_SFP_MASK) == INTERNAL_SFP ||
+ (hw->subsystem_device_id & OEM_MASK) == LY_M88E1512_SFP)
+ hw->phy.type = ngbe_phy_m88e1512_sfi;
+ else if (hw->subsystem_device_id == NGBE_WX1860AL_M88E1512_RJ45)
+ hw->phy.type = ngbe_phy_m88e1512;
+ else if ((hw->subsystem_device_id & OEM_MASK) == YT8521S_SFP ||
+ (hw->subsystem_device_id & OEM_MASK) == LY_YT8521S_SFP)
+ hw->phy.type = ngbe_phy_yt8521s_sfi;
+ else
+ hw->phy.type = ngbe_phy_internal;
+
+/* select claus22 */
+ wr32(hw, NGBE_MDIO_CLAUSE_SELECT, 0xF);
+
+ return ngbe_init_ops(hw);
+}
+
+/**
+ * ngbe_sw_init - Initialize general software structures (struct ngbe_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * ngbe_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static const u32 def_rss_key[10] = {
+ 0xE291D73D, 0x1805EC6C, 0x2A94B30D,
+ 0xA54F2BEC, 0xEA49AF7C, 0xE214AD3D, 0xB855AABE,
+ 0x6A3E67EA, 0x14364D17, 0x3BED200D
+};
+
+static int ngbe_sw_init(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct pci_dev *pdev = adapter->pdev;
+ int err;
+
+ /* PCI config space info */
+ hw->vendor_id = pdev->vendor;
+ hw->device_id = pdev->device;
+ pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+ if (hw->revision_id == NGBE_FAILED_READ_CFG_BYTE &&
+ ngbe_check_cfg_remove(hw, pdev)) {
+ e_err(probe, "read of revision id failed\n");
+ err = -ENODEV;
+ goto out;
+ }
+ hw->subsystem_vendor_id = pdev->subsystem_vendor;
+ hw->subsystem_device_id = pdev->subsystem_device;
+
+ /* phy type, phy ops, mac ops */
+ err = ngbe_init_shared_code(hw);
+ if (err) {
+ e_err(probe, "init_shared_code failed: %d\n", err);
+ goto out;
+ }
+
+ adapter->mac_table = kzalloc(sizeof(struct ngbe_mac_addr) *
+ hw->mac.num_rar_entries,
+ GFP_ATOMIC);
+ if (!adapter->mac_table) {
+ err = NGBE_ERR_OUT_OF_MEM;
+ e_err(probe, "mac_table allocation failed: %d\n", err);
+ goto out;
+ }
+
+ memcpy(adapter->rss_key, def_rss_key, sizeof(def_rss_key));
+
+ /* Set common capability flags and settings */
+ adapter->max_q_vectors = NGBE_MAX_MSIX_Q_VECTORS_EMERALD;
+
+ /* Set MAC specific capability flags and exceptions */
+ adapter->flags |= NGBE_FLAGS_SP_INIT;
+ adapter->flags2 |= NGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+ adapter->flags2 |= NGBE_FLAG2_EEE_CAPABLE;
+
+ /* init mailbox params */
+ TCALL(hw, mbx.ops.init_params);
+
+ /* default flow control settings */
+ hw->fc.requested_mode = ngbe_fc_full;
+ hw->fc.current_mode = ngbe_fc_full; /* init for ethtool output */
+
+ adapter->last_lfc_mode = hw->fc.current_mode;
+ hw->fc.pause_time = NGBE_DEFAULT_FCPAUSE;
+ hw->fc.send_xon = true;
+ hw->fc.disable_fc_autoneg = false;
+
+ /* set default ring sizes */
+ adapter->tx_ring_count = NGBE_DEFAULT_TXD;
+ adapter->rx_ring_count = NGBE_DEFAULT_RXD;
+
+ /* set default work limits */
+ adapter->tx_work_limit = NGBE_DEFAULT_TX_WORK;
+ adapter->rx_work_limit = NGBE_DEFAULT_RX_WORK;
+
+ adapter->tx_timeout_recovery_level = 0;
+
+ /* PF holds first pool slot */
+ adapter->num_vmdqs = 1;
+ set_bit(0, &adapter->fwd_bitmask);
+ set_bit(__NGBE_DOWN, &adapter->state);
+out:
+ return err;
+}
+
+/**
+ * ngbe_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int ngbe_setup_tx_resources(struct ngbe_ring *tx_ring)
+{
+ struct device *dev = tx_ring->dev;
+ int orig_node = dev_to_node(dev);
+ int numa_node = -1;
+ int size;
+
+ size = sizeof(struct ngbe_tx_buffer) * tx_ring->count;
+
+ if (tx_ring->q_vector)
+ numa_node = tx_ring->q_vector->numa_node;
+
+ tx_ring->tx_buffer_info = vzalloc_node(size, numa_node);
+ if (!tx_ring->tx_buffer_info)
+ tx_ring->tx_buffer_info = vzalloc(size);
+ if (!tx_ring->tx_buffer_info)
+ goto err;
+
+ /* round up to nearest 4K */
+ tx_ring->size = tx_ring->count * sizeof(union ngbe_tx_desc);
+ tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+ set_dev_node(dev, numa_node);
+ tx_ring->desc = dma_alloc_coherent(dev,
+ tx_ring->size,
+ &tx_ring->dma,
+ GFP_KERNEL);
+ set_dev_node(dev, orig_node);
+ if (!tx_ring->desc)
+ tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+ &tx_ring->dma, GFP_KERNEL);
+ if (!tx_ring->desc)
+ goto err;
+
+ return 0;
+
+err:
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * ngbe_setup_all_tx_resources - allocate all queues Tx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not). It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ngbe_setup_all_tx_resources(struct ngbe_adapter *adapter)
+{
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ err = ngbe_setup_tx_resources(adapter->tx_ring[i]);
+ if (!err)
+ continue;
+
+ e_err(probe, "Allocation for Tx Queue %u failed\n", i);
+ goto err_setup_tx;
+ }
+
+ return 0;
+err_setup_tx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ ngbe_free_tx_resources(adapter->tx_ring[i]);
+ return err;
+}
+
+/**
+ * ngbe_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int ngbe_setup_rx_resources(struct ngbe_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ int orig_node = dev_to_node(dev);
+ int numa_node = -1;
+ int size;
+
+ size = sizeof(struct ngbe_rx_buffer) * rx_ring->count;
+
+ if (rx_ring->q_vector)
+ numa_node = rx_ring->q_vector->numa_node;
+
+ rx_ring->rx_buffer_info = vzalloc_node(size, numa_node);
+ if (!rx_ring->rx_buffer_info)
+ rx_ring->rx_buffer_info = vzalloc(size);
+ if (!rx_ring->rx_buffer_info)
+ goto err;
+
+ /* Round up to nearest 4K */
+ rx_ring->size = rx_ring->count * sizeof(union ngbe_rx_desc);
+ rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+ set_dev_node(dev, numa_node);
+ rx_ring->desc = dma_alloc_coherent(dev,
+ rx_ring->size,
+ &rx_ring->dma,
+ GFP_KERNEL);
+ set_dev_node(dev, orig_node);
+ if (!rx_ring->desc)
+ rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
+ if (!rx_ring->desc)
+ goto err;
+
+ return 0;
+err:
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * ngbe_setup_all_rx_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not). It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ngbe_setup_all_rx_resources(struct ngbe_adapter *adapter)
+{
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ err = ngbe_setup_rx_resources(adapter->rx_ring[i]);
+ if (!err) {
+ continue;
+ }
+
+ e_err(probe, "Allocation for Rx Queue %u failed\n", i);
+ goto err_setup_rx;
+ }
+
+ return 0;
+err_setup_rx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ ngbe_free_rx_resources(adapter->rx_ring[i]);
+ return err;
+}
+
+/**
+ * ngbe_setup_isb_resources - allocate interrupt status resources
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ngbe_setup_isb_resources(struct ngbe_adapter *adapter)
+{
+ struct device *dev = pci_dev_to_dev(adapter->pdev);
+
+ adapter->isb_mem = dma_alloc_coherent(dev,
+ sizeof(u32) * NGBE_ISB_MAX,
+ &adapter->isb_dma,
+ GFP_KERNEL);
+ if (!adapter->isb_mem) {
+ e_err(probe, "ngbe_setup_isb_resources: alloc isb_mem failed\n");
+ return -ENOMEM;
+ }
+ memset(adapter->isb_mem, 0, sizeof(u32) * NGBE_ISB_MAX);
+ return 0;
+}
+
+/**
+ * ngbe_free_isb_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static void ngbe_free_isb_resources(struct ngbe_adapter *adapter)
+{
+ struct device *dev = pci_dev_to_dev(adapter->pdev);
+
+ dma_free_coherent(dev, sizeof(u32) * NGBE_ISB_MAX,
+ adapter->isb_mem, adapter->isb_dma);
+ adapter->isb_mem = NULL;
+}
+
+/**
+ * ngbe_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void ngbe_free_tx_resources(struct ngbe_ring *tx_ring)
+{
+ ngbe_clean_tx_ring(tx_ring);
+
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!tx_ring->desc)
+ return;
+
+ dma_free_coherent(tx_ring->dev, tx_ring->size,
+ tx_ring->desc, tx_ring->dma);
+ tx_ring->desc = NULL;
+}
+
+/**
+ * ngbe_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void ngbe_free_all_tx_resources(struct ngbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ ngbe_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ * ngbe_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void ngbe_free_rx_resources(struct ngbe_ring *rx_ring)
+{
+ ngbe_clean_rx_ring(rx_ring);
+
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!rx_ring->desc)
+ return;
+
+ dma_free_coherent(rx_ring->dev, rx_ring->size,
+ rx_ring->desc, rx_ring->dma);
+
+ rx_ring->desc = NULL;
+}
+
+/**
+ * ngbe_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void ngbe_free_all_rx_resources(struct ngbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ngbe_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * ngbe_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ngbe_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ if ((new_mtu < 68) || (new_mtu > 9414))
+ return -EINVAL;
+
+ /*
+ * we cannot allow legacy VFs to enable their receive
+ * paths when MTU greater than 1500 is configured. So display a
+ * warning that legacy VFs will be disabled.
+ */
+ if ((adapter->flags & NGBE_FLAG_SRIOV_ENABLED) &&
+ (new_mtu > ETH_DATA_LEN))
+ e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n");
+
+ e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
+
+ /* must set new MTU before calling down or up */
+ netdev->mtu = new_mtu;
+
+ if (netif_running(netdev))
+ ngbe_reinit_locked(adapter);
+
+ return 0;
+}
+
+/**
+ * ngbe_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+int ngbe_open(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ int err;
+
+ /* disallow open during test */
+ if (test_bit(__NGBE_TESTING, &adapter->state))
+ return -EBUSY;
+
+ netif_carrier_off(netdev);
+
+ /* allocate transmit descriptors */
+ err = ngbe_setup_all_tx_resources(adapter);
+ if (err)
+ goto err_setup_tx;
+
+ /* allocate receive descriptors */
+ err = ngbe_setup_all_rx_resources(adapter);
+ if (err)
+ goto err_setup_rx;
+
+ err = ngbe_setup_isb_resources(adapter);
+ if (err)
+ goto err_req_isb;
+
+ ngbe_configure(adapter);
+
+ err = ngbe_request_irq(adapter);
+ if (err)
+ goto err_req_irq;
+
+ if (adapter->num_tx_queues) {
+ /* Notify the stack of the actual queue counts. */
+ err = netif_set_real_num_tx_queues(netdev, adapter->num_vmdqs > 1
+ ? adapter->queues_per_pool
+ : adapter->num_tx_queues);
+ if (err)
+ goto err_set_queues;
+ }
+
+ if (adapter->num_rx_queues) {
+ err = netif_set_real_num_rx_queues(netdev, adapter->num_vmdqs > 1
+ ? adapter->queues_per_pool
+ : adapter->num_rx_queues);
+ if (err)
+ goto err_set_queues;
+ }
+
+ ngbe_ptp_init(adapter);
+ ngbe_up_complete(adapter);
+
+ return 0;
+
+err_set_queues:
+ ngbe_free_irq(adapter);
+err_req_irq:
+ ngbe_free_isb_resources(adapter);
+err_req_isb:
+ ngbe_free_all_rx_resources(adapter);
+
+err_setup_rx:
+ ngbe_free_all_tx_resources(adapter);
+err_setup_tx:
+ ngbe_reset(adapter);
+ return err;
+}
+
+/**
+ * ngbe_close_suspend - actions necessary to both suspend and close flows
+ * @adapter: the private adapter struct
+ *
+ * This function should contain the necessary work common to both suspending
+ * and closing of the device.
+ */
+static void ngbe_close_suspend(struct ngbe_adapter *adapter)
+{
+ ngbe_ptp_suspend(adapter);
+ ngbe_disable_device(adapter);
+
+ ngbe_clean_all_tx_rings(adapter);
+ ngbe_clean_all_rx_rings(adapter);
+
+ ngbe_free_irq(adapter);
+
+ ngbe_free_isb_resources(adapter);
+ ngbe_free_all_rx_resources(adapter);
+ ngbe_free_all_tx_resources(adapter);
+}
+
+/**
+ * ngbe_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS. The hardware is still under the drivers control, but
+ * needs to be disabled. A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+int ngbe_close(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ ngbe_ptp_stop(adapter);
+ ngbe_down(adapter);
+ ngbe_free_irq(adapter);
+
+ ngbe_free_isb_resources(adapter);
+ ngbe_free_all_rx_resources(adapter);
+ ngbe_free_all_tx_resources(adapter);
+
+ ngbe_release_hw_control(adapter);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int ngbe_resume(struct pci_dev *pdev)
+{
+ struct ngbe_adapter *adapter;
+ struct net_device *netdev;
+ u32 err;
+
+ adapter = pci_get_drvdata(pdev);
+ netdev = adapter->netdev;
+ adapter->hw.hw_addr = adapter->io_addr;
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ /*
+ * pci_restore_state clears dev->state_saved so call
+ * pci_save_state to restore it.
+ */
+ pci_save_state(pdev);
+ wr32(&adapter->hw, NGBE_PSR_WKUP_CTL, adapter->wol);
+
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ e_dev_err("Cannot enable PCI device from suspend\n");
+ return err;
+ }
+ smp_mb__before_atomic();
+ clear_bit(__NGBE_DISABLED, &adapter->state);
+ pci_set_master(pdev);
+
+ pci_wake_from_d3(pdev, false);
+
+ ngbe_reset(adapter);
+
+ rtnl_lock();
+
+ err = ngbe_init_interrupt_scheme(adapter);
+ if (!err && netif_running(netdev))
+ err = ngbe_open(netdev);
+
+ rtnl_unlock();
+
+ if (err)
+ return err;
+
+ netif_device_attach(netdev);
+
+ return 0;
+}
+#endif /* CONFIG_PM */
+
+/*
+ * __ngbe_shutdown is not used when power manangbeent
+ * is disabled on older kernels (<2.6.12). causes a compile
+ * warning/error, because it is defined and not used.
+ */
+static int __ngbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
+{
+ struct ngbe_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev = adapter->netdev;
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 wufc = adapter->wol;
+#ifdef CONFIG_PM
+ int retval = 0;
+#endif
+
+ netif_device_detach(netdev);
+
+ rtnl_lock();
+ if (netif_running(netdev))
+ ngbe_close_suspend(adapter);
+ rtnl_unlock();
+
+ ngbe_clear_interrupt_scheme(adapter);
+
+#ifdef CONFIG_PM
+ retval = pci_save_state(pdev);
+ if (retval)
+ return retval;
+#endif
+
+ /* this won't stop link of managebility or WoL is enabled */
+ ngbe_stop_mac_link_on_d3(hw);
+
+ if (wufc) {
+ ngbe_set_rx_mode(netdev);
+ ngbe_configure_rx(adapter);
+ /* enable the optics for SFP+ fiber as we can WoL */
+ TCALL(hw, mac.ops.enable_tx_laser);
+
+ /* turn on all-multi mode if wake on multicast is enabled */
+ if (wufc & NGBE_PSR_WKUP_CTL_MC) {
+ wr32m(hw, NGBE_PSR_CTL,
+ NGBE_PSR_CTL_MPE, NGBE_PSR_CTL_MPE);
+ }
+
+ pci_clear_master(adapter->pdev);
+ wr32(hw, NGBE_PSR_WKUP_CTL, wufc);
+ } else {
+ wr32(hw, NGBE_PSR_WKUP_CTL, 0);
+ }
+
+ pci_wake_from_d3(pdev, !!wufc);
+
+ *enable_wake = !!wufc;
+ ngbe_release_hw_control(adapter);
+
+ if (!test_and_set_bit(__NGBE_DISABLED, &adapter->state))
+ pci_disable_device(pdev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int ngbe_suspend(struct pci_dev *pdev,
+ pm_message_t __always_unused state)
+{
+ int retval;
+ bool wake;
+
+ retval = __ngbe_shutdown(pdev, &wake);
+ if (retval)
+ return retval;
+
+ if (wake) {
+ pci_prepare_to_sleep(pdev);
+ } else {
+ pci_wake_from_d3(pdev, false);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_PM */
+
+static void ngbe_shutdown(struct pci_dev *pdev)
+{
+ bool wake;
+
+ __ngbe_shutdown(pdev, &wake);
+
+ if (system_state == SYSTEM_POWER_OFF) {
+ pci_wake_from_d3(pdev, wake);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+}
+
+/**
+ * ngbe_get_stats64 - Get System Network Statistics
+ * @netdev: network interface device structure
+ * @stats: storage space for 64bit statistics
+ *
+ * Returns 64bit statistics, for use in the ndo_get_stats64 callback. This
+ * function replaces ngbe_get_stats for kernels which support it.
+ */
+static void ngbe_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ngbe_ring *ring = READ_ONCE(adapter->rx_ring[i]);
+ u64 bytes, packets;
+ unsigned int start;
+
+ if (ring) {
+ do {
+ start = u64_stats_fetch_begin_irq(&ring->syncp);
+ packets = ring->stats.packets;
+ bytes = ring->stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&ring->syncp,
+ start));
+ stats->rx_packets += packets;
+ stats->rx_bytes += bytes;
+ }
+ }
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct ngbe_ring *ring = READ_ONCE(adapter->tx_ring[i]);
+ u64 bytes, packets;
+ unsigned int start;
+
+ if (ring) {
+ do {
+ start = u64_stats_fetch_begin_irq(&ring->syncp);
+ packets = ring->stats.packets;
+ bytes = ring->stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&ring->syncp,
+ start));
+ stats->tx_packets += packets;
+ stats->tx_bytes += bytes;
+ }
+ }
+ rcu_read_unlock();
+ /* following stats updated by ngbe_watchdog_task() */
+ stats->multicast = netdev->stats.multicast;
+ stats->rx_errors = netdev->stats.rx_errors;
+ stats->rx_length_errors = netdev->stats.rx_length_errors;
+ stats->rx_crc_errors = netdev->stats.rx_crc_errors;
+ stats->rx_missed_errors = netdev->stats.rx_missed_errors;
+}
+
+/**
+ * ngbe_update_stats - Update the board statistics counters.
+ * @adapter: board private structure
+ **/
+void ngbe_update_stats(struct ngbe_adapter *adapter)
+{
+ struct net_device_stats *net_stats = &adapter->netdev->stats;
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ngbe_hw_stats *hwstats = &adapter->stats;
+ u64 total_mpc = 0;
+ u32 i, bprc, lxon, lxoff;
+ u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
+ u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+ u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
+ u64 hw_csum_rx_good = 0;
+
+ if (test_bit(__NGBE_DOWN, &adapter->state) ||
+ test_bit(__NGBE_RESETTING, &adapter->state))
+ return;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ngbe_ring *rx_ring = adapter->rx_ring[i];
+ non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+ alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
+ alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
+ hw_csum_rx_error += rx_ring->rx_stats.csum_err;
+ hw_csum_rx_good += rx_ring->rx_stats.csum_good_cnt;
+ bytes += rx_ring->stats.bytes;
+ packets += rx_ring->stats.packets;
+
+ }
+
+ adapter->non_eop_descs = non_eop_descs;
+ adapter->alloc_rx_page_failed = alloc_rx_page_failed;
+ adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
+ adapter->hw_csum_rx_error = hw_csum_rx_error;
+ adapter->hw_csum_rx_good = hw_csum_rx_good;
+ net_stats->rx_bytes = bytes;
+ net_stats->rx_packets = packets;
+
+ bytes = 0;
+ packets = 0;
+ /* gather some stats to the adapter struct that are per queue */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct ngbe_ring *tx_ring = adapter->tx_ring[i];
+ restart_queue += tx_ring->tx_stats.restart_queue;
+ tx_busy += tx_ring->tx_stats.tx_busy;
+ bytes += tx_ring->stats.bytes;
+ packets += tx_ring->stats.packets;
+ }
+ adapter->restart_queue = restart_queue;
+ adapter->tx_busy = tx_busy;
+ net_stats->tx_bytes = bytes;
+ net_stats->tx_packets = packets;
+
+ hwstats->crcerrs += rd32(hw, NGBE_RX_CRC_ERROR_FRAMES_LOW);
+
+ hwstats->gprc += rd32(hw, NGBE_PX_GPRC);
+
+ ngbe_update_xoff_rx_lfc(adapter);
+
+ hwstats->o2bgptc += rd32(hw, NGBE_TDM_OS2BMC_CNT);
+ if (ngbe_check_mng_access(&adapter->hw)) {
+ hwstats->o2bspc += rd32(hw, NGBE_MNG_OS2BMC_CNT);
+ hwstats->b2ospc += rd32(hw, NGBE_MNG_BMC2OS_CNT);
+ }
+ hwstats->b2ogprc += rd32(hw, NGBE_RDM_BMC2OS_CNT);
+ hwstats->gorc += rd32(hw, NGBE_PX_GORC_LSB);
+ hwstats->gorc += (u64)rd32(hw, NGBE_PX_GORC_MSB) << 32;
+
+ hwstats->gotc += rd32(hw, NGBE_PX_GOTC_LSB);
+ hwstats->gotc += (u64)rd32(hw, NGBE_PX_GOTC_MSB) << 32;
+
+
+ adapter->hw_rx_no_dma_resources +=
+ rd32(hw, NGBE_RDM_DRP_PKT);
+ bprc = rd32(hw, NGBE_RX_BC_FRAMES_GOOD_LOW);
+ hwstats->bprc += bprc;
+ hwstats->mprc = 0;
+
+ for (i = 0; i < 8; i++)
+ hwstats->mprc += rd32(hw, NGBE_PX_MPRC(i));
+
+ hwstats->roc += rd32(hw, NGBE_RX_OVERSIZE_FRAMES_GOOD);
+ hwstats->rlec += rd32(hw, NGBE_RX_LEN_ERROR_FRAMES_LOW);
+ lxon = rd32(hw, NGBE_RDB_LXONTXC);
+ hwstats->lxontxc += lxon;
+ lxoff = rd32(hw, NGBE_RDB_LXOFFTXC);
+ hwstats->lxofftxc += lxoff;
+
+ hwstats->gptc += rd32(hw, NGBE_PX_GPTC);
+ hwstats->mptc += rd32(hw, NGBE_TX_MC_FRAMES_GOOD_LOW);
+ hwstats->ruc += rd32(hw, NGBE_RX_UNDERSIZE_FRAMES_GOOD);
+ hwstats->tpr += rd32(hw, NGBE_RX_FRAME_CNT_GOOD_BAD_LOW);
+ hwstats->bptc += rd32(hw, NGBE_TX_BC_FRAMES_GOOD_LOW);
+ /* Fill out the OS statistics structure */
+ net_stats->multicast = hwstats->mprc;
+
+ /* Rx Errors */
+ net_stats->rx_errors = hwstats->crcerrs +
+ hwstats->rlec;
+ net_stats->rx_dropped = 0;
+ net_stats->rx_length_errors = hwstats->rlec;
+ net_stats->rx_crc_errors = hwstats->crcerrs;
+ total_mpc = rd32(hw, NGBE_RDB_MPCNT);
+ net_stats->rx_missed_errors = total_mpc;
+
+ /*
+ * VF Stats Collection - skip while resetting because these
+ * are not clear on read and otherwise you'll sometimes get
+ * crazy values.
+ */
+ if (!test_bit(__NGBE_RESETTING, &adapter->state)) {
+ for (i = 0; i < adapter->num_vfs; i++) {
+ UPDATE_VF_COUNTER_32bit(NGBE_VX_GPRC, \
+ adapter->vfinfo->last_vfstats.gprc, \
+ adapter->vfinfo->vfstats.gprc);
+ UPDATE_VF_COUNTER_32bit(NGBE_VX_GPTC, \
+ adapter->vfinfo->last_vfstats.gptc, \
+ adapter->vfinfo->vfstats.gptc);
+ UPDATE_VF_COUNTER_36bit(NGBE_VX_GORC_LSB, \
+ NGBE_VX_GORC_MSB, \
+ adapter->vfinfo->last_vfstats.gorc, \
+ adapter->vfinfo->vfstats.gorc);
+ UPDATE_VF_COUNTER_36bit(NGBE_VX_GOTC_LSB, \
+ NGBE_VX_GOTC_MSB, \
+ adapter->vfinfo->last_vfstats.gotc, \
+ adapter->vfinfo->vfstats.gotc);
+ UPDATE_VF_COUNTER_32bit(NGBE_VX_MPRC, \
+ adapter->vfinfo->last_vfstats.mprc, \
+ adapter->vfinfo->vfstats.mprc);
+ }
+ }
+}
+
+/**
+ * ngbe_check_hang_subtask - check for hung queues and dropped interrupts
+ * @adapter - pointer to the device adapter structure
+ *
+ * This function serves two purposes. First it strobes the interrupt lines
+ * in order to make certain interrupts are occurring. Secondly it sets the
+ * bits needed to check for TX hangs. As a result we should immediately
+ * determine if a hang has occurred.
+ */
+static void ngbe_check_hang_subtask(struct ngbe_adapter *adapter)
+{
+ int i;
+
+ /* If we're down or resetting, just bail */
+ if (test_bit(__NGBE_DOWN, &adapter->state) ||
+ test_bit(__NGBE_REMOVING, &adapter->state) ||
+ test_bit(__NGBE_RESETTING, &adapter->state))
+ return;
+
+ /* Force detection of hung controller */
+ if (netif_carrier_ok(adapter->netdev)) {
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ set_check_for_tx_hang(adapter->tx_ring[i]);
+ }
+}
+
+static void ngbe_watchdog_an_complete(struct ngbe_adapter *adapter)
+{
+ u32 link_speed = 0;
+ u32 lan_speed = 0;
+ bool link_up = true;
+ struct ngbe_hw *hw = &adapter->hw;
+
+ if (!(adapter->flags & NGBE_FLAG_NEED_ANC_CHECK))
+ return;
+
+ TCALL(hw, mac.ops.check_link, &link_speed, &link_up, false);
+
+ adapter->link_speed = link_speed;
+ switch (link_speed) {
+ case NGBE_LINK_SPEED_100_FULL:
+ lan_speed = 1;
+ break;
+ case NGBE_LINK_SPEED_1GB_FULL:
+ lan_speed = 2;
+ break;
+ case NGBE_LINK_SPEED_10_FULL:
+ lan_speed = 0;
+ break;
+ default:
+ break;
+ }
+ wr32m(hw, NGBE_CFG_LAN_SPEED,
+ 0x3, lan_speed);
+
+ if (link_speed & (NGBE_LINK_SPEED_1GB_FULL |
+ NGBE_LINK_SPEED_100_FULL | NGBE_LINK_SPEED_10_FULL)) {
+ wr32(hw, NGBE_MAC_TX_CFG,
+ (rd32(hw, NGBE_MAC_TX_CFG) &
+ ~NGBE_MAC_TX_CFG_SPEED_MASK) | NGBE_MAC_TX_CFG_TE |
+ NGBE_MAC_TX_CFG_SPEED_1G);
+ }
+
+ adapter->flags &= ~NGBE_FLAG_NEED_ANC_CHECK;
+ adapter->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+
+ return;
+}
+
+/**
+ * ngbe_watchdog_update_link - update the link status
+ * @adapter - pointer to the device adapter structure
+ * @link_speed - pointer to a u32 to store the link_speed
+ **/
+static void ngbe_watchdog_update_link_status(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 link_speed = adapter->link_speed;
+ bool link_up = adapter->link_up;
+ u32 lan_speed = 0;
+ u32 reg;
+
+#ifndef CONFIG_NGBE_POLL_LINK_STATUS
+ if (!(adapter->flags & NGBE_FLAG_NEED_LINK_UPDATE))
+ return;
+#endif
+ link_speed = NGBE_LINK_SPEED_1GB_FULL;
+ link_up = true;
+
+ TCALL(hw, mac.ops.check_link, &link_speed, &link_up, false);
+#ifndef CONFIG_NGBE_POLL_LINK_STATUS
+ if (link_up || time_after(jiffies, (adapter->link_check_timeout +
+ NGBE_TRY_LINK_TIMEOUT))) {
+ adapter->flags &= ~NGBE_FLAG_NEED_LINK_UPDATE;
+ }
+#else
+ if (adapter->link_up == link_up &&
+ adapter->link_speed == link_speed)
+ return;
+#endif
+
+ adapter->link_speed = link_speed;
+ switch (link_speed) {
+ case NGBE_LINK_SPEED_100_FULL:
+ lan_speed = 1;
+ break;
+ case NGBE_LINK_SPEED_1GB_FULL:
+ lan_speed = 2;
+ break;
+ case NGBE_LINK_SPEED_10_FULL:
+ lan_speed = 0;
+ break;
+ default:
+ break;
+ }
+ wr32m(hw, NGBE_CFG_LAN_SPEED,
+ 0x3, lan_speed);
+
+ if (link_up) {
+ TCALL(hw, mac.ops.fc_enable);
+ ngbe_set_rx_drop_en(adapter);
+ }
+
+ if (link_up) {
+ adapter->last_rx_ptp_check = jiffies;
+
+ if (test_bit(__NGBE_PTP_RUNNING, &adapter->state))
+ ngbe_ptp_start_cyclecounter(adapter);
+
+ if (link_speed & (NGBE_LINK_SPEED_1GB_FULL |
+ NGBE_LINK_SPEED_100_FULL | NGBE_LINK_SPEED_10_FULL)) {
+ wr32(hw, NGBE_MAC_TX_CFG,
+ (rd32(hw, NGBE_MAC_TX_CFG) &
+ ~NGBE_MAC_TX_CFG_SPEED_MASK) | NGBE_MAC_TX_CFG_TE |
+ NGBE_MAC_TX_CFG_SPEED_1G);
+ }
+
+ /* Re configure MAC RX */
+ reg = rd32(hw, NGBE_MAC_RX_CFG);
+ wr32(hw, NGBE_MAC_RX_CFG, reg);
+ wr32(hw, NGBE_MAC_PKT_FLT, NGBE_MAC_PKT_FLT_PR);
+ reg = rd32(hw, NGBE_MAC_WDG_TIMEOUT);
+ wr32(hw, NGBE_MAC_WDG_TIMEOUT, reg);
+ }
+
+ adapter->link_up = link_up;
+ if (hw->mac.ops.dmac_config && hw->mac.dmac_config.watchdog_timer) {
+ u8 num_tcs = netdev_get_num_tc(adapter->netdev);
+
+ if (hw->mac.dmac_config.link_speed != link_speed ||
+ hw->mac.dmac_config.num_tcs != num_tcs) {
+ hw->mac.dmac_config.link_speed = link_speed;
+ hw->mac.dmac_config.num_tcs = num_tcs;
+ TCALL(hw, mac.ops.dmac_config);
+ }
+ }
+ return;
+}
+
+static void ngbe_update_default_up(struct ngbe_adapter *adapter)
+{
+ u8 up = 0;
+ adapter->default_up = up;
+}
+
+/**
+ * ngbe_watchdog_link_is_up - update netif_carrier status and
+ * print link up message
+ * @adapter - pointer to the device adapter structure
+ **/
+static void ngbe_watchdog_link_is_up(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 link_speed = adapter->link_speed;
+ bool flow_rx, flow_tx;
+
+ /* only continue if link was previously down */
+ if (netif_carrier_ok(netdev))
+ return;
+
+ adapter->flags2 &= ~NGBE_FLAG2_SEARCH_FOR_SFP;
+
+ /* flow_rx, flow_tx report link flow control status */
+ flow_rx = (rd32(hw, NGBE_MAC_RX_FLOW_CTRL) & 0x101) == 0x1;
+ flow_tx = !!(NGBE_RDB_RFCC_RFCE_802_3X &
+ rd32(hw, NGBE_RDB_RFCC));
+
+ e_info(drv, "NIC Link is Up %s, Flow Control: %s\n",
+ (link_speed == NGBE_LINK_SPEED_1GB_FULL ?
+ "1 Gbps" :
+ (link_speed == NGBE_LINK_SPEED_100_FULL ?
+ "100 Mbps" :
+ (link_speed == NGBE_LINK_SPEED_10_FULL ?
+ "10 Mbps" :
+ "unknown speed"))),
+ ((flow_rx && flow_tx) ? "RX/TX" :
+ (flow_rx ? "RX" :
+ (flow_tx ? "TX" : "None"))));
+
+ netif_carrier_on(netdev);
+ netif_tx_wake_all_queues(netdev);
+
+ /* update the default user priority for VFs */
+ ngbe_update_default_up(adapter);
+
+ /* ping all the active vfs to let them know link has changed */
+ ngbe_ping_all_vfs(adapter);
+}
+
+/**
+ * ngbe_watchdog_link_is_down - update netif_carrier status and
+ * print link down message
+ * @adapter - pointer to the adapter structure
+ **/
+static void ngbe_watchdog_link_is_down(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ adapter->link_up = false;
+ adapter->link_speed = 0;
+
+ /* only continue if link was up previously */
+ if (!netif_carrier_ok(netdev))
+ return;
+
+ if (test_bit(__NGBE_PTP_RUNNING, &adapter->state))
+ ngbe_ptp_start_cyclecounter(adapter);
+
+ e_info(drv, "NIC Link is Down\n");
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+
+ /* ping all the active vfs to let them know link has changed */
+ ngbe_ping_all_vfs(adapter);
+}
+
+static bool ngbe_ring_tx_pending(struct ngbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct ngbe_ring *tx_ring = adapter->tx_ring[i];
+
+ if (tx_ring->next_to_use != tx_ring->next_to_clean)
+ return true;
+ }
+
+ return false;
+}
+
+static bool ngbe_vf_tx_pending(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 q_per_pool = 1;
+
+ u32 i, j;
+
+ if (!adapter->num_vfs)
+ return false;
+
+ for (i = 0; i < adapter->num_vfs; i++) {
+ for (j = 0; j < q_per_pool; j++) {
+ u32 h, t;
+
+ h = rd32(hw,
+ NGBE_PX_TR_RPn(q_per_pool, i, j));
+ t = rd32(hw,
+ NGBE_PX_TR_WPn(q_per_pool, i, j));
+
+ if (h != t)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * ngbe_watchdog_flush_tx - flush queues on link down
+ * @adapter - pointer to the device adapter structure
+ **/
+static void ngbe_watchdog_flush_tx(struct ngbe_adapter *adapter)
+{
+ if (!netif_carrier_ok(adapter->netdev)) {
+ if (ngbe_ring_tx_pending(adapter) ||
+ ngbe_vf_tx_pending(adapter)) {
+ /* We've lost link, so the controller stops DMA,
+ * but we've got queued Tx work that's never going
+ * to get done, so reset controller to flush Tx.
+ * (Do the reset outside of interrupt context).
+ */
+ e_warn(drv, "initiating reset due to lost link with "
+ "pending Tx work\n");
+ adapter->flags2 |= NGBE_FLAG2_PF_RESET_REQUESTED;
+ }
+ }
+}
+
+#ifdef CONFIG_PCI_IOV
+static inline void ngbe_issue_vf_flr(struct ngbe_adapter *adapter,
+ struct pci_dev *vfdev)
+{
+ int pos, i;
+ u16 status;
+
+ /* wait for pending transactions on the bus */
+ for (i = 0; i < 4; i++) {
+ if (i)
+ msleep((1 << (i - 1)) * 100);
+
+ pcie_capability_read_word(vfdev, PCI_EXP_DEVSTA, &status);
+ if (!(status & PCI_EXP_DEVSTA_TRPND))
+ goto clear;
+ }
+
+ e_dev_warn("Issuing VFLR with pending transactions\n");
+
+clear:
+ pos = pci_find_capability(vfdev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return;
+
+ e_dev_err("Issuing VFLR for VF %s\n", pci_name(vfdev));
+ pci_write_config_word(vfdev, pos + PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_BCR_FLR);
+ msleep(100);
+}
+
+
+static void ngbe_spoof_check(struct ngbe_adapter *adapter)
+{
+ u32 ssvpc;
+
+ /* Do not perform spoof check if in non-IOV mode */
+ if (adapter->num_vfs == 0)
+ return;
+ ssvpc = rd32(&adapter->hw, NGBE_TDM_SEC_DRP);
+
+ /*
+ * ssvpc register is cleared on read, if zero then no
+ * spoofed packets in the last interval.
+ */
+ if (!ssvpc)
+ return;
+
+ e_warn(drv, "%d Spoofed packets detected\n", ssvpc);
+}
+#endif /* CONFIG_PCI_IOV */
+
+/**
+ * ngbe_watchdog_subtask - check and bring link up
+ * @adapter - pointer to the device adapter structure
+ **/
+static void ngbe_watchdog_subtask(struct ngbe_adapter *adapter)
+{
+ /* if interface is down do nothing */
+ if (test_bit(__NGBE_DOWN, &adapter->state) ||
+ test_bit(__NGBE_REMOVING, &adapter->state) ||
+ test_bit(__NGBE_RESETTING, &adapter->state))
+ return;
+
+ ngbe_watchdog_an_complete(adapter);
+#ifndef CONFIG_NGBE_POLL_LINK_STATUS
+ ngbe_watchdog_update_link_status(adapter);
+
+ if (adapter->link_up)
+ ngbe_watchdog_link_is_up(adapter);
+ else
+ ngbe_watchdog_link_is_down(adapter);
+#endif
+#ifdef CONFIG_PCI_IOV
+ ngbe_spoof_check(adapter);
+#endif /* CONFIG_PCI_IOV */
+
+ ngbe_update_stats(adapter);
+ ngbe_watchdog_flush_tx(adapter);
+}
+
+/**
+ * ngbe_service_timer - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ **/
+static void ngbe_service_timer(struct timer_list *t)
+{
+ struct ngbe_adapter *adapter = from_timer(adapter, t, service_timer);
+ unsigned long next_event_offset;
+ struct ngbe_hw *hw = &adapter->hw;
+
+ /* poll faster when waiting for link */
+ if ((adapter->flags & NGBE_FLAG_NEED_LINK_UPDATE) ||
+ (adapter->flags & NGBE_FLAG_NEED_ANC_CHECK))
+ next_event_offset = HZ / 10;
+ else
+ next_event_offset = HZ * 2;
+
+ if ((rd32(&adapter->hw, NGBE_MIS_PF_SM) == 1) && (hw->bus.lan_id)) {
+ adapter->flags2 |= NGBE_FLAG2_PCIE_NEED_RECOVER;
+ }
+
+ /* Reset the timer */
+ mod_timer(&adapter->service_timer, next_event_offset + jiffies);
+
+ ngbe_service_event_schedule(adapter);
+}
+
+#ifdef CONFIG_NGBE_POLL_LINK_STATUS
+static void ngbe_link_check_timer(struct timer_list *t)
+{
+ struct ngbe_adapter *adapter = from_timer(adapter, t, link_check_timer);
+ unsigned long next_event_offset = HZ / 1000;
+
+ mod_timer(&adapter->link_check_timer, next_event_offset + jiffies);
+ /* if interface is down do nothing */
+ if (test_bit(__NGBE_DOWN, &adapter->state) ||
+ test_bit(__NGBE_REMOVING, &adapter->state) ||
+ test_bit(__NGBE_RESETTING, &adapter->state))
+ return;
+
+ ngbe_watchdog_update_link_status(adapter);
+
+ if (adapter->link_up)
+ ngbe_watchdog_link_is_up(adapter);
+ else
+ ngbe_watchdog_link_is_down(adapter);
+}
+#endif
+
+static void ngbe_reset_subtask(struct ngbe_adapter *adapter)
+{
+ u32 reset_flag = 0;
+ u32 value = 0;
+
+ if (!(adapter->flags2 & (NGBE_FLAG2_PF_RESET_REQUESTED |
+ NGBE_FLAG2_DEV_RESET_REQUESTED |
+ NGBE_FLAG2_GLOBAL_RESET_REQUESTED |
+ NGBE_FLAG2_RESET_INTR_RECEIVED)))
+ return;
+
+ /* If we're already down, just bail */
+ if (test_bit(__NGBE_DOWN, &adapter->state) ||
+ test_bit(__NGBE_REMOVING, &adapter->state))
+ return;
+
+ netdev_err(adapter->netdev, "Reset adapter\n");
+ adapter->tx_timeout_count++;
+
+ rtnl_lock();
+ if (adapter->flags2 & NGBE_FLAG2_GLOBAL_RESET_REQUESTED) {
+ reset_flag |= NGBE_FLAG2_GLOBAL_RESET_REQUESTED;
+ adapter->flags2 &= ~NGBE_FLAG2_GLOBAL_RESET_REQUESTED;
+ }
+ if (adapter->flags2 & NGBE_FLAG2_DEV_RESET_REQUESTED) {
+ reset_flag |= NGBE_FLAG2_DEV_RESET_REQUESTED;
+ adapter->flags2 &= ~NGBE_FLAG2_DEV_RESET_REQUESTED;
+ }
+ if (adapter->flags2 & NGBE_FLAG2_PF_RESET_REQUESTED) {
+ reset_flag |= NGBE_FLAG2_PF_RESET_REQUESTED;
+ adapter->flags2 &= ~NGBE_FLAG2_PF_RESET_REQUESTED;
+ }
+
+ if (adapter->flags2 & NGBE_FLAG2_RESET_INTR_RECEIVED) {
+ /* If there's a recovery already waiting, it takes
+ * precedence before starting a new reset sequence.
+ */
+ adapter->flags2 &= ~NGBE_FLAG2_RESET_INTR_RECEIVED;
+ value = rd32m(&adapter->hw, NGBE_MIS_RST_ST,
+ NGBE_MIS_RST_ST_DEV_RST_TYPE_MASK) >>
+ NGBE_MIS_RST_ST_DEV_RST_TYPE_SHIFT;
+ if (value == NGBE_MIS_RST_ST_DEV_RST_TYPE_SW_RST) {
+ adapter->hw.reset_type = NGBE_SW_RESET;
+
+ } else if (value == NGBE_MIS_RST_ST_DEV_RST_TYPE_GLOBAL_RST)
+ adapter->hw.reset_type = NGBE_GLOBAL_RESET;
+ adapter->hw.force_full_reset = true;
+ ngbe_reinit_locked(adapter);
+ adapter->hw.force_full_reset = false;
+ goto unlock;
+ }
+
+ if (reset_flag & NGBE_FLAG2_DEV_RESET_REQUESTED) {
+ /* Request a Device Reset
+ *
+ * This will start the chip's countdown to the actual full
+ * chip reset event, and a warning interrupt to be sent
+ * to all PFs, including the requestor. Our handler
+ * for the warning interrupt will deal with the shutdown
+ * and recovery of the switch setup.
+ */
+ /*debug to open*/
+ /*ngbe_dump(adapter);*/
+
+ wr32m(&adapter->hw, NGBE_MIS_RST,
+ NGBE_MIS_RST_SW_RST, NGBE_MIS_RST_SW_RST);
+ e_info(drv, "ngbe_reset_subtask: sw reset\n");
+
+ } else if (reset_flag & NGBE_FLAG2_PF_RESET_REQUESTED) {
+ /*debug to open*/
+ /*ngbe_dump(adapter);*/
+ ngbe_reinit_locked(adapter);
+ } else if (reset_flag & NGBE_FLAG2_GLOBAL_RESET_REQUESTED) {
+ /* Request a Global Reset
+ *
+ * This will start the chip's countdown to the actual full
+ * chip reset event, and a warning interrupt to be sent
+ * to all PFs, including the requestor. Our handler
+ * for the warning interrupt will deal with the shutdown
+ * and recovery of the switch setup.
+ */
+ /*debug to open*/
+ /*ngbe_dump(adapter);*/
+ pci_save_state(adapter->pdev);
+ if (ngbe_mng_present(&adapter->hw)) {
+ ngbe_reset_hostif(&adapter->hw);
+ e_info(drv, "ngbe_reset_subtask: lan reset\n");
+
+ } else {
+ wr32m(&adapter->hw, NGBE_MIS_RST,
+ NGBE_MIS_RST_GLOBAL_RST,
+ NGBE_MIS_RST_GLOBAL_RST);
+ e_info(drv, "ngbe_reset_subtask: global reset\n");
+ }
+ }
+
+unlock:
+ rtnl_unlock();
+}
+
+static void ngbe_check_pcie_subtask(struct ngbe_adapter *adapter)
+{
+ if (!(adapter->flags2 & NGBE_FLAG2_PCIE_NEED_RECOVER))
+ return;
+
+ e_info(probe, "do recovery\n");
+ ngbe_pcie_do_recovery(adapter->pdev);
+ wr32m(&adapter->hw, NGBE_MIS_PF_SM,
+ NGBE_MIS_PF_SM_SM, 0);
+ adapter->flags2 &= ~NGBE_FLAG2_PCIE_NEED_RECOVER;
+}
+
+
+/**
+ * ngbe_service_task - manages and runs subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void ngbe_service_task(struct work_struct *work)
+{
+ struct ngbe_adapter *adapter = container_of(work,
+ struct ngbe_adapter,
+ service_task);
+ if (NGBE_REMOVED(adapter->hw.hw_addr)) {
+ if (!test_bit(__NGBE_DOWN, &adapter->state)) {
+ rtnl_lock();
+ ngbe_down(adapter);
+ rtnl_unlock();
+ }
+ ngbe_service_event_complete(adapter);
+ return;
+ }
+
+ ngbe_check_pcie_subtask(adapter);
+ ngbe_reset_subtask(adapter);
+ ngbe_check_overtemp_subtask(adapter);
+ ngbe_watchdog_subtask(adapter);
+ ngbe_check_hang_subtask(adapter);
+
+ if (test_bit(__NGBE_PTP_RUNNING, &adapter->state)) {
+ ngbe_ptp_overflow_check(adapter);
+ if (unlikely(adapter->flags &
+ NGBE_FLAG_RX_HWTSTAMP_IN_REGISTER))
+ ngbe_ptp_rx_hang(adapter);
+ }
+
+ ngbe_service_event_complete(adapter);
+}
+
+static u8 get_ipv6_proto(struct sk_buff *skb, int offset)
+{
+ struct ipv6hdr *hdr = (struct ipv6hdr *)(skb->data + offset);
+ u8 nexthdr = hdr->nexthdr;
+
+ offset += sizeof(struct ipv6hdr);
+
+ while (ipv6_ext_hdr(nexthdr)) {
+ struct ipv6_opt_hdr _hdr, *hp;
+
+ if (nexthdr == NEXTHDR_NONE)
+ break;
+
+ hp = skb_header_pointer(skb, offset, sizeof(_hdr), &_hdr);
+ if (!hp)
+ break;
+
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ break;
+ } else if (nexthdr == NEXTHDR_AUTH) {
+ offset += ipv6_authlen(hp);
+ } else {
+ offset += ipv6_optlen(hp);
+ }
+
+ nexthdr = hp->nexthdr;
+ }
+
+ return nexthdr;
+}
+
+union network_header {
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ void *raw;
+};
+
+static ngbe_dptype encode_tx_desc_ptype(const struct ngbe_tx_buffer *first)
+{
+ struct sk_buff *skb = first->skb;
+ u8 tun_prot = 0;
+ u8 l4_prot = 0;
+ u8 ptype = 0;
+
+ if (skb->encapsulation) {
+ union network_header hdr;
+
+ switch (first->protocol) {
+ case __constant_htons(ETH_P_IP):
+ tun_prot = ip_hdr(skb)->protocol;
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))
+ goto encap_frag;
+ ptype = NGBE_PTYPE_TUN_IPV4;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ tun_prot = get_ipv6_proto(skb, skb_network_offset(skb));
+ if (tun_prot == NEXTHDR_FRAGMENT)
+ goto encap_frag;
+ ptype = NGBE_PTYPE_TUN_IPV6;
+ break;
+ default:
+ goto exit;
+ }
+
+ if (tun_prot == IPPROTO_IPIP) {
+ hdr.raw = (void *)inner_ip_hdr(skb);
+ ptype |= NGBE_PTYPE_PKT_IPIP;
+ } else if (tun_prot == IPPROTO_UDP) {
+ hdr.raw = (void *)inner_ip_hdr(skb);
+ } else {
+ goto exit;
+ }
+
+ switch (hdr.ipv4->version) {
+ case IPVERSION:
+ l4_prot = hdr.ipv4->protocol;
+ if (hdr.ipv4->frag_off & htons(IP_MF | IP_OFFSET)) {
+ ptype |= NGBE_PTYPE_TYP_IPFRAG;
+ goto exit;
+ }
+ break;
+ case 6:
+ l4_prot = get_ipv6_proto(skb,
+ skb_inner_network_offset(skb));
+ ptype |= NGBE_PTYPE_PKT_IPV6;
+ if (l4_prot == NEXTHDR_FRAGMENT) {
+ ptype |= NGBE_PTYPE_TYP_IPFRAG;
+ goto exit;
+ }
+ break;
+ default:
+ goto exit;
+ }
+ } else {
+encap_frag:
+ switch (first->protocol) {
+ case __constant_htons(ETH_P_IP):
+ l4_prot = ip_hdr(skb)->protocol;
+ ptype = NGBE_PTYPE_PKT_IP;
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ ptype |= NGBE_PTYPE_TYP_IPFRAG;
+ goto exit;
+ }
+ break;
+#ifdef NETIF_F_IPV6_CSUM
+ case __constant_htons(ETH_P_IPV6):
+ l4_prot = get_ipv6_proto(skb, skb_network_offset(skb));
+ ptype = NGBE_PTYPE_PKT_IP | NGBE_PTYPE_PKT_IPV6;
+ if (l4_prot == NEXTHDR_FRAGMENT) {
+ ptype |= NGBE_PTYPE_TYP_IPFRAG;
+ goto exit;
+ }
+ break;
+#endif /* NETIF_F_IPV6_CSUM */
+ case __constant_htons(ETH_P_1588):
+ ptype = NGBE_PTYPE_L2_TS;
+ goto exit;
+ case __constant_htons(ETH_P_FIP):
+ ptype = NGBE_PTYPE_L2_FIP;
+ goto exit;
+ case __constant_htons(NGBE_ETH_P_LLDP):
+ ptype = NGBE_PTYPE_L2_LLDP;
+ goto exit;
+ case __constant_htons(NGBE_ETH_P_CNM):
+ ptype = NGBE_PTYPE_L2_CNM;
+ goto exit;
+ case __constant_htons(ETH_P_PAE):
+ ptype = NGBE_PTYPE_L2_EAPOL;
+ goto exit;
+ case __constant_htons(ETH_P_ARP):
+ ptype = NGBE_PTYPE_L2_ARP;
+ goto exit;
+ default:
+ ptype = NGBE_PTYPE_L2_MAC;
+ goto exit;
+ }
+ }
+
+ switch (l4_prot) {
+ case IPPROTO_TCP:
+ ptype |= NGBE_PTYPE_TYP_TCP;
+ break;
+ case IPPROTO_UDP:
+ ptype |= NGBE_PTYPE_TYP_UDP;
+ break;
+ case IPPROTO_SCTP:
+ ptype |= NGBE_PTYPE_TYP_SCTP;
+ break;
+ default:
+ ptype |= NGBE_PTYPE_TYP_IP;
+ break;
+ }
+
+exit:
+ return ngbe_decode_ptype(ptype);
+}
+
+static int ngbe_tso(struct ngbe_ring *tx_ring,
+ struct ngbe_tx_buffer *first,
+ u8 *hdr_len, ngbe_dptype dptype)
+{
+ struct sk_buff *skb = first->skb;
+ u32 vlan_macip_lens, type_tucmd;
+ u32 mss_l4len_idx, l4len;
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ u32 tunhdr_eiplen_tunlen = 0;
+ u8 tun_prot = 0;
+ bool enc = skb->encapsulation;
+
+ struct ipv6hdr *ipv6h;
+
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
+ if (!skb_is_gso(skb))
+ return 0;
+
+ if (skb_header_cloned(skb)) {
+ int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (err)
+ return err;
+ }
+
+ iph = enc ? inner_ip_hdr(skb) : ip_hdr(skb);
+ if (iph->version == 4) {
+ tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb);
+ iph->tot_len = 0;
+ iph->check = 0;
+ tcph->check = ~csum_tcpudp_magic(iph->saddr,
+ iph->daddr, 0,
+ IPPROTO_TCP,
+ 0);
+ first->tx_flags |= NGBE_TX_FLAGS_TSO |
+ NGBE_TX_FLAGS_CSUM |
+ NGBE_TX_FLAGS_IPV4 |
+ NGBE_TX_FLAGS_CC;
+ } else if (iph->version == 6 && skb_is_gso_v6(skb)) {
+ ipv6h = enc ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
+ tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb);
+ ipv6h->payload_len = 0;
+ tcph->check =
+ ~csum_ipv6_magic(&ipv6h->saddr,
+ &ipv6h->daddr,
+ 0, IPPROTO_TCP, 0);
+ first->tx_flags |= NGBE_TX_FLAGS_TSO |
+ NGBE_TX_FLAGS_CSUM |
+ NGBE_TX_FLAGS_CC;
+
+ }
+
+ /* compute header lengths */
+ l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
+ *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data)
+ : skb_transport_offset(skb);
+ *hdr_len += l4len;
+
+ /* update gso size and bytecount with header size */
+ first->gso_segs = skb_shinfo(skb)->gso_segs;
+ first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+ /* mss_l4len_id: use 0 as index for TSO */
+ mss_l4len_idx = l4len << NGBE_TXD_L4LEN_SHIFT;
+ mss_l4len_idx |= skb_shinfo(skb)->gso_size << NGBE_TXD_MSS_SHIFT;
+
+ /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
+ if (enc) {
+ switch (first->protocol) {
+ case __constant_htons(ETH_P_IP):
+ tun_prot = ip_hdr(skb)->protocol;
+ first->tx_flags |= NGBE_TX_FLAGS_OUTER_IPV4;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ tun_prot = ipv6_hdr(skb)->nexthdr;
+ break;
+ default:
+ break;
+ }
+ switch (tun_prot) {
+ case IPPROTO_UDP:
+ tunhdr_eiplen_tunlen = NGBE_TXD_TUNNEL_UDP;
+ tunhdr_eiplen_tunlen |=
+ ((skb_network_header_len(skb) >> 2) <<
+ NGBE_TXD_OUTER_IPLEN_SHIFT) |
+ (((skb_inner_mac_header(skb) -
+ skb_transport_header(skb)) >> 1) <<
+ NGBE_TXD_TUNNEL_LEN_SHIFT);
+ break;
+ case IPPROTO_GRE:
+ tunhdr_eiplen_tunlen = NGBE_TXD_TUNNEL_GRE;
+ tunhdr_eiplen_tunlen |=
+ ((skb_network_header_len(skb) >> 2) <<
+ NGBE_TXD_OUTER_IPLEN_SHIFT) |
+ (((skb_inner_mac_header(skb) -
+ skb_transport_header(skb)) >> 1) <<
+ NGBE_TXD_TUNNEL_LEN_SHIFT);
+ break;
+ case IPPROTO_IPIP:
+ tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb)-
+ (char *)ip_hdr(skb)) >> 2) <<
+ NGBE_TXD_OUTER_IPLEN_SHIFT;
+ break;
+ default:
+ break;
+ }
+
+ vlan_macip_lens = skb_inner_network_header_len(skb) >> 1;
+ } else
+ vlan_macip_lens = skb_network_header_len(skb) >> 1;
+
+ vlan_macip_lens |= skb_network_offset(skb) << NGBE_TXD_MACLEN_SHIFT;
+ vlan_macip_lens |= first->tx_flags & NGBE_TX_FLAGS_VLAN_MASK;
+
+ type_tucmd = dptype.ptype << 24;
+ ngbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen,
+ type_tucmd, mss_l4len_idx);
+
+ return 1;
+}
+
+static void ngbe_tx_csum(struct ngbe_ring *tx_ring,
+ struct ngbe_tx_buffer *first, ngbe_dptype dptype)
+{
+ struct sk_buff *skb = first->skb;
+ u32 vlan_macip_lens = 0;
+ u32 mss_l4len_idx = 0;
+ u32 tunhdr_eiplen_tunlen = 0;
+ u8 tun_prot = 0;
+ u32 type_tucmd;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(first->tx_flags & NGBE_TX_FLAGS_HW_VLAN) &&
+ !(first->tx_flags & NGBE_TX_FLAGS_CC))
+ return;
+ vlan_macip_lens = skb_network_offset(skb) <<
+ NGBE_TXD_MACLEN_SHIFT;
+ } else {
+ u8 l4_prot = 0;
+ union {
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ u8 *raw;
+ } network_hdr;
+ union {
+ struct tcphdr *tcphdr;
+ u8 *raw;
+ } transport_hdr;
+
+ if (skb->encapsulation) {
+ network_hdr.raw = skb_inner_network_header(skb);
+ transport_hdr.raw = skb_inner_transport_header(skb);
+ vlan_macip_lens = skb_network_offset(skb) <<
+ NGBE_TXD_MACLEN_SHIFT;
+ switch (first->protocol) {
+ case __constant_htons(ETH_P_IP):
+ tun_prot = ip_hdr(skb)->protocol;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ tun_prot = ipv6_hdr(skb)->nexthdr;
+ break;
+ default:
+ if (unlikely(net_ratelimit())) {
+ dev_warn(tx_ring->dev,
+ "partial checksum but version=%d\n",
+ network_hdr.ipv4->version);
+ }
+ return;
+ }
+ switch (tun_prot) {
+ case IPPROTO_UDP:
+ tunhdr_eiplen_tunlen = NGBE_TXD_TUNNEL_UDP;
+ tunhdr_eiplen_tunlen |=
+ ((skb_network_header_len(skb) >> 2) <<
+ NGBE_TXD_OUTER_IPLEN_SHIFT) |
+ (((skb_inner_mac_header(skb) -
+ skb_transport_header(skb)) >> 1) <<
+ NGBE_TXD_TUNNEL_LEN_SHIFT);
+ break;
+ case IPPROTO_GRE:
+ tunhdr_eiplen_tunlen = NGBE_TXD_TUNNEL_GRE;
+ tunhdr_eiplen_tunlen |=
+ ((skb_network_header_len(skb) >> 2) <<
+ NGBE_TXD_OUTER_IPLEN_SHIFT) |
+ (((skb_inner_mac_header(skb) -
+ skb_transport_header(skb)) >> 1) <<
+ NGBE_TXD_TUNNEL_LEN_SHIFT);
+ break;
+ case IPPROTO_IPIP:
+ tunhdr_eiplen_tunlen =
+ (((char *)inner_ip_hdr(skb)-
+ (char *)ip_hdr(skb)) >> 2) <<
+ NGBE_TXD_OUTER_IPLEN_SHIFT;
+ break;
+ default:
+ break;
+ }
+
+ } else {
+ network_hdr.raw = skb_network_header(skb);
+ transport_hdr.raw = skb_transport_header(skb);
+ vlan_macip_lens = skb_network_offset(skb) <<
+ NGBE_TXD_MACLEN_SHIFT;
+ }
+
+ switch (network_hdr.ipv4->version) {
+ case IPVERSION:
+ vlan_macip_lens |=
+ (transport_hdr.raw - network_hdr.raw) >> 1;
+ l4_prot = network_hdr.ipv4->protocol;
+ break;
+ case 6:
+ vlan_macip_lens |=
+ (transport_hdr.raw - network_hdr.raw) >> 1;
+ l4_prot = network_hdr.ipv6->nexthdr;
+ break;
+ default:
+ break;
+ }
+
+ switch (l4_prot) {
+ case IPPROTO_TCP:
+ mss_l4len_idx = (transport_hdr.tcphdr->doff * 4) <<
+ NGBE_TXD_L4LEN_SHIFT;
+ break;
+ case IPPROTO_SCTP:
+ mss_l4len_idx = sizeof(struct sctphdr) <<
+ NGBE_TXD_L4LEN_SHIFT;
+ break;
+ case IPPROTO_UDP:
+ mss_l4len_idx = sizeof(struct udphdr) <<
+ NGBE_TXD_L4LEN_SHIFT;
+ break;
+ default:
+ break;
+ }
+
+ /* update TX checksum flag */
+ first->tx_flags |= NGBE_TX_FLAGS_CSUM;
+ }
+ first->tx_flags |= NGBE_TX_FLAGS_CC;
+ /* vlan_macip_lens: MACLEN, VLAN tag */
+ vlan_macip_lens |= first->tx_flags & NGBE_TX_FLAGS_VLAN_MASK;
+
+ type_tucmd = dptype.ptype << 24;
+ ngbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen,
+ type_tucmd, mss_l4len_idx);
+}
+
+static u32 ngbe_tx_cmd_type(u32 tx_flags)
+{
+ /* set type for advanced descriptor with frame checksum insertion */
+ u32 cmd_type = NGBE_TXD_DTYP_DATA |
+ NGBE_TXD_IFCS;
+
+ /* set HW vlan bit if vlan is present */
+ cmd_type |= NGBE_SET_FLAG(tx_flags, NGBE_TX_FLAGS_HW_VLAN,
+ NGBE_TXD_VLE);
+
+ /* set segmentation enable bits for TSO/FSO */
+ cmd_type |= NGBE_SET_FLAG(tx_flags, NGBE_TX_FLAGS_TSO,
+ NGBE_TXD_TSE);
+
+ /* set timestamp bit if present */
+ cmd_type |= NGBE_SET_FLAG(tx_flags, NGBE_TX_FLAGS_TSTAMP,
+ NGBE_TXD_MAC_TSTAMP);
+
+ cmd_type |= NGBE_SET_FLAG(tx_flags, NGBE_TX_FLAGS_LINKSEC,
+ NGBE_TXD_LINKSEC);
+
+ return cmd_type;
+}
+
+static void ngbe_tx_olinfo_status(union ngbe_tx_desc *tx_desc,
+ u32 tx_flags, unsigned int paylen)
+{
+ u32 olinfo_status = paylen << NGBE_TXD_PAYLEN_SHIFT;
+
+ /* enable L4 checksum for TSO and TX checksum offload */
+ olinfo_status |= NGBE_SET_FLAG(tx_flags,
+ NGBE_TX_FLAGS_CSUM,
+ NGBE_TXD_L4CS);
+
+ /* enble IPv4 checksum for TSO */
+ olinfo_status |= NGBE_SET_FLAG(tx_flags,
+ NGBE_TX_FLAGS_IPV4,
+ NGBE_TXD_IIPCS);
+ /* enable outer IPv4 checksum for TSO */
+ olinfo_status |= NGBE_SET_FLAG(tx_flags,
+ NGBE_TX_FLAGS_OUTER_IPV4,
+ NGBE_TXD_EIPCS);
+ /*
+ * Check Context must be set if Tx switch is enabled, which it
+ * always is for case where virtual functions are running
+ */
+ olinfo_status |= NGBE_SET_FLAG(tx_flags,
+ NGBE_TX_FLAGS_CC,
+ NGBE_TXD_CC);
+
+ olinfo_status |= NGBE_SET_FLAG(tx_flags,
+ NGBE_TX_FLAGS_IPSEC,
+ NGBE_TXD_IPSEC);
+
+ tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static int __ngbe_maybe_stop_tx(struct ngbe_ring *tx_ring, u16 size)
+{
+ netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+ /* Herbert's original patch had:
+ * smp_mb__after_netif_stop_queue();
+ * but since that doesn't exist yet, just open code it.
+ */
+ smp_mb();
+
+ /* We need to check again in a case another CPU has just
+ * made room available.
+ */
+ if (likely(ngbe_desc_unused(tx_ring) < size))
+ return -EBUSY;
+
+ /* A reprieve! - use start_queue because it doesn't call schedule */
+ netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+ ++tx_ring->tx_stats.restart_queue;
+ return 0;
+}
+
+static inline int ngbe_maybe_stop_tx(struct ngbe_ring *tx_ring, u16 size)
+{
+ if (likely(ngbe_desc_unused(tx_ring) >= size))
+ return 0;
+
+ return __ngbe_maybe_stop_tx(tx_ring, size);
+}
+
+#define NGBE_TXD_CMD (NGBE_TXD_EOP | \
+ NGBE_TXD_RS)
+
+static int ngbe_tx_map(struct ngbe_ring *tx_ring,
+ struct ngbe_tx_buffer *first,
+ const u8 hdr_len)
+{
+ struct sk_buff *skb = first->skb;
+ struct ngbe_tx_buffer *tx_buffer;
+ union ngbe_tx_desc *tx_desc;
+ skb_frag_t *frag;
+ dma_addr_t dma;
+ unsigned int data_len, size;
+ u32 tx_flags = first->tx_flags;
+ u32 cmd_type = ngbe_tx_cmd_type(tx_flags);
+ u16 i = tx_ring->next_to_use;
+
+ tx_desc = NGBE_TX_DESC(tx_ring, i);
+
+ ngbe_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len);
+
+ size = skb_headlen(skb);
+ data_len = skb->data_len;
+
+ dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+ tx_buffer = first;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ if (dma_mapping_error(tx_ring->dev, dma))
+ goto dma_error;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buffer, len, size);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ while (unlikely(size > NGBE_MAX_DATA_PER_TXD)) {
+ tx_desc->read.cmd_type_len =
+ cpu_to_le32(cmd_type ^ NGBE_MAX_DATA_PER_TXD);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = NGBE_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ dma += NGBE_MAX_DATA_PER_TXD;
+ size -= NGBE_MAX_DATA_PER_TXD;
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+ }
+
+ if (likely(!data_len))
+ break;
+
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = NGBE_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ size = skb_frag_size(frag);
+
+ data_len -= size;
+
+ dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+ DMA_TO_DEVICE);
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ }
+
+ /* write last descriptor with RS and EOP bits */
+ cmd_type |= size | NGBE_TXD_CMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+ netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+ /* set the timestamp */
+ first->time_stamp = jiffies;
+
+ /*
+ * Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch. (Only applicable for weak-ordered
+ * memory model archs, such as IA-64).
+ *
+ * We also need this memory barrier to make certain all of the
+ * status bits have been updated before next_to_watch is written.
+ */
+ wmb();
+
+ /* set next_to_watch value indicating a packet is present */
+ first->next_to_watch = tx_desc;
+
+ i++;
+ if (i == tx_ring->count)
+ i = 0;
+
+ tx_ring->next_to_use = i;
+
+ ngbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ skb_tx_timestamp(skb);
+
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+ writel(i, tx_ring->tail);
+ /* The following mmiowb() is required on certain
+ * architechtures (IA64/Altix in particular) in order to
+ * synchronize the I/O calls with respect to a spin lock. This
+ * is because the wmb() on those architectures does not
+ * guarantee anything for posted I/O writes.
+ *
+ * Note that the associated spin_unlock() is not within the
+ * driver code, but in the networking core stack.
+ */
+ mmiowb();
+ }
+
+ return 0;
+dma_error:
+ dev_err(tx_ring->dev, "TX DMA map failed\n");
+
+ /* clear dma mappings for failed tx_buffer_info map */
+ for (;;) {
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+ if (tx_buffer == first)
+ break;
+ if (i == 0)
+ i += tx_ring->count;
+ i--;
+ }
+
+ dev_kfree_skb_any(first->skb);
+ first->skb = NULL;
+
+ tx_ring->next_to_use = i;
+
+ return -1;
+}
+
+/**
+ * skb_pad - zero pad the tail of an skb
+ * @skb: buffer to pad
+ * @pad: space to pad
+ *
+ * Ensure that a buffer is followed by a padding area that is zero
+ * filled. Used by network drivers which may DMA or transfer data
+ * beyond the buffer end onto the wire.
+ *
+ * May return error in out of memory cases. The skb is freed on error.
+ */
+
+int ngbe_skb_pad_nonzero(struct sk_buff *skb, int pad)
+{
+ int err;
+ int ntail;
+
+ /* If the skbuff is non linear tailroom is always zero.. */
+ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
+ memset(skb->data+skb->len, 0x1, pad);
+ return 0;
+ }
+
+ ntail = skb->data_len + pad - (skb->end - skb->tail);
+ if (likely(skb_cloned(skb) || ntail > 0)) {
+ err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
+ if (unlikely(err))
+ goto free_skb;
+ }
+
+ /* FIXME: The use of this function with non-linear skb's really needs
+ * to be audited.
+ */
+ err = skb_linearize(skb);
+ if (unlikely(err))
+ goto free_skb;
+
+ memset(skb->data + skb->len, 0x1, pad);
+ return 0;
+
+free_skb:
+ kfree_skb(skb);
+ return err;
+}
+
+netdev_tx_t ngbe_xmit_frame_ring(struct sk_buff *skb,
+ struct ngbe_adapter *adapter,
+ struct ngbe_ring *tx_ring)
+{
+ struct ngbe_tx_buffer *first;
+ int tso;
+ u32 tx_flags = 0;
+ unsigned short f;
+ u16 count = TXD_USE_COUNT(skb_headlen(skb));
+ __be16 protocol = skb->protocol;
+ u8 hdr_len = 0;
+ ngbe_dptype dptype;
+
+ /*
+ * need: 1 descriptor per page * PAGE_SIZE/NGBE_MAX_DATA_PER_TXD,
+ * + 1 desc for skb_headlen/NGBE_MAX_DATA_PER_TXD,
+ * + 2 desc gap to keep tail from touching head,
+ * + 1 desc for context descriptor,
+ * otherwise try next time
+ */
+ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+ count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->
+ frags[f]));
+
+ if (ngbe_maybe_stop_tx(tx_ring, count + 3)) {
+ tx_ring->tx_stats.tx_busy++;
+ return NETDEV_TX_BUSY;
+ }
+
+ /* record the location of the first descriptor for this packet */
+ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ first->skb = skb;
+ first->bytecount = skb->len;
+ first->gso_segs = 1;
+
+ /* if we have a HW VLAN tag being added default to the HW one */
+ if (skb_vlan_tag_present(skb)) {
+ tx_flags |= skb_vlan_tag_get(skb) << NGBE_TX_FLAGS_VLAN_SHIFT;
+ tx_flags |= NGBE_TX_FLAGS_HW_VLAN;
+ /* else if it is a SW VLAN check the next protocol and store the tag */
+ } else if (protocol == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vhdr, _vhdr;
+ vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
+ if (!vhdr)
+ goto out_drop;
+
+ protocol = vhdr->h_vlan_encapsulated_proto;
+ tx_flags |= ntohs(vhdr->h_vlan_TCI) <<
+ NGBE_TX_FLAGS_VLAN_SHIFT;
+ tx_flags |= NGBE_TX_FLAGS_SW_VLAN;
+ }
+
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ adapter->ptp_clock) {
+ if (!test_and_set_bit_lock(__NGBE_PTP_TX_IN_PROGRESS,
+ &adapter->state)) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ tx_flags |= NGBE_TX_FLAGS_TSTAMP;
+
+ /* schedule check for Tx timestamp */
+ adapter->ptp_tx_skb = skb_get(skb);
+ adapter->ptp_tx_start = jiffies;
+ schedule_work(&adapter->ptp_tx_work);
+ } else {
+ adapter->tx_hwtstamp_skipped++;
+ }
+ }
+
+#ifdef CONFIG_PCI_IOV
+ /*
+ * Use the l2switch_enable flag - would be false if the DMA
+ * Tx switch had been disabled.
+ */
+ if (adapter->flags & NGBE_FLAG_SRIOV_L2SWITCH_ENABLE)
+ tx_flags |= NGBE_TX_FLAGS_CC;
+
+#endif
+ /* record initial flags and protocol */
+ first->tx_flags = tx_flags;
+ first->protocol = protocol;
+
+ dptype = encode_tx_desc_ptype(first);
+
+ tso = ngbe_tso(tx_ring, first, &hdr_len, dptype);
+ if (tso < 0)
+ goto out_drop;
+ else if (!tso)
+ ngbe_tx_csum(tx_ring, first, dptype);
+
+ if (ngbe_tx_map(tx_ring, first, hdr_len))
+ goto cleanup_tx_tstamp;
+
+ return NETDEV_TX_OK;
+
+out_drop:
+ dev_kfree_skb_any(first->skb);
+ first->skb = NULL;
+
+cleanup_tx_tstamp:
+ if (unlikely(tx_flags & NGBE_TX_FLAGS_TSTAMP)) {
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ cancel_work_sync(&adapter->ptp_tx_work);
+ clear_bit_unlock(__NGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+ }
+
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t ngbe_xmit_frame(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_ring *tx_ring;
+ unsigned int r_idx = skb->queue_mapping;
+
+ if (!netif_carrier_ok(netdev)) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /*
+ * The minimum packet size for olinfo paylen is 17 so pad the skb
+ * in order to meet this minimum size requirement.
+ */
+ if (skb_put_padto(skb, 17))
+ return NETDEV_TX_OK;
+
+ if (r_idx >= adapter->num_tx_queues)
+ r_idx = r_idx % adapter->num_tx_queues;
+ tx_ring = adapter->tx_ring[r_idx];
+
+ return ngbe_xmit_frame_ring(skb, adapter, tx_ring);
+}
+
+/**
+ * ngbe_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ngbe_set_mac(struct net_device *netdev, void *p)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ struct sockaddr *addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ ngbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0));
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+ ngbe_mac_set_default_filter(adapter, hw->mac.addr);
+ e_info(drv, "The mac has been set to %02X:%02X:%02X:%02X:%02X:%02X\n",
+ hw->mac.addr[0], hw->mac.addr[1], hw->mac.addr[2],
+ hw->mac.addr[3], hw->mac.addr[4], hw->mac.addr[5]);
+
+ return 0;
+}
+
+static int ngbe_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
+ int cmd)
+{
+ struct mii_ioctl_data *mii = (struct mii_ioctl_data *) &ifr->ifr_data;
+ int prtad, devad, ret = 0;
+
+ prtad = (mii->phy_id & MDIO_PHY_ID_PRTAD) >> 5;
+ devad = (mii->phy_id & MDIO_PHY_ID_DEVAD);
+
+ return ret;
+}
+
+static int ngbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ return ngbe_ptp_get_ts_config(adapter, ifr);
+ case SIOCSHWTSTAMP:
+ return ngbe_ptp_set_ts_config(adapter, ifr);
+ case SIOCGMIIREG:
+ case SIOCSMIIREG:
+ return ngbe_mii_ioctl(netdev, ifr, cmd);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * ngbe_setup_tc - routine to configure net_device for multiple traffic
+ * classes.
+ *
+ * @netdev: net device to configure
+ * @tc: number of traffic classes to enable
+ */
+int ngbe_setup_tc(struct net_device *dev, u8 tc)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+
+ /* Hardware has to reinitialize queues and interrupts to
+ * match packet buffer alignment. Unfortunately, the
+ * hardware is not flexible enough to do this dynamically.
+ */
+ if (netif_running(dev))
+ ngbe_close(dev);
+ else
+ ngbe_reset(adapter);
+
+ ngbe_clear_interrupt_scheme(adapter);
+
+ if (tc) {
+ netdev_set_num_tc(dev, tc);
+ } else {
+ netdev_reset_tc(dev);
+ }
+
+ ngbe_init_interrupt_scheme(adapter);
+ if (netif_running(dev))
+ ngbe_open(dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+void ngbe_sriov_reinit(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ rtnl_lock();
+ ngbe_setup_tc(netdev, netdev_get_num_tc(netdev));
+ rtnl_unlock();
+}
+#endif
+
+void ngbe_do_reset(struct net_device *netdev)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ if (netif_running(netdev))
+ ngbe_reinit_locked(adapter);
+ else
+ ngbe_reset(adapter);
+}
+
+static netdev_features_t ngbe_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ /* If Rx checksum is disabled, then RSC/LRO should also be disabled */
+ if (!(features & NETIF_F_RXCSUM))
+ features &= ~NETIF_F_LRO;
+
+ /* Turn off LRO if not RSC capable */
+ features &= ~NETIF_F_LRO;
+
+ return features;
+}
+
+static int ngbe_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ bool need_reset = false;
+
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ ngbe_vlan_strip_enable(adapter);
+ else
+ ngbe_vlan_strip_disable(adapter);
+
+ if (features & NETIF_F_RXHASH) {
+ if (!(adapter->flags2 & NGBE_FLAG2_RSS_ENABLED)) {
+ wr32m(&adapter->hw, NGBE_RDB_RA_CTL,
+ NGBE_RDB_RA_CTL_RSS_EN, NGBE_RDB_RA_CTL_RSS_EN);
+ adapter->flags2 |= NGBE_FLAG2_RSS_ENABLED;
+ }
+ } else {
+ if (adapter->flags2 & NGBE_FLAG2_RSS_ENABLED) {
+ wr32m(&adapter->hw, NGBE_RDB_RA_CTL,
+ NGBE_RDB_RA_CTL_RSS_EN, ~NGBE_RDB_RA_CTL_RSS_EN);
+ adapter->flags2 &= ~NGBE_FLAG2_RSS_ENABLED;
+ }
+ }
+
+ if (need_reset)
+ ngbe_do_reset(netdev);
+
+ return 0;
+}
+
+static int ngbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid,
+ u16 flags)
+{
+ /* guarantee we can provide a unique filter for the unicast address */
+ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
+ if (NGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev))
+ return -ENOMEM;
+ }
+
+ return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
+}
+
+static int ngbe_ndo_bridge_setlink(struct net_device *dev,
+ struct nlmsghdr *nlh,
+ __always_unused u16 flags)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+ struct nlattr *attr, *br_spec;
+ int rem;
+
+ if (!(adapter->flags & NGBE_FLAG_SRIOV_ENABLED))
+ return -EOPNOTSUPP;
+
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+ nla_for_each_nested(attr, br_spec, rem) {
+ __u16 mode;
+
+ if (nla_type(attr) != IFLA_BRIDGE_MODE)
+ continue;
+
+ mode = nla_get_u16(attr);
+ if (mode == BRIDGE_MODE_VEPA) {
+ adapter->flags |= NGBE_FLAG_SRIOV_VEPA_BRIDGE_MODE;
+ } else if (mode == BRIDGE_MODE_VEB) {
+ adapter->flags &= ~NGBE_FLAG_SRIOV_VEPA_BRIDGE_MODE;
+ } else {
+ return -EINVAL;
+ }
+
+ adapter->bridge_mode = mode;
+
+ /* re-configure settings related to bridge mode */
+ ngbe_configure_bridge_mode(adapter);
+
+ e_info(drv, "enabling bridge mode: %s\n",
+ mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+ }
+
+ return 0;
+}
+
+static int ngbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev,
+ u32 __maybe_unused filter_mask,
+ int nlflags)
+{
+ struct ngbe_adapter *adapter = netdev_priv(dev);
+ u16 mode;
+
+ if (!(adapter->flags & NGBE_FLAG_SRIOV_ENABLED))
+ return 0;
+
+ mode = adapter->bridge_mode;
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, nlflags,
+ filter_mask, NULL);
+}
+
+#define NGBE_MAX_TUNNEL_HDR_LEN 80
+static netdev_features_t ngbe_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ u32 vlan_num = 0;
+ u16 vlan_depth = skb->mac_len;
+ __be16 type = skb->protocol;
+ struct vlan_hdr *vh;
+
+ if (skb_vlan_tag_present(skb)) {
+ vlan_num++;
+ }
+
+ if (vlan_depth) {
+ vlan_depth -= VLAN_HLEN;
+ } else {
+ vlan_depth = ETH_HLEN;
+ }
+
+ while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+ vlan_num++;
+ vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
+
+ }
+
+ if (vlan_num > 2)
+ features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
+
+ if (skb->encapsulation) {
+ if (unlikely(skb_inner_mac_header(skb) -
+ skb_transport_header(skb) >
+ NGBE_MAX_TUNNEL_HDR_LEN))
+ return features & ~NETIF_F_CSUM_MASK;
+ }
+ return features;
+}
+
+static const struct net_device_ops ngbe_netdev_ops = {
+ .ndo_open = ngbe_open,
+ .ndo_stop = ngbe_close,
+ .ndo_start_xmit = ngbe_xmit_frame,
+ .ndo_set_rx_mode = ngbe_set_rx_mode,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_set_mac_address = ngbe_set_mac,
+ .ndo_change_mtu = ngbe_change_mtu,
+ .ndo_tx_timeout = ngbe_tx_timeout,
+ .ndo_vlan_rx_add_vid = ngbe_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = ngbe_vlan_rx_kill_vid,
+ .ndo_do_ioctl = ngbe_ioctl,
+
+ .ndo_set_vf_mac = ngbe_ndo_set_vf_mac,
+ .ndo_set_vf_vlan = ngbe_ndo_set_vf_vlan,
+ /* set_vf_rate not support by emerald */
+ .ndo_set_vf_rate = ngbe_ndo_set_vf_bw,
+ .ndo_set_vf_spoofchk = ngbe_ndo_set_vf_spoofchk,
+ .ndo_set_vf_trust = ngbe_ndo_set_vf_trust,
+ .ndo_get_vf_config = ngbe_ndo_get_vf_config,
+ .ndo_get_stats64 = ngbe_get_stats64,
+
+ .ndo_fdb_add = ngbe_ndo_fdb_add,
+
+ .ndo_bridge_setlink = ngbe_ndo_bridge_setlink,
+ .ndo_bridge_getlink = ngbe_ndo_bridge_getlink,
+
+ .ndo_features_check = ngbe_features_check,
+ .ndo_set_features = ngbe_set_features,
+ .ndo_fix_features = ngbe_fix_features,
+};
+
+void ngbe_assign_netdev_ops(struct net_device *dev)
+{
+ dev->netdev_ops = &ngbe_netdev_ops;
+ ngbe_set_ethtool_ops(dev);
+ dev->watchdog_timeo = 5 * HZ;
+}
+
+/**
+ * ngbe_wol_supported - Check whether device supports WoL
+ * @adapter: the adapter private structure
+ * @device_id: the device ID
+ * @subdev_id: the subsystem device ID
+ *
+ * This function is used by probe and ethtool to determine
+ * which devices have WoL support
+ *
+ **/
+int ngbe_wol_supported(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ /* check eeprom to see if WOL is enabled */
+ if ((hw->bus.func == 0) ||
+ (hw->bus.func == 1) ||
+ (hw->bus.func == 2) ||
+ (hw->bus.func == 3))
+ return true;
+ else
+ return false;
+}
+
+
+/**
+ * ngbe_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ngbe_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ngbe_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int ngbe_probe(struct pci_dev *pdev,
+ const struct pci_device_id __always_unused *ent)
+{
+ struct net_device *netdev;
+ struct ngbe_adapter *adapter = NULL;
+ struct ngbe_hw *hw = NULL;
+ static int cards_found;
+ int err, pci_using_dac, expected_gts;
+ u32 eeprom_verl = 0;
+ u32 etrack_id = 0;
+ char *info_string, *i_s_var;
+ u32 eeprom_cksum_devcap = 0;
+ u32 saved_version = 0;
+ u32 devcap;
+
+ bool disable_dev = false;
+
+ netdev_features_t hw_features;
+
+ err = pci_enable_device_mem(pdev);
+ if (err)
+ return err;
+
+ if (!dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)) &&
+ !dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64))) {
+ pci_using_dac = 1;
+ } else {
+ err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+ if (err) {
+ err = dma_set_coherent_mask(pci_dev_to_dev(pdev),
+ DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(pci_dev_to_dev(pdev), "No usable DMA "
+ "configuration, aborting\n");
+ goto err_dma;
+ }
+ }
+ pci_using_dac = 0;
+ }
+
+ err = pci_request_selected_regions(pdev,
+ pci_select_bars(pdev, IORESOURCE_MEM),
+ ngbe_driver_name);
+ if (err) {
+ dev_err(pci_dev_to_dev(pdev),
+ "pci_request_selected_regions failed 0x%x\n", err);
+ goto err_pci_reg;
+ }
+
+ pci_enable_pcie_error_reporting(pdev);
+ pci_set_master(pdev);
+
+ /* errata 16 */
+ pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_READRQ,
+ 0x1000);
+
+ netdev = alloc_etherdev_mq(sizeof(struct ngbe_adapter), NGBE_MAX_TX_QUEUES);
+ if (!netdev) {
+ err = -ENOMEM;
+ goto err_alloc_etherdev;
+ }
+
+ SET_NETDEV_DEV(netdev, pci_dev_to_dev(pdev));
+
+ adapter = netdev_priv(netdev);
+ adapter->netdev = netdev;
+ adapter->pdev = pdev;
+ hw = &adapter->hw;
+ hw->back = adapter;
+ adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+
+ hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+
+ adapter->io_addr = hw->hw_addr;
+ if (!hw->hw_addr) {
+ err = -EIO;
+ goto err_ioremap;
+ }
+
+ /* autoneg default on */
+ hw->mac.autoneg = true;
+
+ /* assign netdev ops and ethtool ops */
+ ngbe_assign_netdev_ops(netdev);
+
+ strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+ adapter->bd_number = cards_found;
+
+ /* setup the private structure */
+ err = ngbe_sw_init(adapter);
+ if (err)
+ goto err_sw_init;
+
+ /*
+ * check_options must be called before setup_link to set up
+ * hw->fc completely
+ */
+ ngbe_check_options(adapter);
+
+ TCALL(hw, mac.ops.set_lan_id);
+
+ /* check if flash load is done after hw power up */
+ err = ngbe_check_flash_load(hw, NGBE_SPI_ILDR_STATUS_PERST);
+ if (err)
+ goto err_sw_init;
+ err = ngbe_check_flash_load(hw, NGBE_SPI_ILDR_STATUS_PWRRST);
+ if (err)
+ goto err_sw_init;
+
+ /* reset_hw fills in the perm_addr as well */
+
+ hw->phy.reset_if_overtemp = true;
+ err = TCALL(hw, mac.ops.reset_hw);
+ hw->phy.reset_if_overtemp = false;
+ if (err) {
+ e_dev_err("HW reset failed: %d\n", err);
+ goto err_sw_init;
+ }
+
+#ifdef CONFIG_PCI_IOV
+ if (adapter->num_vfs > 0) {
+ e_dev_warn("Enabling SR-IOV VFs using the max_vfs module "
+ "parameter is deprecated.\n");
+ e_dev_warn("Please use the pci sysfs interface instead. Ex:\n");
+ e_dev_warn("echo '%d' > /sys/bus/pci/devices/%04x:%02x:%02x.%1x"
+ "/sriov_numvfs\n",
+ adapter->num_vfs,
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
+ }
+
+ if (adapter->flags & NGBE_FLAG_SRIOV_CAPABLE) {
+ pci_sriov_set_totalvfs(pdev, NGBE_MAX_VFS_DRV_LIMIT);
+ ngbe_enable_sriov(adapter);
+ }
+#endif /* CONFIG_PCI_IOV */
+
+ netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+
+#ifdef NETIF_F_IPV6_CSUM
+ netdev->features |= NETIF_F_IPV6_CSUM;
+#endif
+
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX;
+
+ netdev->features |= ngbe_tso_features();
+
+ if (adapter->flags2 & NGBE_FLAG2_RSS_ENABLED)
+ netdev->features |= NETIF_F_RXHASH;
+
+ netdev->features |= NETIF_F_RXCSUM;
+
+ /* copy netdev features into list of user selectable features */
+ hw_features = netdev->hw_features;
+ hw_features |= netdev->features;
+
+ /* set this bit last since it cannot be part of hw_features */
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ netdev->features |= NETIF_F_NTUPLE;
+
+ hw_features |= NETIF_F_NTUPLE;
+ netdev->hw_features = hw_features;
+
+ netdev->vlan_features |= NETIF_F_SG |
+ NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM |
+ NETIF_F_TSO |
+ NETIF_F_TSO6;
+
+ netdev->hw_enc_features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+ netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+ /* MTU range: 68 - 9414 */
+ netdev->min_mtu = ETH_MIN_MTU;
+ netdev->max_mtu = NGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+
+ if (pci_using_dac) {
+ netdev->features |= NETIF_F_HIGHDMA;
+
+ netdev->vlan_features |= NETIF_F_HIGHDMA;
+
+ }
+
+ if (hw->bus.lan_id == 0) {
+ wr32(hw, NGBE_CALSUM_CAP_STATUS, 0x0);
+ wr32(hw, NGBE_EEPROM_VERSION_STORE_REG, 0x0);
+ } else {
+ eeprom_cksum_devcap = rd32(hw, NGBE_CALSUM_CAP_STATUS);
+ saved_version = rd32(hw, NGBE_EEPROM_VERSION_STORE_REG);
+ }
+
+ TCALL(hw, eeprom.ops.init_params);
+ TCALL(hw, mac.ops.release_swfw_sync, NGBE_MNG_SWFW_SYNC_SW_MB);
+ if (hw->bus.lan_id == 0 || eeprom_cksum_devcap == 0) {
+ /* make sure the EEPROM is good */
+ if (TCALL(hw, eeprom.ops.eeprom_chksum_cap_st, NGBE_CALSUM_COMMAND, &devcap)) {
+ e_dev_err("The EEPROM Checksum Is Not Valid\n");
+ err = -EIO;
+ goto err_sw_init;
+ }
+ }
+
+ memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);
+
+ if (!is_valid_ether_addr(netdev->dev_addr)) {
+ e_dev_err("invalid MAC address\n");
+ err = -EIO;
+ goto err_sw_init;
+ }
+
+ ngbe_mac_set_default_filter(adapter, hw->mac.perm_addr);
+
+ timer_setup(&adapter->service_timer, ngbe_service_timer, 0);
+#ifdef CONFIG_NGBE_POLL_LINK_STATUS
+ timer_setup(&adapter->link_check_timer, ngbe_link_check_timer, 0);
+#endif
+ if (NGBE_REMOVED(hw->hw_addr)) {
+ err = -EIO;
+ goto err_sw_init;
+ }
+ INIT_WORK(&adapter->service_task, ngbe_service_task);
+ set_bit(__NGBE_SERVICE_INITED, &adapter->state);
+ clear_bit(__NGBE_SERVICE_SCHED, &adapter->state);
+
+ err = ngbe_init_interrupt_scheme(adapter);
+ if (err)
+ goto err_sw_init;
+
+ /* WOL not supported for all devices */
+ adapter->wol = 0;
+ if (hw->bus.lan_id == 0 || eeprom_cksum_devcap == 0) {
+ TCALL(hw, eeprom.ops.read,
+ hw->eeprom.sw_region_offset + NGBE_DEVICE_CAPS,
+ &adapter->eeprom_cap);
+ /*only support in LAN0*/
+ adapter->eeprom_cap = NGBE_DEVICE_CAPS_WOL_PORT0;
+ } else {
+ adapter->eeprom_cap = eeprom_cksum_devcap & 0xffff;
+ }
+ if (ngbe_wol_supported(adapter))
+ adapter->wol = NGBE_PSR_WKUP_CTL_MAG;
+ if ((hw->subsystem_device_id & WOL_SUP_MASK) == WOL_SUP) {
+ /*enable wol first in shadow ram*/
+ ngbe_write_ee_hostif(hw, 0x7FE, 0xa50F);
+ ngbe_write_ee_hostif(hw, 0x7FF, 0x5a5a);
+ }
+ hw->wol_enabled = !!(adapter->wol);
+ wr32(hw, NGBE_PSR_WKUP_CTL, adapter->wol);
+
+ device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
+
+ /*
+ * Save off EEPROM version number and Option Rom version which
+ * together make a unique identify for the eeprom
+ */
+ if (hw->bus.lan_id == 0 || saved_version == 0) {
+ TCALL(hw, eeprom.ops.read32,
+ hw->eeprom.sw_region_offset + NGBE_EEPROM_VERSION_L,
+ &eeprom_verl);
+ etrack_id = eeprom_verl;
+ wr32(hw, NGBE_EEPROM_VERSION_STORE_REG, etrack_id);
+ wr32(hw, NGBE_CALSUM_CAP_STATUS, 0x10000 | (u32)adapter->eeprom_cap);
+ } else if (eeprom_cksum_devcap) {
+ etrack_id = saved_version;
+ } else {
+ TCALL(hw, eeprom.ops.read32,
+ hw->eeprom.sw_region_offset + NGBE_EEPROM_VERSION_L,
+ &eeprom_verl);
+ etrack_id = eeprom_verl;
+ }
+
+ /* Make sure offset to SCSI block is valid */
+ snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+ "0x%08x", etrack_id);
+
+ /* reset the hardware with the new settings */
+ err = TCALL(hw, mac.ops.start_hw);
+ if (err == NGBE_ERR_EEPROM_VERSION) {
+ /* We are running on a pre-production device, log a warning */
+ e_dev_warn("This device is a pre-production adapter/LOM. "
+ "Please be aware there may be issues associated "
+ "with your hardware. If you are experiencing "
+ "problems please contact your hardware "
+ "representative who provided you with this "
+ "hardware.\n");
+ } else if (err) {
+ e_dev_err("HW init failed, err = %d\n", err);
+ goto err_register;
+ }
+
+ /* pick up the PCI bus settings for reporting later */
+ TCALL(hw, mac.ops.get_bus_info);
+
+ strcpy(netdev->name, "eth%d");
+ err = register_netdev(netdev);
+ if (err)
+ goto err_register;
+
+ pci_set_drvdata(pdev, adapter);
+ adapter->netdev_registered = true;
+
+ /*
+ * call save state here in standalone driver because it relies on
+ * adapter struct to exist, and needs to call netdev_priv
+ */
+ pci_save_state(pdev);
+
+ /* carrier off reporting is important to ethtool even BEFORE open */
+ netif_carrier_off(netdev);
+ /* keep stopping all the transmit queues for older kernels */
+ netif_tx_stop_all_queues(netdev);
+
+ /* print all messages at the end so that we use our eth%d name */
+
+ /* calculate the expected PCIe bandwidth required for optimal
+ * performance. Note that some older parts will never have enough
+ * bandwidth due to being older generation PCIe parts. We clamp these
+ * parts to ensure that no warning is displayed, as this could confuse
+ * users otherwise. */
+
+ expected_gts = ngbe_enumerate_functions(adapter) * 10;
+
+ /* don't check link if we failed to enumerate functions */
+ if (expected_gts > 0)
+ ngbe_check_minimum_link(adapter, expected_gts);
+
+ TCALL(hw, mac.ops.set_fw_drv_ver, 0xFF, 0xFF, 0xFF, 0xFF);
+
+ if (((hw->subsystem_device_id & NCSI_SUP_MASK) == NCSI_SUP) ||
+ ((hw->subsystem_device_id & OEM_MASK) == OCP_CARD))
+ e_info(probe, "NCSI : support");
+ else
+ e_info(probe, "NCSI : unsupported");
+
+ e_info(probe, "PHY: %s, PBA No: Wang Xun GbE Family Controller\n",
+ hw->phy.type == ngbe_phy_internal?"Internal":"External");
+
+ e_info(probe, "%02x:%02x:%02x:%02x:%02x:%02x\n",
+ netdev->dev_addr[0], netdev->dev_addr[1],
+ netdev->dev_addr[2], netdev->dev_addr[3],
+ netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#define INFO_STRING_LEN 255
+ info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL);
+ if (!info_string) {
+ e_err(probe, "allocation for info string failed\n");
+ goto no_info_string;
+ }
+ i_s_var = info_string;
+ i_s_var += sprintf(info_string, "Enabled Features: ");
+ i_s_var += sprintf(i_s_var, "RxQ: %d TxQ: %d ",
+ adapter->num_rx_queues, adapter->num_tx_queues);
+ if (adapter->flags & NGBE_FLAG_TPH_ENABLED)
+ i_s_var += sprintf(i_s_var, "TPH ");
+
+ BUG_ON(i_s_var > (info_string + INFO_STRING_LEN));
+ /* end features printing */
+ e_info(probe, "%s\n", info_string);
+ kfree(info_string);
+no_info_string:
+
+#ifdef CONFIG_PCI_IOV
+ if (adapter->flags & NGBE_FLAG_SRIOV_ENABLED) {
+ int i;
+ for (i = 0; i < adapter->num_vfs; i++)
+ ngbe_vf_configuration(pdev, (i | 0x10000000));
+ }
+#endif
+
+ e_info(probe, "WangXun(R) Gigabit Network Connection\n");
+ cards_found++;
+
+#ifdef CONFIG_NGBE_SYSFS
+ if (ngbe_sysfs_init(adapter))
+ e_err(probe, "failed to allocate sysfs resources\n");
+#else
+#ifdef CONFIG_NGBE_PROCFS
+ if (ngbe_procfs_init(adapter))
+ e_err(probe, "failed to allocate procfs resources\n");
+#endif /* CONFIG_NGBE_PROCFS */
+#endif /* CONFIG_NGBE_SYSFS */
+
+
+#ifdef CONFIG_NGBE_DEBUG_FS
+ ngbe_dbg_adapter_init(adapter);
+#endif /* CONFIG_NGBE_DEBUG_FS */
+
+ return 0;
+
+err_register:
+ ngbe_clear_interrupt_scheme(adapter);
+ ngbe_release_hw_control(adapter);
+err_sw_init:
+#ifdef CONFIG_PCI_IOV
+ ngbe_disable_sriov(adapter);
+#endif /* CONFIG_PCI_IOV */
+ adapter->flags2 &= ~NGBE_FLAG2_SEARCH_FOR_SFP;
+ kfree(adapter->mac_table);
+ iounmap(adapter->io_addr);
+err_ioremap:
+ disable_dev = !test_and_set_bit(__NGBE_DISABLED, &adapter->state);
+ free_netdev(netdev);
+err_alloc_etherdev:
+ pci_release_selected_regions(pdev,
+ pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+ if (!adapter || disable_dev)
+ pci_disable_device(pdev);
+
+ return err;
+}
+
+/**
+ * ngbe_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ngbe_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device. The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void ngbe_remove(struct pci_dev *pdev)
+{
+ struct ngbe_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev;
+ bool disable_dev;
+
+ /* if !adapter then we already cleaned up in probe */
+ if (!adapter)
+ return;
+
+ netdev = adapter->netdev;
+#ifdef CONFIG_NGBE_DEBUG_FS
+ ngbe_dbg_adapter_exit(adapter);
+#endif
+
+ set_bit(__NGBE_REMOVING, &adapter->state);
+ cancel_work_sync(&adapter->service_task);
+
+#ifdef CONFIG_NGBE_SYSFS
+ ngbe_sysfs_exit(adapter);
+#else
+#ifdef CONFIG_NGBE_PROCFS
+ ngbe_procfs_exit(adapter);
+#endif
+#endif /* CONFIG_NGBE_SYSFS */
+ if (adapter->netdev_registered) {
+ unregister_netdev(netdev);
+ adapter->netdev_registered = false;
+ }
+
+#ifdef CONFIG_PCI_IOV
+ ngbe_disable_sriov(adapter);
+#endif
+
+ ngbe_clear_interrupt_scheme(adapter);
+ ngbe_release_hw_control(adapter);
+
+ iounmap(adapter->io_addr);
+ pci_release_selected_regions(pdev,
+ pci_select_bars(pdev, IORESOURCE_MEM));
+
+ kfree(adapter->mac_table);
+ disable_dev = !test_and_set_bit(__NGBE_DISABLED, &adapter->state);
+ free_netdev(netdev);
+
+ pci_disable_pcie_error_reporting(pdev);
+
+ if (disable_dev)
+ pci_disable_device(pdev);
+}
+
+static bool ngbe_check_cfg_remove(struct ngbe_hw *hw, struct pci_dev *pdev)
+{
+ u16 value;
+
+ pci_read_config_word(pdev, PCI_VENDOR_ID, &value);
+ if (value == NGBE_FAILED_READ_CFG_WORD) {
+ ngbe_remove_adapter(hw);
+ return true;
+ }
+ return false;
+}
+
+u16 ngbe_read_pci_cfg_word(struct ngbe_hw *hw, u32 reg)
+{
+ struct ngbe_adapter *adapter = hw->back;
+ u16 value;
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ return NGBE_FAILED_READ_CFG_WORD;
+ pci_read_config_word(adapter->pdev, reg, &value);
+ if (value == NGBE_FAILED_READ_CFG_WORD &&
+ ngbe_check_cfg_remove(hw, adapter->pdev))
+ return NGBE_FAILED_READ_CFG_WORD;
+ return value;
+}
+
+#ifdef CONFIG_PCI_IOV
+static u32 ngbe_read_pci_cfg_dword(struct ngbe_hw *hw, u32 reg)
+{
+ struct ngbe_adapter *adapter = hw->back;
+ u32 value;
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ return NGBE_FAILED_READ_CFG_DWORD;
+ pci_read_config_dword(adapter->pdev, reg, &value);
+ if (value == NGBE_FAILED_READ_CFG_DWORD &&
+ ngbe_check_cfg_remove(hw, adapter->pdev))
+ return NGBE_FAILED_READ_CFG_DWORD;
+ return value;
+}
+#endif /* CONFIG_PCI_IOV */
+
+void ngbe_write_pci_cfg_word(struct ngbe_hw *hw, u32 reg, u16 value)
+{
+ struct ngbe_adapter *adapter = hw->back;
+
+ if (NGBE_REMOVED(hw->hw_addr))
+ return;
+ pci_write_config_word(adapter->pdev, reg, value);
+}
+
+/**
+ * ngbe_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t ngbe_io_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct ngbe_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev = adapter->netdev;
+
+#ifdef CONFIG_PCI_IOV
+ struct ngbe_hw *hw = &adapter->hw;
+ struct pci_dev *bdev, *vfdev;
+ u32 dw0, dw1, dw2, dw3;
+ int vf, pos;
+ u16 req_id, pf_func;
+
+ if (adapter->num_vfs == 0)
+ goto skip_bad_vf_detection;
+
+ bdev = pdev->bus->self;
+ while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT))
+ bdev = bdev->bus->self;
+
+ if (!bdev)
+ goto skip_bad_vf_detection;
+
+ pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
+ if (!pos)
+ goto skip_bad_vf_detection;
+
+ dw0 = ngbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG);
+ dw1 = ngbe_read_pci_cfg_dword(hw,
+ pos + PCI_ERR_HEADER_LOG + 4);
+ dw2 = ngbe_read_pci_cfg_dword(hw,
+ pos + PCI_ERR_HEADER_LOG + 8);
+ dw3 = ngbe_read_pci_cfg_dword(hw,
+ pos + PCI_ERR_HEADER_LOG + 12);
+ if (NGBE_REMOVED(hw->hw_addr))
+ goto skip_bad_vf_detection;
+
+ req_id = dw1 >> 16;
+ /* if bit 7 of the requestor ID is set then it's a VF */
+ if (!(req_id & 0x0080))
+ goto skip_bad_vf_detection;
+
+ pf_func = req_id & 0x01;
+ if ((pf_func & 1) == (pdev->devfn & 1)) {
+ vf = (req_id & 0x7F) >> 1;
+ e_dev_err("VF %d has caused a PCIe error\n", vf);
+ e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
+ "%8.8x\tdw3: %8.8x\n",
+ dw0, dw1, dw2, dw3);
+
+ /* Find the pci device of the offending VF */
+ vfdev = pci_get_device(PCI_VENDOR_ID_TRUSTNETIC,
+ NGBE_VF_DEVICE_ID, NULL);
+ while (vfdev) {
+ if (vfdev->devfn == (req_id & 0xFF))
+ break;
+ vfdev = pci_get_device(PCI_VENDOR_ID_TRUSTNETIC,
+ NGBE_VF_DEVICE_ID, vfdev);
+ }
+ /*
+ * There's a slim chance the VF could have been hot
+ * plugged, so if it is no longer present we don't need
+ * to issue the VFLR.Just clean up the AER in that case.
+ */
+ if (vfdev) {
+ ngbe_issue_vf_flr(adapter, vfdev);
+ /* Free device reference count */
+ pci_dev_put(vfdev);
+ }
+
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+ }
+
+ /*
+ * Even though the error may have occurred on the other port
+ * we still need to increment the vf error reference count for
+ * both ports because the I/O resume function will be called
+ * for both of them.
+ */
+ adapter->vferr_refcount++;
+
+ return PCI_ERS_RESULT_RECOVERED;
+
+ skip_bad_vf_detection:
+#endif /* CONFIG_PCI_IOV */
+
+ if (!test_bit(__NGBE_SERVICE_INITED, &adapter->state))
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ rtnl_lock();
+ netif_device_detach(netdev);
+
+ if (state == pci_channel_io_perm_failure) {
+ rtnl_unlock();
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ if (netif_running(netdev))
+ ngbe_close(netdev);
+
+ if (!test_and_set_bit(__NGBE_DISABLED, &adapter->state))
+ pci_disable_device(pdev);
+ rtnl_unlock();
+
+ /* Request a slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ngbe_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ */
+static pci_ers_result_t ngbe_io_slot_reset(struct pci_dev *pdev)
+{
+ struct ngbe_adapter *adapter = pci_get_drvdata(pdev);
+ pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+
+ if (pci_enable_device_mem(pdev)) {
+ e_err(probe, "Cannot re-enable PCI device after reset.\n");
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ smp_mb__before_atomic();
+ clear_bit(__NGBE_DISABLED, &adapter->state);
+ adapter->hw.hw_addr = adapter->io_addr;
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ /*
+ * After second error pci->state_saved is false, this
+ * resets it so EEH doesn't break.
+ */
+ pci_save_state(pdev);
+
+ pci_wake_from_d3(pdev, false);
+
+ ngbe_reset(adapter);
+
+ result = PCI_ERS_RESULT_RECOVERED;
+ }
+
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+
+ return result;
+}
+
+/**
+ * ngbe_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation.
+ */
+static void ngbe_io_resume(struct pci_dev *pdev)
+{
+ struct ngbe_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev = adapter->netdev;
+
+#ifdef CONFIG_PCI_IOV
+ if (adapter->vferr_refcount) {
+ e_info(drv, "Resuming after VF err\n");
+ adapter->vferr_refcount--;
+ return;
+ }
+#endif
+ rtnl_lock();
+ if (netif_running(netdev))
+ ngbe_open(netdev);
+
+ netif_device_attach(netdev);
+ rtnl_unlock();
+}
+
+static const struct pci_error_handlers ngbe_err_handler = {
+ .error_detected = ngbe_io_error_detected,
+ .slot_reset = ngbe_io_slot_reset,
+ .resume = ngbe_io_resume,
+};
+
+struct net_device *ngbe_hw_to_netdev(const struct ngbe_hw *hw)
+{
+ return ((struct ngbe_adapter *)hw->back)->netdev;
+}
+struct ngbe_msg *ngbe_hw_to_msg(const struct ngbe_hw *hw)
+{
+ struct ngbe_adapter *adapter =
+ container_of(hw, struct ngbe_adapter, hw);
+ return (struct ngbe_msg *)&adapter->msg_enable;
+}
+
+static struct pci_driver ngbe_driver = {
+ .name = ngbe_driver_name,
+ .id_table = ngbe_pci_tbl,
+ .probe = ngbe_probe,
+ .remove = ngbe_remove,
+#ifdef CONFIG_PM
+ .suspend = ngbe_suspend,
+ .resume = ngbe_resume,
+#endif
+ .shutdown = ngbe_shutdown,
+ .sriov_configure = ngbe_pci_sriov_configure,
+ .err_handler = &ngbe_err_handler
+};
+
+/**
+ * ngbe_init_module - Driver Registration Routine
+ *
+ * ngbe_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init ngbe_init_module(void)
+{
+ int ret;
+ pr_info("%s - version %s\n", ngbe_driver_string, ngbe_driver_version);
+ pr_info("%s\n", ngbe_copyright);
+
+ ngbe_wq = create_singlethread_workqueue(ngbe_driver_name);
+ if (!ngbe_wq) {
+ pr_err("%s: Failed to create workqueue\n", ngbe_driver_name);
+ return -ENOMEM;
+ }
+
+#ifdef CONFIG_NGBE_PROCFS
+ if (ngbe_procfs_topdir_init())
+ pr_info("Procfs failed to initialize topdir\n");
+#endif
+
+#ifdef CONFIG_NGBE_DEBUG_FS
+ ngbe_dbg_init();
+#endif
+
+ ret = pci_register_driver(&ngbe_driver);
+ return ret;
+}
+
+module_init(ngbe_init_module);
+
+/**
+ * ngbe_exit_module - Driver Exit Cleanup Routine
+ *
+ * ngbe_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit ngbe_exit_module(void)
+{
+ pci_unregister_driver(&ngbe_driver);
+#ifdef CONFIG_NGBE_PROCFS
+ ngbe_procfs_topdir_exit();
+#endif
+ destroy_workqueue(ngbe_wq);
+#ifdef CONFIG_NGBE_DEBUG_FS
+ ngbe_dbg_exit();
+#endif /* CONFIG_NGBE_DEBUG_FS */
+}
+
+module_exit(ngbe_exit_module);
+
+/* ngbe_main.c */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_mbx.c b/drivers/net/ethernet/netswift/ngbe/ngbe_mbx.c
new file mode 100644
index 0000000000000..34167f78c207f
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_mbx.c
@@ -0,0 +1,687 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "ngbe_type.h"
+#include "ngbe.h"
+#include "ngbe_mbx.h"
+
+
+/**
+ * ngbe_read_mbx - Reads a message from the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to read
+ *
+ * returns SUCCESS if it successfuly read message from buffer
+ **/
+int ngbe_read_mbx(struct ngbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+ int err = NGBE_ERR_MBX;
+
+ /* limit read to size of mailbox */
+ if (size > mbx->size)
+ size = mbx->size;
+
+ err = TCALL(hw, mbx.ops.read, msg, size, mbx_id);
+
+ return err;
+}
+
+/**
+ * ngbe_write_mbx - Write a message to the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully copied message into the buffer
+ **/
+int ngbe_write_mbx(struct ngbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+ int err = 0;
+
+ if (size > mbx->size) {
+ err = NGBE_ERR_MBX;
+ ERROR_REPORT2(NGBE_ERROR_ARGUMENT,
+ "Invalid mailbox message size %d", size);
+ } else
+ err = TCALL(hw, mbx.ops.write, msg, size, mbx_id);
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_msg - checks to see if someone sent us mail
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+int ngbe_check_for_msg(struct ngbe_hw *hw, u16 mbx_id)
+{
+ int err = NGBE_ERR_MBX;
+
+ err = TCALL(hw, mbx.ops.check_for_msg, mbx_id);
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_ack - checks to see if someone sent us ACK
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+int ngbe_check_for_ack(struct ngbe_hw *hw, u16 mbx_id)
+{
+ int err = NGBE_ERR_MBX;
+
+ err = TCALL(hw, mbx.ops.check_for_ack, mbx_id);
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_rst - checks to see if other side has reset
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+int ngbe_check_for_rst(struct ngbe_hw *hw, u16 mbx_id)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+ int err = NGBE_ERR_MBX;
+
+ if (mbx->ops.check_for_rst)
+ err = mbx->ops.check_for_rst(hw, mbx_id);
+
+ return err;
+}
+
+/**
+ * ngbe_poll_for_msg - Wait for message notification
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully received a message notification
+ **/
+int ngbe_poll_for_msg(struct ngbe_hw *hw, u16 mbx_id)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+ int countdown = mbx->timeout;
+
+ if (!countdown || !mbx->ops.check_for_msg)
+ goto out;
+
+ while (countdown && TCALL(hw, mbx.ops.check_for_msg, mbx_id)) {
+ countdown--;
+ if (!countdown)
+ break;
+ udelay(mbx->udelay);
+ }
+
+ if (countdown == 0)
+ ERROR_REPORT2(NGBE_ERROR_POLLING,
+ "Polling for VF%d mailbox message timedout", mbx_id);
+
+out:
+ return countdown ? 0 : NGBE_ERR_MBX;
+}
+
+/**
+ * ngbe_poll_for_ack - Wait for message acknowledngbeent
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully received a message acknowledngbeent
+ **/
+int ngbe_poll_for_ack(struct ngbe_hw *hw, u16 mbx_id)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+ int countdown = mbx->timeout;
+
+ if (!countdown || !mbx->ops.check_for_ack)
+ goto out;
+
+ while (countdown && TCALL(hw, mbx.ops.check_for_ack, mbx_id)) {
+ countdown--;
+ if (!countdown)
+ break;
+ udelay(mbx->udelay);
+ }
+
+ if (countdown == 0)
+ ERROR_REPORT2(NGBE_ERROR_POLLING,
+ "Polling for VF%d mailbox ack timedout", mbx_id);
+
+out:
+ return countdown ? 0 : NGBE_ERR_MBX;
+}
+
+/**
+ * ngbe_read_posted_mbx - Wait for message notification and receive message
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully received a message notification and
+ * copied it into the receive buffer.
+ **/
+int ngbe_read_posted_mbx(struct ngbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+ int err = NGBE_ERR_MBX;
+
+ if (!mbx->ops.read)
+ goto out;
+
+ err = ngbe_poll_for_msg(hw, mbx_id);
+
+ /* if ack received read message, otherwise we timed out */
+ if (!err)
+ err = TCALL(hw, mbx.ops.read, msg, size, mbx_id);
+out:
+ return err;
+}
+
+/**
+ * ngbe_write_posted_mbx - Write a message to the mailbox, wait for ack
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully copied message into the buffer and
+ * received an ack to that message within delay * timeout period
+ **/
+int ngbe_write_posted_mbx(struct ngbe_hw *hw, u32 *msg, u16 size,
+ u16 mbx_id)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+ int err;
+
+ /* exit if either we can't write or there isn't a defined timeout */
+ if (!mbx->timeout)
+ return NGBE_ERR_MBX;
+
+ /* send msg */
+ err = TCALL(hw, mbx.ops.write, msg, size, mbx_id);
+
+ /* if msg sent wait until we receive an ack */
+ if (!err)
+ err = ngbe_poll_for_ack(hw, mbx_id);
+
+ return err;
+}
+
+/**
+ * ngbe_init_mbx_ops - Initialize MB function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Setups up the mailbox read and write message function pointers
+ **/
+void ngbe_init_mbx_ops(struct ngbe_hw *hw)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+
+ mbx->ops.read_posted = ngbe_read_posted_mbx;
+ mbx->ops.write_posted = ngbe_write_posted_mbx;
+}
+
+/**
+ * ngbe_read_v2p_mailbox - read v2p mailbox
+ * @hw: pointer to the HW structure
+ *
+ * This function is used to read the v2p mailbox without losing the read to
+ * clear status bits.
+ **/
+u32 ngbe_read_v2p_mailbox(struct ngbe_hw *hw)
+{
+ u32 v2p_mailbox = rd32(hw, NGBE_VXMAILBOX);
+
+ v2p_mailbox |= hw->mbx.v2p_mailbox;
+ hw->mbx.v2p_mailbox |= v2p_mailbox & NGBE_VXMAILBOX_R2C_BITS;
+
+ return v2p_mailbox;
+}
+
+/**
+ * ngbe_check_for_bit_vf - Determine if a status bit was set
+ * @hw: pointer to the HW structure
+ * @mask: bitmask for bits to be tested and cleared
+ *
+ * This function is used to check for the read to clear bits within
+ * the V2P mailbox.
+ **/
+int ngbe_check_for_bit_vf(struct ngbe_hw *hw, u32 mask)
+{
+ u32 mailbox = ngbe_read_v2p_mailbox(hw);
+
+ hw->mbx.v2p_mailbox &= ~mask;
+
+ return (mailbox & mask ? 0 : NGBE_ERR_MBX);
+}
+
+/**
+ * ngbe_check_for_msg_vf - checks to see if the PF has sent mail
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the PF has set the Status bit or else ERR_MBX
+ **/
+int ngbe_check_for_msg_vf(struct ngbe_hw *hw, u16 mbx_id)
+{
+ int err = NGBE_ERR_MBX;
+
+ UNREFERENCED_PARAMETER(mbx_id);
+
+ /* read clear the pf sts bit */
+ if (!ngbe_check_for_bit_vf(hw, NGBE_VXMAILBOX_PFSTS)) {
+ err = 0;
+ hw->mbx.stats.reqs++;
+ }
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_ack_vf - checks to see if the PF has ACK'd
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the PF has set the ACK bit or else ERR_MBX
+ **/
+int ngbe_check_for_ack_vf(struct ngbe_hw *hw, u16 mbx_id)
+{
+ int err = NGBE_ERR_MBX;
+
+ UNREFERENCED_PARAMETER(mbx_id);
+
+ /* read clear the pf ack bit */
+ if (!ngbe_check_for_bit_vf(hw, NGBE_VXMAILBOX_PFACK)) {
+ err = 0;
+ hw->mbx.stats.acks++;
+ }
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_rst_vf - checks to see if the PF has reset
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns true if the PF has set the reset done bit or else false
+ **/
+int ngbe_check_for_rst_vf(struct ngbe_hw *hw, u16 mbx_id)
+{
+ int err = NGBE_ERR_MBX;
+
+ UNREFERENCED_PARAMETER(mbx_id);
+ if (!ngbe_check_for_bit_vf(hw, (NGBE_VXMAILBOX_RSTD |
+ NGBE_VXMAILBOX_RSTI))) {
+ err = 0;
+ hw->mbx.stats.rsts++;
+ }
+
+ return err;
+}
+
+/**
+ * ngbe_obtain_mbx_lock_vf - obtain mailbox lock
+ * @hw: pointer to the HW structure
+ *
+ * return SUCCESS if we obtained the mailbox lock
+ **/
+int ngbe_obtain_mbx_lock_vf(struct ngbe_hw *hw)
+{
+ int err = NGBE_ERR_MBX;
+ u32 mailbox;
+
+ /* Take ownership of the buffer */
+ wr32(hw, NGBE_VXMAILBOX, NGBE_VXMAILBOX_VFU);
+
+ /* reserve mailbox for vf use */
+ mailbox = ngbe_read_v2p_mailbox(hw);
+ if (mailbox & NGBE_VXMAILBOX_VFU)
+ err = 0;
+ else
+ ERROR_REPORT2(NGBE_ERROR_POLLING,
+ "Failed to obtain mailbox lock for VF");
+
+ return err;
+}
+
+/**
+ * ngbe_write_mbx_vf - Write a message to the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully copied message into the buffer
+ **/
+int ngbe_write_mbx_vf(struct ngbe_hw *hw, u32 *msg, u16 size,
+ u16 mbx_id)
+{
+ int err;
+ u16 i;
+
+ UNREFERENCED_PARAMETER(mbx_id);
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ err = ngbe_obtain_mbx_lock_vf(hw);
+ if (err)
+ goto out_no_write;
+
+ /* flush msg and acks as we are overwriting the message buffer */
+ ngbe_check_for_msg_vf(hw, 0);
+ ngbe_check_for_ack_vf(hw, 0);
+
+ /* copy the caller specified message to the mailbox memory buffer */
+ for (i = 0; i < size; i++)
+ wr32a(hw, NGBE_VXMBMEM, i, msg[i]);
+
+ /* update stats */
+ hw->mbx.stats.msgs_tx++;
+
+ /* Drop VFU and interrupt the PF to tell it a message has been sent */
+ wr32(hw, NGBE_VXMAILBOX, NGBE_VXMAILBOX_REQ);
+
+out_no_write:
+ return err;
+}
+
+/**
+ * ngbe_read_mbx_vf - Reads a message from the inbox intended for vf
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to read
+ *
+ * returns SUCCESS if it successfuly read message from buffer
+ **/
+int ngbe_read_mbx_vf(struct ngbe_hw *hw, u32 *msg, u16 size,
+ u16 mbx_id)
+{
+ int err = 0;
+ u16 i;
+ UNREFERENCED_PARAMETER(mbx_id);
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ err = ngbe_obtain_mbx_lock_vf(hw);
+ if (err)
+ goto out_no_read;
+
+ /* copy the message from the mailbox memory buffer */
+ for (i = 0; i < size; i++)
+ msg[i] = rd32a(hw, NGBE_VXMBMEM, i);
+
+ /* Acknowledge receipt and release mailbox, then we're done */
+ wr32(hw, NGBE_VXMAILBOX, NGBE_VXMAILBOX_ACK);
+
+ /* update stats */
+ hw->mbx.stats.msgs_rx++;
+
+out_no_read:
+ return err;
+}
+
+/**
+ * ngbe_init_mbx_params_vf - set initial values for vf mailbox
+ * @hw: pointer to the HW structure
+ *
+ * Initializes the hw->mbx struct to correct values for vf mailbox
+ */
+void ngbe_init_mbx_params_vf(struct ngbe_hw *hw)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+
+ /* start mailbox as timed out and let the reset_hw call set the timeout
+ * value to begin communications */
+ mbx->timeout = 0;
+ mbx->udelay = NGBE_VF_MBX_INIT_DELAY;
+
+ mbx->size = NGBE_VXMAILBOX_SIZE;
+
+ mbx->ops.read = ngbe_read_mbx_vf;
+ mbx->ops.write = ngbe_write_mbx_vf;
+ mbx->ops.read_posted = ngbe_read_posted_mbx;
+ mbx->ops.write_posted = ngbe_write_posted_mbx;
+ mbx->ops.check_for_msg = ngbe_check_for_msg_vf;
+ mbx->ops.check_for_ack = ngbe_check_for_ack_vf;
+ mbx->ops.check_for_rst = ngbe_check_for_rst_vf;
+
+ mbx->stats.msgs_tx = 0;
+ mbx->stats.msgs_rx = 0;
+ mbx->stats.reqs = 0;
+ mbx->stats.acks = 0;
+ mbx->stats.rsts = 0;
+}
+
+int ngbe_check_for_bit_pf(struct ngbe_hw *hw, u32 mask)
+{
+ u32 mbvficr = rd32(hw, NGBE_MBVFICR);
+ int err = NGBE_ERR_MBX;
+
+ if (mbvficr & mask) {
+ err = 0;
+ wr32(hw, NGBE_MBVFICR, mask);
+ }
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_msg_pf - checks to see if the VF has sent mail
+ * @hw: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+int ngbe_check_for_msg_pf(struct ngbe_hw *hw, u16 vf)
+{
+ int err = NGBE_ERR_MBX;
+ u32 vf_bit = vf;
+
+ if (!ngbe_check_for_bit_pf(hw, NGBE_MBVFICR_VFREQ_VF1 << vf_bit)) {
+ err = 0;
+ hw->mbx.stats.reqs++;
+ }
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_ack_pf - checks to see if the VF has ACKed
+ * @hw: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+int ngbe_check_for_ack_pf(struct ngbe_hw *hw, u16 vf)
+{
+ int err = NGBE_ERR_MBX;
+ u32 vf_bit = vf;
+
+ if (!ngbe_check_for_bit_pf(hw, NGBE_MBVFICR_VFACK_VF1 << vf_bit)) {
+ err = 0;
+ hw->mbx.stats.acks++;
+ }
+
+ return err;
+}
+
+/**
+ * ngbe_check_for_rst_pf - checks to see if the VF has reset
+ * @hw: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+int ngbe_check_for_rst_pf(struct ngbe_hw *hw, u16 vf)
+{
+ u32 vflre = 0;
+ int err = NGBE_ERR_MBX;
+
+ vflre = rd32(hw, NGBE_VFLRE);
+
+ if (vflre & (1 << vf)) {
+ err = 0;
+ wr32(hw, NGBE_VFLREC, (1 << vf));
+ hw->mbx.stats.rsts++;
+ }
+
+ return err;
+}
+
+/**
+ * ngbe_obtain_mbx_lock_pf - obtain mailbox lock
+ * @hw: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * return SUCCESS if we obtained the mailbox lock
+ **/
+int ngbe_obtain_mbx_lock_pf(struct ngbe_hw *hw, u16 vf)
+{
+ int err = NGBE_ERR_MBX;
+ u32 mailbox;
+
+ /* Take ownership of the buffer */
+ wr32(hw, NGBE_PXMAILBOX(vf), NGBE_PXMAILBOX_PFU);
+
+ /* reserve mailbox for vf use */
+ mailbox = rd32(hw, NGBE_PXMAILBOX(vf));
+ if (mailbox & NGBE_PXMAILBOX_PFU)
+ err = 0;
+ else
+ ERROR_REPORT2(NGBE_ERROR_POLLING,
+ "Failed to obtain mailbox lock for PF%d", vf);
+
+ return err;
+}
+
+/**
+ * ngbe_write_mbx_pf - Places a message in the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @vf: the VF index
+ *
+ * returns SUCCESS if it successfully copied message into the buffer
+ **/
+int ngbe_write_mbx_pf(struct ngbe_hw *hw, u32 *msg, u16 size,
+ u16 vf)
+{
+ int err;
+ u16 i;
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ err = ngbe_obtain_mbx_lock_pf(hw, vf);
+ if (err)
+ goto out_no_write;
+
+ /* flush msg and acks as we are overwriting the message buffer */
+ ngbe_check_for_msg_pf(hw, vf);
+ ngbe_check_for_ack_pf(hw, vf);
+
+ /* copy the caller specified message to the mailbox memory buffer */
+ for (i = 0; i < size; i++)
+ wr32a(hw, NGBE_PXMBMEM(vf), i, msg[i]);
+
+ /* Interrupt VF to tell it a message has been sent and release buffer*/
+ wr32(hw, NGBE_PXMAILBOX(vf), NGBE_PXMAILBOX_STS);
+
+ /* update stats */
+ hw->mbx.stats.msgs_tx++;
+
+out_no_write:
+ return err;
+}
+
+/**
+ * ngbe_read_mbx_pf - Read a message from the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @vf: the VF index
+ *
+ * This function copies a message from the mailbox buffer to the caller's
+ * memory buffer. The presumption is that the caller knows that there was
+ * a message due to a VF request so no polling for message is needed.
+ **/
+int ngbe_read_mbx_pf(struct ngbe_hw *hw, u32 *msg, u16 size,
+ u16 vf)
+{
+ int err;
+ u16 i;
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ err = ngbe_obtain_mbx_lock_pf(hw, vf);
+ if (err)
+ goto out_no_read;
+
+ /* copy the message to the mailbox memory buffer */
+ for (i = 0; i < size; i++)
+ msg[i] = rd32a(hw, NGBE_PXMBMEM(vf), i);
+
+ /* Acknowledge the message and release buffer */
+ wr32(hw, NGBE_PXMAILBOX(vf), NGBE_PXMAILBOX_ACK);
+
+ /* update stats */
+ hw->mbx.stats.msgs_rx++;
+
+out_no_read:
+ return err;
+}
+
+/**
+ * ngbe_init_mbx_params_pf - set initial values for pf mailbox
+ * @hw: pointer to the HW structure
+ *
+ * Initializes the hw->mbx struct to correct values for pf mailbox
+ */
+void ngbe_init_mbx_params_pf(struct ngbe_hw *hw)
+{
+ struct ngbe_mbx_info *mbx = &hw->mbx;
+
+ mbx->timeout = 0;
+ mbx->udelay = 0;
+
+ mbx->size = NGBE_VXMAILBOX_SIZE;
+
+ mbx->ops.read = ngbe_read_mbx_pf;
+ mbx->ops.write = ngbe_write_mbx_pf;
+ mbx->ops.read_posted = ngbe_read_posted_mbx;
+ mbx->ops.write_posted = ngbe_write_posted_mbx;
+ mbx->ops.check_for_msg = ngbe_check_for_msg_pf;
+ mbx->ops.check_for_ack = ngbe_check_for_ack_pf;
+ mbx->ops.check_for_rst = ngbe_check_for_rst_pf;
+
+ mbx->stats.msgs_tx = 0;
+ mbx->stats.msgs_rx = 0;
+ mbx->stats.reqs = 0;
+ mbx->stats.acks = 0;
+ mbx->stats.rsts = 0;
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_mbx.h b/drivers/net/ethernet/netswift/ngbe/ngbe_mbx.h
new file mode 100644
index 0000000000000..5e89fa180f968
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_mbx.h
@@ -0,0 +1,167 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#ifndef _NGBE_MBX_H_
+#define _NGBE_MBX_H_
+
+#define NGBE_VXMAILBOX_SIZE (16)
+
+/**
+ * VF Registers
+ **/
+#define NGBE_VXMAILBOX 0x00600
+#define NGBE_VXMAILBOX_REQ ((0x1) << 0) /* Request for PF Ready bit */
+#define NGBE_VXMAILBOX_ACK ((0x1) << 1) /* Ack PF message received */
+#define NGBE_VXMAILBOX_VFU ((0x1) << 2) /* VF owns the mailbox buffer */
+#define NGBE_VXMAILBOX_PFU ((0x1) << 3) /* PF owns the mailbox buffer */
+#define NGBE_VXMAILBOX_PFSTS ((0x1) << 4) /* PF wrote a message in the MB */
+#define NGBE_VXMAILBOX_PFACK ((0x1) << 5) /* PF ack the previous VF msg */
+#define NGBE_VXMAILBOX_RSTI ((0x1) << 6) /* PF has reset indication */
+#define NGBE_VXMAILBOX_RSTD ((0x1) << 7) /* PF has indicated reset done */
+#define NGBE_VXMAILBOX_R2C_BITS (NGBE_VXMAILBOX_RSTD | \
+ NGBE_VXMAILBOX_PFSTS | NGBE_VXMAILBOX_PFACK)
+
+#define NGBE_VXMBMEM 0x00C00 /* 16*4B */
+
+/**
+ * PF Registers
+ **/
+#define NGBE_PXMAILBOX(i) (0x00600 + (4 * (i))) /* i=[0,7] */
+#define NGBE_PXMAILBOX_STS ((0x1) << 0) /* Initiate message send to VF */
+#define NGBE_PXMAILBOX_ACK ((0x1) << 1) /* Ack message recv'd from VF */
+#define NGBE_PXMAILBOX_VFU ((0x1) << 2) /* VF owns the mailbox buffer */
+#define NGBE_PXMAILBOX_PFU ((0x1) << 3) /* PF owns the mailbox buffer */
+#define NGBE_PXMAILBOX_RVFU ((0x1) << 4) /* Reset VFU - used when VF stuck*/
+
+#define NGBE_PXMBMEM(i) (0x5000 + (64 * (i))) /* i=[0,7] */
+
+#define NGBE_VFLRP(i) (0x00490 + (4 * (i))) /* i=[0,1] */
+#define NGBE_VFLRE 0x004A0
+#define NGBE_VFLREC 0x004A8
+
+/* SR-IOV specific macros */
+#define NGBE_MBVFICR 0x00480
+#define NGBE_MBVFICR_INDEX(vf) ((vf) >> 4)
+#define NGBE_MBVFICR_VFREQ_MASK (0x0000FFFF) /* bits for VF messages */
+#define NGBE_MBVFICR_VFREQ_VF1 (0x00000001) /* bit for VF 1 message */
+#define NGBE_MBVFICR_VFACK_MASK (0xFFFF0000) /* bits for VF acks */
+#define NGBE_MBVFICR_VFACK_VF1 (0x00010000) /* bit for VF 1 ack */
+
+/**
+ * Messages
+ **/
+/* If it's a NGBE_VF_* msg then it originates in the VF and is sent to the
+ * PF. The reverse is true if it is NGBE_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+#define NGBE_VT_MSGTYPE_ACK 0x80000000 /* Messages below or'd with
+ * this are the ACK */
+#define NGBE_VT_MSGTYPE_NACK 0x40000000 /* Messages below or'd with
+ * this are the NACK */
+#define NGBE_VT_MSGTYPE_CTS 0x20000000 /* Indicates that VF is still
+ * clear to send requests */
+#define NGBE_VT_MSGINFO_SHIFT 16
+/* bits 23:16 are used for extra info for certain messages */
+#define NGBE_VT_MSGINFO_MASK (0xFF << NGBE_VT_MSGINFO_SHIFT)
+
+/* definitions to support mailbox API version negotiation */
+
+/*
+ * each element denotes a version of the API; existing numbers may not
+ * change; any additions must go at the end
+ */
+enum ngbe_pfvf_api_rev {
+ ngbe_mbox_api_null,
+ ngbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */
+ ngbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */
+ ngbe_mbox_api_12, /* API version 1.2, linux/freebsd VF driver */
+ ngbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */
+ ngbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */
+ ngbe_mbox_api_unknown, /* indicates that API version is not known */
+};
+
+/* mailbox API, legacy requests */
+#define NGBE_VF_RESET 0x01 /* VF requests reset */
+#define NGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */
+#define NGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */
+#define NGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */
+
+/* mailbox API, version 1.0 VF requests */
+#define NGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */
+#define NGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */
+#define NGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */
+
+/* mailbox API, version 1.1 VF requests */
+#define NGBE_VF_GET_QUEUES 0x09 /* get queue configuration */
+
+/* mailbox API, version 1.2 VF requests */
+#define NGBE_VF_GET_RETA 0x0a /* VF request for RETA */
+#define NGBE_VF_GET_RSS_KEY 0x0b /* get RSS key */
+#define NGBE_VF_UPDATE_XCAST_MODE 0x0c
+#define NGBE_VF_BACKUP 0x8001 /* VF requests backup */
+
+#define NGBE_VF_GET_LINK_STATUS 0x20 /* VF get link status from PF */
+
+/* mode choices for IXGBE_VF_UPDATE_XCAST_MODE */
+enum ngbevf_xcast_modes {
+ NGBEVF_XCAST_MODE_NONE = 0,
+ NGBEVF_XCAST_MODE_MULTI,
+ NGBEVF_XCAST_MODE_ALLMULTI,
+ NGBEVF_XCAST_MODE_PROMISC,
+};
+
+/* GET_QUEUES return data indices within the mailbox */
+#define NGBE_VF_TX_QUEUES 1 /* number of Tx queues supported */
+#define NGBE_VF_RX_QUEUES 2 /* number of Rx queues supported */
+#define NGBE_VF_TRANS_VLAN 3 /* Indication of port vlan */
+#define NGBE_VF_DEF_QUEUE 4 /* Default queue offset */
+
+/* length of permanent address message returned from PF */
+#define NGBE_VF_PERMADDR_MSG_LEN 4
+/* word in permanent address message with the current multicast type */
+#define NGBE_VF_MC_TYPE_WORD 3
+
+#define NGBE_PF_CONTROL_MSG 0x0100 /* PF control message */
+
+/* mailbox API, version 2.0 VF requests */
+#define NGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */
+#define NGBE_VF_GET_QUEUES 0x09 /* get queue configuration */
+#define NGBE_VF_ENABLE_MACADDR 0x0A /* enable MAC address */
+#define NGBE_VF_DISABLE_MACADDR 0x0B /* disable MAC address */
+#define NGBE_VF_GET_MACADDRS 0x0C /* get all configured MAC addrs */
+#define NGBE_VF_SET_MCAST_PROMISC 0x0D /* enable multicast promiscuous */
+#define NGBE_VF_GET_MTU 0x0E /* get bounds on MTU */
+#define NGBE_VF_SET_MTU 0x0F /* set a specific MTU */
+
+/* mailbox API, version 2.0 PF requests */
+#define NGBE_PF_TRANSPARENT_VLAN 0x0101 /* enable transparent vlan */
+
+#define NGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */
+#define NGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */
+
+int ngbe_read_mbx(struct ngbe_hw *, u32 *, u16, u16);
+int ngbe_write_mbx(struct ngbe_hw *, u32 *, u16, u16);
+int ngbe_read_posted_mbx(struct ngbe_hw *, u32 *, u16, u16);
+int ngbe_write_posted_mbx(struct ngbe_hw *, u32 *, u16, u16);
+int ngbe_check_for_msg(struct ngbe_hw *, u16);
+int ngbe_check_for_ack(struct ngbe_hw *, u16);
+int ngbe_check_for_rst(struct ngbe_hw *, u16);
+void ngbe_init_mbx_ops(struct ngbe_hw *hw);
+void ngbe_init_mbx_params_vf(struct ngbe_hw *);
+void ngbe_init_mbx_params_pf(struct ngbe_hw *);
+
+#endif /* _NGBE_MBX_H_ */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_param.c b/drivers/net/ethernet/netswift/ngbe/ngbe_param.c
new file mode 100644
index 0000000000000..92f0dd0f32734
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_param.c
@@ -0,0 +1,839 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+
+#include <linux/types.h>
+#include <linux/module.h>
+
+#include "ngbe.h"
+
+/* This is the only thing that needs to be changed to adjust the
+ * maximum number of ports that the driver can manage.
+ */
+#define NGBE_MAX_NIC 32
+#define OPTION_UNSET -1
+#define OPTION_DISABLED 0
+#define OPTION_ENABLED 1
+
+#define STRINGIFY(foo) #foo /* magic for getting defines into strings */
+#define XSTRINGIFY(bar) STRINGIFY(bar)
+
+/* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+
+#define NGBE_PARAM_INIT { [0 ... NGBE_MAX_NIC] = OPTION_UNSET }
+
+#define NGBE_PARAM(X, desc) \
+ static int X[NGBE_MAX_NIC + 1] = NGBE_PARAM_INIT; \
+ static unsigned int num_##X; \
+ module_param_array(X, int, &num_##X, 0); \
+ MODULE_PARM_DESC(X, desc);
+
+/* IntMode (Interrupt Mode)
+ *
+ * Valid Range: 0-2
+ * - 0 - Legacy Interrupt
+ * - 1 - MSI Interrupt
+ * - 2 - MSI-X Interrupt(s)
+ *
+ * Default Value: 2
+ */
+NGBE_PARAM(InterruptType, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), "
+ "default IntMode (deprecated)");
+NGBE_PARAM(IntMode, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), "
+ "default 2");
+#define NGBE_INT_LEGACY 0
+#define NGBE_INT_MSI 1
+#define NGBE_INT_MSIX 2
+#define NGBE_DEFAULT_INT NGBE_INT_MSIX
+
+/* MQ - Multiple Queue enable/disable
+ *
+ * Valid Range: 0, 1
+ * - 0 - disables MQ
+ * - 1 - enables MQ
+ *
+ * Default Value: 1
+ */
+
+NGBE_PARAM(MQ, "Disable or enable Multiple Queues, default 1");
+
+/* RSS - Receive-Side Scaling (RSS) Descriptor Queues
+ *
+ * Valid Range: 0-64
+ * - 0 - enables RSS and sets the Desc. Q's to min(64, num_online_cpus()).
+ * - 1-64 - enables RSS and sets the Desc. Q's to the specified value.
+ *
+ * Default Value: 0
+ */
+
+NGBE_PARAM(RSS, "Number of Receive-Side Scaling Descriptor Queues, "
+ "default 0=number of cpus");
+
+/* VMDQ - Virtual Machine Device Queues (VMDQ)
+ *
+ * Valid Range: 1-16
+ * - 1 Disables VMDQ by allocating only a single queue.
+ * - 2-16 - enables VMDQ and sets the Desc. Q's to the specified value.
+ *
+ * Default Value: 1
+ */
+
+#define NGBE_DEFAULT_NUM_VMDQ 8
+
+NGBE_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0/1 = disable, "
+ "2-16 enable (default=" XSTRINGIFY(NGBE_DEFAULT_NUM_VMDQ) ")");
+
+#ifdef CONFIG_PCI_IOV
+/* max_vfs - SR I/O Virtualization
+ *
+ * Valid Range: 0-63
+ * - 0 Disables SR-IOV
+ * - 1-63 - enables SR-IOV and sets the number of VFs enabled
+ *
+ * Default Value: 0
+ */
+
+#define MAX_SRIOV_VFS 8
+
+NGBE_PARAM(max_vfs, "Number of Virtual Functions: 0 = disable (default), "
+ "1-" XSTRINGIFY(MAX_SRIOV_VFS) " = enable "
+ "this many VFs");
+
+/* VEPA - Set internal bridge to VEPA mode
+ *
+ * Valid Range: 0-1
+ * - 0 Set bridge to VEB mode
+ * - 1 Set bridge to VEPA mode
+ *
+ * Default Value: 0
+ */
+/*
+ *Note:
+ *=====
+ * This provides ability to ensure VEPA mode on the internal bridge even if
+ * the kernel does not support the netdev bridge setting operations.
+*/
+NGBE_PARAM(VEPA, "VEPA Bridge Mode: 0 = VEB (default), 1 = VEPA");
+#endif
+
+/* Interrupt Throttle Rate (interrupts/sec)
+ *
+ * Valid Range: 980-500000 (0=off, 1=dynamic)
+ *
+ * Default Value: 1
+ */
+#define DEFAULT_ITR 1
+NGBE_PARAM(InterruptThrottleRate, "Maximum interrupts per second, per vector, "
+ "(0,1,980-500000), default 1");
+#define MAX_ITR NGBE_MAX_INT_RATE
+#define MIN_ITR NGBE_MIN_INT_RATE
+
+#ifndef CONFIG_NGBE_NO_LLI
+
+/* LLIPort (Low Latency Interrupt TCP Port)
+ *
+ * Valid Range: 0 - 65535
+ *
+ * Default Value: 0 (disabled)
+ */
+NGBE_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535)");
+
+#define DEFAULT_LLIPORT 0
+#define MAX_LLIPORT 0xFFFF
+#define MIN_LLIPORT 0
+
+
+/* LLISize (Low Latency Interrupt on Packet Size)
+ *
+ * Valid Range: 0 - 1500
+ *
+ * Default Value: 0 (disabled)
+ */
+NGBE_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500)");
+
+#define DEFAULT_LLISIZE 0
+#define MAX_LLISIZE 1500
+#define MIN_LLISIZE 0
+
+/* LLIEType (Low Latency Interrupt Ethernet Type)
+ *
+ * Valid Range: 0 - 0x8fff
+ *
+ * Default Value: 0 (disabled)
+ */
+NGBE_PARAM(LLIEType, "Low Latency Interrupt Ethernet Protocol Type");
+
+#define DEFAULT_LLIETYPE 0
+#define MAX_LLIETYPE 0x8fff
+#define MIN_LLIETYPE 0
+
+/* LLIVLANP (Low Latency Interrupt on VLAN priority threshold)
+ *
+ * Valid Range: 0 - 7
+ *
+ * Default Value: 0 (disabled)
+ */
+NGBE_PARAM(LLIVLANP, "Low Latency Interrupt on VLAN priority threshold");
+
+#define DEFAULT_LLIVLANP 0
+#define MAX_LLIVLANP 7
+#define MIN_LLIVLANP 0
+
+#endif /* CONFIG_NGBE_NO_LLI */
+
+/* Software ATR packet sample rate
+ *
+ * Valid Range: 0-255 0 = off, 1-255 = rate of Tx packet inspection
+ *
+ * Default Value: 20
+ */
+NGBE_PARAM(AtrSampleRate, "Software ATR Tx packet sample rate");
+
+#define NGBE_MAX_ATR_SAMPLE_RATE 255
+#define NGBE_MIN_ATR_SAMPLE_RATE 1
+#define NGBE_ATR_SAMPLE_RATE_OFF 0
+#define NGBE_DEFAULT_ATR_SAMPLE_RATE 20
+
+/* Enable/disable Large Receive Offload
+ *
+ * Valid Values: 0(off), 1(on)
+ *
+ * Default Value: 1
+ */
+NGBE_PARAM(LRO, "Large Receive Offload (0,1), default 1 = on");
+
+/* Enable/disable support for DMA coalescing
+ *
+ * Valid Values: 0(off), 41 - 10000(on)
+ *
+ * Default Value: 0
+ */
+NGBE_PARAM(dmac_watchdog,
+ "DMA coalescing watchdog in microseconds (0,41-10000),"
+ "default 0 = off");
+
+/* Rx buffer mode
+ *
+ * Valid Range: 0-1 0 = no header split, 1 = hdr split
+ *
+ * Default Value: 0
+ */
+NGBE_PARAM(RxBufferMode, "0=(default)no header split\n"
+ "\t\t\t1=hdr split for recognized packet\n");
+
+#define NGBE_RXBUFMODE_NO_HEADER_SPLIT 0
+#define NGBE_RXBUFMODE_HEADER_SPLIT 1
+#define NGBE_DEFAULT_RXBUFMODE NGBE_RXBUFMODE_NO_HEADER_SPLIT
+
+struct ngbe_option {
+ enum { enable_option, range_option, list_option } type;
+ const char *name;
+ const char *err;
+ const char *msg;
+ int def;
+ union {
+ struct { /* range_option info */
+ int min;
+ int max;
+ } r;
+ struct { /* list_option info */
+ int nr;
+ const struct ngbe_opt_list {
+ int i;
+ char *str;
+ } *p;
+ } l;
+ } arg;
+};
+
+static int ngbe_validate_option(u32 *value,
+ struct ngbe_option *opt)
+{
+ int val = (int)*value;
+
+ if (val == OPTION_UNSET) {
+ ngbe_info("ngbe: Invalid %s specified (%d), %s\n",
+ opt->name, val, opt->err);
+ *value = (u32)opt->def;
+ return 0;
+ }
+
+ switch (opt->type) {
+ case enable_option:
+ switch (val) {
+ case OPTION_ENABLED:
+ ngbe_info("ngbe: %s Enabled\n", opt->name);
+ return 0;
+ case OPTION_DISABLED:
+ ngbe_info("ngbe: %s Disabled\n", opt->name);
+ return 0;
+ }
+ break;
+ case range_option:
+ if ((val >= opt->arg.r.min && val <= opt->arg.r.max) ||
+ val == opt->def) {
+ if (opt->msg)
+ ngbe_info("ngbe: %s set to %d, %s\n",
+ opt->name, val, opt->msg);
+ else
+ ngbe_info("ngbe: %s set to %d\n",
+ opt->name, val);
+ return 0;
+ }
+ break;
+ case list_option: {
+ int i;
+ const struct ngbe_opt_list *ent;
+
+ for (i = 0; i < opt->arg.l.nr; i++) {
+ ent = &opt->arg.l.p[i];
+ if (val == ent->i) {
+ if (ent->str[0] != '\0')
+ ngbe_info("%s\n", ent->str);
+ return 0;
+ }
+ }
+ }
+ break;
+ default:
+ BUG_ON(1);
+ }
+
+ ngbe_info("ngbe: Invalid %s specified (%d), %s\n",
+ opt->name, val, opt->err);
+ *value = (u32)opt->def;
+ return -1;
+}
+
+/**
+ * ngbe_check_options - Range Checking for Command Line Parameters
+ * @adapter: board private structure
+ *
+ * This routine checks all command line parameters for valid user
+ * input. If an invalid value is given, or if no user specified
+ * value exists, a default value is used. The final value is stored
+ * in a variable in the adapter structure.
+ **/
+void ngbe_check_options(struct ngbe_adapter *adapter)
+{
+ u32 bd = adapter->bd_number;
+ u32 *aflags = &adapter->flags;
+ struct ngbe_ring_feature *feature = adapter->ring_feature;
+ u32 vmdq;
+
+ if (bd >= NGBE_MAX_NIC) {
+ ngbe_notice("Warning: no configuration for board #%d\n", bd);
+ ngbe_notice("Using defaults for all values\n");
+ }
+
+ { /* Interrupt Mode */
+ u32 int_mode;
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Interrupt Mode",
+ .err =
+ "using default of "__MODULE_STRING(NGBE_DEFAULT_INT),
+ .def = NGBE_DEFAULT_INT,
+ .arg = { .r = { .min = NGBE_INT_LEGACY,
+ .max = NGBE_INT_MSIX} }
+ };
+
+ if (num_IntMode > bd || num_InterruptType > bd) {
+ int_mode = IntMode[bd];
+ if (int_mode == OPTION_UNSET)
+ int_mode = InterruptType[bd];
+ ngbe_validate_option(&int_mode, &opt);
+ switch (int_mode) {
+ case NGBE_INT_MSIX:
+ if (!(*aflags & NGBE_FLAG_MSIX_CAPABLE))
+ ngbe_info(
+ "Ignoring MSI-X setting; "
+ "support unavailable\n");
+ break;
+ case NGBE_INT_MSI:
+ if (!(*aflags & NGBE_FLAG_MSI_CAPABLE)) {
+ ngbe_info(
+ "Ignoring MSI setting; "
+ "support unavailable\n");
+ } else {
+ *aflags &= ~NGBE_FLAG_MSIX_CAPABLE;
+ }
+ break;
+ case NGBE_INT_LEGACY:
+ default:
+ *aflags &= ~NGBE_FLAG_MSIX_CAPABLE;
+ *aflags &= ~NGBE_FLAG_MSI_CAPABLE;
+ break;
+ }
+ } else {
+ /* default settings */
+ if (opt.def == NGBE_INT_MSIX &&
+ *aflags & NGBE_FLAG_MSIX_CAPABLE) {
+ *aflags |= NGBE_FLAG_MSIX_CAPABLE;
+ *aflags |= NGBE_FLAG_MSI_CAPABLE;
+ } else if (opt.def == NGBE_INT_MSI &&
+ *aflags & NGBE_FLAG_MSI_CAPABLE) {
+ *aflags &= ~NGBE_FLAG_MSIX_CAPABLE;
+ *aflags |= NGBE_FLAG_MSI_CAPABLE;
+ } else {
+ *aflags &= ~NGBE_FLAG_MSIX_CAPABLE;
+ *aflags &= ~NGBE_FLAG_MSI_CAPABLE;
+ }
+ }
+ }
+ { /* Multiple Queue Support */
+ static struct ngbe_option opt = {
+ .type = enable_option,
+ .name = "Multiple Queue Support",
+ .err = "defaulting to Enabled",
+ .def = OPTION_ENABLED
+ };
+
+ if (num_MQ > bd) {
+ u32 mq = MQ[bd];
+ ngbe_validate_option(&mq, &opt);
+ if (mq)
+ *aflags |= NGBE_FLAG_MQ_CAPABLE;
+ else
+ *aflags &= ~NGBE_FLAG_MQ_CAPABLE;
+ } else {
+ if (opt.def == OPTION_ENABLED)
+ *aflags |= NGBE_FLAG_MQ_CAPABLE;
+ else
+ *aflags &= ~NGBE_FLAG_MQ_CAPABLE;
+ }
+ /* Check Interoperability */
+ if ((*aflags & NGBE_FLAG_MQ_CAPABLE) &&
+ !(*aflags & NGBE_FLAG_MSIX_CAPABLE)) {
+ DPRINTK(PROBE, INFO,
+ "Multiple queues are not supported while MSI-X "
+ "is disabled. Disabling Multiple Queues.\n");
+ *aflags &= ~NGBE_FLAG_MQ_CAPABLE;
+ }
+ }
+
+ { /* Receive-Side Scaling (RSS) */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Receive-Side Scaling (RSS)",
+ .err = "using default.",
+ .def = 0,
+ .arg = { .r = { .min = 0,
+ .max = 1} }
+ };
+ u32 rss = RSS[bd];
+ /* adjust Max allowed RSS queues based on MAC type */
+ opt.arg.r.max = ngbe_max_rss_indices(adapter);
+
+ if (num_RSS > bd) {
+ ngbe_validate_option(&rss, &opt);
+ /* base it off num_online_cpus() with hardware limit */
+ if (!rss)
+ rss = min_t(int, opt.arg.r.max,
+ num_online_cpus());
+
+ feature[RING_F_RSS].limit = (u16)rss;
+ } else if (opt.def == 0) {
+ rss = min_t(int, ngbe_max_rss_indices(adapter),
+ num_online_cpus());
+ feature[RING_F_RSS].limit = rss;
+ }
+ /* Check Interoperability */
+ if (rss > 1) {
+ if (!(*aflags & NGBE_FLAG_MQ_CAPABLE)) {
+ DPRINTK(PROBE, INFO,
+ "Multiqueue is disabled. "
+ "Limiting RSS.\n");
+ feature[RING_F_RSS].limit = 1;
+ }
+ }
+ adapter->flags2 |= NGBE_FLAG2_RSS_ENABLED;
+ }
+ { /* Virtual Machine Device Queues (VMDQ) */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Virtual Machine Device Queues (VMDQ)",
+ .err = "defaulting to Disabled",
+ .def = OPTION_DISABLED,
+ .arg = { .r = { .min = OPTION_DISABLED,
+ .max = NGBE_MAX_VMDQ_INDICES
+ } }
+ };
+
+ if (num_VMDQ > bd) {
+ vmdq = VMDQ[bd];
+
+ ngbe_validate_option(&vmdq, &opt);
+
+ /* zero or one both mean disabled from our driver's
+ * perspective */
+ if (vmdq > 1) {
+ *aflags |= NGBE_FLAG_VMDQ_ENABLED;
+ } else
+ *aflags &= ~NGBE_FLAG_VMDQ_ENABLED;
+
+ feature[RING_F_VMDQ].limit = (u16)vmdq;
+ } else {
+ if (opt.def == OPTION_DISABLED)
+ *aflags &= ~NGBE_FLAG_VMDQ_ENABLED;
+ else
+ *aflags |= NGBE_FLAG_VMDQ_ENABLED;
+
+ feature[RING_F_VMDQ].limit = opt.def;
+ }
+
+ /* Check Interoperability */
+ if (*aflags & NGBE_FLAG_VMDQ_ENABLED) {
+ if (!(*aflags & NGBE_FLAG_MQ_CAPABLE)) {
+ DPRINTK(PROBE, INFO,
+ "VMDQ is not supported while multiple "
+ "queues are disabled. "
+ "Disabling VMDQ.\n");
+ *aflags &= ~NGBE_FLAG_VMDQ_ENABLED;
+ feature[RING_F_VMDQ].limit = 0;
+ }
+ }
+ }
+#ifdef CONFIG_PCI_IOV
+ { /* Single Root I/O Virtualization (SR-IOV) */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "I/O Virtualization (IOV)",
+ .err = "defaulting to Disabled",
+ .def = OPTION_DISABLED,
+ .arg = { .r = { .min = OPTION_DISABLED,
+ .max = MAX_SRIOV_VFS} }
+ };
+
+ if (num_max_vfs > bd) {
+ u32 vfs = max_vfs[bd];
+ if (ngbe_validate_option(&vfs, &opt)) {
+ vfs = 0;
+ DPRINTK(PROBE, INFO,
+ "max_vfs out of range "
+ "Disabling SR-IOV.\n");
+ }
+
+ adapter->num_vfs = vfs;
+
+ if (vfs)
+ *aflags |= NGBE_FLAG_SRIOV_ENABLED;
+ else
+ *aflags &= ~NGBE_FLAG_SRIOV_ENABLED;
+ } else {
+ if (opt.def == OPTION_DISABLED) {
+ adapter->num_vfs = 0;
+ *aflags &= ~NGBE_FLAG_SRIOV_ENABLED;
+ } else {
+ adapter->num_vfs = opt.def;
+ *aflags |= NGBE_FLAG_SRIOV_ENABLED;
+ }
+ }
+
+ /* Check Interoperability */
+ if (*aflags & NGBE_FLAG_SRIOV_ENABLED) {
+ if (!(*aflags & NGBE_FLAG_SRIOV_CAPABLE)) {
+ DPRINTK(PROBE, INFO,
+ "IOV is not supported on this "
+ "hardware. Disabling IOV.\n");
+ *aflags &= ~NGBE_FLAG_SRIOV_ENABLED;
+ adapter->num_vfs = 0;
+ } else if (!(*aflags & NGBE_FLAG_MQ_CAPABLE)) {
+ DPRINTK(PROBE, INFO,
+ "IOV is not supported while multiple "
+ "queues are disabled. "
+ "Disabling IOV.\n");
+ *aflags &= ~NGBE_FLAG_SRIOV_ENABLED;
+ adapter->num_vfs = 0;
+ }
+ }
+ }
+ { /* VEPA Bridge Mode enable for SR-IOV mode */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "VEPA Bridge Mode Enable",
+ .err = "defaulting to disabled",
+ .def = OPTION_DISABLED,
+ .arg = { .r = { .min = OPTION_DISABLED,
+ .max = OPTION_ENABLED} }
+ };
+
+ if (num_VEPA > bd) {
+ u32 vepa = VEPA[bd];
+ ngbe_validate_option(&vepa, &opt);
+ if (vepa)
+ adapter->flags |=
+ NGBE_FLAG_SRIOV_VEPA_BRIDGE_MODE;
+ } else {
+ if (opt.def == OPTION_ENABLED)
+ adapter->flags |=
+ NGBE_FLAG_SRIOV_VEPA_BRIDGE_MODE;
+ }
+ }
+#endif /* CONFIG_PCI_IOV */
+ { /* Interrupt Throttling Rate */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Interrupt Throttling Rate (ints/sec)",
+ .err = "using default of "__MODULE_STRING(DEFAULT_ITR),
+ .def = DEFAULT_ITR,
+ .arg = { .r = { .min = MIN_ITR,
+ .max = MAX_ITR } }
+ };
+
+ if (num_InterruptThrottleRate > bd) {
+ u32 itr = InterruptThrottleRate[bd];
+ switch (itr) {
+ case 0:
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ adapter->rx_itr_setting = 0;
+ break;
+ case 1:
+ DPRINTK(PROBE, INFO, "dynamic interrupt "
+ "throttling enabled\n");
+ adapter->rx_itr_setting = 1;
+ break;
+ default:
+ ngbe_validate_option(&itr, &opt);
+ /* the first bit is used as control */
+ adapter->rx_itr_setting = (u16)((1000000/itr) << 2);
+ break;
+ }
+ adapter->tx_itr_setting = adapter->rx_itr_setting;
+ } else {
+ adapter->rx_itr_setting = opt.def;
+ adapter->tx_itr_setting = opt.def;
+ }
+ }
+#ifndef CONFIG_NGBE_NO_LLI
+ { /* Low Latency Interrupt TCP Port*/
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Low Latency Interrupt TCP Port",
+ .err = "using default of "
+ __MODULE_STRING(DEFAULT_LLIPORT),
+ .def = DEFAULT_LLIPORT,
+ .arg = { .r = { .min = MIN_LLIPORT,
+ .max = MAX_LLIPORT } }
+ };
+
+ if (num_LLIPort > bd) {
+ adapter->lli_port = LLIPort[bd];
+ if (adapter->lli_port) {
+ ngbe_validate_option(&adapter->lli_port, &opt);
+ } else {
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ }
+ } else {
+ adapter->lli_port = opt.def;
+ }
+ }
+ { /* Low Latency Interrupt on Packet Size */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Low Latency Interrupt on Packet Size",
+ .err = "using default of "
+ __MODULE_STRING(DEFAULT_LLISIZE),
+ .def = DEFAULT_LLISIZE,
+ .arg = { .r = { .min = MIN_LLISIZE,
+ .max = MAX_LLISIZE } }
+ };
+
+ if (num_LLISize > bd) {
+ adapter->lli_size = LLISize[bd];
+ if (adapter->lli_size) {
+ ngbe_validate_option(&adapter->lli_size, &opt);
+ } else {
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ }
+ } else {
+ adapter->lli_size = opt.def;
+ }
+ }
+ { /* Low Latency Interrupt EtherType*/
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Low Latency Interrupt on Ethernet Protocol "
+ "Type",
+ .err = "using default of "
+ __MODULE_STRING(DEFAULT_LLIETYPE),
+ .def = DEFAULT_LLIETYPE,
+ .arg = { .r = { .min = MIN_LLIETYPE,
+ .max = MAX_LLIETYPE } }
+ };
+
+ if (num_LLIEType > bd) {
+ adapter->lli_etype = LLIEType[bd];
+ if (adapter->lli_etype) {
+ ngbe_validate_option(&adapter->lli_etype,
+ &opt);
+ } else {
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ }
+ } else {
+ adapter->lli_etype = opt.def;
+ }
+ }
+ { /* LLI VLAN Priority */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Low Latency Interrupt on VLAN priority "
+ "threshold",
+ .err = "using default of "
+ __MODULE_STRING(DEFAULT_LLIVLANP),
+ .def = DEFAULT_LLIVLANP,
+ .arg = { .r = { .min = MIN_LLIVLANP,
+ .max = MAX_LLIVLANP } }
+ };
+
+ if (num_LLIVLANP > bd) {
+ adapter->lli_vlan_pri = LLIVLANP[bd];
+ if (adapter->lli_vlan_pri) {
+ ngbe_validate_option(&adapter->lli_vlan_pri,
+ &opt);
+ } else {
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ }
+ } else {
+ adapter->lli_vlan_pri = opt.def;
+ }
+ }
+#endif /* CONFIG_NGBE_NO_LLI */
+
+ { /* Flow Director ATR Tx sample packet rate */
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Software ATR Tx packet sample rate",
+ .err = "using default of "
+ __MODULE_STRING(NGBE_DEFAULT_ATR_SAMPLE_RATE),
+ .def = NGBE_DEFAULT_ATR_SAMPLE_RATE,
+ .arg = {.r = {.min = NGBE_ATR_SAMPLE_RATE_OFF,
+ .max = NGBE_MAX_ATR_SAMPLE_RATE} }
+ };
+ static const char atr_string[] =
+ "ATR Tx Packet sample rate set to";
+
+ if (num_AtrSampleRate > bd) {
+ adapter->atr_sample_rate = AtrSampleRate[bd];
+
+ if (adapter->atr_sample_rate) {
+ ngbe_validate_option(&adapter->atr_sample_rate,
+ &opt);
+ DPRINTK(PROBE, INFO, "%s %d\n", atr_string,
+ adapter->atr_sample_rate);
+ }
+ } else {
+ adapter->atr_sample_rate = opt.def;
+ }
+ }
+
+ { /* LRO - Set Large Receive Offload */
+ struct ngbe_option opt = {
+ .type = enable_option,
+ .name = "LRO - Large Receive Offload",
+ .err = "defaulting to Disabled",
+ .def = OPTION_DISABLED
+ };
+ struct net_device *netdev = adapter->netdev;
+ opt.def = OPTION_DISABLED;
+
+ if (num_LRO > bd) {
+ u32 lro = LRO[bd];
+ ngbe_validate_option(&lro, &opt);
+ if (lro)
+ netdev->features |= NETIF_F_LRO;
+ else
+ netdev->features &= ~NETIF_F_LRO;
+ } else if (opt.def == OPTION_ENABLED) {
+ netdev->features |= NETIF_F_LRO;
+ } else {
+ netdev->features &= ~NETIF_F_LRO;
+ }
+
+ if ((netdev->features & NETIF_F_LRO)) {
+ DPRINTK(PROBE, INFO,
+ "RSC is not supported on this "
+ "hardware. Disabling RSC.\n");
+ netdev->features &= ~NETIF_F_LRO;
+ }
+ }
+ { /* DMA Coalescing */
+ struct ngbe_option opt = {
+ .type = range_option,
+ .name = "dmac_watchdog",
+ .err = "defaulting to 0 (disabled)",
+ .def = 0,
+ .arg = { .r = { .min = 41, .max = 10000 } },
+ };
+ const char *cmsg = "DMA coalescing not supported on this "
+ "hardware";
+
+ opt.err = cmsg;
+ opt.msg = cmsg;
+ opt.arg.r.min = 0;
+ opt.arg.r.max = 0;
+
+ if (num_dmac_watchdog > bd) {
+ u32 dmac_wd = dmac_watchdog[bd];
+
+ ngbe_validate_option(&dmac_wd, &opt);
+ adapter->hw.mac.dmac_config.watchdog_timer = (u16)dmac_wd;
+ } else {
+ adapter->hw.mac.dmac_config.watchdog_timer = opt.def;
+ }
+ }
+
+ { /* Rx buffer mode */
+ u32 rx_buf_mode;
+ static struct ngbe_option opt = {
+ .type = range_option,
+ .name = "Rx buffer mode",
+ .err = "using default of "
+ __MODULE_STRING(NGBE_DEFAULT_RXBUFMODE),
+ .def = NGBE_DEFAULT_RXBUFMODE,
+ .arg = {.r = {.min = NGBE_RXBUFMODE_NO_HEADER_SPLIT,
+ .max = NGBE_RXBUFMODE_HEADER_SPLIT} }
+
+ };
+
+ if (num_RxBufferMode > bd) {
+ rx_buf_mode = RxBufferMode[bd];
+ ngbe_validate_option(&rx_buf_mode, &opt);
+ switch (rx_buf_mode) {
+ case NGBE_RXBUFMODE_NO_HEADER_SPLIT:
+ *aflags &= ~NGBE_FLAG_RX_HS_ENABLED;
+ break;
+ case NGBE_RXBUFMODE_HEADER_SPLIT:
+ *aflags |= NGBE_FLAG_RX_HS_ENABLED;
+ break;
+ default:
+ break;
+ }
+ } else {
+ *aflags &= ~NGBE_FLAG_RX_HS_ENABLED;
+ }
+ }
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.c b/drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.c
new file mode 100644
index 0000000000000..8d47bfabd6ad8
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.c
@@ -0,0 +1,257 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include "ngbe_pcierr.h"
+#include "ngbe.h"
+
+#define NGBE_ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \
+ PCI_ERR_ROOT_CMD_NONFATAL_EN| \
+ PCI_ERR_ROOT_CMD_FATAL_EN)
+
+#ifndef PCI_ERS_RESULT_NO_AER_DRIVER
+/* No AER capabilities registered for the driver */
+#define PCI_ERS_RESULT_NO_AER_DRIVER ((__force pci_ers_result_t) 6)
+#endif
+
+static pci_ers_result_t merge_result(enum pci_ers_result orig,
+ enum pci_ers_result new)
+{
+ if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
+ return PCI_ERS_RESULT_NO_AER_DRIVER;
+ if (new == PCI_ERS_RESULT_NONE)
+ return orig;
+ switch (orig) {
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ case PCI_ERS_RESULT_RECOVERED:
+ orig = new;
+ break;
+ case PCI_ERS_RESULT_DISCONNECT:
+ if (new == PCI_ERS_RESULT_NEED_RESET)
+ orig = PCI_ERS_RESULT_NEED_RESET;
+ break;
+ default:
+ break;
+ }
+ return orig;
+}
+
+static int ngbe_report_error_detected(struct pci_dev *dev,
+ enum pci_channel_state state,
+ enum pci_ers_result *result)
+{
+ pci_ers_result_t vote;
+ const struct pci_error_handlers *err_handler;
+
+ device_lock(&dev->dev);
+ if (
+ !dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->error_detected) {
+ /*
+ * If any device in the subtree does not have an error_detected
+ * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
+ * error callbacks of "any" device in the subtree, and will
+ * exit in the disconnected error state.
+ */
+ if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+ vote = PCI_ERS_RESULT_NO_AER_DRIVER;
+ else
+ vote = PCI_ERS_RESULT_NONE;
+ } else {
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->error_detected(dev, state);
+ }
+
+ *result = merge_result(*result, vote);
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int ngbe_report_frozen_detected(struct pci_dev *dev, void *data)
+{
+ return ngbe_report_error_detected(dev, pci_channel_io_frozen, data);
+}
+
+static int ngbe_report_mmio_enabled(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote, *result = data;
+ const struct pci_error_handlers *err_handler;
+
+ device_lock(&dev->dev);
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->mmio_enabled)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->mmio_enabled(dev);
+ *result = merge_result(*result, vote);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int ngbe_report_slot_reset(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote, *result = data;
+ const struct pci_error_handlers *err_handler;
+
+ device_lock(&dev->dev);
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->slot_reset)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->slot_reset(dev);
+ *result = merge_result(*result, vote);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int ngbe_report_resume(struct pci_dev *dev, void *data)
+{
+ const struct pci_error_handlers *err_handler;
+
+ device_lock(&dev->dev);
+ dev->error_state = pci_channel_io_normal;
+ if (
+ !dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->resume)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ err_handler->resume(dev);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+void ngbe_pcie_do_recovery(struct pci_dev *dev)
+{
+ pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
+ struct pci_bus *bus;
+ u32 reg32;
+ int pos;
+ int delay = 1;
+ u32 id;
+ u16 ctrl;
+ /*
+ * Error recovery runs on all subordinates of the first downstream port.
+ * If the downstream port detected the error, it is cleared at the end.
+ */
+ if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+ pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
+ dev = dev->bus->self;
+ bus = dev->subordinate;
+
+ pci_walk_bus(bus, ngbe_report_frozen_detected, &status);
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+ if (pos) {
+ /* Disable Root's interrupt in response to error messages */
+ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32);
+ reg32 &= ~NGBE_ROOT_PORT_INTR_ON_MESG_MASK;
+ pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
+ }
+
+ pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+ ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+
+ /*
+ * * PCI spec v3.0 7.6.4.2 requires minimum Trst of 1ms. Double
+ * * this to 2ms to ensure that we meet the minimum requirement.
+ * */
+
+ msleep(2);
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+
+ /*
+ * * Trhfa for conventional PCI is 2^25 clock cycles.
+ * * Assuming a minimum 33MHz clock this results in a 1s
+ * * delay before we can consider subordinate devices to
+ * * be re-initialized. PCIe has some ways to shorten this,
+ * * but we don't make use of them yet.
+ * */
+ ssleep(1);
+
+ pci_read_config_dword(dev, PCI_COMMAND, &id);
+ while (id == ~0) {
+ if (delay > 60000) {
+ pci_warn(dev, "not ready %dms after %s; giving up\n",
+ delay - 1, "bus_reset");
+ return;
+ }
+
+ if (delay > 1000)
+ pci_info(dev, "not ready %dms after %s; waiting\n",
+ delay - 1, "bus_reset");
+
+ msleep(delay);
+ delay *= 2;
+ pci_read_config_dword(dev, PCI_COMMAND, &id);
+ }
+
+ if (delay > 1000)
+ pci_info(dev, "ready %dms after %s\n", delay - 1,
+ "bus_reset");
+
+ pci_info(dev, "Root Port link has been reset\n");
+
+ if (pos) {
+ /* Clear Root Error Status */
+ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32);
+ pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32);
+
+ /* Enable Root Port's interrupt in response to error messages */
+ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32);
+ reg32 |= NGBE_ROOT_PORT_INTR_ON_MESG_MASK;
+ pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
+ }
+
+ if (status == PCI_ERS_RESULT_CAN_RECOVER) {
+ status = PCI_ERS_RESULT_RECOVERED;
+ pci_dbg(dev, "broadcast mmio_enabled message\n");
+ pci_walk_bus(bus, ngbe_report_mmio_enabled, &status);
+ }
+
+ if (status == PCI_ERS_RESULT_NEED_RESET) {
+ /*
+ * TODO: Should call platform-specific
+ * functions to reset slot before calling
+ * drivers' slot_reset callbacks?
+ */
+ status = PCI_ERS_RESULT_RECOVERED;
+ pci_dbg(dev, "broadcast slot_reset message\n");
+ pci_walk_bus(bus, ngbe_report_slot_reset, &status);
+ }
+
+ if (status != PCI_ERS_RESULT_RECOVERED)
+ goto failed;
+
+ pci_dbg(dev, "broadcast resume message\n");
+ pci_walk_bus(bus, ngbe_report_resume, &status);
+
+failed:
+ return;
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.h b/drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.h
new file mode 100644
index 0000000000000..f92def4d21667
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_pcierr.h
@@ -0,0 +1,23 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#ifndef _NGBE_PCIERR_H_
+#define _NGBE_PCIERR_H_
+
+void ngbe_pcie_do_recovery(struct pci_dev *dev);
+#endif
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_phy.c b/drivers/net/ethernet/netswift/ngbe/ngbe_phy.c
new file mode 100644
index 0000000000000..2f9013c291a11
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_phy.c
@@ -0,0 +1,1243 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "ngbe_phy.h"
+
+/**
+ * ngbe_check_reset_blocked - check status of MNG FW veto bit
+ * @hw: pointer to the hardware structure
+ *
+ * This function checks the MMNGC.MNG_VETO bit to see if there are
+ * any constraints on link from manageability. For MAC's that don't
+ * have this bit just return faluse since the link can not be blocked
+ * via this method.
+ **/
+bool ngbe_check_reset_blocked(struct ngbe_hw *hw)
+{
+ u32 mmngc;
+
+ DEBUGFUNC("ngbe_check_reset_blocked");
+
+ mmngc = rd32(hw, NGBE_MIS_ST);
+ if (mmngc & NGBE_MIS_ST_MNG_VETO) {
+ ERROR_REPORT1(NGBE_ERROR_SOFTWARE,
+ "MNG_VETO bit detected.\n");
+ return true;
+ }
+
+ return false;
+}
+
+/* For internal phy only */
+s32 ngbe_phy_read_reg(struct ngbe_hw *hw,
+ u32 reg_offset,
+ u32 page,
+ u16 *phy_data)
+{
+ /* clear input */
+ *phy_data = 0;
+
+ wr32(hw, NGBE_PHY_CONFIG(NGBE_INTERNAL_PHY_PAGE_SELECT_OFFSET),
+ page);
+
+ if (reg_offset >= NGBE_INTERNAL_PHY_OFFSET_MAX) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "input reg offset %d exceed maximum 31.\n", reg_offset);
+ return NGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ *phy_data = 0xFFFF & rd32(hw, NGBE_PHY_CONFIG(reg_offset));
+
+ return NGBE_OK;
+}
+
+/* For internal phy only */
+s32 ngbe_phy_write_reg(struct ngbe_hw *hw,
+ u32 reg_offset,
+ u32 page,
+ u16 phy_data)
+{
+
+ wr32(hw, NGBE_PHY_CONFIG(NGBE_INTERNAL_PHY_PAGE_SELECT_OFFSET),
+ page);
+
+ if (reg_offset >= NGBE_INTERNAL_PHY_OFFSET_MAX) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "input reg offset %d exceed maximum 31.\n", reg_offset);
+ return NGBE_ERR_INVALID_ARGUMENT;
+ }
+ wr32(hw, NGBE_PHY_CONFIG(reg_offset), phy_data);
+
+ return NGBE_OK;
+}
+
+s32 ngbe_check_internal_phy_id(struct ngbe_hw *hw)
+{
+ u16 phy_id_high = 0;
+ u16 phy_id_low = 0;
+ u16 phy_id = 0;
+
+ DEBUGFUNC("ngbe_check_internal_phy_id");
+
+ ngbe_phy_read_reg(hw, NGBE_MDI_PHY_ID1_OFFSET, 0, &phy_id_high);
+ phy_id = phy_id_high << 6;
+ ngbe_phy_read_reg(hw, NGBE_MDI_PHY_ID2_OFFSET, 0, &phy_id_low);
+ phy_id |= (phy_id_low & NGBE_MDI_PHY_ID_MASK) >> 10;
+
+ if (NGBE_INTERNAL_PHY_ID != phy_id) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "internal phy id 0x%x not supported.\n", phy_id);
+ return NGBE_ERR_DEVICE_NOT_SUPPORTED;
+ } else
+ hw->phy.id = (u32)phy_id;
+
+ return NGBE_OK;
+}
+
+/**
+ * ngbe_read_phy_mdi - Reads a value from a specified PHY register without
+ * the SWFW lock
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit address of PHY register to read
+ * @phy_data: Pointer to read data from PHY register
+ **/
+s32 ngbe_phy_read_reg_mdi(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data)
+{
+ u32 command;
+ s32 status = 0;
+
+ /* setup and write the address cycle command */
+ command = NGBE_MSCA_RA(reg_addr) |
+ NGBE_MSCA_PA(hw->phy.addr) |
+ NGBE_MSCA_DA(device_type);
+ wr32(hw, NGBE_MSCA, command);
+
+ command = NGBE_MSCC_CMD(NGBE_MSCA_CMD_READ) |
+ NGBE_MSCC_BUSY |
+ NGBE_MDIO_CLK(6);
+ wr32(hw, NGBE_MSCC, command);
+
+ /* wait to complete */
+ status = po32m(hw, NGBE_MSCC,
+ NGBE_MSCC_BUSY, ~NGBE_MSCC_BUSY,
+ NGBE_MDIO_TIMEOUT, 10);
+ if (status != 0) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "PHY address command did not complete.\n");
+ return NGBE_ERR_PHY;
+ }
+
+ /* read data from MSCC */
+ *phy_data = 0xFFFF & rd32(hw, NGBE_MSCC);
+
+ return 0;
+}
+
+/**
+ * ngbe_write_phy_reg_mdi - Writes a value to specified PHY register
+ * without SWFW lock
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @device_type: 5 bit device type
+ * @phy_data: Data to write to the PHY register
+ **/
+s32 ngbe_phy_write_reg_mdi(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data)
+{
+ u32 command;
+ s32 status = 0;
+
+ /* setup and write the address cycle command */
+ command = NGBE_MSCA_RA(reg_addr) |
+ NGBE_MSCA_PA(hw->phy.addr) |
+ NGBE_MSCA_DA(device_type);
+ wr32(hw, NGBE_MSCA, command);
+
+ command = phy_data | NGBE_MSCC_CMD(NGBE_MSCA_CMD_WRITE) |
+ NGBE_MSCC_BUSY | NGBE_MDIO_CLK(6);
+ wr32(hw, NGBE_MSCC, command);
+
+ /* wait to complete */
+ status = po32m(hw, NGBE_MSCC,
+ NGBE_MSCC_BUSY, ~NGBE_MSCC_BUSY,
+ NGBE_MDIO_TIMEOUT, 10);
+ if (status != 0) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "PHY address command did not complete.\n");
+ return NGBE_ERR_PHY;
+ }
+
+ return 0;
+}
+
+s32 ngbe_phy_read_reg_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data)
+{
+ s32 status = 0;
+ status = ngbe_phy_write_reg_mdi(hw, 0x1e, device_type, reg_addr);
+ if (!status)
+ status = ngbe_phy_read_reg_mdi(hw, 0x1f, device_type, phy_data);
+ return status;
+}
+
+s32 ngbe_phy_write_reg_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data)
+{
+ s32 status = 0;
+ status = ngbe_phy_write_reg_mdi(hw, 0x1e, device_type, reg_addr);
+ if (!status)
+ status = ngbe_phy_write_reg_mdi(hw, 0x1f, device_type, phy_data);
+ return status;
+}
+
+s32 ngbe_phy_read_reg_sds_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data)
+{
+ s32 status = 0;
+ status = ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x02);
+ if (!status)
+ status = ngbe_phy_read_reg_ext_yt8521s(hw, reg_addr, device_type, phy_data);
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x00);
+ return status;
+}
+
+s32 ngbe_phy_write_reg_sds_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data)
+{
+ s32 status = 0;
+ status = ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x02);
+ if (!status)
+ status = ngbe_phy_write_reg_ext_yt8521s(hw, reg_addr, device_type, phy_data);
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x00);
+ return status;
+}
+
+s32 ngbe_phy_read_reg_sds_mii_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data)
+{
+ s32 status = 0;
+ status = ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x02);
+ if (!status)
+ status = ngbe_phy_read_reg_mdi(hw, reg_addr, device_type, phy_data);
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x00);
+ return status;
+}
+
+s32 ngbe_phy_write_reg_sds_mii_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data)
+{
+ s32 status = 0;
+ status = ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x02);
+ if (!status)
+ status = ngbe_phy_write_reg_mdi(hw, reg_addr, device_type, phy_data);
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, device_type, 0x00);
+ return status;
+}
+
+s32 ngbe_check_mdi_phy_id(struct ngbe_hw *hw)
+{
+ u16 phy_id_high = 0;
+ u16 phy_id_low = 0;
+ u32 phy_id = 0;
+
+ DEBUGFUNC("ngbe_check_mdi_phy_id");
+
+ if (hw->phy.type == ngbe_phy_m88e1512) {
+ /* select page 0 */
+ ngbe_phy_write_reg_mdi(hw, 22, 0, 0);
+ } else {
+ /* select page 1 */
+ ngbe_phy_write_reg_mdi(hw, 22, 0, 1);
+ }
+
+ ngbe_phy_read_reg_mdi(hw, NGBE_MDI_PHY_ID1_OFFSET, 0, &phy_id_high);
+ phy_id = phy_id_high << 6;
+ ngbe_phy_read_reg_mdi(hw, NGBE_MDI_PHY_ID2_OFFSET, 0, &phy_id_low);
+ phy_id |= (phy_id_low & NGBE_MDI_PHY_ID_MASK) >> 10;
+
+ if (NGBE_M88E1512_PHY_ID != phy_id) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "MDI phy id 0x%x not supported.\n", phy_id);
+ return NGBE_ERR_DEVICE_NOT_SUPPORTED;
+ } else
+ hw->phy.id = phy_id;
+
+ return NGBE_OK;
+}
+
+bool ngbe_validate_phy_addr(struct ngbe_hw *hw, u32 phy_addr)
+{
+ u16 phy_id = 0;
+ bool valid = false;
+
+ DEBUGFUNC("ngbe_validate_phy_addr");
+
+ hw->phy.addr = phy_addr;
+
+ ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x3, 0, &phy_id);
+ if (phy_id != 0xFFFF && phy_id != 0x0)
+ valid = true;
+
+ return valid;
+}
+
+s32 ngbe_check_yt_phy_id(struct ngbe_hw *hw)
+{
+ u16 phy_id = 0;
+ bool valid = false;
+ u32 phy_addr;
+ DEBUGFUNC("ngbe_check_yt_phy_id");
+
+ for (phy_addr = 0; phy_addr < 32; phy_addr++) {
+ valid = ngbe_validate_phy_addr(hw, phy_addr);
+ if (valid) {
+ hw->phy.addr = phy_addr;
+ printk("valid phy addr is 0x%x\n", phy_addr);
+ break;
+ }
+ }
+ if (!valid) {
+ printk("cannnot find valid phy address.\n");
+ return NGBE_ERR_DEVICE_NOT_SUPPORTED;
+ }
+ ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x3, 0, &phy_id);
+ if (NGBE_YT8521S_PHY_ID != phy_id) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "MDI phy id 0x%x not supported.\n", phy_id);
+ printk("phy id is 0x%x\n", phy_id);
+ return NGBE_ERR_DEVICE_NOT_SUPPORTED;
+ } else
+ hw->phy.id = phy_id;
+ return NGBE_OK;
+}
+
+s32 ngbe_check_zte_phy_id(struct ngbe_hw *hw)
+{
+ u16 phy_id_high = 0;
+ u16 phy_id_low = 0;
+ u16 phy_id = 0;
+
+ DEBUGFUNC("ngbe_check_zte_phy_id");
+
+ ngbe_phy_read_reg_mdi(hw, NGBE_MDI_PHY_ID1_OFFSET, 0, &phy_id_high);
+ phy_id = phy_id_high << 6;
+ ngbe_phy_read_reg_mdi(hw, NGBE_MDI_PHY_ID2_OFFSET, 0, &phy_id_low);
+ phy_id |= (phy_id_low & NGBE_MDI_PHY_ID_MASK) >> 10;
+
+ if (NGBE_INTERNAL_PHY_ID != phy_id) {
+ ERROR_REPORT1(NGBE_ERROR_UNSUPPORTED,
+ "MDI phy id 0x%x not supported.\n", phy_id);
+ return NGBE_ERR_DEVICE_NOT_SUPPORTED;
+ } else
+ hw->phy.id = (u32)phy_id;
+
+ return NGBE_OK;
+}
+
+/**
+ * ngbe_init_phy_ops - PHY/SFP specific init
+ * @hw: pointer to hardware structure
+ *
+ * Initialize any function pointers that were not able to be
+ * set during init_shared_code because the PHY/SFP type was
+ * not known. Perform the SFP init if necessary.
+ *
+**/
+s32 ngbe_phy_init(struct ngbe_hw *hw)
+{
+ s32 ret_val = 0;
+ u16 value = 0;
+ int i;
+
+ DEBUGFUNC("\n");
+
+ /* set fwsw semaphore mask for phy first */
+ if (!hw->phy.phy_semaphore_mask) {
+ hw->phy.phy_semaphore_mask = NGBE_MNG_SWFW_SYNC_SW_PHY;
+ }
+
+ /* init phy.addr according to HW design */
+
+ hw->phy.addr = 0;
+
+ /* Identify the PHY or SFP module */
+ ret_val = TCALL(hw, phy.ops.identify);
+ if (ret_val == NGBE_ERR_SFP_NOT_SUPPORTED)
+ return ret_val;
+
+ /* enable interrupts, only link status change and an done is allowed */
+ if (hw->phy.type == ngbe_phy_internal) {
+ value = NGBE_INTPHY_INT_LSC | NGBE_INTPHY_INT_ANC;
+ TCALL(hw, phy.ops.write_reg, 0x12, 0xa42, value);
+ } else if (hw->phy.type == ngbe_phy_m88e1512 ||
+ hw->phy.type == ngbe_phy_m88e1512_sfi) {
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 2);
+ TCALL(hw, phy.ops.read_reg_mdi, 21, 0, &value);
+ value &= ~NGBE_M88E1512_RGM_TTC;
+ value |= NGBE_M88E1512_RGM_RTC;
+ TCALL(hw, phy.ops.write_reg_mdi, 21, 0, value);
+ if (hw->phy.type == ngbe_phy_m88e1512)
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ else
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+
+ TCALL(hw, phy.ops.write_reg_mdi, 0, 0, NGBE_MDI_PHY_RESET);
+ for (i = 0; i < 15; i++) {
+ TCALL(hw, phy.ops.read_reg_mdi, 0, 0, &value);
+ if (value & NGBE_MDI_PHY_RESET)
+ msleep(1);
+ else
+ break;
+ }
+
+ if (i == 15) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "phy reset exceeds maximum waiting period.\n");
+ return NGBE_ERR_PHY_TIMEOUT;
+ }
+
+ ret_val = TCALL(hw, phy.ops.reset);
+ if (ret_val) {
+ return ret_val;
+ }
+
+ /* set LED2 to interrupt output and INTn active low */
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 3);
+ TCALL(hw, phy.ops.read_reg_mdi, 18, 0, &value);
+ value |= NGBE_M88E1512_INT_EN;
+ value &= ~(NGBE_M88E1512_INT_POL);
+ TCALL(hw, phy.ops.write_reg_mdi, 18, 0, value);
+
+ if (hw->phy.type == ngbe_phy_m88e1512_sfi) {
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ TCALL(hw, phy.ops.read_reg_mdi, 16, 0, &value);
+ value &= ~0x4;
+ TCALL(hw, phy.ops.write_reg_mdi, 16, 0, value);
+ }
+
+ /* enable link status change and AN complete interrupts */
+ value = NGBE_M88E1512_INT_ANC | NGBE_M88E1512_INT_LSC;
+ if (hw->phy.type == ngbe_phy_m88e1512)
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ else
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ TCALL(hw, phy.ops.write_reg_mdi, 18, 0, value);
+
+ /* LED control */
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 3);
+ TCALL(hw, phy.ops.read_reg_mdi, 16, 0, &value);
+ value &= ~0x00FF;
+ value |= (NGBE_M88E1512_LED1_CONF << 4) | NGBE_M88E1512_LED0_CONF;
+ TCALL(hw, phy.ops.write_reg_mdi, 16, 0, value);
+ TCALL(hw, phy.ops.read_reg_mdi, 17, 0, &value);
+ value &= ~0x000F;
+
+ TCALL(hw, phy.ops.write_reg_mdi, 17, 0, value);
+ } else if (hw->phy.type == ngbe_phy_yt8521s_sfi) {
+
+ /*enable yt8521s interrupt*/
+ #if 1
+ /* select sds area register */
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, 0, 0x00);
+
+ /* enable interrupt */
+ value = 0x000C;
+ TCALL(hw, phy.ops.write_reg_mdi, 0x12, 0, value);
+ #endif
+
+ /* select fiber_to_rgmii first */
+ ngbe_phy_read_reg_ext_yt8521s(hw, 0xa006, 0, &value);
+ value &= ~0x100;
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xa006, 0, value);
+
+ ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x0, 0, &value);
+ value |= 0x800;
+ ngbe_phy_write_reg_sds_mii_yt8521s(hw, 0x0, 0, value);
+ }
+
+ return ret_val;
+}
+
+/**
+ * ngbe_identify_module - Identifies module type
+ * @hw: pointer to hardware structure
+ *
+ * Determines HW type and calls appropriate function.
+ **/
+s32 ngbe_phy_identify(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+
+ DEBUGFUNC("ngbe_phy_identify");
+
+ switch (hw->phy.type) {
+ case ngbe_phy_internal:
+ status = ngbe_check_internal_phy_id(hw);
+ break;
+ case ngbe_phy_m88e1512:
+ case ngbe_phy_m88e1512_sfi:
+ status = ngbe_check_mdi_phy_id(hw);
+ break;
+ case ngbe_phy_zte:
+ status = ngbe_check_zte_phy_id(hw);
+ break;
+ case ngbe_phy_yt8521s_sfi:
+ status = ngbe_check_yt_phy_id(hw);
+ break;
+ default:
+ status = NGBE_ERR_PHY_TYPE;
+ }
+
+ return status;
+}
+
+s32 ngbe_phy_reset(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+
+ u16 value = 0;
+ int i;
+
+ DEBUGFUNC("ngbe_phy_reset");
+
+ /* only support internal phy */
+ if (hw->phy.type != ngbe_phy_internal)
+ return NGBE_ERR_PHY_TYPE;
+
+ /* Don't reset PHY if it's shut down due to overtemp. */
+ if (!hw->phy.reset_if_overtemp &&
+ (NGBE_ERR_OVERTEMP == TCALL(hw, phy.ops.check_overtemp))) {
+ ERROR_REPORT1(NGBE_ERROR_CAUTION,
+ "OVERTEMP! Skip PHY reset.\n");
+ return NGBE_ERR_OVERTEMP;
+ }
+
+ /* Blocked by MNG FW so bail */
+ if (ngbe_check_reset_blocked(hw))
+ return status;
+
+ value |= NGBE_MDI_PHY_RESET;
+ status = TCALL(hw, phy.ops.write_reg, 0, 0, value);
+ for (i = 0; i < NGBE_PHY_RST_WAIT_PERIOD; i++) {
+ status = TCALL(hw, phy.ops.read_reg, 0, 0, &value);
+ if (!(value & NGBE_MDI_PHY_RESET))
+ break;
+ msleep(1);
+ }
+
+ if (i == NGBE_PHY_RST_WAIT_PERIOD) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "PHY MODE RESET did not complete.\n");
+ return NGBE_ERR_RESET_FAILED;
+ }
+
+ return status;
+}
+
+u32 ngbe_phy_setup_link(struct ngbe_hw *hw,
+ u32 speed,
+ bool need_restart_AN)
+{
+ u16 value = 0;
+
+ DEBUGFUNC("ngbe_phy_setup_link");
+
+ /* disable 10/100M Half Duplex */
+ TCALL(hw, phy.ops.read_reg, 4, 0, &value);
+ value &= 0xFF5F;
+ TCALL(hw, phy.ops.write_reg, 4, 0, value);
+
+ /* set advertise enable according to input speed */
+ if (!(speed & NGBE_LINK_SPEED_1GB_FULL)) {
+ TCALL(hw, phy.ops.read_reg, 9, 0, &value);
+ value &= 0xFDFF;
+ TCALL(hw, phy.ops.write_reg, 9, 0, value);
+ } else {
+ TCALL(hw, phy.ops.read_reg, 9, 0, &value);
+ value |= 0x200;
+ TCALL(hw, phy.ops.write_reg, 9, 0, value);
+ }
+
+ if (!(speed & NGBE_LINK_SPEED_100_FULL)) {
+ TCALL(hw, phy.ops.read_reg, 4, 0, &value);
+ value &= 0xFEFF;
+ TCALL(hw, phy.ops.write_reg, 4, 0, value);
+ } else {
+ TCALL(hw, phy.ops.read_reg, 4, 0, &value);
+ value |= 0x100;
+ TCALL(hw, phy.ops.write_reg, 4, 0, value);
+ }
+
+ if (!(speed & NGBE_LINK_SPEED_10_FULL)) {
+ TCALL(hw, phy.ops.read_reg, 4, 0, &value);
+ value &= 0xFFBF;
+ TCALL(hw, phy.ops.write_reg, 4, 0, value);
+ } else {
+ TCALL(hw, phy.ops.read_reg, 4, 0, &value);
+ value |= 0x40;
+ TCALL(hw, phy.ops.write_reg, 4, 0, value);
+ }
+
+ /* restart AN and wait AN done interrupt */
+ if (((hw->subsystem_device_id & NCSI_SUP_MASK) == NCSI_SUP) ||
+ ((hw->subsystem_device_id & OEM_MASK) == OCP_CARD)) {
+ if (need_restart_AN)
+ value = NGBE_MDI_PHY_RESTART_AN | NGBE_MDI_PHY_ANE;
+ else
+ value = NGBE_MDI_PHY_ANE;
+ } else {
+ value = NGBE_MDI_PHY_RESTART_AN | NGBE_MDI_PHY_ANE;
+ }
+ TCALL(hw, phy.ops.write_reg, 0, 0, value);
+
+ value = 0x205B;
+ TCALL(hw, phy.ops.write_reg, 16, 0xd04, value);
+ TCALL(hw, phy.ops.write_reg, 17, 0xd04, 0);
+
+ TCALL(hw, phy.ops.read_reg, 18, 0xd04, &value);
+
+ value = value & 0xFF8C;
+ /*act led blinking mode set to 60ms*/
+ value |= 0x2;
+ TCALL(hw, phy.ops.write_reg, 18, 0xd04, value);
+
+ TCALL(hw, phy.ops.check_event);
+
+ return NGBE_OK;
+}
+
+s32 ngbe_phy_reset_m88e1512(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+
+ u16 value = 0;
+ int i;
+
+ DEBUGFUNC("ngbe_phy_reset_m88e1512");
+
+ if (hw->phy.type != ngbe_phy_m88e1512 &&
+ hw->phy.type != ngbe_phy_m88e1512_sfi)
+ return NGBE_ERR_PHY_TYPE;
+
+ /* Don't reset PHY if it's shut down due to overtemp. */
+ if (!hw->phy.reset_if_overtemp &&
+ (NGBE_ERR_OVERTEMP == TCALL(hw, phy.ops.check_overtemp))) {
+ ERROR_REPORT1(NGBE_ERROR_CAUTION,
+ "OVERTEMP! Skip PHY reset.\n");
+ return NGBE_ERR_OVERTEMP;
+ }
+
+ /* Blocked by MNG FW so bail */
+ if (ngbe_check_reset_blocked(hw))
+ return status;
+
+ /* select page 18 reg 20 */
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 18);
+
+ if (hw->phy.type == ngbe_phy_m88e1512)
+ /* mode select to RGMII-to-copper */
+ value = 0;
+ else
+ /* mode select to RGMII-to-sfi */
+ value = 2;
+ status = TCALL(hw, phy.ops.write_reg_mdi, 20, 0, value);
+ /* mode reset */
+ value |= NGBE_MDI_PHY_RESET;
+ status = TCALL(hw, phy.ops.write_reg_mdi, 20, 0, value);
+
+ for (i = 0; i < NGBE_PHY_RST_WAIT_PERIOD; i++) {
+ status = TCALL(hw, phy.ops.read_reg_mdi, 20, 0, &value);
+ if (!(value & NGBE_MDI_PHY_RESET))
+ break;
+ msleep(1);
+ }
+
+ if (i == NGBE_PHY_RST_WAIT_PERIOD) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "M88E1512 MODE RESET did not complete.\n");
+ return NGBE_ERR_RESET_FAILED;
+ }
+
+ return status;
+}
+
+s32 ngbe_phy_reset_yt8521s(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+
+ u16 value = 0;
+ int i;
+
+ DEBUGFUNC("ngbe_phy_reset_yt8521s");
+
+ if (hw->phy.type != ngbe_phy_yt8521s_sfi)
+ return NGBE_ERR_PHY_TYPE;
+
+ /* Don't reset PHY if it's shut down due to overtemp. */
+ if (!hw->phy.reset_if_overtemp &&
+ (NGBE_ERR_OVERTEMP == TCALL(hw, phy.ops.check_overtemp))) {
+ ERROR_REPORT1(NGBE_ERROR_CAUTION,
+ "OVERTEMP! Skip PHY reset.\n");
+ return NGBE_ERR_OVERTEMP;
+ }
+
+ /* Blocked by MNG FW so bail */
+ if (ngbe_check_reset_blocked(hw))
+ return status;
+
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0, 0, &value);
+ /* sds software reset */
+ value |= 0x8000;
+ status = ngbe_phy_write_reg_sds_mii_yt8521s(hw, 0, 0, value);
+
+ for (i = 0; i < NGBE_PHY_RST_WAIT_PERIOD; i++) {
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0, 0, &value);
+ if (!(value & 0x8000))
+ break;
+ msleep(1);
+ }
+
+ if (i == NGBE_PHY_RST_WAIT_PERIOD) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "YT8521S Software RESET did not complete.\n");
+ return NGBE_ERR_RESET_FAILED;
+ }
+
+ return status;
+}
+
+u32 ngbe_phy_setup_link_m88e1512(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete)
+{
+ u16 value_r4 = 0;
+ u16 value_r9 = 0;
+ u16 value;
+
+ DEBUGFUNC("\n");
+ UNREFERENCED_PARAMETER(autoneg_wait_to_complete);
+
+ hw->phy.autoneg_advertised = 0;
+ if (hw->phy.type == ngbe_phy_m88e1512) {
+ if (speed & NGBE_LINK_SPEED_1GB_FULL) {
+ value_r9 |= NGBE_M88E1512_1000BASET_FULL;
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_1GB_FULL;
+ }
+
+ if (speed & NGBE_LINK_SPEED_100_FULL) {
+ value_r4 |= NGBE_M88E1512_100BASET_FULL;
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_100_FULL;
+ }
+
+ if (speed & NGBE_LINK_SPEED_10_FULL) {
+ value_r4 |= NGBE_M88E1512_10BASET_FULL;
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_10_FULL;
+ }
+
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ TCALL(hw, phy.ops.read_reg_mdi, 4, 0, &value);
+ value &= ~(NGBE_M88E1512_100BASET_FULL |
+ NGBE_M88E1512_100BASET_HALF |
+ NGBE_M88E1512_10BASET_FULL |
+ NGBE_M88E1512_10BASET_HALF);
+ value_r4 |= value;
+ TCALL(hw, phy.ops.write_reg_mdi, 4, 0, value_r4);
+
+ TCALL(hw, phy.ops.read_reg_mdi, 9, 0, &value);
+ value &= ~(NGBE_M88E1512_1000BASET_FULL |
+ NGBE_M88E1512_1000BASET_HALF);
+ value_r9 |= value;
+ TCALL(hw, phy.ops.write_reg_mdi, 9, 0, value_r9);
+
+ value = NGBE_MDI_PHY_RESTART_AN | NGBE_MDI_PHY_ANE;
+ TCALL(hw, phy.ops.write_reg_mdi, 0, 0, value);
+ } else {
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_1GB_FULL;
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ TCALL(hw, phy.ops.read_reg_mdi, 4, 0, &value);
+ value &= ~0x60;
+ value |= 0x20;
+ TCALL(hw, phy.ops.write_reg_mdi, 4, 0, value);
+
+ value = NGBE_MDI_PHY_RESTART_AN | NGBE_MDI_PHY_ANE;
+ TCALL(hw, phy.ops.write_reg_mdi, 0, 0, value);
+ }
+
+ TCALL(hw, phy.ops.check_event);
+
+ return NGBE_OK;
+}
+
+u32 ngbe_phy_setup_link_yt8521s(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete)
+{
+ s32 ret_val = 0;
+ u16 value;
+ u16 value_r4 = 0;
+ u16 value_r9 = 0;
+
+ DEBUGFUNC("\n");
+ UNREFERENCED_PARAMETER(autoneg_wait_to_complete);
+ UNREFERENCED_PARAMETER(speed);
+
+ hw->phy.autoneg_advertised = 0;
+
+ if (hw->phy.type == ngbe_phy_yt8521s) {
+ value_r4 = 0x140;
+ value_r9 = 0x200;
+ /*disable 100/10base-T Self-negotiation ability*/
+ ngbe_phy_read_reg_mdi(hw, 0x4, 0, &value);
+ value &= ~value_r4;
+ ngbe_phy_write_reg_mdi(hw, 0x4, 0, value);
+
+ /*disable 1000base-T Self-negotiation ability*/
+ ngbe_phy_read_reg_mdi(hw, 0x9, 0, &value);
+ value &= ~value_r9;
+ ngbe_phy_write_reg_mdi(hw, 0x9, 0, value);
+
+ value_r4 = 0x0;
+ value_r9 = 0x0;
+
+ if (speed & NGBE_LINK_SPEED_1GB_FULL) {
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_1GB_FULL;
+ value_r9 |= 0x200;
+ }
+ if (speed & NGBE_LINK_SPEED_100_FULL) {
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_100_FULL;
+ value_r4 |= 0x100;
+ }
+ if (speed & NGBE_LINK_SPEED_10_FULL) {
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_10_FULL;
+ value_r4 |= 0x40;
+ }
+
+ /* enable 1000base-T Self-negotiation ability */
+ ngbe_phy_read_reg_mdi(hw, 0x9, 0, &value);
+ value |= value_r9;
+ ngbe_phy_write_reg_mdi(hw, 0x9, 0, value);
+
+ /* enable 100/10base-T Self-negotiation ability */
+ ngbe_phy_read_reg_mdi(hw, 0x4, 0, &value);
+ value |= value_r4;
+ ngbe_phy_write_reg_mdi(hw, 0x4, 0, value);
+
+ /* software reset to make the above configuration take effect*/
+ ngbe_phy_read_reg_mdi(hw, 0x0, 0, &value);
+ value |= 0x8000;
+ ngbe_phy_write_reg_mdi(hw, 0x0, 0, value);
+ } else {
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_1GB_FULL;
+
+ /* RGMII_Config1 : Config rx and tx training delay */
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xA003, 0, 0x3cf1);
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xA001, 0, 0x8041);
+
+ /* software reset */
+ ngbe_phy_write_reg_sds_ext_yt8521s(hw, 0x0, 0, 0x9140);
+
+ /* power on phy */
+ ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x0, 0, &value);
+ value &= ~0x800;
+ ngbe_phy_write_reg_sds_mii_yt8521s(hw, 0x0, 0, value);
+ }
+
+ TCALL(hw, phy.ops.check_event);
+
+ return ret_val;
+}
+
+s32 ngbe_phy_reset_zte(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+ u16 value = 0;
+ int i;
+
+ DEBUGFUNC("ngbe_phy_reset_zte");
+
+ if (hw->phy.type != ngbe_phy_zte)
+ return NGBE_ERR_PHY_TYPE;
+
+ /* Don't reset PHY if it's shut down due to overtemp. */
+ if (!hw->phy.reset_if_overtemp &&
+ (NGBE_ERR_OVERTEMP == TCALL(hw, phy.ops.check_overtemp))) {
+ ERROR_REPORT1(NGBE_ERROR_CAUTION,
+ "OVERTEMP! Skip PHY reset.\n");
+ return NGBE_ERR_OVERTEMP;
+ }
+
+ /* Blocked by MNG FW so bail */
+ if (ngbe_check_reset_blocked(hw))
+ return status;
+
+ /* zte phy */
+ /* set control register[0x0] to reset mode */
+ value = 1;
+ /* mode reset */
+ value |= NGBE_MDI_PHY_RESET;
+ status = TCALL(hw, phy.ops.write_reg_mdi, 0, 0, value);
+
+ for (i = 0; i < NGBE_PHY_RST_WAIT_PERIOD; i++) {
+ status = TCALL(hw, phy.ops.read_reg_mdi, 0, 0, &value);
+ if (!(value & NGBE_MDI_PHY_RESET))
+ break;
+ msleep(1);
+ }
+
+ if (i == NGBE_PHY_RST_WAIT_PERIOD) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "ZTE MODE RESET did not complete.\n");
+ return NGBE_ERR_RESET_FAILED;
+ }
+
+ return status;
+}
+
+u32 ngbe_phy_setup_link_zte(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete)
+{
+ u16 ngbe_phy_ccr = 0;
+
+ DEBUGFUNC("\n");
+ UNREFERENCED_PARAMETER(autoneg_wait_to_complete);
+ /*
+ * Clear autoneg_advertised and set new values based on input link
+ * speed.
+ */
+ hw->phy.autoneg_advertised = 0;
+ TCALL(hw, phy.ops.read_reg_mdi, 0, 0, &ngbe_phy_ccr);
+
+ if (speed & NGBE_LINK_SPEED_1GB_FULL) {
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_1GB_FULL;
+ ngbe_phy_ccr |= NGBE_MDI_PHY_SPEED_SELECT1;/*bit 6*/
+ } else if (speed & NGBE_LINK_SPEED_100_FULL) {
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_100_FULL;
+ ngbe_phy_ccr |= NGBE_MDI_PHY_SPEED_SELECT0;/*bit 13*/
+ } else if (speed & NGBE_LINK_SPEED_10_FULL)
+ hw->phy.autoneg_advertised |= NGBE_LINK_SPEED_10_FULL;
+ else
+ return NGBE_LINK_SPEED_UNKNOWN;
+
+ ngbe_phy_ccr |= NGBE_MDI_PHY_DUPLEX;/*restart autonegotiation*/
+ TCALL(hw, phy.ops.write_reg_mdi, 0, 0, ngbe_phy_ccr);
+
+ return speed;
+}
+
+/**
+ * ngbe_tn_check_overtemp - Checks if an overtemp occurred.
+ * @hw: pointer to hardware structure
+ *
+ * Checks if the LASI temp alarm status was triggered due to overtemp
+ **/
+s32 ngbe_phy_check_overtemp(struct ngbe_hw *hw)
+{
+ s32 status = 0;
+ u32 ts_state;
+
+ DEBUGFUNC("ngbe_phy_check_overtemp");
+
+ /* Check that the LASI temp alarm status was triggered */
+ ts_state = rd32(hw, NGBE_TS_ALARM_ST);
+
+ if (ts_state & NGBE_TS_ALARM_ST_DALARM)
+ status = NGBE_ERR_UNDERTEMP;
+ else if (ts_state & NGBE_TS_ALARM_ST_ALARM)
+ status = NGBE_ERR_OVERTEMP;
+
+ return status;
+}
+
+s32 ngbe_phy_check_event(struct ngbe_hw *hw)
+{
+ u16 value = 0;
+ struct ngbe_adapter *adapter = hw->back;
+
+ TCALL(hw, phy.ops.read_reg, 0x1d, 0xa43, &value);
+ adapter->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+ if (value & 0x10) {
+ adapter->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+ } else if (value & 0x08) {
+ adapter->flags |= NGBE_FLAG_NEED_ANC_CHECK;
+ }
+
+ return NGBE_OK;
+}
+
+s32 ngbe_phy_check_event_m88e1512(struct ngbe_hw *hw)
+{
+ u16 value = 0;
+ struct ngbe_adapter *adapter = hw->back;
+
+ if (hw->phy.type == ngbe_phy_m88e1512)
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ else
+ TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ TCALL(hw, phy.ops.read_reg_mdi, 19, 0, &value);
+
+ if (value & NGBE_M88E1512_LSC) {
+ adapter->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+ }
+
+ if (value & NGBE_M88E1512_ANC) {
+ adapter->flags |= NGBE_FLAG_NEED_ANC_CHECK;
+ }
+
+ return NGBE_OK;
+}
+
+s32 ngbe_phy_check_event_yt8521s(struct ngbe_hw *hw)
+{
+ u16 value = 0;
+ struct ngbe_adapter *adapter = hw->back;
+
+ ngbe_phy_write_reg_ext_yt8521s(hw, 0xa000, 0, 0x0);
+ TCALL(hw, phy.ops.read_reg_mdi, 0x13, 0, &value);
+
+ if (value & (NGBE_YT8521S_SDS_LINK_UP | NGBE_YT8521S_SDS_LINK_DOWN)) {
+ adapter->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
+ }
+
+ return NGBE_OK;
+}
+
+s32 ngbe_phy_get_advertised_pause(struct ngbe_hw *hw, u8 *pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ status = TCALL(hw, phy.ops.read_reg, 4, 0, &value);
+ *pause_bit = (u8)((value >> 10) & 0x3);
+ return status;
+}
+
+s32 ngbe_phy_get_advertised_pause_m88e1512(struct ngbe_hw *hw, u8 *pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ if (hw->phy.type == ngbe_phy_m88e1512) {
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ status = TCALL(hw, phy.ops.read_reg_mdi, 4, 0, &value);
+ *pause_bit = (u8)((value >> 10) & 0x3);
+ } else {
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ status = TCALL(hw, phy.ops.read_reg_mdi, 4, 0, &value);
+ *pause_bit = (u8)((value >> 7) & 0x3);
+ }
+ return status;
+}
+
+s32 ngbe_phy_get_advertised_pause_yt8521s(struct ngbe_hw *hw, u8 *pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x04, 0, &value);
+ *pause_bit = (u8)((value >> 7) & 0x3);
+ return status;
+}
+
+s32 ngbe_phy_get_lp_advertised_pause(struct ngbe_hw *hw, u8 *pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ status = TCALL(hw, phy.ops.read_reg, 0x1d, 0xa43, &value);
+
+ status = TCALL(hw, phy.ops.read_reg, 0x1, 0, &value);
+ value = (value >> 5) & 0x1;
+
+ /* if AN complete then check lp adv pause */
+ status = TCALL(hw, phy.ops.read_reg, 5, 0, &value);
+ *pause_bit = (u8)((value >> 10) & 0x3);
+ return status;
+}
+
+s32 ngbe_phy_get_lp_advertised_pause_m88e1512(struct ngbe_hw *hw,
+ u8 *pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ if (hw->phy.type == ngbe_phy_m88e1512) {
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ status = TCALL(hw, phy.ops.read_reg_mdi, 5, 0, &value);
+ *pause_bit = (u8)((value >> 10) & 0x3);
+ } else {
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ status = TCALL(hw, phy.ops.read_reg_mdi, 5, 0, &value);
+ *pause_bit = (u8)((value >> 7) & 0x3);
+ }
+ return status;
+}
+
+s32 ngbe_phy_get_lp_advertised_pause_yt8521s(struct ngbe_hw *hw,
+ u8 *pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x05, 0, &value);
+ *pause_bit = (u8)((value >> 7) & 0x3);
+ return status;
+}
+
+s32 ngbe_phy_set_pause_advertisement(struct ngbe_hw *hw, u16 pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ status = TCALL(hw, phy.ops.read_reg, 4, 0, &value);
+ value &= ~0xC00;
+ value |= pause_bit;
+ status = TCALL(hw, phy.ops.write_reg, 4, 0, value);
+ return status;
+}
+
+s32 ngbe_phy_set_pause_advertisement_m88e1512(struct ngbe_hw *hw,
+ u16 pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+ if (hw->phy.type == ngbe_phy_m88e1512) {
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 0);
+ status = TCALL(hw, phy.ops.read_reg_mdi, 4, 0, &value);
+ value &= ~0xC00;
+ value |= pause_bit;
+ status = TCALL(hw, phy.ops.write_reg_mdi, 4, 0, value);
+ } else {
+ status = TCALL(hw, phy.ops.write_reg_mdi, 22, 0, 1);
+ status = TCALL(hw, phy.ops.read_reg_mdi, 4, 0, &value);
+ value &= ~0x180;
+ value |= pause_bit;
+ status = TCALL(hw, phy.ops.write_reg_mdi, 4, 0, value);
+ }
+
+ return status;
+}
+
+s32 ngbe_phy_set_pause_advertisement_yt8521s(struct ngbe_hw *hw,
+ u16 pause_bit)
+{
+ u16 value;
+ s32 status = 0;
+
+ status = ngbe_phy_read_reg_sds_mii_yt8521s(hw, 0x04, 0, &value);
+ value &= ~0x180;
+ value |= pause_bit;
+ status = ngbe_phy_write_reg_sds_mii_yt8521s(hw, 0x04, 0, value);
+
+ return status;
+}
+
+s32 ngbe_phy_setup(struct ngbe_hw *hw)
+{
+ int i;
+ u16 value = 0;
+
+ for (i = 0; i < 15; i++) {
+ if (!rd32m(hw, NGBE_MIS_ST, NGBE_MIS_ST_GPHY_IN_RST(hw->bus.lan_id))) {
+ break;
+ }
+ msleep(1);
+ }
+
+ if (i == 15) {
+ ERROR_REPORT1(NGBE_ERROR_POLLING,
+ "GPhy reset exceeds maximum times.\n");
+ return NGBE_ERR_PHY_TIMEOUT;
+ }
+
+ for (i = 0; i < 1000; i++) {
+ TCALL(hw, phy.ops.read_reg, 29, 0xa43, &value);
+ if (value & 0x20)
+ break;
+ }
+
+ TCALL(hw, phy.ops.write_reg, 20, 0xa46, 1);
+ for (i = 0; i < 1000; i++) {
+ TCALL(hw, phy.ops.read_reg, 29, 0xa43, &value);
+ if (value & 0x20)
+ break;
+ }
+ if (i == 1000) {
+ return NGBE_ERR_PHY_TIMEOUT;
+ }
+
+ TCALL(hw, phy.ops.write_reg, 20, 0xa46, 2);
+ for (i = 0; i < 1000; i++) {
+ TCALL(hw, phy.ops.read_reg, 29, 0xa43, &value);
+ if (value & 0x20)
+ break;
+ }
+
+ if (i == 1000) {
+ return NGBE_ERR_PHY_TIMEOUT;
+ }
+
+ for (i = 0; i < 1000; i++) {
+ TCALL(hw, phy.ops.read_reg, 16, 0xa42, &value);
+ if ((value & 0x7) == 3)
+ break;
+ }
+
+ if (i == 1000) {
+ return NGBE_ERR_PHY_TIMEOUT;
+ }
+
+ return NGBE_OK;
+}
+
+s32 ngbe_init_phy_ops_common(struct ngbe_hw *hw)
+{
+ struct ngbe_phy_info *phy = &hw->phy;
+
+ phy->ops.reset = ngbe_phy_reset;
+ phy->ops.read_reg = ngbe_phy_read_reg;
+ phy->ops.write_reg = ngbe_phy_write_reg;
+ phy->ops.setup_link = ngbe_phy_setup_link;
+ phy->ops.check_overtemp = ngbe_phy_check_overtemp;
+ phy->ops.identify = ngbe_phy_identify;
+ phy->ops.init = ngbe_phy_init;
+ phy->ops.check_event = ngbe_phy_check_event;
+ phy->ops.get_adv_pause = ngbe_phy_get_advertised_pause;
+ phy->ops.get_lp_adv_pause = ngbe_phy_get_lp_advertised_pause;
+ phy->ops.set_adv_pause = ngbe_phy_set_pause_advertisement;
+ phy->ops.setup_once = ngbe_phy_setup;
+
+ return NGBE_OK;
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_phy.h b/drivers/net/ethernet/netswift/ngbe/ngbe_phy.h
new file mode 100644
index 0000000000000..c6568018b20c7
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_phy.h
@@ -0,0 +1,201 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#ifndef _NGBE_PHY_H_
+#define _NGBE_PHY_H_
+
+#include "ngbe_type.h"
+#include "ngbe.h"
+
+/* EEPROM byte offsets */
+#define NGBE_SFF_IDENTIFIER 0x0
+#define NGBE_SFF_IDENTIFIER_SFP 0x3
+#define NGBE_SFF_VENDOR_OUI_BYTE0 0x25
+#define NGBE_SFF_VENDOR_OUI_BYTE1 0x26
+#define NGBE_SFF_VENDOR_OUI_BYTE2 0x27
+#define NGBE_SFF_1GBE_COMP_CODES 0x6
+#define NGBE_SFF_10GBE_COMP_CODES 0x3
+#define NGBE_SFF_CABLE_TECHNOLOGY 0x8
+#define NGBE_SFF_CABLE_SPEC_COMP 0x3C
+#define NGBE_SFF_SFF_8472_SWAP 0x5C
+#define NGBE_SFF_SFF_8472_COMP 0x5E
+#define NGBE_SFF_SFF_8472_OSCB 0x6E
+#define NGBE_SFF_SFF_8472_ESCB 0x76
+#define NGBE_SFF_IDENTIFIER_QSFP_PLUS 0xD
+#define NGBE_SFF_QSFP_VENDOR_OUI_BYTE0 0xA5
+#define NGBE_SFF_QSFP_VENDOR_OUI_BYTE1 0xA6
+#define NGBE_SFF_QSFP_VENDOR_OUI_BYTE2 0xA7
+#define NGBE_SFF_QSFP_CONNECTOR 0x82
+#define NGBE_SFF_QSFP_10GBE_COMP 0x83
+#define NGBE_SFF_QSFP_1GBE_COMP 0x86
+#define NGBE_SFF_QSFP_CABLE_LENGTH 0x92
+#define NGBE_SFF_QSFP_DEVICE_TECH 0x93
+
+/* Bitmasks */
+#define NGBE_SFF_DA_PASSIVE_CABLE 0x4
+#define NGBE_SFF_DA_ACTIVE_CABLE 0x8
+#define NGBE_SFF_DA_SPEC_ACTIVE_LIMITING 0x4
+#define NGBE_SFF_1GBASESX_CAPABLE 0x1
+#define NGBE_SFF_1GBASELX_CAPABLE 0x2
+#define NGBE_SFF_1GBASET_CAPABLE 0x8
+#define NGBE_SFF_10GBASESR_CAPABLE 0x10
+#define NGBE_SFF_10GBASELR_CAPABLE 0x20
+#define NGBE_SFF_SOFT_RS_SELECT_MASK 0x8
+#define NGBE_SFF_SOFT_RS_SELECT_10G 0x8
+#define NGBE_SFF_SOFT_RS_SELECT_1G 0x0
+#define NGBE_SFF_ADDRESSING_MODE 0x4
+#define NGBE_SFF_QSFP_DA_ACTIVE_CABLE 0x1
+#define NGBE_SFF_QSFP_DA_PASSIVE_CABLE 0x8
+#define NGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE 0x23
+#define NGBE_SFF_QSFP_TRANSMITER_850NM_VCSEL 0x0
+#define NGBE_I2C_EEPROM_READ_MASK 0x100
+#define NGBE_I2C_EEPROM_STATUS_MASK 0x3
+#define NGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0
+#define NGBE_I2C_EEPROM_STATUS_PASS 0x1
+#define NGBE_I2C_EEPROM_STATUS_FAIL 0x2
+#define NGBE_I2C_EEPROM_STATUS_IN_PROGRESS 0x3
+
+#define NGBE_CS4227 0xBE /* CS4227 address */
+#define NGBE_CS4227_GLOBAL_ID_LSB 0
+#define NGBE_CS4227_SCRATCH 2
+#define NGBE_CS4227_GLOBAL_ID_VALUE 0x03E5
+#define NGBE_CS4227_SCRATCH_VALUE 0x5aa5
+#define NGBE_CS4227_RETRIES 5
+#define NGBE_CS4227_LINE_SPARE22_MSB 0x12AD /* Reg to program speed */
+#define NGBE_CS4227_LINE_SPARE24_LSB 0x12B0 /* Reg to program EDC */
+#define NGBE_CS4227_HOST_SPARE22_MSB 0x1AAD /* Reg to program speed */
+#define NGBE_CS4227_HOST_SPARE24_LSB 0x1AB0 /* Reg to program EDC */
+#define NGBE_CS4227_EDC_MODE_CX1 0x0002
+#define NGBE_CS4227_EDC_MODE_SR 0x0004
+#define NGBE_CS4227_RESET_HOLD 500 /* microseconds */
+#define NGBE_CS4227_RESET_DELAY 500 /* milliseconds */
+#define NGBE_CS4227_CHECK_DELAY 30 /* milliseconds */
+#define NGBE_PE 0xE0 /* Port expander address */
+#define NGBE_PE_OUTPUT 1 /* Output register offset */
+#define NGBE_PE_CONFIG 3 /* Config register offset */
+#define NGBE_PE_BIT1 (1 << 1)
+
+/* Flow control defines */
+#define NGBE_TAF_SYM_PAUSE (0x1)
+#define NGBE_TAF_ASM_PAUSE (0x2)
+
+/* Bit-shift macros */
+#define NGBE_SFF_VENDOR_OUI_BYTE0_SHIFT 24
+#define NGBE_SFF_VENDOR_OUI_BYTE1_SHIFT 16
+#define NGBE_SFF_VENDOR_OUI_BYTE2_SHIFT 8
+
+/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
+#define NGBE_SFF_VENDOR_OUI_TYCO 0x00407600
+#define NGBE_SFF_VENDOR_OUI_FTL 0x00906500
+#define NGBE_SFF_VENDOR_OUI_AVAGO 0x00176A00
+#define NGBE_SFF_VENDOR_OUI_INTEL 0x001B2100
+
+/* I2C SDA and SCL timing parameters for standard mode */
+#define NGBE_I2C_T_HD_STA 4
+#define NGBE_I2C_T_LOW 5
+#define NGBE_I2C_T_HIGH 4
+#define NGBE_I2C_T_SU_STA 5
+#define NGBE_I2C_T_HD_DATA 5
+#define NGBE_I2C_T_SU_DATA 1
+#define NGBE_I2C_T_RISE 1
+#define NGBE_I2C_T_FALL 1
+#define NGBE_I2C_T_SU_STO 4
+#define NGBE_I2C_T_BUF 5
+
+#ifndef NGBE_SFP_DETECT_RETRIES
+#define NGBE_SFP_DETECT_RETRIES 10
+#endif /* NGBE_SFP_DETECT_RETRIES */
+
+/* SFP+ SFF-8472 Compliance */
+#define NGBE_SFF_SFF_8472_UNSUP 0x00
+
+enum ngbe_phy_type ngbe_get_phy_type_from_id(struct ngbe_hw *hw);
+s32 ngbe_init_phy_ops_common(struct ngbe_hw *hw);
+s32 ngbe_phy_read_reg_mdi(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data);
+s32 ngbe_phy_write_reg_mdi(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data);
+
+s32 ngbe_phy_read_reg_sds_mii_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data);
+s32 ngbe_phy_write_reg_sds_mii_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data);
+
+s32 ngbe_phy_read_reg_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data);
+s32 ngbe_phy_write_reg_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data);
+
+s32 ngbe_phy_read_reg_sds_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 *phy_data);
+s32 ngbe_phy_write_reg_sds_ext_yt8521s(struct ngbe_hw *hw,
+ u32 reg_addr,
+ u32 device_type,
+ u16 phy_data);
+
+s32 ngbe_phy_init(struct ngbe_hw *hw);
+s32 ngbe_phy_identify(struct ngbe_hw *hw);
+s32 ngbe_phy_reset(struct ngbe_hw *hw);
+u32 ngbe_phy_setup_link(struct ngbe_hw *hw,
+ u32 speed,
+ bool need_restart_AN);
+s32 ngbe_phy_reset_m88e1512(struct ngbe_hw *hw);
+u32 ngbe_phy_setup_link_m88e1512(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete);
+s32 ngbe_phy_check_overtemp(struct ngbe_hw *hw);
+
+s32 ngbe_check_zte_phy_id(struct ngbe_hw *hw);
+s32 ngbe_phy_reset_zte(struct ngbe_hw *hw);
+u32 ngbe_phy_setup_link_zte(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete);
+s32 ngbe_phy_check_event(struct ngbe_hw *hw);
+s32 ngbe_phy_check_event_m88e1512(struct ngbe_hw *hw);
+s32 ngbe_phy_get_advertised_pause_m88e1512(struct ngbe_hw *hw, u8 *pause_bit);
+s32 ngbe_phy_get_lp_advertised_pause_m88e1512(struct ngbe_hw *hw,
+ u8 *pause_bit);
+s32 ngbe_phy_set_pause_advertisement_m88e1512(struct ngbe_hw *hw,
+ u16 pause_bit);
+
+s32 ngbe_phy_reset_yt8521s(struct ngbe_hw *hw);
+u32 ngbe_phy_setup_link_yt8521s(struct ngbe_hw *hw,
+ u32 speed,
+ bool autoneg_wait_to_complete);
+
+s32 ngbe_phy_check_event_yt8521s(struct ngbe_hw *hw);
+s32 ngbe_phy_get_advertised_pause_yt8521s(struct ngbe_hw *hw, u8 *pause_bit);
+s32 ngbe_phy_get_lp_advertised_pause_yt8521s(struct ngbe_hw *hw,
+ u8 *pause_bit);
+s32 ngbe_phy_set_pause_advertisement_yt8521s(struct ngbe_hw *hw,
+ u16 pause_bit);
+
+#endif /* _NGBE_PHY_H_ */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_procfs.c b/drivers/net/ethernet/netswift/ngbe/ngbe_procfs.c
new file mode 100644
index 0000000000000..f7ef1da9fd4ef
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_procfs.c
@@ -0,0 +1,908 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "ngbe.h"
+#include "ngbe_hw.h"
+#include "ngbe_type.h"
+
+#ifdef CONFIG_NGBE_PROCFS
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+
+static struct proc_dir_entry *ngbe_top_dir;
+
+static struct net_device_stats *procfs_get_stats(struct net_device *netdev)
+{
+ if (netdev == NULL)
+ return NULL;
+
+ /* only return the current stats */
+ return &netdev->stats;
+}
+
+static int ngbe_fwbanner(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ return snprintf(page, count, "%s\n", adapter->eeprom_id);
+}
+
+static int ngbe_porttype(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ return snprintf(page, count, "%d\n",
+ test_bit(__NGBE_DOWN, &adapter->state));
+}
+
+static int ngbe_portspeed(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ int speed = 0;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ switch (adapter->link_speed) {
+ case NGBE_LINK_SPEED_100_FULL:
+ speed = 1;
+ break;
+ case NGBE_LINK_SPEED_1GB_FULL:
+ speed = 10;
+ break;
+ case NGBE_LINK_SPEED_10GB_FULL:
+ speed = 100;
+ break;
+ default:
+ break;
+ }
+ return snprintf(page, count, "%d\n", speed);
+}
+
+static int ngbe_wqlflag(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ return snprintf(page, count, "%d\n", adapter->wol);
+}
+
+static int ngbe_xflowctl(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct ngbe_hw *hw;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "%d\n", hw->fc.current_mode);
+}
+
+static int ngbe_rxdrops(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n",
+ net_stats->rx_dropped);
+}
+
+static int ngbe_rxerrors(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n", net_stats->rx_errors);
+}
+
+static int ngbe_rxupacks(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "%d\n", rd32(hw, NGBE_TPR));
+}
+
+static int ngbe_rxmpacks(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ int i, mprc = 0;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+ for (i = 0; i < 8; i++)
+ mprc += rd32(hw, NGBE_PX_MPRC(i));
+ return snprintf(page, count, "%d\n", mprc);
+}
+
+static int ngbe_rxbpacks(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "%d\n",
+ rd32(hw, NGBE_RX_BC_FRAMES_GOOD_LOW));
+}
+
+static int ngbe_txupacks(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "%d\n",
+ rd32(hw, NGBE_TX_FRAME_CNT_GOOD_BAD_LOW));
+}
+
+static int ngbe_txmpacks(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "%d\n",
+ rd32(hw, NGBE_TX_MC_FRAMES_GOOD_LOW));
+}
+
+static int ngbe_txbpacks(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "%d\n",
+ rd32(hw, NGBE_TX_BC_FRAMES_GOOD_LOW));
+}
+
+static int ngbe_txerrors(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n",
+ net_stats->tx_errors);
+}
+
+static int ngbe_txdrops(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n",
+ net_stats->tx_dropped);
+}
+
+static int ngbe_rxframes(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n",
+ net_stats->rx_packets);
+}
+
+static int ngbe_rxbytes(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n",
+ net_stats->rx_bytes);
+}
+
+static int ngbe_txframes(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n",
+ net_stats->tx_packets);
+}
+
+static int ngbe_txbytes(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device_stats *net_stats;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ net_stats = procfs_get_stats(adapter->netdev);
+ if (net_stats == NULL)
+ return snprintf(page, count, "error: no net stats\n");
+
+ return snprintf(page, count, "%lu\n",
+ net_stats->tx_bytes);
+}
+
+static int ngbe_linkstat(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ int bitmask = 0;
+ u32 link_speed;
+ bool link_up = false;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ if (!test_bit(__NGBE_DOWN, &adapter->state))
+ bitmask |= 1;
+
+ /* always assume link is up, if no check link function */
+ link_up = true;
+ if (link_up)
+ bitmask |= 2;
+
+ if (adapter->old_lsc != adapter->lsc_int) {
+ bitmask |= 4;
+ adapter->old_lsc = adapter->lsc_int;
+ }
+
+ return snprintf(page, count, "0x%X\n", bitmask);
+}
+
+static int ngbe_funcid(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct ngbe_hw *hw;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "0x%X\n", hw->bus.func);
+}
+
+static int ngbe_funcvers(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void __always_unused *data)
+{
+ return snprintf(page, count, "%s\n", ngbe_driver_version);
+}
+
+static int ngbe_macburn(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
+ (unsigned int)hw->mac.perm_addr[0],
+ (unsigned int)hw->mac.perm_addr[1],
+ (unsigned int)hw->mac.perm_addr[2],
+ (unsigned int)hw->mac.perm_addr[3],
+ (unsigned int)hw->mac.perm_addr[4],
+ (unsigned int)hw->mac.perm_addr[5]);
+}
+
+static int ngbe_macadmn(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_hw *hw;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
+ (unsigned int)hw->mac.addr[0],
+ (unsigned int)hw->mac.addr[1],
+ (unsigned int)hw->mac.addr[2],
+ (unsigned int)hw->mac.addr[3],
+ (unsigned int)hw->mac.addr[4],
+ (unsigned int)hw->mac.addr[5]);
+}
+
+static int ngbe_maclla1(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct ngbe_hw *hw;
+ int rc;
+ u16 eeprom_buff[6];
+ u16 first_word = 0x37;
+ const u16 word_count = ARRAY_SIZE(eeprom_buff);
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ rc = TCALL(hw, eeprom.ops.read_buffer, first_word, 1, &first_word);
+ if (rc != 0)
+ return snprintf(page, count,
+ "error: reading pointer to the EEPROM\n");
+
+ if (first_word != 0x0000 && first_word != 0xFFFF) {
+ rc = TCALL(hw, eeprom.ops.read_buffer, first_word, word_count,
+ eeprom_buff);
+ if (rc != 0)
+ return snprintf(page, count, "error: reading buffer\n");
+ } else {
+ memset(eeprom_buff, 0, sizeof(eeprom_buff));
+ }
+
+ switch (hw->bus.func) {
+ case 0:
+ return snprintf(page, count, "0x%04X%04X%04X\n",
+ eeprom_buff[0],
+ eeprom_buff[1],
+ eeprom_buff[2]);
+ case 1:
+ return snprintf(page, count, "0x%04X%04X%04X\n",
+ eeprom_buff[3],
+ eeprom_buff[4],
+ eeprom_buff[5]);
+ default:
+ return snprintf(page, count, "unexpected port %d\n", hw->bus.func);
+ }
+}
+
+static int ngbe_mtusize(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device *netdev;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ netdev = adapter->netdev;
+ if (netdev == NULL)
+ return snprintf(page, count, "error: no net device\n");
+
+ return snprintf(page, count, "%d\n", netdev->mtu);
+}
+
+static int ngbe_featflag(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ int bitmask = 0;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device *netdev;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ netdev = adapter->netdev;
+ if (netdev == NULL)
+ return snprintf(page, count, "error: no net device\n");
+ if (adapter->netdev->features & NETIF_F_RXCSUM)
+ bitmask |= 1;
+ return snprintf(page, count, "%d\n", bitmask);
+}
+
+static int ngbe_lsominct(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void __always_unused *data)
+{
+ return snprintf(page, count, "%d\n", 1);
+}
+
+static int ngbe_prommode(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ struct net_device *netdev;
+
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+ netdev = adapter->netdev;
+ if (netdev == NULL)
+ return snprintf(page, count, "error: no net device\n");
+
+ return snprintf(page, count, "%d\n",
+ netdev->flags & IFF_PROMISC);
+}
+
+static int ngbe_txdscqsz(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ return snprintf(page, count, "%d\n", adapter->tx_ring[0]->count);
+}
+
+static int ngbe_rxdscqsz(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ return snprintf(page, count, "%d\n", adapter->rx_ring[0]->count);
+}
+
+static int ngbe_rxqavg(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ int index;
+ int diff = 0;
+ u16 ntc;
+ u16 ntu;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ for (index = 0; index < adapter->num_rx_queues; index++) {
+ ntc = adapter->rx_ring[index]->next_to_clean;
+ ntu = adapter->rx_ring[index]->next_to_use;
+
+ if (ntc >= ntu)
+ diff += (ntc - ntu);
+ else
+ diff += (adapter->rx_ring[index]->count - ntu + ntc);
+ }
+ if (adapter->num_rx_queues <= 0)
+ return snprintf(page, count,
+ "can't calculate, number of queues %d\n",
+ adapter->num_rx_queues);
+ return snprintf(page, count, "%d\n", diff/adapter->num_rx_queues);
+}
+
+static int ngbe_txqavg(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ int index;
+ int diff = 0;
+ u16 ntc;
+ u16 ntu;
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ for (index = 0; index < adapter->num_tx_queues; index++) {
+ ntc = adapter->tx_ring[index]->next_to_clean;
+ ntu = adapter->tx_ring[index]->next_to_use;
+
+ if (ntc >= ntu)
+ diff += (ntc - ntu);
+ else
+ diff += (adapter->tx_ring[index]->count - ntu + ntc);
+ }
+ if (adapter->num_tx_queues <= 0)
+ return snprintf(page, count,
+ "can't calculate, number of queues %d\n",
+ adapter->num_tx_queues);
+ return snprintf(page, count, "%d\n",
+ diff/adapter->num_tx_queues);
+}
+
+static int ngbe_iovotype(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void __always_unused *data)
+{
+ return snprintf(page, count, "2\n");
+}
+
+static int ngbe_funcnbr(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ return snprintf(page, count, "%d\n", adapter->num_vfs);
+}
+
+static int ngbe_pciebnbr(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_adapter *adapter = (struct ngbe_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ return snprintf(page, count, "%d\n", adapter->pdev->bus->number);
+}
+
+static int ngbe_therm_dealarmthresh(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_therm_proc_data *therm_data =
+ (struct ngbe_therm_proc_data *)data;
+
+ if (therm_data == NULL)
+ return snprintf(page, count, "error: no therm_data\n");
+
+ return snprintf(page, count, "%d\n",
+ therm_data->sensor_data->dalarm_thresh);
+}
+
+static int ngbe_therm_alarmthresh(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ struct ngbe_therm_proc_data *therm_data =
+ (struct ngbe_therm_proc_data *)data;
+
+ if (therm_data == NULL)
+ return snprintf(page, count, "error: no therm_data\n");
+
+ return snprintf(page, count, "%d\n",
+ therm_data->sensor_data->alarm_thresh);
+}
+
+static int ngbe_therm_temp(char *page, char __always_unused **start,
+ off_t __always_unused off, int count,
+ int __always_unused *eof, void *data)
+{
+ s32 status;
+ struct ngbe_therm_proc_data *therm_data =
+ (struct ngbe_therm_proc_data *)data;
+
+ if (therm_data == NULL)
+ return snprintf(page, count, "error: no therm_data\n");
+
+ status = ngbe_get_thermal_sensor_data(therm_data->hw);
+ if (status != 0)
+ snprintf(page, count, "error: status %d returned\n", status);
+
+ return snprintf(page, count, "%d\n", therm_data->sensor_data->temp);
+}
+
+struct ngbe_proc_type {
+ char name[32];
+ int (*read)(char*, char**, off_t, int, int*, void*);
+};
+
+struct ngbe_proc_type ngbe_proc_entries[] = {
+ {"fwbanner", &ngbe_fwbanner},
+ {"porttype", &ngbe_porttype},
+ {"portspeed", &ngbe_portspeed},
+ {"wqlflag", &ngbe_wqlflag},
+ {"xflowctl", &ngbe_xflowctl},
+ {"rxdrops", &ngbe_rxdrops},
+ {"rxerrors", &ngbe_rxerrors},
+ {"rxupacks", &ngbe_rxupacks},
+ {"rxmpacks", &ngbe_rxmpacks},
+ {"rxbpacks", &ngbe_rxbpacks},
+ {"txdrops", &ngbe_txdrops},
+ {"txerrors", &ngbe_txerrors},
+ {"txupacks", &ngbe_txupacks},
+ {"txmpacks", &ngbe_txmpacks},
+ {"txbpacks", &ngbe_txbpacks},
+ {"rxframes", &ngbe_rxframes},
+ {"rxbytes", &ngbe_rxbytes},
+ {"txframes", &ngbe_txframes},
+ {"txbytes", &ngbe_txbytes},
+ {"linkstat", &ngbe_linkstat},
+ {"funcid", &ngbe_funcid},
+ {"funcvers", &ngbe_funcvers},
+ {"macburn", &ngbe_macburn},
+ {"macadmn", &ngbe_macadmn},
+ {"maclla1", &ngbe_maclla1},
+ {"mtusize", &ngbe_mtusize},
+ {"featflag", &ngbe_featflag},
+ {"lsominct", &ngbe_lsominct},
+ {"prommode", &ngbe_prommode},
+ {"txdscqsz", &ngbe_txdscqsz},
+ {"rxdscqsz", &ngbe_rxdscqsz},
+ {"txqavg", &ngbe_txqavg},
+ {"rxqavg", &ngbe_rxqavg},
+ {"iovotype", &ngbe_iovotype},
+ {"funcnbr", &ngbe_funcnbr},
+ {"pciebnbr", &ngbe_pciebnbr},
+ {"", NULL}
+};
+
+struct ngbe_proc_type ngbe_internal_entries[] = {
+ {"temp", &ngbe_therm_temp},
+ {"alarmthresh", &ngbe_therm_alarmthresh},
+ {"dealarmthresh", &ngbe_therm_dealarmthresh},
+ {"", NULL}
+};
+
+void ngbe_del_proc_entries(struct ngbe_adapter *adapter)
+{
+ int index;
+ int i;
+ char buf[16]; /* much larger than the sensor number will ever be */
+
+ if (ngbe_top_dir == NULL)
+ return;
+
+ for (i = 0; i < NGBE_MAX_SENSORS; i++) {
+ if (adapter->therm_dir[i] == NULL)
+ continue;
+
+ for (index = 0; ; index++) {
+ if (ngbe_internal_entries[index].read == NULL)
+ break;
+
+ remove_proc_entry(ngbe_internal_entries[index].name,
+ adapter->therm_dir[i]);
+ }
+ snprintf(buf, sizeof(buf), "sensor_%d", i);
+ remove_proc_entry(buf, adapter->info_dir);
+ }
+
+ if (adapter->info_dir != NULL) {
+ for (index = 0; ; index++) {
+ if (ngbe_proc_entries[index].read == NULL)
+ break;
+ remove_proc_entry(ngbe_proc_entries[index].name,
+ adapter->info_dir);
+ }
+ remove_proc_entry("info", adapter->eth_dir);
+ }
+
+ if (adapter->eth_dir != NULL)
+ remove_proc_entry(pci_name(adapter->pdev), ngbe_top_dir);
+}
+
+/* called from ngbe_main.c */
+void ngbe_procfs_exit(struct ngbe_adapter *adapter)
+{
+ ngbe_del_proc_entries(adapter);
+}
+
+int ngbe_procfs_topdir_init(void)
+{
+ ngbe_top_dir = proc_mkdir("driver/ngbe", NULL);
+ if (ngbe_top_dir == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ngbe_procfs_topdir_exit(void)
+{
+ remove_proc_entry("driver/ngbe", NULL);
+}
+
+/* called from ngbe_main.c */
+int ngbe_procfs_init(struct ngbe_adapter *adapter)
+{
+ int rc = 0;
+ int index;
+ int i;
+ char buf[16]; /* much larger than the sensor number will ever be */
+
+ adapter->eth_dir = NULL;
+ adapter->info_dir = NULL;
+ adapter->therm_dir = NULL;
+
+ if (ngbe_top_dir == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), ngbe_top_dir);
+ if (adapter->eth_dir == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ adapter->info_dir = proc_mkdir("info", adapter->eth_dir);
+ if (adapter->info_dir == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ for (index = 0; ; index++) {
+ if (ngbe_proc_entries[index].read == NULL)
+ break;
+ if (!(create_proc_read_entry(ngbe_proc_entries[index].name,
+ 0444,
+ adapter->info_dir,
+ ngbe_proc_entries[index].read,
+ adapter))) {
+
+ rc = -ENOMEM;
+ goto fail;
+ }
+ }
+ if (!TCALL(&(adapter->hw), ops.init_thermal_sensor_thresh))
+ goto exit;
+
+ snprintf(buf, sizeof(buf), "sensor");
+ adapter->therm_dir = proc_mkdir(buf, adapter->info_dir);
+ if (adapter->therm_dir == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ for (index = 0; ; index++) {
+ if (ngbe_internal_entries[index].read == NULL)
+ break;
+ /*
+ * therm_data struct contains pointer the read func
+ * will be needing
+ */
+ adapter->therm_data.hw = &adapter->hw;
+ adapter->therm_data.sensor_data =
+ &adapter->hw.mac.thermal_sensor_data.sensor;
+
+ if (!(create_proc_read_entry(
+ ngbe_internal_entries[index].name,
+ 0444,
+ adapter->therm_dir,
+ ngbe_internal_entries[index].read,
+ &adapter->therm_data))) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ goto exit;
+
+fail:
+ ngbe_del_proc_entries(adapter);
+exit:
+ return rc;
+}
+
+#endif /* CONFIG_NGBE_PROCFS */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_ptp.c b/drivers/net/ethernet/netswift/ngbe/ngbe_ptp.c
new file mode 100644
index 0000000000000..87e7d5dc11a43
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_ptp.c
@@ -0,0 +1,858 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#include "ngbe.h"
+#include <linux/ptp_classify.h>
+
+/*
+ * SYSTIME is defined by a fixed point system which allows the user to
+ * define the scale counter increment value at every level change of
+ * the oscillator driving SYSTIME value. The time unit is determined by
+ * the clock frequency of the oscillator and TIMINCA register.
+ * The cyclecounter and timecounter structures are used to to convert
+ * the scale counter into nanoseconds. SYSTIME registers need to be converted
+ * to ns values by use of only a right shift.
+ * The following math determines the largest incvalue that will fit into
+ * the available bits in the TIMINCA register:
+ * Period * [ 2 ^ ( MaxWidth - PeriodWidth ) ]
+ * PeriodWidth: Number of bits to store the clock period
+ * MaxWidth: The maximum width value of the TIMINCA register
+ * Period: The clock period for the oscillator, which changes based on the link
+ * speed:
+ * At 10Gb link or no link, the period is 6.4 ns.
+ * At 1Gb link, the period is multiplied by 10. (64ns)
+ * At 100Mb link, the period is multiplied by 100. (640ns)
+ * round(): discard the fractional portion of the calculation
+ *
+ * The calculated value allows us to right shift the SYSTIME register
+ * value in order to quickly convert it into a nanosecond clock,
+ * while allowing for the maximum possible adjustment value.
+ *
+ * LinkSpeed ClockFreq ClockPeriod TIMINCA:IV
+ * 10000Mbps 156.25MHz 6.4*10^-9 0xCCCCCC(0xFFFFF/ns)
+ * 1000 Mbps 62.5 MHz 16 *10^-9 0x800000(0x7FFFF/ns)
+ * 100 Mbps 6.25 MHz 160*10^-9 0xA00000(0xFFFF/ns)
+ * 10 Mbps 0.625 MHz 1600*10^-9 0xC7F380(0xFFF/ns)
+ * FPGA 31.25 MHz 32 *10^-9 0x800000(0x3FFFF/ns)
+ *
+ * These diagrams are only for the 10Gb link period
+ *
+ * +--------------+ +--------------+
+ * | 32 | | 8 | 3 | 20 |
+ * *--------------+ +--------------+
+ * \________ 43 bits ______/ fract
+ *
+ * The 43 bit SYSTIME overflows every
+ * 2^43 * 10^-9 / 3600 = 2.4 hours
+ */
+#define NGBE_INCVAL_10GB 0xCCCCCC
+#define NGBE_INCVAL_1GB 0x2000000/*in Emerald all speed is same*/
+#define NGBE_INCVAL_100 0xA00000
+#define NGBE_INCVAL_10 0xC7F380
+#define NGBE_INCVAL_FPGA 0x800000
+
+#define NGBE_INCVAL_SHIFT_10GB 20
+#define NGBE_INCVAL_SHIFT_1GB 22/*in Emerald all speed is same*/
+#define NGBE_INCVAL_SHIFT_100 15
+#define NGBE_INCVAL_SHIFT_10 12
+#define NGBE_INCVAL_SHIFT_FPGA 17
+
+#define NGBE_OVERFLOW_PERIOD (HZ * 30)
+#define NGBE_PTP_TX_TIMEOUT (HZ)
+
+/**
+ * ngbe_ptp_read - read raw cycle counter (to be used by time counter)
+ * @hw_cc: the cyclecounter structure
+ *
+ * this function reads the cyclecounter registers and is called by the
+ * cyclecounter structure used to construct a ns counter from the
+ * arbitrary fixed point registers
+ */
+static u64 ngbe_ptp_read(const struct cyclecounter *hw_cc)
+{
+ struct ngbe_adapter *adapter =
+ container_of(hw_cc, struct ngbe_adapter, hw_cc);
+ struct ngbe_hw *hw = &adapter->hw;
+ u64 stamp = 0;
+
+ stamp |= (u64)rd32(hw, NGBE_TSEC_1588_SYSTIML);
+ stamp |= (u64)rd32(hw, NGBE_TSEC_1588_SYSTIMH) << 32;
+
+ return stamp;
+}
+
+/**
+ * ngbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp
+ * @adapter: private adapter structure
+ * @hwtstamp: stack timestamp structure
+ * @systim: unsigned 64bit system time value
+ *
+ * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value
+ * which can be used by the stack's ptp functions.
+ *
+ * The lock is used to protect consistency of the cyclecounter and the SYSTIME
+ * registers. However, it does not need to protect against the Rx or Tx
+ * timestamp registers, as there can't be a new timestamp until the old one is
+ * unlatched by reading.
+ *
+ * In addition to the timestamp in hardware, some controllers need a software
+ * overflow cyclecounter, and this function takes this into account as well.
+ **/
+static void ngbe_ptp_convert_to_hwtstamp(struct ngbe_adapter *adapter,
+ struct skb_shared_hwtstamps *hwtstamp,
+ u64 timestamp)
+{
+ unsigned long flags;
+ u64 ns;
+
+ memset(hwtstamp, 0, sizeof(*hwtstamp));
+
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ ns = timecounter_cyc2time(&adapter->hw_tc, timestamp);
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ hwtstamp->hwtstamp = ns_to_ktime(ns);
+}
+
+/**
+ * ngbe_ptp_adjfreq
+ * @ptp: the ptp clock structure
+ * @ppb: parts per billion adjustment from base
+ *
+ * adjust the frequency of the ptp cycle counter by the
+ * indicated ppb from the base frequency.
+ */
+static int ngbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+ struct ngbe_adapter *adapter =
+ container_of(ptp, struct ngbe_adapter, ptp_caps);
+ struct ngbe_hw *hw = &adapter->hw;
+ u64 freq, incval;
+ u32 diff;
+ int neg_adj = 0;
+
+ if (ppb < 0) {
+ neg_adj = 1;
+ ppb = -ppb;
+ }
+
+ smp_mb();
+ incval = READ_ONCE(adapter->base_incval);
+
+ freq = incval;
+ freq *= ppb;
+ diff = div_u64(freq, 1000000000ULL);
+
+ incval = neg_adj ? (incval - diff) : (incval + diff);
+ /* temp setting*/
+
+ if (incval > NGBE_TSEC_1588_INC_IV(~0))
+ e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n");
+ wr32(hw, NGBE_TSEC_1588_INC, NGBE_TSEC_1588_INC_IV(incval));
+
+ return 0;
+}
+
+/**
+ * ngbe_ptp_adjtime
+ * @ptp: the ptp clock structure
+ * @delta: offset to adjust the cycle counter by ns
+ *
+ * adjust the timer by resetting the timecounter structure.
+ */
+static int ngbe_ptp_adjtime(struct ptp_clock_info *ptp,
+ s64 delta)
+{
+ struct ngbe_adapter *adapter =
+ container_of(ptp, struct ngbe_adapter, ptp_caps);
+ unsigned long flags;
+
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ timecounter_adjtime(&adapter->hw_tc, delta);
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ return 0;
+}
+
+/**
+ * ngbe_ptp_gettime64
+ * @ptp: the ptp clock structure
+ * @ts: timespec64 structure to hold the current time value
+ *
+ * read the timecounter and return the correct value on ns,
+ * after converting it into a struct timespec64.
+ */
+static int ngbe_ptp_gettime64(struct ptp_clock_info *ptp,
+ struct timespec64 *ts)
+{
+ struct ngbe_adapter *adapter =
+ container_of(ptp, struct ngbe_adapter, ptp_caps);
+ unsigned long flags;
+ u64 ns;
+
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ ns = timecounter_read(&adapter->hw_tc);
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
+/**
+ * ngbe_ptp_settime64
+ * @ptp: the ptp clock structure
+ * @ts: the timespec64 containing the new time for the cycle counter
+ *
+ * reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ */
+static int ngbe_ptp_settime64(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct ngbe_adapter *adapter =
+ container_of(ptp, struct ngbe_adapter, ptp_caps);
+ u64 ns;
+ unsigned long flags;
+
+ ns = timespec64_to_ns(ts);
+
+ /* reset the timecounter */
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns);
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ return 0;
+}
+
+/**
+ * ngbe_ptp_feature_enable
+ * @ptp: the ptp clock structure
+ * @rq: the requested feature to change
+ * @on: whether to enable or disable the feature
+ *
+ * enable (or disable) ancillary features of the phc subsystem.
+ * our driver only supports the PPS feature on the X540
+ */
+static int ngbe_ptp_feature_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ return -ENOTSUPP;
+}
+
+/**
+ * ngbe_ptp_check_pps_event
+ * @adapter: the private adapter structure
+ * @eicr: the interrupt cause register value
+ *
+ * This function is called by the interrupt routine when checking for
+ * interrupts. It will check and handle a pps event.
+ */
+void ngbe_ptp_check_pps_event(struct ngbe_adapter *adapter)
+{
+ struct ptp_clock_event event;
+
+ event.type = PTP_CLOCK_PPS;
+
+ /* this check is necessary in case the interrupt was enabled via some
+ * alternative means (ex. debug_fs). Better to check here than
+ * everywhere that calls this function.
+ */
+ if (!adapter->ptp_clock)
+ return;
+
+ /* we don't config PPS on SDP yet, so just return.
+ * ptp_clock_event(adapter->ptp_clock, &event);
+ */
+}
+
+/**
+ * ngbe_ptp_overflow_check - watchdog task to detect SYSTIME overflow
+ * @adapter: private adapter struct
+ *
+ * this watchdog task periodically reads the timecounter
+ * in order to prevent missing when the system time registers wrap
+ * around. This needs to be run approximately twice a minute for the fastest
+ * overflowing hardware. We run it for all hardware since it shouldn't have a
+ * large impact.
+ */
+void ngbe_ptp_overflow_check(struct ngbe_adapter *adapter)
+{
+ bool timeout = time_is_before_jiffies(adapter->last_overflow_check +
+ NGBE_OVERFLOW_PERIOD);
+ struct timespec64 ts;
+
+ if (timeout) {
+ ngbe_ptp_gettime64(&adapter->ptp_caps, &ts);
+ adapter->last_overflow_check = jiffies;
+ }
+}
+
+/**
+ * ngbe_ptp_rx_hang - detect error case when Rx timestamp registers latched
+ * @adapter: private network adapter structure
+ *
+ * this watchdog task is scheduled to detect error case where hardware has
+ * dropped an Rx packet that was timestamped when the ring is full. The
+ * particular error is rare but leaves the device in a state unable to timestamp
+ * any future packets.
+ */
+void ngbe_ptp_rx_hang(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct ngbe_ring *rx_ring;
+ u32 tsyncrxctl = rd32(hw, NGBE_PSR_1588_CTL);
+ unsigned long rx_event;
+ int n;
+
+ /* if we don't have a valid timestamp in the registers, just update the
+ * timeout counter and exit
+ */
+ if (!(tsyncrxctl & NGBE_PSR_1588_CTL_VALID)) {
+ adapter->last_rx_ptp_check = jiffies;
+ return;
+ }
+
+ /* determine the most recent watchdog or rx_timestamp event */
+ rx_event = adapter->last_rx_ptp_check;
+ for (n = 0; n < adapter->num_rx_queues; n++) {
+ rx_ring = adapter->rx_ring[n];
+ if (time_after(rx_ring->last_rx_timestamp, rx_event))
+ rx_event = rx_ring->last_rx_timestamp;
+ }
+
+ /* only need to read the high RXSTMP register to clear the lock */
+ if (time_is_before_jiffies(rx_event + 5 * HZ)) {
+ rd32(hw, NGBE_PSR_1588_STMPH);
+ adapter->last_rx_ptp_check = jiffies;
+
+ adapter->rx_hwtstamp_cleared++;
+ e_warn(drv, "clearing RX Timestamp hang");
+ }
+}
+
+/**
+ * ngbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state
+ * @adapter: the private adapter structure
+ *
+ * This function should be called whenever the state related to a Tx timestamp
+ * needs to be cleared. This helps ensure that all related bits are reset for
+ * the next Tx timestamp event.
+ */
+static void ngbe_ptp_clear_tx_timestamp(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ rd32(hw, NGBE_TSEC_1588_STMPH);
+ if (adapter->ptp_tx_skb) {
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ }
+ clear_bit_unlock(__NGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+}
+
+/**
+ * ngbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: the private adapter struct
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the shhwtstamps structure which
+ * is passed up the network stack
+ */
+static void ngbe_ptp_tx_hwtstamp(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct skb_shared_hwtstamps shhwtstamps;
+ u64 regval = 0;
+
+ regval |= (u64)rd32(hw, NGBE_TSEC_1588_STMPL);
+ regval |= (u64)rd32(hw, NGBE_TSEC_1588_STMPH) << 32;
+
+ ngbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval);
+ skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
+
+ ngbe_ptp_clear_tx_timestamp(adapter);
+}
+
+/**
+ * ngbe_ptp_tx_hwtstamp_work
+ * @work: pointer to the work struct
+ *
+ * This work item polls TSYNCTXCTL valid bit to determine when a Tx hardware
+ * timestamp has been taken for the current skb. It is necesary, because the
+ * descriptor's "done" bit does not correlate with the timestamp event.
+ */
+static void ngbe_ptp_tx_hwtstamp_work(struct work_struct *work)
+{
+ struct ngbe_adapter *adapter = container_of(work, struct ngbe_adapter,
+ ptp_tx_work);
+ struct ngbe_hw *hw = &adapter->hw;
+ bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
+ NGBE_PTP_TX_TIMEOUT);
+ u32 tsynctxctl;
+
+ /* we have to have a valid skb to poll for a timestamp */
+ if (!adapter->ptp_tx_skb) {
+ ngbe_ptp_clear_tx_timestamp(adapter);
+ return;
+ }
+
+ /* stop polling once we have a valid timestamp */
+ tsynctxctl = rd32(hw, NGBE_TSEC_1588_CTL);
+ if (tsynctxctl & NGBE_TSEC_1588_CTL_VALID) {
+ ngbe_ptp_tx_hwtstamp(adapter);
+ return;
+ }
+
+ /* check timeout last in case timestamp event just occurred */
+ if (timeout) {
+ ngbe_ptp_clear_tx_timestamp(adapter);
+ adapter->tx_hwtstamp_timeouts++;
+ e_warn(drv, "clearing Tx Timestamp hang");
+ } else {
+ /* reschedule to keep checking until we timeout */
+ schedule_work(&adapter->ptp_tx_work);
+ }
+}
+
+/**
+ * ngbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: particular skb to send timestamp with
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the shhwtstamps structure which
+ * is passed up the network stack
+ */
+void ngbe_ptp_rx_hwtstamp(struct ngbe_adapter *adapter, struct sk_buff *skb)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u64 regval = 0;
+ u32 tsyncrxctl;
+
+ /*
+ * Read the tsyncrxctl register afterwards in order to prevent taking an
+ * I/O hit on every packet.
+ */
+ tsyncrxctl = rd32(hw, NGBE_PSR_1588_CTL);
+ if (!(tsyncrxctl & NGBE_PSR_1588_CTL_VALID))
+ return;
+
+ regval |= (u64)rd32(hw, NGBE_PSR_1588_STMPL);
+ regval |= (u64)rd32(hw, NGBE_PSR_1588_STMPH) << 32;
+
+ ngbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+}
+
+/**
+ * ngbe_ptp_get_ts_config - get current hardware timestamping configuration
+ * @adapter: pointer to adapter structure
+ * @ifreq: ioctl data
+ *
+ * This function returns the current timestamping settings. Rather than
+ * attempt to deconstruct registers to fill in the values, simply keep a copy
+ * of the old settings around, and return a copy when requested.
+ */
+int ngbe_ptp_get_ts_config(struct ngbe_adapter *adapter, struct ifreq *ifr)
+{
+ struct hwtstamp_config *config = &adapter->tstamp_config;
+
+ return copy_to_user(ifr->ifr_data, config,
+ sizeof(*config)) ? -EFAULT : 0;
+}
+
+/**
+ * ngbe_ptp_set_timestamp_mode - setup the hardware for the requested mode
+ * @adapter: the private ngbe adapter structure
+ * @config: the hwtstamp configuration requested
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't cause any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ * Since hardware always timestamps Path delay packets when timestamping V2
+ * packets, regardless of the type specified in the register, only use V2
+ * Event mode. This more accurately tells the user what the hardware is going
+ * to do anyways.
+ *
+ * Note: this may modify the hwtstamp configuration towards a more general
+ * mode, if required to support the specifically requested mode.
+ */
+static int ngbe_ptp_set_timestamp_mode(struct ngbe_adapter *adapter,
+ struct hwtstamp_config *config)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 tsync_tx_ctl = NGBE_TSEC_1588_CTL_ENABLED;
+ u32 tsync_rx_ctl = NGBE_PSR_1588_CTL_ENABLED;
+ u32 tsync_rx_mtrl = PTP_EV_PORT << 16;
+ bool is_l2 = false;
+ u32 regval;
+
+ /* reserved for future extensions */
+ if (config->flags)
+ return -EINVAL;
+
+ switch (config->tx_type) {
+ case HWTSTAMP_TX_OFF:
+ tsync_tx_ctl = 0;
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ tsync_rx_ctl = 0;
+ tsync_rx_mtrl = 0;
+ adapter->flags &= ~(NGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ NGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ tsync_rx_ctl |= NGBE_PSR_1588_CTL_TYPE_L4_V1;
+ tsync_rx_mtrl |= NGBE_PSR_1588_MSGTYPE_V1_SYNC_MSG;
+ adapter->flags |= (NGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ NGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ tsync_rx_ctl |= NGBE_PSR_1588_CTL_TYPE_L4_V1;
+ tsync_rx_mtrl |= NGBE_PSR_1588_MSGTYPE_V1_DELAY_REQ_MSG;
+ adapter->flags |= (NGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ NGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ tsync_rx_ctl |= NGBE_PSR_1588_CTL_TYPE_EVENT_V2;
+ is_l2 = true;
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ adapter->flags |= (NGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ NGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_ALL:
+ default:
+ /* register RXMTRL must be set in order to do V1 packets,
+ * therefore it is not possible to time stamp both V1 Sync and
+ * Delay_Req messages unless hardware supports timestamping all
+ * packets => return error
+ */
+ adapter->flags &= ~(NGBE_FLAG_RX_HWTSTAMP_ENABLED |
+ NGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ config->rx_filter = HWTSTAMP_FILTER_NONE;
+ return -ERANGE;
+ }
+
+ /* define ethertype filter for timestamping L2 packets */
+ if (is_l2)
+ wr32(hw,
+ NGBE_PSR_ETYPE_SWC(NGBE_PSR_ETYPE_SWC_FILTER_1588),
+ (NGBE_PSR_ETYPE_SWC_FILTER_EN | /* enable filter */
+ NGBE_PSR_ETYPE_SWC_1588 | /* enable timestamping */
+ ETH_P_1588)); /* 1588 eth protocol type */
+ else
+ wr32(hw,
+ NGBE_PSR_ETYPE_SWC(NGBE_PSR_ETYPE_SWC_FILTER_1588),
+ 0);
+
+ /* enable/disable TX */
+ regval = rd32(hw, NGBE_TSEC_1588_CTL);
+ regval &= ~NGBE_TSEC_1588_CTL_ENABLED;
+ regval |= tsync_tx_ctl;
+ wr32(hw, NGBE_TSEC_1588_CTL, regval);
+
+ /* enable/disable RX */
+ regval = rd32(hw, NGBE_PSR_1588_CTL);
+ regval &= ~(NGBE_PSR_1588_CTL_ENABLED | NGBE_PSR_1588_CTL_TYPE_MASK);
+ regval |= tsync_rx_ctl;
+ wr32(hw, NGBE_PSR_1588_CTL, regval);
+
+ /* define which PTP packets are time stamped */
+ wr32(hw, NGBE_PSR_1588_MSGTYPE, tsync_rx_mtrl);
+
+ NGBE_WRITE_FLUSH(hw);
+
+ /* clear TX/RX timestamp state, just to be sure */
+ ngbe_ptp_clear_tx_timestamp(adapter);
+ rd32(hw, NGBE_PSR_1588_STMPH);
+
+ return 0;
+}
+
+/**
+ * ngbe_ptp_set_ts_config - user entry point for timestamp mode
+ * @adapter: pointer to adapter struct
+ * @ifreq: ioctl data
+ *
+ * Set hardware to requested mode. If unsupported, return an error with no
+ * changes. Otherwise, store the mode for future reference.
+ */
+int ngbe_ptp_set_ts_config(struct ngbe_adapter *adapter, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ err = ngbe_ptp_set_timestamp_mode(adapter, &config);
+ if (err)
+ return err;
+
+ /* save these settings for future reference */
+ memcpy(&adapter->tstamp_config, &config,
+ sizeof(adapter->tstamp_config));
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+static void ngbe_ptp_link_speed_adjust(struct ngbe_adapter *adapter,
+ u32 *shift, u32 *incval)
+{
+ /**
+ * Scale the NIC cycle counter by a large factor so that
+ * relatively small corrections to the frequency can be added
+ * or subtracted. The drawbacks of a large factor include
+ * (a) the clock register overflows more quickly, (b) the cycle
+ * counter structure must be able to convert the systime value
+ * to nanoseconds using only a multiplier and a right-shift,
+ * and (c) the value must fit within the timinca register space
+ * => math based on internal DMA clock rate and available bits
+ *
+ * Note that when there is no link, internal DMA clock is same as when
+ * link speed is 10Gb. Set the registers correctly even when link is
+ * down to preserve the clock setting
+ */
+
+ *shift = NGBE_INCVAL_SHIFT_1GB;
+ *incval = NGBE_INCVAL_1GB;
+
+ return;
+}
+
+/**
+ * ngbe_ptp_start_cyclecounter - create the cycle counter from hw
+ * @adapter: pointer to the adapter structure
+ *
+ * This function should be called to set the proper values for the TIMINCA
+ * register and tell the cyclecounter structure what the tick rate of SYSTIME
+ * is. It does not directly modify SYSTIME registers or the timecounter
+ * structure. It should be called whenever a new TIMINCA value is necessary,
+ * such as during initialization or when the link speed changes.
+ */
+void ngbe_ptp_start_cyclecounter(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ unsigned long flags;
+ struct cyclecounter cc;
+ u32 incval = 0;
+
+ /* For some of the boards below this mask is technically incorrect.
+ * The timestamp mask overflows at approximately 61bits. However the
+ * particular hardware does not overflow on an even bitmask value.
+ * Instead, it overflows due to conversion of upper 32bits billions of
+ * cycles. Timecounters are not really intended for this purpose so
+ * they do not properly function if the overflow point isn't 2^N-1.
+ * However, the actual SYSTIME values in question take ~138 years to
+ * overflow. In practice this means they won't actually overflow. A
+ * proper fix to this problem would require modification of the
+ * timecounter delta calculations.
+ */
+ cc.mask = CLOCKSOURCE_MASK(64);
+ cc.mult = 1;
+ cc.shift = 0;
+
+ cc.read = ngbe_ptp_read;
+ ngbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval);
+ wr32(hw, NGBE_TSEC_1588_INC, NGBE_TSEC_1588_INC_IV(incval));
+
+ /* update the base incval used to calculate frequency adjustment */
+ WRITE_ONCE(adapter->base_incval, incval);
+ smp_mb();
+
+ /* need lock to prevent incorrect read while modifying cyclecounter */
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc));
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+}
+
+/**
+ * ngbe_ptp_reset
+ * @adapter: the ngbe private board structure
+ *
+ * When the MAC resets, all of the hardware configuration for timesync is
+ * reset. This function should be called to re-enable the device for PTP,
+ * using the last known settings. However, we do lose the current clock time,
+ * so we fallback to resetting it based on the kernel's realtime clock.
+ *
+ * This function will maintain the hwtstamp_config settings, and it retriggers
+ * the SDP output if it's enabled.
+ */
+void ngbe_ptp_reset(struct ngbe_adapter *adapter)
+{
+ unsigned long flags;
+
+ /* reset the hardware timestamping mode */
+ ngbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
+ ngbe_ptp_start_cyclecounter(adapter);
+
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
+ ktime_to_ns(ktime_get_real()));
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ adapter->last_overflow_check = jiffies;
+}
+
+/**
+ * ngbe_ptp_create_clock
+ * @adapter: the ngbe private adapter structure
+ *
+ * This function performs setup of the user entry point function table and
+ * initalizes the PTP clock device used by userspace to access the clock-like
+ * features of the PTP core. It will be called by ngbe_ptp_init, and may
+ * re-use a previously initialized clock (such as during a suspend/resume
+ * cycle).
+ */
+static long ngbe_ptp_create_clock(struct ngbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ long err;
+
+ /* do nothing if we already have a clock device */
+ if (!IS_ERR_OR_NULL(adapter->ptp_clock))
+ return 0;
+
+ snprintf(adapter->ptp_caps.name, sizeof(adapter->ptp_caps.name),
+ "%s", netdev->name);
+ adapter->ptp_caps.owner = THIS_MODULE;
+ adapter->ptp_caps.max_adj = 500000000; /* 10^-9s */
+ adapter->ptp_caps.n_alarm = 0;
+ adapter->ptp_caps.n_ext_ts = 0;
+ adapter->ptp_caps.n_per_out = 0;
+ adapter->ptp_caps.pps = 0;
+ adapter->ptp_caps.adjfreq = ngbe_ptp_adjfreq;
+ adapter->ptp_caps.adjtime = ngbe_ptp_adjtime;
+ adapter->ptp_caps.gettime64 = ngbe_ptp_gettime64;
+ adapter->ptp_caps.settime64 = ngbe_ptp_settime64;
+ adapter->ptp_caps.enable = ngbe_ptp_feature_enable;
+
+ adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+ pci_dev_to_dev(adapter->pdev));
+ if (IS_ERR(adapter->ptp_clock)) {
+ err = PTR_ERR(adapter->ptp_clock);
+ adapter->ptp_clock = NULL;
+ e_dev_err("ptp_clock_register failed\n");
+ return err;
+ } else
+ e_dev_info("registered PHC device on %s\n", netdev->name);
+
+ /* Set the default timestamp mode to disabled here. We do this in
+ * create_clock instead of initialization, because we don't want to
+ * override the previous settings during a suspend/resume cycle.
+ */
+ adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+ adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+ return 0;
+}
+
+/**
+ * ngbe_ptp_init
+ * @adapter: the ngbe private adapter structure
+ *
+ * This function performs the required steps for enabling ptp
+ * support. If ptp support has already been loaded it simply calls the
+ * cyclecounter init routine and exits.
+ */
+void ngbe_ptp_init(struct ngbe_adapter *adapter)
+{
+ /* initialize the spin lock first, since the user might call the clock
+ * functions any time after we've initialized the ptp clock device.
+ */
+ spin_lock_init(&adapter->tmreg_lock);
+
+ /* obtain a ptp clock device, or re-use an existing device */
+ if (ngbe_ptp_create_clock(adapter))
+ return;
+
+ /* we have a clock, so we can intialize work for timestamps now */
+ INIT_WORK(&adapter->ptp_tx_work, ngbe_ptp_tx_hwtstamp_work);
+
+ /* reset the ptp related hardware bits */
+ ngbe_ptp_reset(adapter);
+
+ /* enter the NGBE_PTP_RUNNING state */
+ set_bit(__NGBE_PTP_RUNNING, &adapter->state);
+
+ return;
+}
+
+/**
+ * ngbe_ptp_suspend - stop ptp work items
+ * @adapter: pointer to adapter struct
+ *
+ * This function suspends ptp activity, and prevents more work from being
+ * generated, but does not destroy the clock device.
+ */
+void ngbe_ptp_suspend(struct ngbe_adapter *adapter)
+{
+ /* leave the NGBE_PTP_RUNNING STATE */
+ if (!test_and_clear_bit(__NGBE_PTP_RUNNING, &adapter->state))
+ return;
+
+ adapter->flags2 &= ~NGBE_FLAG2_PTP_PPS_ENABLED;
+
+ cancel_work_sync(&adapter->ptp_tx_work);
+ ngbe_ptp_clear_tx_timestamp(adapter);
+}
+
+/**
+ * ngbe_ptp_stop - destroy the ptp_clock device
+ * @adapter: pointer to adapter struct
+ *
+ * Completely destroy the ptp_clock device, and disable all PTP related
+ * features. Intended to be run when the device is being closed.
+ */
+void ngbe_ptp_stop(struct ngbe_adapter *adapter)
+{
+ /* first, suspend ptp activity */
+ ngbe_ptp_suspend(adapter);
+
+ /* now destroy the ptp clock device */
+ if (adapter->ptp_clock) {
+ ptp_clock_unregister(adapter->ptp_clock);
+ adapter->ptp_clock = NULL;
+ e_dev_info("removed PHC on %s\n",
+ adapter->netdev->name);
+ }
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_sriov.c b/drivers/net/ethernet/netswift/ngbe/ngbe_sriov.c
new file mode 100644
index 0000000000000..785e25287ad39
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_sriov.c
@@ -0,0 +1,1461 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/ipv6.h>
+
+#include "ngbe.h"
+#include "ngbe_type.h"
+#include "ngbe_sriov.h"
+
+#ifdef CONFIG_PCI_IOV
+static int __ngbe_enable_sriov(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int num_vf_macvlans, i;
+ struct vf_macvlans *mv_list;
+
+ adapter->flags |= NGBE_FLAG_SRIOV_ENABLED;
+ e_dev_info("SR-IOV enabled with %d VFs\n", adapter->num_vfs);
+
+ /* Enable VMDq flag so device will be set in VM mode */
+ adapter->flags |= NGBE_FLAG_VMDQ_ENABLED;
+ if (!adapter->ring_feature[RING_F_VMDQ].limit)
+ adapter->ring_feature[RING_F_VMDQ].limit = 1;
+ adapter->ring_feature[RING_F_VMDQ].offset = adapter->num_vfs;
+
+ num_vf_macvlans = hw->mac.num_rar_entries -
+ (NGBE_MAX_PF_MACVLANS + 1 + adapter->num_vfs);
+
+ adapter->mv_list = mv_list = kcalloc(num_vf_macvlans,
+ sizeof(struct vf_macvlans),
+ GFP_KERNEL);
+ if (mv_list) {
+ /* Initialize list of VF macvlans */
+ INIT_LIST_HEAD(&adapter->vf_mvs.l);
+ for (i = 0; i < num_vf_macvlans; i++) {
+ mv_list->vf = -1;
+ mv_list->free = true;
+ list_add(&mv_list->l, &adapter->vf_mvs.l);
+ mv_list++;
+ }
+ }
+
+ /* Initialize default switching mode VEB */
+ wr32m(hw, NGBE_PSR_CTL,
+ NGBE_PSR_CTL_SW_EN, NGBE_PSR_CTL_SW_EN);
+
+ /* If call to enable VFs succeeded then allocate memory
+ * for per VF control structures.
+ */
+ adapter->vfinfo = kcalloc(adapter->num_vfs,
+ sizeof(struct vf_data_storage), GFP_KERNEL);
+ if (!adapter->vfinfo) {
+ adapter->num_vfs = 0;
+ e_dev_info("failed to allocate memory for VF Data Storage\n");
+ return -ENOMEM;
+ }
+
+ /* enable L2 switch and replication */
+ adapter->flags |= NGBE_FLAG_SRIOV_L2SWITCH_ENABLE |
+ NGBE_FLAG_SRIOV_REPLICATION_ENABLE;
+
+ /* We do not support RSS w/ SR-IOV */
+ adapter->ring_feature[RING_F_RSS].limit = 1;
+
+ /* enable spoof checking for all VFs */
+ for (i = 0; i < adapter->num_vfs; i++) {
+ /* enable spoof checking for all VFs */
+ adapter->vfinfo[i].spoofchk_enabled = true;
+
+ /* Untrust all VFs */
+ adapter->vfinfo[i].trusted = false;
+
+ /* set the default xcast mode */
+ adapter->vfinfo[i].xcast_mode = NGBEVF_XCAST_MODE_NONE;
+ }
+
+ wr32m(hw, NGBE_CFG_PORT_CTL,
+ NGBE_CFG_PORT_CTL_NUM_VT_MASK, NGBE_CFG_PORT_CTL_NUM_VT_8);
+
+ return 0;
+}
+
+#define NGBE_BA4_ADDR(vfinfo, reg) \
+ ((u8 __iomem *)((u8 *)(vfinfo)->b4_addr + (reg)))
+
+/**
+ * ngbe_get_vfs - Find and take references to all vf devices
+ * @adapter: Pointer to adapter struct
+ */
+static void ngbe_get_vfs(struct ngbe_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ u16 vendor = pdev->vendor;
+ struct pci_dev *vfdev;
+ int vf = 0;
+ u16 vf_id;
+ int pos;
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ if (!pos)
+ return;
+ pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
+
+ vfdev = pci_get_device(vendor, vf_id, NULL);
+ for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) {
+ struct vf_data_storage *vfinfo;
+ if (!vfdev->is_virtfn)
+ continue;
+ if (vfdev->physfn != pdev)
+ continue;
+ if (vf >= adapter->num_vfs)
+ continue;
+
+ /*pci_dev_get(vfdev);*/
+ vfinfo = &adapter->vfinfo[vf];
+ vfinfo->vfdev = vfdev;
+ vfinfo->b4_addr = ioremap(pci_resource_start(vfdev, 4), 64);
+
+ ++vf;
+ }
+}
+
+/**
+ * ngbe_pet_vfs - Release references to all vf devices
+ * @adapter: Pointer to adapter struct
+ */
+static void ngbe_put_vfs(struct ngbe_adapter *adapter)
+{
+ unsigned int num_vfs = adapter->num_vfs, vf;
+
+ /* put the reference to all of the vf devices */
+ for (vf = 0; vf < num_vfs; ++vf) {
+ struct vf_data_storage *vfinfo;
+ struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+
+ if (!vfdev)
+ continue;
+
+ vfinfo = &adapter->vfinfo[vf];
+ iounmap(vfinfo->b4_addr);
+ vfinfo->b4_addr = NULL;
+ vfinfo->vfdev = NULL;
+ /*pci_dev_put(vfdev);*/
+ }
+}
+
+/* Note this function is called when the user wants to enable SR-IOV
+ * VFs using the now deprecated module parameter
+ */
+void ngbe_enable_sriov(struct ngbe_adapter *adapter)
+{
+ int pre_existing_vfs = 0;
+
+ pre_existing_vfs = pci_num_vf(adapter->pdev);
+ if (!pre_existing_vfs && !adapter->num_vfs)
+ return;
+
+ /* If there are pre-existing VFs then we have to force
+ * use of that many - over ride any module parameter value.
+ * This may result from the user unloading the PF driver
+ * while VFs were assigned to guest VMs or because the VFs
+ * have been created via the new PCI SR-IOV sysfs interface.
+ */
+ if (pre_existing_vfs) {
+ adapter->num_vfs = pre_existing_vfs;
+ dev_warn(&adapter->pdev->dev,
+ "Virtual Functions already enabled for this device -"
+ "Please reload all VF drivers to avoid spoofed packet "
+ "errors\n");
+ } else {
+ int err;
+ /*
+ * The sapphire supports up to 64 VFs per physical function
+ * but this implementation limits allocation to 63 so that
+ * basic networking resources are still available to the
+ * physical function. If the user requests greater thn
+ * 63 VFs then it is an error - reset to default of zero.
+ */
+ adapter->num_vfs = min_t(unsigned int, adapter->num_vfs,
+ NGBE_MAX_VFS_DRV_LIMIT);
+
+ err = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
+ if (err) {
+ e_err(probe, "Failed to enable PCI sriov: %d\n", err);
+ adapter->num_vfs = 0;
+ return;
+ }
+ }
+
+ if (!__ngbe_enable_sriov(adapter)) {
+ ngbe_get_vfs(adapter);
+ return;
+ }
+
+ /* If we have gotten to this point then there is no memory available
+ * to manage the VF devices - print message and bail.
+ */
+ e_err(probe, "Unable to allocate memory for VF Data Storage - "
+ "SRIOV disabled\n");
+ ngbe_disable_sriov(adapter);
+}
+#endif /* CONFIG_PCI_IOV */
+
+int ngbe_disable_sriov(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ /* set num VFs to 0 to prevent access to vfinfo */
+ adapter->num_vfs = 0;
+
+ /* put the reference to all of the vf devices */
+#ifdef CONFIG_PCI_IOV
+ ngbe_put_vfs(adapter);
+#endif
+ /* free VF control structures */
+ kfree(adapter->vfinfo);
+ adapter->vfinfo = NULL;
+
+ /* free macvlan list */
+ kfree(adapter->mv_list);
+ adapter->mv_list = NULL;
+
+ /* if SR-IOV is already disabled then there is nothing to do */
+ if (!(adapter->flags & NGBE_FLAG_SRIOV_ENABLED))
+ return 0;
+
+#ifdef CONFIG_PCI_IOV
+ /*
+ * If our VFs are assigned we cannot shut down SR-IOV
+ * without causing issues, so just leave the hardware
+ * available but disabled
+ */
+ if (pci_vfs_assigned(adapter->pdev)) {
+ e_dev_warn("Unloading driver while VFs are assigned -"
+ "VFs will not be deallocated\n");
+ return -EPERM;
+ }
+ /* disable iov and allow time for transactions to clear */
+ pci_disable_sriov(adapter->pdev);
+#endif
+
+ /* set default pool back to 0 */
+ wr32m(hw, NGBE_PSR_VM_CTL,
+ NGBE_PSR_VM_CTL_POOL_MASK, 0);
+ NGBE_WRITE_FLUSH(hw);
+
+ adapter->ring_feature[RING_F_VMDQ].offset = 0;
+
+ /* take a breather then clean up driver data */
+ msleep(100);
+
+ adapter->flags &= ~NGBE_FLAG_SRIOV_ENABLED;
+
+ /* Disable VMDq flag so device will be set in VM mode */
+ if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+ adapter->flags &= ~NGBE_FLAG_VMDQ_ENABLED;
+ }
+
+ return 0;
+}
+
+static int ngbe_set_vf_multicasts(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+{
+ u16 entries = (msgbuf[0] & NGBE_VT_MSGINFO_MASK)
+ >> NGBE_VT_MSGINFO_SHIFT;
+ u16 *hash_list = (u16 *)&msgbuf[1];
+ struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+ struct ngbe_hw *hw = &adapter->hw;
+ int i;
+ u32 vector_bit;
+ u32 vector_reg;
+ u32 mta_reg;
+ u32 vmolr = rd32(hw, NGBE_PSR_VM_L2CTL(vf));
+
+ /* only so many hash values supported */
+ entries = min(entries, (u16)NGBE_MAX_VF_MC_ENTRIES);
+
+ /* salt away the number of multi cast addresses assigned
+ * to this VF for later use to restore when the PF multi cast
+ * list changes
+ */
+ vfinfo->num_vf_mc_hashes = entries;
+
+ /* VFs are limited to using the MTA hash table for their multicast
+ * addresses */
+ for (i = 0; i < entries; i++)
+ vfinfo->vf_mc_hashes[i] = hash_list[i];
+
+ for (i = 0; i < vfinfo->num_vf_mc_hashes; i++) {
+ vector_reg = (vfinfo->vf_mc_hashes[i] >> 5) & 0x7F;
+ vector_bit = vfinfo->vf_mc_hashes[i] & 0x1F;
+ /* errata 5: maintain a copy of the register table conf */
+ mta_reg = hw->mac.mta_shadow[vector_reg];
+ mta_reg |= (1 << vector_bit);
+ hw->mac.mta_shadow[vector_reg] = mta_reg;
+ wr32(hw, NGBE_PSR_MC_TBL(vector_reg), mta_reg);
+ }
+ vmolr |= NGBE_PSR_VM_L2CTL_ROMPE;
+ wr32(hw, NGBE_PSR_VM_L2CTL(vf), vmolr);
+
+ return 0;
+}
+
+void ngbe_restore_vf_multicasts(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct vf_data_storage *vfinfo;
+ u32 i, j;
+ u32 vector_bit;
+ u32 vector_reg;
+
+ for (i = 0; i < adapter->num_vfs; i++) {
+ u32 vmolr = rd32(hw, NGBE_PSR_VM_L2CTL(i));
+ vfinfo = &adapter->vfinfo[i];
+ for (j = 0; j < vfinfo->num_vf_mc_hashes; j++) {
+ hw->addr_ctrl.mta_in_use++;
+ vector_reg = (vfinfo->vf_mc_hashes[j] >> 5) & 0x7F;
+ vector_bit = vfinfo->vf_mc_hashes[j] & 0x1F;
+ wr32m(hw, NGBE_PSR_MC_TBL(vector_reg),
+ 1 << vector_bit, 1 << vector_bit);
+ /* errata 5: maintain a copy of the reg table conf */
+ hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit);
+ }
+ if (vfinfo->num_vf_mc_hashes)
+ vmolr |= NGBE_PSR_VM_L2CTL_ROMPE;
+ else
+ vmolr &= ~NGBE_PSR_VM_L2CTL_ROMPE;
+ wr32(hw, NGBE_PSR_VM_L2CTL(i), vmolr);
+ }
+
+ /* Restore any VF macvlans */
+ ngbe_full_sync_mac_table(adapter);
+}
+
+int ngbe_set_vf_vlan(struct ngbe_adapter *adapter, int add, int vid, u16 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ /* VLAN 0 is a special case, don't allow it to be removed */
+ if (!vid && !add)
+ return 0;
+
+ return TCALL(hw, mac.ops.set_vfta, vid, vf, (bool)add);
+}
+
+static int ngbe_set_vf_lpe(struct ngbe_adapter *adapter, u32 max_frame,
+ u32 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 max_frs, reg_val;
+
+ /*
+ * For sapphire we have to keep all PFs and VFs operating with
+ * the same max_frame value in order to avoid sending an oversize
+ * frame to a VF. In order to guarantee this is handled correctly
+ * for all cases we have several special exceptions to take into
+ * account before we can enable the VF for receive
+ */
+ struct net_device *dev = adapter->netdev;
+ int pf_max_frame = dev->mtu + ETH_HLEN;
+ u32 vf_shift, vfre;
+ s32 err = 0;
+
+ switch (adapter->vfinfo[vf].vf_api) {
+ case ngbe_mbox_api_11:
+ case ngbe_mbox_api_12:
+ case ngbe_mbox_api_13:
+ /*
+ * Version 1.1 supports jumbo frames on VFs if PF has
+ * jumbo frames enabled which means legacy VFs are
+ * disabled
+ */
+ if (pf_max_frame > ETH_FRAME_LEN)
+ break;
+ /* fall through */
+ default:
+ /*
+ * If the PF or VF are running w/ jumbo frames enabled
+ * we need to shut down the VF Rx path as we cannot
+ * support jumbo frames on legacy VFs
+ */
+ if ((pf_max_frame > ETH_FRAME_LEN) ||
+ (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)))
+ err = -EINVAL;
+ break;
+ }
+
+ /* determine VF receive enable location */
+ vf_shift = vf;
+
+ /* enable or disable receive depending on error */
+ vfre = rd32(hw, NGBE_RDM_POOL_RE);
+ if (err)
+ vfre &= ~(1 << vf_shift);
+ else
+ vfre |= 1 << vf_shift;
+ wr32(hw, NGBE_RDM_POOL_RE, vfre);
+
+ if (err) {
+ e_err(drv, "VF max_frame %d out of range\n", max_frame);
+ return err;
+ }
+
+ /* pull current max frame size from hardware */
+ max_frs = DIV_ROUND_UP(max_frame, 1024);
+ reg_val = rd32(hw, NGBE_MAC_WDG_TIMEOUT) &
+ NGBE_MAC_WDG_TIMEOUT_WTO_MASK;
+ if (max_frs > (reg_val + NGBE_MAC_WDG_TIMEOUT_WTO_DELTA)) {
+ wr32(hw, NGBE_MAC_WDG_TIMEOUT,
+ max_frs - NGBE_MAC_WDG_TIMEOUT_WTO_DELTA);
+ }
+
+ e_info(hw, "VF requests change max MTU to %d\n", max_frame);
+
+ return 0;
+}
+
+void ngbe_set_vmolr(struct ngbe_hw *hw, u16 vf, bool aupe)
+{
+ u32 vmolr = rd32(hw, NGBE_PSR_VM_L2CTL(vf));
+ vmolr |= NGBE_PSR_VM_L2CTL_BAM;
+ if (aupe)
+ vmolr |= NGBE_PSR_VM_L2CTL_AUPE;
+ else
+ vmolr &= ~NGBE_PSR_VM_L2CTL_AUPE;
+ wr32(hw, NGBE_PSR_VM_L2CTL(vf), vmolr);
+}
+
+static void ngbe_set_vmvir(struct ngbe_adapter *adapter,
+ u16 vid, u16 qos, u16 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 vmvir = vid | (qos << VLAN_PRIO_SHIFT) |
+ NGBE_TDM_VLAN_INS_VLANA_DEFAULT;
+
+ wr32(hw, NGBE_TDM_VLAN_INS(vf), vmvir);
+}
+
+static void ngbe_clear_vmvir(struct ngbe_adapter *adapter, u32 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ wr32(hw, NGBE_TDM_VLAN_INS(vf), 0);
+}
+
+static inline void ngbe_vf_reset_event(struct ngbe_adapter *adapter, u16 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+ u8 num_tcs = netdev_get_num_tc(adapter->netdev);
+
+ /* add PF assigned VLAN or VLAN 0 */
+ ngbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf);
+
+ /* reset offloads to defaults */
+ ngbe_set_vmolr(hw, vf, !vfinfo->pf_vlan);
+
+ /* set outgoing tags for VFs */
+ if (!vfinfo->pf_vlan && !vfinfo->pf_qos && !num_tcs) {
+ ngbe_clear_vmvir(adapter, vf);
+ } else {
+ if (vfinfo->pf_qos || !num_tcs)
+ ngbe_set_vmvir(adapter, vfinfo->pf_vlan,
+ vfinfo->pf_qos, vf);
+ else
+ ngbe_set_vmvir(adapter, vfinfo->pf_vlan,
+ adapter->default_up, vf);
+
+ if (vfinfo->spoofchk_enabled)
+ TCALL(hw, mac.ops.set_vlan_anti_spoofing, true, vf);
+ }
+
+ /* reset multicast table array for vf */
+ adapter->vfinfo[vf].num_vf_mc_hashes = 0;
+
+ /* Flush and reset the mta with the new values */
+ ngbe_set_rx_mode(adapter->netdev);
+
+ ngbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
+
+ /* reset VF api back to unknown */
+ adapter->vfinfo[vf].vf_api = ngbe_mbox_api_10;
+}
+
+int ngbe_set_vf_mac(struct ngbe_adapter *adapter,
+ u16 vf, unsigned char *mac_addr)
+{
+ s32 retval = 0;
+ ngbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
+ retval = ngbe_add_mac_filter(adapter, mac_addr, vf);
+ if (retval >= 0)
+ memcpy(adapter->vfinfo[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
+ else
+ memset(adapter->vfinfo[vf].vf_mac_addresses, 0, ETH_ALEN);
+
+ return retval;
+}
+
+static int ngbe_negotiate_vf_api(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+{
+ int api = msgbuf[1];
+
+ switch (api) {
+ case ngbe_mbox_api_10:
+ case ngbe_mbox_api_11:
+ case ngbe_mbox_api_12:
+ case ngbe_mbox_api_13:
+ adapter->vfinfo[vf].vf_api = api;
+ return 0;
+ default:
+ break;
+ }
+
+ e_info(drv, "VF %d requested invalid api version %u\n", vf, api);
+
+ return -1;
+}
+
+static int ngbe_get_vf_queues(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+{
+ struct net_device *dev = adapter->netdev;
+ unsigned int default_tc = 0;
+ u8 num_tcs = netdev_get_num_tc(dev);
+
+ /* verify the PF is supporting the correct APIs */
+ switch (adapter->vfinfo[vf].vf_api) {
+ case ngbe_mbox_api_20:
+ case ngbe_mbox_api_11:
+ break;
+ default:
+ return -1;
+ }
+
+ /* only allow 1 Tx queue for bandwidth limiting */
+ msgbuf[NGBE_VF_TX_QUEUES] = 1;
+ msgbuf[NGBE_VF_RX_QUEUES] = 1;
+
+ /* notify VF of need for VLAN tag stripping, and correct queue */
+ if (num_tcs)
+ msgbuf[NGBE_VF_TRANS_VLAN] = num_tcs;
+ else if (adapter->vfinfo[vf].pf_vlan || adapter->vfinfo[vf].pf_qos)
+ msgbuf[NGBE_VF_TRANS_VLAN] = 1;
+ else
+ msgbuf[NGBE_VF_TRANS_VLAN] = 0;
+
+ /* notify VF of default queue */
+ msgbuf[NGBE_VF_DEF_QUEUE] = default_tc;
+
+ return 0;
+}
+
+static int ngbe_get_vf_link_status(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+{
+ /* verify the PF is supporting the correct APIs */
+ switch (adapter->vfinfo[vf].vf_api) {
+ case ngbe_mbox_api_11:
+ case ngbe_mbox_api_12:
+ case ngbe_mbox_api_13:
+ break;
+ default:
+ return -1;
+ }
+
+ if (adapter->link_up)
+ msgbuf[1] = NGBE_VF_STATUS_LINKUP;
+ else
+ msgbuf[1] = 0;
+
+ return 0;
+}
+
+static int ngbe_set_vf_macvlan(struct ngbe_adapter *adapter,
+ u16 vf, int index, unsigned char *mac_addr)
+{
+ struct list_head *pos;
+ struct vf_macvlans *entry;
+ s32 retval = 0;
+
+ if (index <= 1) {
+ list_for_each(pos, &adapter->vf_mvs.l) {
+ entry = list_entry(pos, struct vf_macvlans, l);
+ if (entry->vf == vf) {
+ entry->vf = -1;
+ entry->free = true;
+ entry->is_macvlan = false;
+ ngbe_del_mac_filter(adapter,
+ entry->vf_macvlan, vf);
+ }
+ }
+ }
+
+ /*
+ * If index was zero then we were asked to clear the uc list
+ * for the VF. We're done.
+ */
+ if (!index)
+ return 0;
+
+ entry = NULL;
+
+ list_for_each(pos, &adapter->vf_mvs.l) {
+ entry = list_entry(pos, struct vf_macvlans, l);
+ if (entry->free)
+ break;
+ }
+
+ /*
+ * If we traversed the entire list and didn't find a free entry
+ * then we're out of space on the RAR table. Also entry may
+ * be NULL because the original memory allocation for the list
+ * failed, which is not fatal but does mean we can't support
+ * VF requests for MACVLAN because we couldn't allocate
+ * memory for the list manangbeent required.
+ */
+ if (!entry || !entry->free)
+ return -ENOSPC;
+
+ retval = ngbe_add_mac_filter(adapter, mac_addr, vf);
+ if (retval >= 0) {
+ entry->free = false;
+ entry->is_macvlan = true;
+ entry->vf = vf;
+ memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
+ }
+
+ return retval;
+}
+
+#ifdef CONFIG_PCI_IOV
+int ngbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
+{
+ unsigned char vf_mac_addr[6];
+ struct ngbe_adapter *adapter = pci_get_drvdata(pdev);
+ unsigned int vfn = (event_mask & 0x7);
+ bool enable = ((event_mask & 0x10000000U) != 0);
+
+ if (enable) {
+ memset(vf_mac_addr, 0, ETH_ALEN);
+ memcpy(adapter->vfinfo[vfn].vf_mac_addresses, vf_mac_addr, 6);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
+static inline void ngbe_write_qde(struct ngbe_adapter *adapter, u32 vf,
+ u32 qde)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 q_per_pool = 1;
+ u32 reg = 0;
+ u32 i = vf * q_per_pool;
+
+ reg = rd32(hw, NGBE_RDM_PF_QDE);
+ reg |= qde << i;
+
+ wr32(hw, NGBE_RDM_PF_QDE, reg);
+
+}
+
+static inline void ngbe_write_hide_vlan(struct ngbe_adapter *adapter, u32 vf,
+ u32 hide_vlan)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 q_per_pool = 1;
+ u32 reg = 0;
+ u32 i = vf * q_per_pool;
+ reg = rd32(hw, NGBE_RDM_PF_HIDE);
+
+ if (hide_vlan == 1)
+ reg |= hide_vlan << i;
+ else
+ reg &= hide_vlan << i;
+
+ wr32(hw, NGBE_RDM_PF_HIDE, reg);
+}
+
+static int ngbe_vf_reset_msg(struct ngbe_adapter *adapter, u16 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
+ u32 reg, vf_shift;
+ u32 msgbuf[4] = {0, 0, 0, 0};
+ u8 *addr = (u8 *)(&msgbuf[1]);
+ struct net_device *dev = adapter->netdev;
+ int pf_max_frame;
+
+ e_info(probe, "VF Reset msg received from vf %d\n", vf);
+
+ /* reset the filters for the device */
+ ngbe_vf_reset_event(adapter, vf);
+
+ /* set vf mac address */
+ if (!is_zero_ether_addr(vf_mac))
+ ngbe_set_vf_mac(adapter, vf, vf_mac);
+
+ vf_shift = vf;
+
+ /* enable transmit for vf */
+ wr32m(hw, NGBE_TDM_POOL_TE,
+ 1 << vf, 1 << vf);
+
+ /* force drop enable for all VF Rx queues */
+ ngbe_write_qde(adapter, vf, 1);
+
+ /* enable receive for vf */
+ reg = rd32(hw, NGBE_RDM_POOL_RE);
+ reg |= 1 << vf_shift;
+
+ pf_max_frame = dev->mtu + ETH_HLEN;
+
+ if (pf_max_frame > ETH_FRAME_LEN)
+ reg &= ~(1 << vf_shift);
+ wr32(hw, NGBE_RDM_POOL_RE, reg);
+
+ /* enable VF mailbox for further messages */
+ adapter->vfinfo[vf].clear_to_send = true;
+
+ /* reply to reset with ack and vf mac address */
+ msgbuf[0] = NGBE_VF_RESET;
+ if (!is_zero_ether_addr(vf_mac)) {
+ msgbuf[0] |= NGBE_VT_MSGTYPE_ACK;
+ memcpy(addr, vf_mac, ETH_ALEN);
+ } else {
+ msgbuf[0] |= NGBE_VT_MSGTYPE_NACK;
+ dev_warn(pci_dev_to_dev(adapter->pdev),
+ "VF %d has no MAC address assigned, you may have to "
+ "assign one manually\n", vf);
+ }
+
+ /*
+ * Piggyback the multicast filter type so VF can compute the
+ * correct vectors
+ */
+ msgbuf[3] = hw->mac.mc_filter_type;
+ ngbe_write_mbx(hw, msgbuf, NGBE_VF_PERMADDR_MSG_LEN, vf);
+
+ return 0;
+}
+
+static int ngbe_set_vf_mac_addr(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u16 vf)
+{
+ u8 *new_mac = ((u8 *)(&msgbuf[1]));
+
+ if (!is_valid_ether_addr(new_mac)) {
+ e_warn(drv, "VF %d attempted to set invalid mac\n", vf);
+ return -1;
+ }
+
+ if (adapter->vfinfo[vf].pf_set_mac &&
+ memcmp(adapter->vfinfo[vf].vf_mac_addresses, new_mac,
+ ETH_ALEN)) {
+ u8 *pm = adapter->vfinfo[vf].vf_mac_addresses;
+ e_warn(drv,
+ "VF %d attempted to set a new MAC address but it already "
+ "has an administratively set MAC address "
+ "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X\n",
+ vf, pm[0], pm[1], pm[2], pm[3], pm[4], pm[5]);
+ e_warn(drv, "Check the VF driver and if it is not using the "
+ "correct MAC address you may need to reload the VF "
+ "driver\n");
+ return -1;
+ }
+ return ngbe_set_vf_mac(adapter, vf, new_mac) < 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+static int ngbe_find_vlvf_entry(struct ngbe_hw *hw, u32 vlan)
+{
+ u32 vlvf;
+ s32 regindex;
+
+ /* short cut the special case */
+ if (vlan == 0)
+ return 0;
+
+ /* Search for the vlan id in the VLVF entries */
+ for (regindex = 1; regindex < NGBE_PSR_VLAN_SWC_ENTRIES; regindex++) {
+ wr32(hw, NGBE_PSR_VLAN_SWC_IDX, regindex);
+ vlvf = rd32(hw, NGBE_PSR_VLAN_SWC);
+ if ((vlvf & VLAN_VID_MASK) == vlan)
+ break;
+ }
+
+ /* Return a negative value if not found */
+ if (regindex >= NGBE_PSR_VLAN_SWC_ENTRIES)
+ regindex = -1;
+
+ return regindex;
+}
+#endif /* CONFIG_PCI_IOV */
+
+static int ngbe_set_vf_vlan_msg(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u16 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int add = (msgbuf[0] & NGBE_VT_MSGINFO_MASK) >> NGBE_VT_MSGINFO_SHIFT;
+ int vid = (msgbuf[1] & NGBE_PSR_VLAN_SWC_VLANID_MASK);
+ int err;
+ u8 tcs = netdev_get_num_tc(adapter->netdev);
+
+ if (adapter->vfinfo[vf].pf_vlan || tcs) {
+ e_warn(drv,
+ "VF %d attempted to override administratively set VLAN "
+ "configuration\n"
+ "Reload the VF driver to resume operations\n",
+ vf);
+ return -1;
+ }
+
+ if (add)
+ adapter->vfinfo[vf].vlan_count++;
+ else if (adapter->vfinfo[vf].vlan_count)
+ adapter->vfinfo[vf].vlan_count--;
+
+ /* in case of promiscuous mode any VLAN filter set for a VF must
+ * also have the PF pool added to it.
+ */
+ if (add && adapter->netdev->flags & IFF_PROMISC)
+ err = ngbe_set_vf_vlan(adapter, add, vid, VMDQ_P(0));
+
+ err = ngbe_set_vf_vlan(adapter, add, vid, vf);
+ if (!err && adapter->vfinfo[vf].spoofchk_enabled)
+ TCALL(hw, mac.ops.set_vlan_anti_spoofing, true, vf);
+
+#ifdef CONFIG_PCI_IOV
+ /* Go through all the checks to see if the VLAN filter should
+ * be wiped completely.
+ */
+ if (!add && adapter->netdev->flags & IFF_PROMISC) {
+ u32 bits = 0, vlvf;
+ s32 reg_ndx;
+
+ reg_ndx = ngbe_find_vlvf_entry(hw, vid);
+ if (reg_ndx < 0)
+ goto out;
+ wr32(hw, NGBE_PSR_VLAN_SWC_IDX, reg_ndx);
+ vlvf = rd32(hw, NGBE_PSR_VLAN_SWC);
+ /* See if any other pools are set for this VLAN filter
+ * entry other than the PF.
+ */
+ if (VMDQ_P(0) < 32) {
+ bits = rd32(hw, NGBE_PSR_VLAN_SWC_VM_L);
+ bits &= ~(1 << VMDQ_P(0));
+ } else {
+ bits &= ~(1 << (VMDQ_P(0) - 32));
+ bits |= rd32(hw, NGBE_PSR_VLAN_SWC_VM_L);
+ }
+
+ /* If the filter was removed then ensure PF pool bit
+ * is cleared if the PF only added itself to the pool
+ * because the PF is in promiscuous mode.
+ */
+ if ((vlvf & VLAN_VID_MASK) == vid &&
+ !test_bit(vid, adapter->active_vlans) &&
+ !bits)
+ ngbe_set_vf_vlan(adapter, add, vid, VMDQ_P(0));
+ }
+
+out:
+#endif
+ return err;
+}
+
+static int ngbe_set_vf_macvlan_msg(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u16 vf)
+{
+ u8 *new_mac = ((u8 *)(&msgbuf[1]));
+ int index = (msgbuf[0] & NGBE_VT_MSGINFO_MASK) >>
+ NGBE_VT_MSGINFO_SHIFT;
+ int err;
+
+ if (adapter->vfinfo[vf].pf_set_mac && index > 0) {
+ e_warn(drv,
+ "VF %d requested MACVLAN filter but is administratively denied\n",
+ vf);
+ return -1;
+ }
+
+ /* An non-zero index indicates the VF is setting a filter */
+ if (index) {
+ if (!is_valid_ether_addr(new_mac)) {
+ e_warn(drv, "VF %d attempted to set invalid mac\n", vf);
+ return -1;
+ }
+
+ /*
+ * If the VF is allowed to set MAC filters then turn off
+ * anti-spoofing to avoid false positives.
+ */
+ if (adapter->vfinfo[vf].spoofchk_enabled)
+ ngbe_ndo_set_vf_spoofchk(adapter->netdev, vf, false);
+ }
+
+ err = ngbe_set_vf_macvlan(adapter, vf, index, new_mac);
+ if (err == -ENOSPC)
+ e_warn(drv,
+ "VF %d has requested a MACVLAN filter but there is no "
+ "space for it\n",
+ vf);
+
+ return err < 0;
+}
+
+static int ngbe_update_vf_xcast_mode(struct ngbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int xcast_mode = msgbuf[1];
+ u32 vmolr, fctrl, disable, enable;
+
+ /* verify the PF is supporting the correct APIs */
+ switch (adapter->vfinfo[vf].vf_api) {
+ case ngbe_mbox_api_12:
+ /* promisc introduced in 1.3 version */
+ if (xcast_mode == NGBEVF_XCAST_MODE_PROMISC)
+ return -EOPNOTSUPP;
+ /* Fall threw */
+ case ngbe_mbox_api_13:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (adapter->vfinfo[vf].xcast_mode == xcast_mode)
+ goto out;
+
+ switch (xcast_mode) {
+ case NGBEVF_XCAST_MODE_NONE:
+ disable = NGBE_PSR_VM_L2CTL_BAM |
+ NGBE_PSR_VM_L2CTL_ROMPE |
+ NGBE_PSR_VM_L2CTL_MPE |
+ NGBE_PSR_VM_L2CTL_UPE |
+ NGBE_PSR_VM_L2CTL_VPE;
+ enable = 0;
+ break;
+ case NGBEVF_XCAST_MODE_MULTI:
+ disable = NGBE_PSR_VM_L2CTL_MPE |
+ NGBE_PSR_VM_L2CTL_UPE |
+ NGBE_PSR_VM_L2CTL_VPE;
+ enable = NGBE_PSR_VM_L2CTL_BAM |
+ NGBE_PSR_VM_L2CTL_ROMPE;
+ break;
+ case NGBEVF_XCAST_MODE_ALLMULTI:
+ disable = NGBE_PSR_VM_L2CTL_UPE |
+ NGBE_PSR_VM_L2CTL_VPE;
+ enable = NGBE_PSR_VM_L2CTL_BAM |
+ NGBE_PSR_VM_L2CTL_ROMPE |
+ NGBE_PSR_VM_L2CTL_MPE;
+ break;
+ case NGBEVF_XCAST_MODE_PROMISC:
+ fctrl = rd32(hw, NGBE_PSR_CTL);
+ if (!(fctrl & NGBE_PSR_CTL_UPE)) {
+ /* VF promisc requires PF in promisc */
+ e_warn(drv,
+ "Enabling VF promisc requires PF in promisc\n");
+ return -EPERM;
+ }
+ disable = 0;
+ enable = NGBE_PSR_VM_L2CTL_BAM |
+ NGBE_PSR_VM_L2CTL_ROMPE |
+ NGBE_PSR_VM_L2CTL_MPE |
+ NGBE_PSR_VM_L2CTL_UPE |
+ NGBE_PSR_VM_L2CTL_VPE;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ vmolr = rd32(hw, NGBE_PSR_VM_L2CTL(vf));
+ vmolr &= ~disable;
+ vmolr |= enable;
+ wr32(hw, NGBE_PSR_VM_L2CTL(vf), vmolr);
+
+ adapter->vfinfo[vf].xcast_mode = xcast_mode;
+
+out:
+ msgbuf[1] = xcast_mode;
+
+ return 0;
+}
+
+static int ngbe_rcv_msg_from_vf(struct ngbe_adapter *adapter, u16 vf)
+{
+ u16 mbx_size = NGBE_VXMAILBOX_SIZE;
+ u32 msgbuf[NGBE_VXMAILBOX_SIZE];
+ struct ngbe_hw *hw = &adapter->hw;
+ s32 retval;
+
+ retval = ngbe_read_mbx(hw, msgbuf, mbx_size, vf);
+
+ if (retval) {
+ pr_err("Error receiving message from VF\n");
+ return retval;
+ }
+
+ /* this is a message we already processed, do nothing */
+ if (msgbuf[0] & (NGBE_VT_MSGTYPE_ACK | NGBE_VT_MSGTYPE_NACK))
+ return retval;
+
+ /* flush the ack before we write any messages back */
+ NGBE_WRITE_FLUSH(hw);
+
+ if (msgbuf[0] == NGBE_VF_RESET)
+ return ngbe_vf_reset_msg(adapter, vf);
+
+ /*
+ * until the vf completes a virtual function reset it should not be
+ * allowed to start any configuration.
+ */
+
+ if (!adapter->vfinfo[vf].clear_to_send) {
+ msgbuf[0] |= NGBE_VT_MSGTYPE_NACK;
+ ngbe_write_mbx(hw, msgbuf, 1, vf);
+ return retval;
+ }
+
+ switch ((msgbuf[0] & 0xFFFF)) {
+ case NGBE_VF_SET_MAC_ADDR:
+ retval = ngbe_set_vf_mac_addr(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_SET_MULTICAST:
+ retval = ngbe_set_vf_multicasts(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_SET_VLAN:
+ retval = ngbe_set_vf_vlan_msg(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_SET_LPE:
+ if (msgbuf[1] > NGBE_MAX_JUMBO_FRAME_SIZE) {
+ e_err(drv, "VF max_frame %d exceed MAX_JUMBO_FRAME_SIZE\n", msgbuf[1]);
+ return -EINVAL;
+ }
+ retval = ngbe_set_vf_lpe(adapter, msgbuf[1], vf);
+ break;
+ case NGBE_VF_SET_MACVLAN:
+ retval = ngbe_set_vf_macvlan_msg(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_API_NEGOTIATE:
+ retval = ngbe_negotiate_vf_api(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_GET_QUEUES:
+ retval = ngbe_get_vf_queues(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_UPDATE_XCAST_MODE:
+ retval = ngbe_update_vf_xcast_mode(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_GET_LINK_STATUS:
+ retval = ngbe_get_vf_link_status(adapter, msgbuf, vf);
+ break;
+ case NGBE_VF_BACKUP:
+ break;
+ default:
+ e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]);
+ retval = NGBE_ERR_MBX;
+ break;
+ }
+
+ /* notify the VF of the results of what it sent us */
+ if (retval)
+ msgbuf[0] |= NGBE_VT_MSGTYPE_NACK;
+ else
+ msgbuf[0] |= NGBE_VT_MSGTYPE_ACK;
+
+ msgbuf[0] |= NGBE_VT_MSGTYPE_CTS;
+
+ ngbe_write_mbx(hw, msgbuf, mbx_size, vf);
+
+ return retval;
+}
+
+static void ngbe_rcv_ack_from_vf(struct ngbe_adapter *adapter, u16 vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 msg = NGBE_VT_MSGTYPE_NACK;
+
+ /* if device isn't clear to send it shouldn't be reading either */
+ if (!adapter->vfinfo[vf].clear_to_send)
+ ngbe_write_mbx(hw, &msg, 1, vf);
+}
+
+void ngbe_msg_task(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u16 vf;
+
+ for (vf = 0; vf < adapter->num_vfs; vf++) {
+ /* process any reset requests */
+ if (!ngbe_check_for_rst(hw, vf))
+ ngbe_vf_reset_event(adapter, vf);
+
+ /* process any messages pending */
+ if (!ngbe_check_for_msg(hw, vf))
+ ngbe_rcv_msg_from_vf(adapter, vf);
+
+ /* process any acks */
+ if (!ngbe_check_for_ack(hw, vf))
+ ngbe_rcv_ack_from_vf(adapter, vf);
+ }
+}
+
+void ngbe_disable_tx_rx(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+
+ /* disable transmit and receive for all vfs */
+ wr32(hw, NGBE_TDM_POOL_TE, 0);
+ wr32(hw, NGBE_RDM_POOL_RE, 0);
+}
+
+static inline void ngbe_ping_vf(struct ngbe_adapter *adapter, int vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 ping;
+
+ ping = NGBE_PF_CONTROL_MSG;
+ if (adapter->vfinfo[vf].clear_to_send)
+ ping |= NGBE_VT_MSGTYPE_CTS;
+ ngbe_write_mbx(hw, &ping, 1, vf);
+}
+
+void ngbe_ping_all_vfs(struct ngbe_adapter *adapter)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 ping;
+ u16 i;
+
+ for (i = 0 ; i < adapter->num_vfs; i++) {
+ ping = NGBE_PF_CONTROL_MSG;
+ if (adapter->vfinfo[i].clear_to_send)
+ ping |= NGBE_VT_MSGTYPE_CTS;
+ ngbe_write_mbx(hw, &ping, 1, i);
+ }
+}
+
+int ngbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ if (vf >= adapter->num_vfs)
+ return -EINVAL;
+
+ /* nothing to do */
+ if (adapter->vfinfo[vf].trusted == setting)
+ return 0;
+
+ adapter->vfinfo[vf].trusted = setting;
+
+ /* reset VF to reconfigure features */
+ adapter->vfinfo[vf].clear_to_send = false;
+ ngbe_ping_vf(adapter, vf);
+
+ e_info(drv, "VF %u is %strusted\n", vf, setting ? "" : "not ");
+
+ return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+static int ngbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
+{
+ struct ngbe_adapter *adapter = pci_get_drvdata(dev);
+ int err = 0;
+ int i;
+ int pre_existing_vfs = pci_num_vf(dev);
+
+ if (!(adapter->flags & NGBE_FLAG_SRIOV_CAPABLE)) {
+ e_dev_warn("SRIOV not supported on this device\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (pre_existing_vfs && pre_existing_vfs != num_vfs)
+ err = ngbe_disable_sriov(adapter);
+ else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
+ goto out;
+
+ if (err)
+ goto err_out;
+
+ /* While the SR-IOV capability structure reports total VFs to be
+ * 8 we limit the actual number that can be allocated to 7 so
+ * that some transmit/receive resources can be reserved to the
+ * PF. The PCI bus driver already checks for other values out of
+ * range.
+ */
+ if ((num_vfs + adapter->num_vmdqs) > NGBE_MAX_VF_FUNCTIONS) {
+ err = -EPERM;
+ goto err_out;
+ }
+
+ adapter->num_vfs = num_vfs;
+
+ err = __ngbe_enable_sriov(adapter);
+ if (err)
+ goto err_out;
+
+ for (i = 0; i < adapter->num_vfs; i++)
+ ngbe_vf_configuration(dev, (i | 0x10000000));
+
+ err = pci_enable_sriov(dev, num_vfs);
+ if (err) {
+ e_dev_warn("Failed to enable PCI sriov: %d\n", err);
+ goto err_out;
+ }
+ ngbe_get_vfs(adapter);
+ msleep(100);
+ ngbe_sriov_reinit(adapter);
+out:
+ return num_vfs;
+err_out:
+ return err;
+}
+
+static int ngbe_pci_sriov_disable(struct pci_dev *dev)
+{
+ struct ngbe_adapter *adapter = pci_get_drvdata(dev);
+ int err;
+ u32 current_flags = adapter->flags;
+
+ err = ngbe_disable_sriov(adapter);
+
+ /* Only reinit if no error and state changed */
+ if (!err && current_flags != adapter->flags)
+ ngbe_sriov_reinit(adapter);
+
+ return err;
+}
+#endif
+
+int ngbe_pci_sriov_configure(struct pci_dev __maybe_unused *dev,
+ int __maybe_unused num_vfs)
+{
+#ifdef CONFIG_PCI_IOV
+ if (num_vfs == 0)
+ return ngbe_pci_sriov_disable(dev);
+ else
+ return ngbe_pci_sriov_enable(dev, num_vfs);
+#endif
+ return 0;
+}
+
+int ngbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+ s32 retval = 0;
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ if (!is_valid_ether_addr(mac) || (vf >= adapter->num_vfs))
+ return -EINVAL;
+
+ dev_info(pci_dev_to_dev(adapter->pdev),
+ "setting MAC %pM on VF %d\n", mac, vf);
+ dev_info(pci_dev_to_dev(adapter->pdev),
+ "Reload the VF driver to make this change effective.\n");
+ retval = ngbe_set_vf_mac(adapter, vf, mac);
+ if (retval >= 0) {
+ adapter->vfinfo[vf].pf_set_mac = true;
+ if (test_bit(__NGBE_DOWN, &adapter->state)) {
+ dev_warn(pci_dev_to_dev(adapter->pdev),
+ "The VF MAC address has been set, but the PF "
+ "device is not up.\n");
+ dev_warn(pci_dev_to_dev(adapter->pdev),
+ "Bring the PF device up before attempting to "
+ "use the VF device.\n");
+ }
+ } else {
+ dev_warn(pci_dev_to_dev(adapter->pdev),
+ "The VF MAC address was NOT set due to invalid or "
+ "duplicate MAC address.\n");
+ }
+
+ return retval;
+}
+
+static int ngbe_enable_port_vlan(struct ngbe_adapter *adapter,
+ int vf, u16 vlan, u8 qos)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int err;
+
+ err = ngbe_set_vf_vlan(adapter, true, vlan, vf);
+ if (err)
+ goto out;
+ ngbe_set_vmvir(adapter, vlan, qos, vf);
+ ngbe_set_vmolr(hw, vf, false);
+ if (adapter->vfinfo[vf].spoofchk_enabled)
+ TCALL(hw, mac.ops.set_vlan_anti_spoofing, true, vf);
+ adapter->vfinfo[vf].vlan_count++;
+ /* enable hide vlan */
+ ngbe_write_qde(adapter, vf, 1);
+ ngbe_write_hide_vlan(adapter, vf, 1);
+ adapter->vfinfo[vf].pf_vlan = vlan;
+ adapter->vfinfo[vf].pf_qos = qos;
+ dev_info(pci_dev_to_dev(adapter->pdev),
+ "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
+ if (test_bit(__NGBE_DOWN, &adapter->state)) {
+ dev_warn(pci_dev_to_dev(adapter->pdev),
+ "The VF VLAN has been set, but the PF device is not "
+ "up.\n");
+ dev_warn(pci_dev_to_dev(adapter->pdev),
+ "Bring the PF device up before attempting to use the VF "
+ "device.\n");
+ }
+
+out:
+ return err;
+}
+
+static int ngbe_disable_port_vlan(struct ngbe_adapter *adapter, int vf)
+{
+ struct ngbe_hw *hw = &adapter->hw;
+ int err;
+
+ err = ngbe_set_vf_vlan(adapter, false,
+ adapter->vfinfo[vf].pf_vlan, vf);
+ ngbe_clear_vmvir(adapter, vf);
+ ngbe_set_vmolr(hw, vf, true);
+ TCALL(hw, mac.ops.set_vlan_anti_spoofing, false, vf);
+ if (adapter->vfinfo[vf].vlan_count)
+ adapter->vfinfo[vf].vlan_count--;
+ /* disable hide vlan */
+ ngbe_write_hide_vlan(adapter, vf, 0);
+ adapter->vfinfo[vf].pf_vlan = 0;
+ adapter->vfinfo[vf].pf_qos = 0;
+
+ return err;
+}
+
+int ngbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+ u8 qos, __be16 vlan_proto)
+{
+ int err = 0;
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ /* VLAN IDs accepted range 0-4094 */
+ if ((vf >= adapter->num_vfs) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
+ return -EINVAL;
+
+ if (vlan_proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ if (vlan || qos) {
+ /*
+ * Check if there is already a port VLAN set, if so
+ * we have to delete the old one first before we
+ * can set the new one. The usage model had
+ * previously assumed the user would delete the
+ * old port VLAN before setting a new one but this
+ * is not necessarily the case.
+ */
+ if (adapter->vfinfo[vf].pf_vlan)
+ err = ngbe_disable_port_vlan(adapter, vf);
+ if (err)
+ goto out;
+ err = ngbe_enable_port_vlan(adapter, vf, vlan, qos);
+
+ } else {
+ err = ngbe_disable_port_vlan(adapter, vf);
+ }
+out:
+ return err;
+}
+
+/* no effect */
+int ngbe_ndo_set_vf_bw(struct net_device *netdev,
+ int vf,
+ int min_tx_rate,
+ int max_tx_rate)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+
+ /* verify VF is active */
+ if (vf >= adapter->num_vfs)
+ return -EINVAL;
+
+ /* verify link is up */
+ if (!adapter->link_up)
+ return -EINVAL;
+
+ /* verify we are linked at 1 or 10 Gbps */
+ if (adapter->link_speed < NGBE_LINK_SPEED_1GB_FULL)
+ return -EINVAL;
+
+ /* store values */
+ adapter->vfinfo[vf].min_tx_rate = min_tx_rate;
+ adapter->vfinfo[vf].max_tx_rate = max_tx_rate;
+
+ return 0;
+}
+
+int ngbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ struct ngbe_hw *hw = &adapter->hw;
+ u32 regval;
+
+ if (vf >= adapter->num_vfs)
+ return -EINVAL;
+
+ adapter->vfinfo[vf].spoofchk_enabled = setting;
+
+ if (vf < 32) {
+ regval = (setting << vf);
+ wr32m(hw, NGBE_TDM_MAC_AS_L,
+ regval | (1 << vf), regval);
+
+ if (adapter->vfinfo[vf].vlan_count) {
+ wr32m(hw, NGBE_TDM_VLAN_AS_L,
+ regval | (1 << vf), regval);
+ }
+ }
+
+ return 0;
+}
+
+int ngbe_ndo_get_vf_config(struct net_device *netdev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct ngbe_adapter *adapter = netdev_priv(netdev);
+ if (vf >= adapter->num_vfs)
+ return -EINVAL;
+ ivi->vf = vf;
+ memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
+
+ ivi->max_tx_rate = adapter->vfinfo[vf].max_tx_rate;
+ ivi->min_tx_rate = adapter->vfinfo[vf].min_tx_rate;
+
+ ivi->vlan = adapter->vfinfo[vf].pf_vlan;
+ ivi->qos = adapter->vfinfo[vf].pf_qos;
+
+ ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled;
+ ivi->trusted = adapter->vfinfo[vf].trusted;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_sriov.h b/drivers/net/ethernet/netswift/ngbe/ngbe_sriov.h
new file mode 100644
index 0000000000000..958c5303f72ad
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_sriov.h
@@ -0,0 +1,63 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+
+#ifndef _NGBE_SRIOV_H_
+#define _NGBE_SRIOV_H_
+
+/* ngbe driver limit the max number of VFs could be enabled to
+ * 7 (NGBE_MAX_VF_FUNCTIONS - 1)
+ */
+#define NGBE_MAX_VFS_DRV_LIMIT (NGBE_MAX_VF_FUNCTIONS - 1)
+
+void ngbe_restore_vf_multicasts(struct ngbe_adapter *adapter);
+int ngbe_set_vf_vlan(struct ngbe_adapter *adapter, int add, int vid, u16 vf);
+void ngbe_set_vmolr(struct ngbe_hw *hw, u16 vf, bool aupe);
+void ngbe_msg_task(struct ngbe_adapter *adapter);
+int ngbe_set_vf_mac(struct ngbe_adapter *adapter,
+ u16 vf, unsigned char *mac_addr);
+void ngbe_disable_tx_rx(struct ngbe_adapter *adapter);
+void ngbe_ping_all_vfs(struct ngbe_adapter *adapter);
+
+int ngbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
+
+int ngbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
+ u8 qos, __be16 vlan_proto);
+
+int ngbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+ int max_tx_rate);
+
+int ngbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+int ngbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting);
+int ngbe_ndo_get_vf_config(struct net_device *netdev,
+ int vf, struct ifla_vf_info *ivi);
+
+int ngbe_disable_sriov(struct ngbe_adapter *adapter);
+#ifdef CONFIG_PCI_IOV
+int ngbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
+void ngbe_enable_sriov(struct ngbe_adapter *adapter);
+#endif
+int ngbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
+
+#define NGBE_VF_STATUS_LINKUP 0x1
+
+/*
+ * These are defined in ngbe_type.h on behalf of the VF driver
+ * but we need them here unwrapped for the PF driver.
+ */
+//#define NGBE_DEV_ID_SP_VF 0x1000
+#endif /* _NGBE_SRIOV_H_ */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_sysfs.c b/drivers/net/ethernet/netswift/ngbe/ngbe_sysfs.c
new file mode 100644
index 0000000000000..559d02b2feeb2
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_sysfs.c
@@ -0,0 +1,222 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "ngbe.h"
+#include "ngbe_hw.h"
+#include "ngbe_type.h"
+
+#ifdef CONFIG_NGBE_SYSFS
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/time.h>
+#ifdef CONFIG_NGBE_HWMON
+#include <linux/hwmon.h>
+#endif
+
+#ifdef CONFIG_NGBE_HWMON
+/* hwmon callback functions */
+static ssize_t ngbe_hwmon_show_temp(struct device __always_unused *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hwmon_attr *ngbe_attr = container_of(attr, struct hwmon_attr,
+ dev_attr);
+ unsigned int value;
+
+ /* reset the temp field */
+ TCALL(ngbe_attr->hw, mac.ops.get_thermal_sensor_data);
+
+ value = ngbe_attr->sensor->temp;
+
+ /* display millidegree */
+ value *= 1000;
+
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t ngbe_hwmon_show_alarmthresh(struct device __always_unused *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hwmon_attr *ngbe_attr = container_of(attr, struct hwmon_attr,
+ dev_attr);
+ unsigned int value = ngbe_attr->sensor->alarm_thresh;
+
+ /* display millidegree */
+ value *= 1000;
+
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t ngbe_hwmon_show_dalarmthresh(struct device __always_unused *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hwmon_attr *ngbe_attr = container_of(attr, struct hwmon_attr,
+ dev_attr);
+ unsigned int value = ngbe_attr->sensor->dalarm_thresh;
+
+ /* display millidegree */
+ value *= 1000;
+
+ return sprintf(buf, "%u\n", value);
+}
+
+/**
+ * ngbe_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
+ * @adapter: pointer to the adapter structure
+ * @type: type of sensor data to display
+ *
+ * For each file we want in hwmon's sysfs interface we need a device_attribute
+ * This is included in our hwmon_attr struct that contains the references to
+ * the data structures we need to get the data to display.
+ */
+static int ngbe_add_hwmon_attr(struct ngbe_adapter *adapter, int type)
+{
+ int rc;
+ unsigned int n_attr;
+ struct hwmon_attr *ngbe_attr;
+
+ n_attr = adapter->ngbe_hwmon_buff.n_hwmon;
+ ngbe_attr = &adapter->ngbe_hwmon_buff.hwmon_list[n_attr];
+
+ switch (type) {
+ case NGBE_HWMON_TYPE_TEMP:
+ ngbe_attr->dev_attr.show = ngbe_hwmon_show_temp;
+ snprintf(ngbe_attr->name, sizeof(ngbe_attr->name),
+ "temp%u_input", 0);
+ break;
+ case NGBE_HWMON_TYPE_ALARMTHRESH:
+ ngbe_attr->dev_attr.show = ngbe_hwmon_show_alarmthresh;
+ snprintf(ngbe_attr->name, sizeof(ngbe_attr->name),
+ "temp%u_alarmthresh", 0);
+ break;
+ case NGBE_HWMON_TYPE_DALARMTHRESH:
+ ngbe_attr->dev_attr.show = ngbe_hwmon_show_dalarmthresh;
+ snprintf(ngbe_attr->name, sizeof(ngbe_attr->name),
+ "temp%u_dalarmthresh", 0);
+ break;
+ default:
+ rc = -EPERM;
+ return rc;
+ }
+
+ /* These always the same regardless of type */
+ ngbe_attr->sensor =
+ &adapter->hw.mac.thermal_sensor_data.sensor;
+ ngbe_attr->hw = &adapter->hw;
+ ngbe_attr->dev_attr.store = NULL;
+ ngbe_attr->dev_attr.attr.mode = S_IRUGO;
+ ngbe_attr->dev_attr.attr.name = ngbe_attr->name;
+
+ rc = device_create_file(pci_dev_to_dev(adapter->pdev),
+ &ngbe_attr->dev_attr);
+
+ if (rc == 0)
+ ++adapter->ngbe_hwmon_buff.n_hwmon;
+
+ return rc;
+}
+#endif /* CONFIG_NGBE_HWMON */
+
+static void ngbe_sysfs_del_adapter(
+ struct ngbe_adapter __maybe_unused *adapter)
+{
+#ifdef CONFIG_NGBE_HWMON
+ int i;
+
+ if (adapter == NULL)
+ return;
+
+ for (i = 0; i < adapter->ngbe_hwmon_buff.n_hwmon; i++) {
+ device_remove_file(pci_dev_to_dev(adapter->pdev),
+ &adapter->ngbe_hwmon_buff.hwmon_list[i].dev_attr);
+ }
+
+ kfree(adapter->ngbe_hwmon_buff.hwmon_list);
+
+ if (adapter->ngbe_hwmon_buff.device)
+ hwmon_device_unregister(adapter->ngbe_hwmon_buff.device);
+#endif /* CONFIG_NGBE_HWMON */
+}
+
+/* called from ngbe_main.c */
+void ngbe_sysfs_exit(struct ngbe_adapter *adapter)
+{
+ ngbe_sysfs_del_adapter(adapter);
+}
+
+/* called from ngbe_main.c */
+int ngbe_sysfs_init(struct ngbe_adapter *adapter)
+{
+ int rc = 0;
+#ifdef CONFIG_NGBE_HWMON
+ struct hwmon_buff *ngbe_hwmon = &adapter->ngbe_hwmon_buff;
+ int n_attrs;
+
+#endif /* CONFIG_NGBE_HWMON */
+ if (adapter == NULL)
+ goto err;
+
+#ifdef CONFIG_NGBE_HWMON
+
+ /* Don't create thermal hwmon interface if no sensors present */
+ if (TCALL(&adapter->hw, mac.ops.init_thermal_sensor_thresh))
+ goto no_thermal;
+
+ /*
+ * Allocation space for max attributs
+ * max num sensors * values (temp, alamthresh, dalarmthresh)
+ */
+ n_attrs = 3;
+ ngbe_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr),
+ GFP_KERNEL);
+ if (!ngbe_hwmon->hwmon_list) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ ngbe_hwmon->device =
+ hwmon_device_register(pci_dev_to_dev(adapter->pdev));
+ if (IS_ERR(ngbe_hwmon->device)) {
+ rc = PTR_ERR(ngbe_hwmon->device);
+ goto err;
+ }
+
+ /* Bail if any hwmon attr struct fails to initialize */
+ rc = ngbe_add_hwmon_attr(adapter, NGBE_HWMON_TYPE_TEMP);
+ rc |= ngbe_add_hwmon_attr(adapter, NGBE_HWMON_TYPE_ALARMTHRESH);
+ rc |= ngbe_add_hwmon_attr(adapter, NGBE_HWMON_TYPE_DALARMTHRESH);
+ if (rc)
+ goto err;
+
+no_thermal:
+#endif /* CONFIG_NGBE_HWMON */
+ goto exit;
+
+err:
+ ngbe_sysfs_del_adapter(adapter);
+exit:
+ return rc;
+}
+#endif /* CONFIG_NGBE_SYSFS */
diff --git a/drivers/net/ethernet/netswift/ngbe/ngbe_type.h b/drivers/net/ethernet/netswift/ngbe/ngbe_type.h
new file mode 100644
index 0000000000000..4e7f627edbbce
--- /dev/null
+++ b/drivers/net/ethernet/netswift/ngbe/ngbe_type.h
@@ -0,0 +1,2941 @@
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#ifndef _NGBE_TYPE_H_
+#define _NGBE_TYPE_H_
+
+#include <linux/types.h>
+#include <linux/mdio.h>
+#include <linux/netdevice.h>
+
+/*
+ * The following is a brief description of the error categories used by the
+ * ERROR_REPORT* macros.
+ *
+ * - NGBE_ERROR_INVALID_STATE
+ * This category is for errors which represent a serious failure state that is
+ * unexpected, and could be potentially harmful to device operation. It should
+ * not be used for errors relating to issues that can be worked around or
+ * ignored.
+ *
+ * - NGBE_ERROR_POLLING
+ * This category is for errors related to polling/timeout issues and should be
+ * used in any case where the timeout occured, or a failure to obtain a lock, or
+ * failure to receive data within the time limit.
+ *
+ * - NGBE_ERROR_CAUTION
+ * This category should be used for reporting issues that may be the cause of
+ * other errors, such as temperature warnings. It should indicate an event which
+ * could be serious, but hasn't necessarily caused problems yet.
+ *
+ * - NGBE_ERROR_SOFTWARE
+ * This category is intended for errors due to software state preventing
+ * something. The category is not intended for errors due to bad arguments, or
+ * due to unsupported features. It should be used when a state occurs which
+ * prevents action but is not a serious issue.
+ *
+ * - NGBE_ERROR_ARGUMENT
+ * This category is for when a bad or invalid argument is passed. It should be
+ * used whenever a function is called and error checking has detected the
+ * argument is wrong or incorrect.
+ *
+ * - NGBE_ERROR_UNSUPPORTED
+ * This category is for errors which are due to unsupported circumstances or
+ * configuration issues. It should not be used when the issue is due to an
+ * invalid argument, but for when something has occurred that is unsupported
+ * (Ex: Flow control autonegotiation or an unsupported SFP+ module.)
+ */
+
+/* Little Endian defines */
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+
+#endif
+#ifndef __be16
+/* Big Endian defines */
+#define __be16 u16
+#define __be32 u32
+#define __be64 u64
+
+#endif
+
+/************ ngbe_register.h ************/
+/* Vendor ID */
+#ifndef PCI_VENDOR_ID_TRUSTNETIC
+#define PCI_VENDOR_ID_TRUSTNETIC 0x8088
+#endif
+
+/* Device IDs */
+/* copper */
+#define NGBE_DEV_ID_EM_TEST 0x0000
+#define NGBE_DEV_ID_EM_WX1860AL_W 0x0100
+#define NGBE_DEV_ID_EM_WX1860A2 0x0101
+#define NGBE_DEV_ID_EM_WX1860A2S 0x0102
+#define NGBE_DEV_ID_EM_WX1860A4 0x0103
+#define NGBE_DEV_ID_EM_WX1860A4S 0x0104
+#define NGBE_DEV_ID_EM_WX1860AL2 0x0105
+#define NGBE_DEV_ID_EM_WX1860AL2S 0x0106
+#define NGBE_DEV_ID_EM_WX1860AL4 0x0107
+#define NGBE_DEV_ID_EM_WX1860AL4S 0x0108
+#define NGBE_DEV_ID_EM_WX1860NCSI 0x0109
+#define NGBE_DEV_ID_EM_WX1860A1 0x010a
+#define NGBE_DEV_ID_EM_WX1860AL1 0x010b
+
+/* transfer units */
+#define NGBE_KB_TO_B 1024
+
+/* Subsystem ID */
+#define NGBE_WX1860AL_INTERNAL 0x0410
+#define NGBE_WX1860AL_ZTE5201_RJ45 0x0100
+#define NGBE_WX1860AL_M88E1512_RJ45 0x0200
+#define NGBE_WX1860AL_M88E1512_SFP 0x0403
+#define NGBE_WX1860AL_YT8521S_SFP 0x0460
+
+#define NGBE_SUBSYSTEM_ID_EM_SF100F_LP 0x0103
+#define NGBE_SUBSYSTEM_ID_EM_SF100HF_LP 0x0103
+#define NGBE_SUBSYSTEM_ID_EM_SF200T 0x0201
+#define NGBE_SUBSYSTEM_ID_EM_SF200T_S 0x0210
+#define NGBE_SUBSYSTEM_ID_EM_SF400T 0x0401
+#define NGBE_SUBSYSTEM_ID_EM_SF400T_S 0x0410
+#define NGBE_SUBSYSTEM_ID_EM_SF200HT 0x0202
+#define NGBE_SUBSYSTEM_ID_EM_SF200HT_S 0x0220
+#define NGBE_SUBSYSTEM_ID_EM_SF400HT 0x0402
+#define NGBE_SUBSYSTEM_ID_EM_SF400HT_S 0x0420
+#define NGBE_SUBSYSTEM_ID_EM_SF200HXT 0x0230
+#define NGBE_SUBSYSTEM_ID_EM_SF400HXT 0x0430
+#define NGBE_SUBSYSTEM_ID_EM_SF400_OCP 0x0440
+#define NGBE_SUBSYSTEM_ID_EM_SF400_LY 0x0450
+#define NGBE_SUBSYSTEM_ID_EM_SF400_LY_YT 0x0470
+
+#define INTERNAL_SFP 0x0003
+#define OCP_CARD 0x0040
+#define LY_M88E1512_SFP 0x0050
+#define YT8521S_SFP 0x0060
+#define LY_YT8521S_SFP 0x0070
+
+#define OEM_MASK 0x00F0
+#define INTERNAL_SFP_MASK 0x000F
+
+#define NCSI_SUP 0x8000
+#define NCSI_SUP_MASK 0x8000
+
+#define WOL_SUP 0x4000
+#define WOL_SUP_MASK 0x4000
+
+/* MDIO Manageable Devices (MMDs). */
+#define NGBE_MDIO_PMA_PMD_DEV_TYPE 0x1 /* PMA and PMD */
+#define NGBE_MDIO_PCS_DEV_TYPE 0x3 /* Physical Coding Sublayer*/
+#define NGBE_MDIO_PHY_XS_DEV_TYPE 0x4 /* PHY Extender Sublayer */
+#define NGBE_MDIO_AUTO_NEG_DEV_TYPE 0x7 /* Auto-Negotiation */
+#define NGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE 0x1E /* Vendor specific 1 */
+
+/* phy register definitions */
+/* VENDOR_SPECIFIC_1_DEV regs */
+#define NGBE_MDIO_VENDOR_SPECIFIC_1_STATUS 0x1 /* VS1 Status Reg */
+#define NGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS 0x0008 /* 1 = Link Up */
+#define NGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0-10G, 1-1G */
+
+/* AUTO_NEG_DEV regs */
+#define NGBE_MDIO_AUTO_NEG_CONTROL 0x0 /* AUTO_NEG Control Reg */
+#define NGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */
+#define NGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Reg */
+#define NGBE_MDIO_AUTO_NEG_LP_STATUS 0xE820 /* AUTO NEG RX LP Status
+ * Reg */
+#define NGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */
+#define NGBE_MII_AUTONEG_XNP_TX_REG 0x17 /* 1G XNP Transmit */
+#define NGBE_MII_AUTONEG_ADVERTISE_REG 0x10 /* 100M Advertisement */
+
+#define NGBE_MDIO_AUTO_NEG_1000BASE_EEE_ADVT 0x4
+#define NGBE_MDIO_AUTO_NEG_100BASE_EEE_ADVT 0x2
+#define NGBE_MDIO_AUTO_NEG_LP_1000BASE_CAP 0x8000
+
+#define NGBE_MII_1GBASE_T_ADVERTISE_XNP_TX 0x4000 /* full duplex, bit:14*/
+#define NGBE_MII_1GBASE_T_ADVERTISE 0x8000 /* full duplex, bit:15*/
+#define NGBE_MII_100BASE_T_ADVERTISE 0x0100 /* full duplex, bit:8 */
+#define NGBE_MII_100BASE_T_ADVERTISE_HALF 0x0080 /* half duplex, bit:7 */
+#define NGBE_MII_RESTART 0x200
+#define NGBE_MII_AUTONEG_COMPLETE 0x20
+#define NGBE_MII_AUTONEG_LINK_UP 0x04
+#define NGBE_MII_AUTONEG_REG 0x0
+
+/* PHY_XS_DEV regs */
+#define NGBE_MDIO_PHY_XS_CONTROL 0x0 /* PHY_XS Control Reg */
+#define NGBE_MDIO_PHY_XS_RESET 0x8000 /* PHY_XS Reset */
+
+/* Media-dependent registers. */
+#define NGBE_MDIO_PHY_ID_HIGH 0x2 /* PHY ID High Reg*/
+#define NGBE_MDIO_PHY_ID_LOW 0x3 /* PHY ID Low Reg*/
+#define NGBE_MDIO_PHY_SPEED_ABILITY 0x4 /* Speed Ability Reg */
+#define NGBE_MDIO_PHY_EXT_ABILITY 0xB /* Ext Ability Reg */
+
+#define NGBE_MDIO_PHY_SPEED_1G 0x0010 /* 1G capable */
+#define NGBE_MDIO_PHY_SPEED_100M 0x0020 /* 100M capable */
+#define NGBE_MDIO_PHY_SPEED_10M 0x0040 /* 10M capable */
+
+#define NGBE_MDIO_PHY_1000BASET_ABILITY 0x0020 /* 1000BaseT capable */
+#define NGBE_MDIO_PHY_100BASETX_ABILITY 0x0080 /* 100BaseTX capable */
+
+#define NGBE_PHY_REVISION_MASK 0xFFFFFFF0U
+#define NGBE_MAX_PHY_ADDR 32
+
+#define NGBE_MDIO_CLAUSE_SELECT 0x11220
+
+/* INTERNAL PHY CONTROL */
+#define NGBE_INTERNAL_PHY_PAGE_SELECT_OFFSET 31
+#define NGBE_INTERNAL_PHY_OFFSET_MAX 32
+#define NGBE_INTERNAL_PHY_ID 0x000732
+
+#define NGBE_INTPHY_LED0 0x0010
+#define NGBE_INTPHY_LED1 0x0040
+#define NGBE_INTPHY_LED2 0x2000
+
+#define NGBE_INTPHY_INT_LSC 0x0010
+#define NGBE_INTPHY_INT_ANC 0x0008
+
+/* PHY MDI STANDARD CONFIG */
+#define NGBE_MDI_PHY_ID1_OFFSET 2
+#define NGBE_MDI_PHY_ID2_OFFSET 3
+#define NGBE_MDI_PHY_ID_MASK 0xFFFFFC00U
+#define NGBE_MDI_PHY_SPEED_SELECT1 0x0040
+#define NGBE_MDI_PHY_DUPLEX 0x0100
+#define NGBE_MDI_PHY_RESTART_AN 0x0200
+#define NGBE_MDI_PHY_ANE 0x1000
+#define NGBE_MDI_PHY_SPEED_SELECT0 0x2000
+#define NGBE_MDI_PHY_RESET 0x8000
+
+#define NGBE_PHY_RST_WAIT_PERIOD 5
+
+#define NGBE_M88E1512_PHY_ID 0x005043
+/* reg 18_0 */
+#define NGBE_M88E1512_INT_LSC 0x0400
+#define NGBE_M88E1512_INT_ANC 0x0800
+/* reg 18_3 */
+#define NGBE_M88E1512_INT_EN 0x0080
+#define NGBE_M88E1512_INT_POL 0x0800
+
+/* reg 21_2 */
+#define NGBE_M88E1512_RGM_TTC 0x0010
+#define NGBE_M88E1512_RGM_RTC 0x0020
+
+/* LED control */
+#define NGBE_M88E1512_LED1_CONF 0x6
+#define NGBE_M88E1512_LED0_CONF 0x1
+
+/* LED polarity */
+#define NGBE_M88E1512_LED1_POL 0x1
+#define NGBE_M88E1512_LED0_POL 0x1
+
+/* reg 4_0 ADV REG*/
+#define NGBE_M88E1512_10BASET_HALF 0x0020
+#define NGBE_M88E1512_10BASET_FULL 0x0040
+#define NGBE_M88E1512_100BASET_HALF 0x0080
+#define NGBE_M88E1512_100BASET_FULL 0x0100
+
+/* reg 9_0 ADV REG*/
+#define NGBE_M88E1512_1000BASET_HALF 0x0100
+#define NGBE_M88E1512_1000BASET_FULL 0x0200
+
+/* reg 19_0 INT status*/
+#define NGBE_M88E1512_ANC 0x0800
+#define NGBE_M88E1512_LSC 0x0400
+
+/* yt8521s reg */
+#define NGBE_YT8521S_PHY_ID 0x011a
+
+#define NGBE_YT8521S_SDS_LINK_UP 0x4
+#define NGBE_YT8521S_SDS_LINK_DOWN 0x8
+
+/* PHY IDs*/
+#define TN1010_PHY_ID 0x00A19410U
+#define QT2022_PHY_ID 0x0043A400U
+#define ATH_PHY_ID 0x03429050U
+/* PHY FW revision */
+#define TNX_FW_REV 0xB
+#define AQ_FW_REV 0x20
+
+/* ETH PHY Registers */
+#define NGBE_SR_XS_PCS_MMD_STATUS1 0x30001
+#define NGBE_SR_PCS_CTL2 0x30007
+#define NGBE_SR_PMA_MMD_CTL1 0x10000
+#define NGBE_SR_MII_MMD_CTL 0x1F0000
+#define NGBE_SR_MII_MMD_DIGI_CTL 0x1F8000
+#define NGBE_SR_MII_MMD_AN_CTL 0x1F8001
+#define NGBE_SR_MII_MMD_AN_ADV 0x1F0004
+#define NGBE_SR_MII_MMD_AN_ADV_PAUSE(_v) ((0x3 & (_v)) << 7)
+#define NGBE_SR_MII_MMD_LP_BABL 0x1F0005
+#define NGBE_SR_AN_MMD_CTL 0x70000
+#define NGBE_SR_AN_MMD_ADV_REG1 0x70010
+#define NGBE_SR_AN_MMD_ADV_REG1_PAUSE(_v) ((0x3 & (_v)) << 10)
+#define NGBE_SR_AN_MMD_ADV_REG1_PAUSE_SYM 0x400
+#define NGBE_SR_AN_MMD_ADV_REG1_PAUSE_ASM 0x800
+#define NGBE_SR_AN_MMD_ADV_REG2 0x70011
+#define NGBE_SR_AN_MMD_LP_ABL1 0x70013
+#define NGBE_VR_AN_KR_MODE_CL 0x78003
+#define NGBE_VR_XS_OR_PCS_MMD_DIGI_CTL1 0x38000
+#define NGBE_VR_XS_OR_PCS_MMD_DIGI_STATUS 0x38010
+
+#define NGBE_PHY_MPLLA_CTL0 0x18071
+#define NGBE_PHY_MPLLA_CTL3 0x18077
+#define NGBE_PHY_MISC_CTL0 0x18090
+#define NGBE_PHY_VCO_CAL_LD0 0x18092
+#define NGBE_PHY_VCO_CAL_LD1 0x18093
+#define NGBE_PHY_VCO_CAL_LD2 0x18094
+#define NGBE_PHY_VCO_CAL_LD3 0x18095
+#define NGBE_PHY_VCO_CAL_REF0 0x18096
+#define NGBE_PHY_VCO_CAL_REF1 0x18097
+#define NGBE_PHY_RX_AD_ACK 0x18098
+#define NGBE_PHY_AFE_DFE_ENABLE 0x1805D
+#define NGBE_PHY_DFE_TAP_CTL0 0x1805E
+#define NGBE_PHY_RX_EQ_ATT_LVL0 0x18057
+#define NGBE_PHY_RX_EQ_CTL0 0x18058
+#define NGBE_PHY_RX_EQ_CTL 0x1805C
+#define NGBE_PHY_TX_EQ_CTL0 0x18036
+#define NGBE_PHY_TX_EQ_CTL1 0x18037
+#define NGBE_PHY_TX_RATE_CTL 0x18034
+#define NGBE_PHY_RX_RATE_CTL 0x18054
+#define NGBE_PHY_TX_GEN_CTL2 0x18032
+#define NGBE_PHY_RX_GEN_CTL2 0x18052
+#define NGBE_PHY_RX_GEN_CTL3 0x18053
+#define NGBE_PHY_MPLLA_CTL2 0x18073
+#define NGBE_PHY_RX_POWER_ST_CTL 0x18055
+#define NGBE_PHY_TX_POWER_ST_CTL 0x18035
+#define NGBE_PHY_TX_GENCTRL1 0x18031
+
+#define NGBE_SR_PCS_CTL2_PCS_TYPE_SEL_R 0x0
+#define NGBE_SR_PCS_CTL2_PCS_TYPE_SEL_X 0x1
+#define NGBE_SR_PCS_CTL2_PCS_TYPE_SEL_MASK 0x3
+#define NGBE_SR_PMA_MMD_CTL1_SPEED_SEL_1G 0x0
+#define NGBE_SR_PMA_MMD_CTL1_SPEED_SEL_MASK 0x2000
+#define NGBE_SR_PMA_MMD_CTL1_LB_EN 0x1
+#define NGBE_SR_MII_MMD_CTL_AN_EN 0x1000
+#define NGBE_SR_MII_MMD_CTL_RESTART_AN 0x0200
+#define NGBE_SR_AN_MMD_CTL_RESTART_AN 0x0200
+#define NGBE_SR_AN_MMD_CTL_ENABLE 0x1000
+#define NGBE_SR_AN_MMD_ADV_REG2_BP_TYPE_KX4 0x40
+#define NGBE_SR_AN_MMD_ADV_REG2_BP_TYPE_KX 0x20
+#define NGBE_SR_AN_MMD_ADV_REG2_BP_TYPE_KR 0x80
+#define NGBE_SR_AN_MMD_ADV_REG2_BP_TYPE_MASK 0xFFFF
+#define NGBE_VR_XS_OR_PCS_MMD_DIGI_CTL1_ENABLE 0x1000
+#define NGBE_VR_XS_OR_PCS_MMD_DIGI_CTL1_VR_RST 0x8000
+#define NGBE_VR_XS_OR_PCS_MMD_DIGI_STATUS_PSEQ_MASK 0x1C
+#define NGBE_VR_XS_OR_PCS_MMD_DIGI_STATUS_PSEQ_POWER_GOOD 0x10
+
+#define NGBE_PHY_MPLLA_CTL0_MULTIPLIER_1GBASEX_KX 32
+#define NGBE_PHY_MPLLA_CTL0_MULTIPLIER_OTHER 40
+#define NGBE_PHY_MPLLA_CTL0_MULTIPLIER_MASK 0xFF
+#define NGBE_PHY_MPLLA_CTL3_MULTIPLIER_BW_1GBASEX_KX 0x46
+#define NGBE_PHY_MPLLA_CTL3_MULTIPLIER_BW_OTHER 0x56
+#define NGBE_PHY_MPLLA_CTL3_MULTIPLIER_BW_MASK 0x7FF
+#define NGBE_PHY_MISC_CTL0_TX2RX_LB_EN_0 0x1
+#define NGBE_PHY_MISC_CTL0_TX2RX_LB_EN_3_1 0xE
+#define NGBE_PHY_MISC_CTL0_RX_VREF_CTRL 0x1F00
+#define NGBE_PHY_VCO_CAL_LD0_1GBASEX_KX 1344
+#define NGBE_PHY_VCO_CAL_LD0_OTHER 1360
+#define NGBE_PHY_VCO_CAL_LD0_MASK 0x1000
+#define NGBE_PHY_VCO_CAL_REF0_LD0_1GBASEX_KX 42
+#define NGBE_PHY_VCO_CAL_REF0_LD0_OTHER 34
+#define NGBE_PHY_VCO_CAL_REF0_LD0_MASK 0x3F
+#define NGBE_PHY_AFE_DFE_ENABLE_DFE_EN0 0x10
+#define NGBE_PHY_AFE_DFE_ENABLE_AFE_EN0 0x1
+#define NGBE_PHY_AFE_DFE_ENABLE_MASK 0xFF
+#define NGBE_PHY_RX_EQ_CTL_CONT_ADAPT0 0x1
+#define NGBE_PHY_RX_EQ_CTL_CONT_ADAPT_MASK 0xF
+#define NGBE_PHY_TX_RATE_CTL_TX0_RATE_RXAUI 0x1
+#define NGBE_PHY_TX_RATE_CTL_TX0_RATE_1GBASEX_KX 0x3
+#define NGBE_PHY_TX_RATE_CTL_TX0_RATE_OTHER 0x2
+#define NGBE_PHY_TX_RATE_CTL_TX1_RATE_OTHER 0x20
+#define NGBE_PHY_TX_RATE_CTL_TX2_RATE_OTHER 0x200
+#define NGBE_PHY_TX_RATE_CTL_TX3_RATE_OTHER 0x2000
+#define NGBE_PHY_TX_RATE_CTL_TX0_RATE_MASK 0x7
+#define NGBE_PHY_TX_RATE_CTL_TX1_RATE_MASK 0x70
+#define NGBE_PHY_TX_RATE_CTL_TX2_RATE_MASK 0x700
+#define NGBE_PHY_TX_RATE_CTL_TX3_RATE_MASK 0x7000
+#define NGBE_PHY_RX_RATE_CTL_RX0_RATE_RXAUI 0x1
+#define NGBE_PHY_RX_RATE_CTL_RX0_RATE_1GBASEX_KX 0x3
+#define NGBE_PHY_RX_RATE_CTL_RX0_RATE_OTHER 0x2
+#define NGBE_PHY_RX_RATE_CTL_RX1_RATE_OTHER 0x20
+#define NGBE_PHY_RX_RATE_CTL_RX2_RATE_OTHER 0x200
+#define NGBE_PHY_RX_RATE_CTL_RX3_RATE_OTHER 0x2000
+#define NGBE_PHY_RX_RATE_CTL_RX0_RATE_MASK 0x7
+#define NGBE_PHY_RX_RATE_CTL_RX1_RATE_MASK 0x70
+#define NGBE_PHY_RX_RATE_CTL_RX2_RATE_MASK 0x700
+#define NGBE_PHY_RX_RATE_CTL_RX3_RATE_MASK 0x7000
+#define NGBE_PHY_TX_GEN_CTL2_TX0_WIDTH_OTHER 0x100
+#define NGBE_PHY_TX_GEN_CTL2_TX0_WIDTH_MASK 0x300
+#define NGBE_PHY_TX_GEN_CTL2_TX1_WIDTH_OTHER 0x400
+#define NGBE_PHY_TX_GEN_CTL2_TX1_WIDTH_MASK 0xC00
+#define NGBE_PHY_TX_GEN_CTL2_TX2_WIDTH_OTHER 0x1000
+#define NGBE_PHY_TX_GEN_CTL2_TX2_WIDTH_MASK 0x3000
+#define NGBE_PHY_TX_GEN_CTL2_TX3_WIDTH_OTHER 0x4000
+#define NGBE_PHY_TX_GEN_CTL2_TX3_WIDTH_MASK 0xC000
+#define NGBE_PHY_RX_GEN_CTL2_RX0_WIDTH_OTHER 0x100
+#define NGBE_PHY_RX_GEN_CTL2_RX0_WIDTH_MASK 0x300
+#define NGBE_PHY_RX_GEN_CTL2_RX1_WIDTH_OTHER 0x400
+#define NGBE_PHY_RX_GEN_CTL2_RX1_WIDTH_MASK 0xC00
+#define NGBE_PHY_RX_GEN_CTL2_RX2_WIDTH_OTHER 0x1000
+#define NGBE_PHY_RX_GEN_CTL2_RX2_WIDTH_MASK 0x3000
+#define NGBE_PHY_RX_GEN_CTL2_RX3_WIDTH_OTHER 0x4000
+#define NGBE_PHY_RX_GEN_CTL2_RX3_WIDTH_MASK 0xC000
+
+#define NGBE_PHY_MPLLA_CTL2_DIV_CLK_EN_8 0x100
+#define NGBE_PHY_MPLLA_CTL2_DIV_CLK_EN_10 0x200
+#define NGBE_PHY_MPLLA_CTL2_DIV_CLK_EN_16P5 0x400
+#define NGBE_PHY_MPLLA_CTL2_DIV_CLK_EN_MASK 0x700
+
+#define NGBE_XPCS_POWER_GOOD_MAX_POLLING_TIME 100
+#define NGBE_PHY_INIT_DONE_POLLING_TIME 100
+
+/**************** Global Registers ****************************/
+/* chip control Registers */
+#define NGBE_MIS_RST 0x1000C
+#define NGBE_MIS_PWR 0x10000
+#define NGBE_MIS_CTL 0x10004
+#define NGBE_MIS_PF_SM 0x10008
+#define NGBE_MIS_ST 0x10028
+#define NGBE_MIS_SWSM 0x1002C
+#define NGBE_MIS_RST_ST 0x10030
+
+#define NGBE_MIS_RST_SW_RST 0x00000001U
+#define NGBE_MIS_RST_LAN0_RST 0x00000002U
+#define NGBE_MIS_RST_LAN1_RST 0x00000004U
+#define NGBE_MIS_RST_LAN2_RST 0x00000008U
+#define NGBE_MIS_RST_LAN3_RST 0x00000010U
+#define NGBE_MIS_RST_FW_RST 0x00000020U
+
+#define NGBE_MIS_RST_LAN0_CHG_ETH_MODE 0x20000000U
+#define NGBE_MIS_RST_LAN1_CHG_ETH_MODE 0x40000000U
+#define NGBE_MIS_RST_GLOBAL_RST 0x80000000U
+
+#define NGBE_MIS_PWR_LAN_ID(_r) ((0xF0000000U & (_r)) >> 28)
+#define NGBE_MIS_PWR_LAN_ID_0 (1)
+#define NGBE_MIS_PWR_LAN_ID_1 (2)
+#define NGBE_MIS_PWR_LAN_ID_2 (3)
+#define NGBE_MIS_PWR_LAN_ID_3 (4)
+
+#define NGBE_MIS_ST_MNG_INIT_DN 0x00000001U
+#define NGBE_MIS_ST_MNG_VETO 0x00000100U
+#define NGBE_MIS_ST_LAN0_ECC 0x00010000U
+#define NGBE_MIS_ST_LAN1_ECC 0x00020000U
+#define NGBE_MIS_ST_LAN2_ECC 0x00040000U
+#define NGBE_MIS_ST_LAN3_ECC 0x00080000U
+#define NGBE_MIS_ST_MNG_ECC 0x00100000U
+#define NGBE_MIS_ST_PCORE_ECC 0x00200000U
+#define NGBE_MIS_ST_PCIWRP_ECC 0x00400000U
+#define NGBE_MIS_ST_PCIEPHY_ECC 0x00800000U
+#define NGBE_MIS_ST_FMGR_ECC 0x01000000U
+#define NGBE_MIS_ST_GPHY_IN_RST(_r) (0x00000200U << (_r))
+
+#define NGBE_MIS_SWSM_SMBI 1
+#define NGBE_MIS_RST_ST_DEV_RST_ST_DONE 0x00000000U
+#define NGBE_MIS_RST_ST_DEV_RST_ST_REQ 0x00080000U
+#define NGBE_MIS_RST_ST_DEV_RST_ST_INPROGRESS 0x00100000U
+#define NGBE_MIS_RST_ST_DEV_RST_ST_MASK 0x00180000U
+#define NGBE_MIS_RST_ST_DEV_RST_TYPE_MASK 0x00070000U
+#define NGBE_MIS_RST_ST_DEV_RST_TYPE_SHIFT 16
+#define NGBE_MIS_RST_ST_DEV_RST_TYPE_SW_RST 0x3
+#define NGBE_MIS_RST_ST_DEV_RST_TYPE_GLOBAL_RST 0x5
+#define NGBE_MIS_RST_ST_RST_INIT 0x0000FF00U
+#define NGBE_MIS_RST_ST_RST_INI_SHIFT 8
+#define NGBE_MIS_RST_ST_RST_TIM 0x000000FFU
+#define NGBE_MIS_PF_SM_SM 1
+
+/* Sensors for PVT(Process Voltage Temperature) */
+#define NGBE_TS_CTL 0x10300
+#define NGBE_TS_EN 0x10304
+#define NGBE_TS_ST 0x10308
+#define NGBE_TS_ALARM_THRE 0x1030C
+#define NGBE_TS_DALARM_THRE 0x10310
+#define NGBE_TS_INT_EN 0x10314
+#define NGBE_TS_ALARM_ST 0x10318
+#define NGBE_TS_ALARM_ST_DALARM 0x00000002U
+#define NGBE_TS_ALARM_ST_ALARM 0x00000001U
+
+#define NGBE_EFUSE_WDATA0 0x10320
+#define NGBE_EFUSE_WDATA1 0x10324
+#define NGBE_EFUSE_RDATA0 0x10328
+#define NGBE_EFUSE_RDATA1 0x1032C
+#define NGBE_EFUSE_STATUS 0x10330
+
+#define NGBE_TS_CTL_CALI_DONE 0x80000000U
+#define NGBE_TS_EN_ENA 0x00000001U
+#define NGBE_TS_ST_DATA_OUT_MASK 0x000003FFU
+#define NGBE_TS_ALARM_THRE_MASK 0x000003FFU
+#define NGBE_TS_DALARM_THRE_MASK 0x000003FFU
+#define NGBE_TS_INT_EN_DALARM_INT_EN 0x00000002U
+#define NGBE_TS_INT_EN_ALARM_INT_EN 0x00000001U
+
+struct ngbe_thermal_diode_data {
+ s16 temp;
+ s16 alarm_thresh;
+ s16 dalarm_thresh;
+};
+
+struct ngbe_thermal_sensor_data {
+ struct ngbe_thermal_diode_data sensor;
+};
+
+/* FMGR Registers */
+#define NGBE_SPI_ILDR_STATUS 0x10120
+#define NGBE_SPI_ILDR_STATUS_PERST 0x00000001U /* PCIE_PERST is done */
+#define NGBE_SPI_ILDR_STATUS_PWRRST 0x00000002U /* Power on reset done */
+#define NGBE_SPI_ILDR_STATUS_SW_RESET 0x00000800U /* software reset done */
+#define NGBE_SPI_ILDR_STATUS_LAN0_SW_RST 0x00002000U /* lan0 soft reset done */
+#define NGBE_SPI_ILDR_STATUS_LAN1_SW_RST 0x00004000U /* lan1 soft reset done */
+#define NGBE_SPI_ILDR_STATUS_LAN2_SW_RST 0x00008000U /* lan2 soft reset done */
+#define NGBE_SPI_ILDR_STATUS_LAN3_SW_RST 0x00010000U /* lan3 soft reset done */
+
+#define NGBE_MAX_FLASH_LOAD_POLL_TIME 10
+
+#define NGBE_SPI_CMD 0x10104
+#define NGBE_SPI_CMD_CMD(_v) (((_v) & 0x7) << 28)
+#define NGBE_SPI_CMD_CLK(_v) (((_v) & 0x7) << 25)
+#define NGBE_SPI_CMD_ADDR(_v) (((_v) & 0x7FFFFF))
+
+#define NGBE_SPI_DATA 0x10108
+#define NGBE_SPI_DATA_BYPASS ((0x1) << 31)
+#define NGBE_SPI_DATA_STATUS(_v) (((_v) & 0xFF) << 16)
+#define NGBE_SPI_DATA_OP_DONE ((0x1))
+
+#define NGBE_SPI_STATUS 0x1010C
+#define NGBE_SPI_STATUS_OPDONE ((0x1))
+#define NGBE_SPI_STATUS_FLASH_BYPASS ((0x1) << 31)
+
+#define NGBE_SPI_USR_CMD 0x10110
+#define NGBE_SPI_CMDCFG0 0x10114
+#define NGBE_SPI_CMDCFG1 0x10118
+#define NGBE_SPI_ILDR_SWPTR 0x10124
+
+/************************* Port Registers ************************************/
+
+/* port cfg Registers */
+#define NGBE_CFG_PORT_CTL 0x14400
+#define NGBE_CFG_PORT_ST 0x14404
+#define NGBE_CFG_EX_VTYPE 0x14408
+#define NGBE_CFG_LED_CTL 0x14424
+
+/* internal phy reg_offset [0,31] */
+#define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4))
+
+#define NGBE_CFG_TCP_TIME 0x14420
+#define NGBE_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) /* [0,3] */
+#define NGBE_CFG_LAN_SPEED 0x14440
+
+/* port cfg bit */
+#define NGBE_CFG_PORT_CTL_PFRSTD 0x00004000U /* Phy Function Reset Done */
+#define NGBE_CFG_PORT_CTL_D_VLAN 0x00000001U /* double vlan*/
+#define NGBE_CFG_PORT_CTL_ETAG_ETYPE_VLD 0x00000002U
+#define NGBE_CFG_PORT_CTL_QINQ 0x00000004U
+#define NGBE_CFG_PORT_CTL_DRV_LOAD 0x00000008U
+#define NGBE_CFG_PORT_CTL_NUM_VT_MASK 0x00001000U /* number of TVs */
+#define NGBE_CFG_PORT_CTL_NUM_VT_NONE 0x00000000U
+#define NGBE_CFG_PORT_CTL_NUM_VT_8 0x00001000U
+/* Status Bit */
+#define NGBE_CFG_PORT_ST_LINK_1000M 0x00000002U
+#define NGBE_CFG_PORT_ST_LINK_100M 0x00000004U
+#define NGBE_CFG_PORT_ST_LINK_10M 0x00000008U
+#define NGBE_CFG_PORT_ST_LAN_ID(_r) ((0x00000300U & (_r)) >> 8)
+#define NGBE_LINK_UP_TIME 90
+
+/* LED CTL Bit */
+
+#define NGBE_CFG_LED_CTL_LINK_10M_SEL 0x00000008U
+#define NGBE_CFG_LED_CTL_LINK_100M_SEL 0x00000004U
+#define NGBE_CFG_LED_CTL_LINK_1G_SEL 0x00000002U
+#define NGBE_CFG_LED_CTL_LINK_OD_SHIFT 16
+/* LED modes */
+#define NGBE_LED_LINK_10M NGBE_CFG_LED_CTL_LINK_10M_SEL
+#define NGBE_LED_LINK_1G NGBE_CFG_LED_CTL_LINK_1G_SEL
+#define NGBE_LED_LINK_100M NGBE_CFG_LED_CTL_LINK_100M_SEL
+
+/* GPIO Registers */
+#define NGBE_GPIO_DR 0x14800
+#define NGBE_GPIO_DDR 0x14804
+#define NGBE_GPIO_CTL 0x14808
+#define NGBE_GPIO_INTEN 0x14830
+#define NGBE_GPIO_INTMASK 0x14834
+#define NGBE_GPIO_INTTYPE_LEVEL 0x14838
+#define NGBE_GPIO_POLARITY 0x1483C
+#define NGBE_GPIO_INTSTATUS 0x14840
+#define NGBE_GPIO_EOI 0x1484C
+/*GPIO bit */
+#define NGBE_GPIO_DR_0 0x00000001U /* SDP0 Data Value */
+#define NGBE_GPIO_DR_1 0x00000002U /* SDP1 Data Value */
+#define NGBE_GPIO_DDR_0 0x00000001U /* SDP0 IO direction */
+#define NGBE_GPIO_DDR_1 0x00000002U /* SDP1 IO direction */
+#define NGBE_GPIO_CTL_SW_MODE 0x00000000U /* SDP software mode */
+#define NGBE_GPIO_INTEN_1 0x00000002U /* SDP1 interrupt enable */
+#define NGBE_GPIO_INTEN_2 0x00000004U /* SDP2 interrupt enable */
+#define NGBE_GPIO_INTEN_3 0x00000008U /* SDP3 interrupt enable */
+#define NGBE_GPIO_INTEN_5 0x00000020U /* SDP5 interrupt enable */
+#define NGBE_GPIO_INTEN_6 0x00000040U /* SDP6 interrupt enable */
+#define NGBE_GPIO_INTTYPE_LEVEL_2 0x00000004U /* SDP2 interrupt type level */
+#define NGBE_GPIO_INTTYPE_LEVEL_3 0x00000008U /* SDP3 interrupt type level */
+#define NGBE_GPIO_INTTYPE_LEVEL_5 0x00000020U /* SDP5 interrupt type level */
+#define NGBE_GPIO_INTTYPE_LEVEL_6 0x00000040U /* SDP6 interrupt type level */
+#define NGBE_GPIO_INTSTATUS_1 0x00000002U /* SDP1 interrupt status */
+#define NGBE_GPIO_INTSTATUS_2 0x00000004U /* SDP2 interrupt status */
+#define NGBE_GPIO_INTSTATUS_3 0x00000008U /* SDP3 interrupt status */
+#define NGBE_GPIO_INTSTATUS_5 0x00000020U /* SDP5 interrupt status */
+#define NGBE_GPIO_INTSTATUS_6 0x00000040U /* SDP6 interrupt status */
+#define NGBE_GPIO_EOI_2 0x00000004U /* SDP2 interrupt clear */
+#define NGBE_GPIO_EOI_3 0x00000008U /* SDP3 interrupt clear */
+#define NGBE_GPIO_EOI_5 0x00000020U /* SDP5 interrupt clear */
+#define NGBE_GPIO_EOI_6 0x00000040U /* SDP6 interrupt clear */
+
+/* TPH registers */
+#define NGBE_CFG_TPH_TDESC 0x14F00 /* TPH conf for Tx desc write back */
+#define NGBE_CFG_TPH_RDESC 0x14F04 /* TPH conf for Rx desc write back */
+#define NGBE_CFG_TPH_RHDR 0x14F08 /* TPH conf for writing Rx pkt header */
+#define NGBE_CFG_TPH_RPL 0x14F0C /* TPH conf for payload write access */
+/* TPH bit */
+#define NGBE_CFG_TPH_TDESC_EN 0x80000000U
+#define NGBE_CFG_TPH_TDESC_PH_SHIFT 29
+#define NGBE_CFG_TPH_TDESC_ST_SHIFT 16
+#define NGBE_CFG_TPH_RDESC_EN 0x80000000U
+#define NGBE_CFG_TPH_RDESC_PH_SHIFT 29
+#define NGBE_CFG_TPH_RDESC_ST_SHIFT 16
+#define NGBE_CFG_TPH_RHDR_EN 0x00008000U
+#define NGBE_CFG_TPH_RHDR_PH_SHIFT 13
+#define NGBE_CFG_TPH_RHDR_ST_SHIFT 0
+#define NGBE_CFG_TPH_RPL_EN 0x80000000U
+#define NGBE_CFG_TPH_RPL_PH_SHIFT 29
+#define NGBE_CFG_TPH_RPL_ST_SHIFT 16
+
+/*********************** Transmit DMA registers **************************/
+/* transmit global control */
+#define NGBE_TDM_CTL 0x18000
+#define NGBE_TDM_POOL_TE 0x18004
+#define NGBE_TDM_PB_THRE 0x18020
+
+#define NGBE_TDM_LLQ 0x18040
+#define NGBE_TDM_ETYPE_LB_L 0x18050
+
+#define NGBE_TDM_ETYPE_AS_L 0x18058
+#define NGBE_TDM_MAC_AS_L 0x18060
+
+#define NGBE_TDM_VLAN_AS_L 0x18070
+
+#define NGBE_TDM_TCP_FLG_L 0x18078
+#define NGBE_TDM_TCP_FLG_H 0x1807C
+#define NGBE_TDM_VLAN_INS(_i) (0x18100 + ((_i) * 4)) /* 8 of these 0 - 7 */
+/* TDM CTL BIT */
+#define NGBE_TDM_CTL_TE 0x1 /* Transmit Enable */
+#define NGBE_TDM_CTL_PADDING 0x2 /* Padding byte number for ipsec ESP */
+#define NGBE_TDM_CTL_VT_SHIFT 16 /* VLAN EtherType */
+/* Per VF Port VLAN insertion rules */
+#define NGBE_TDM_VLAN_INS_VLANA_DEFAULT 0x40000000U /*Always use default VLAN*/
+#define NGBE_TDM_VLAN_INS_VLANA_NEVER 0x80000000U /* Never insert VLAN tag */
+
+#define NGBE_TDM_RP_CTL_RST ((0x1) << 0)
+#define NGBE_TDM_RP_CTL_RPEN ((0x1) << 2)
+#define NGBE_TDM_RP_CTL_RLEN ((0x1) << 3)
+#define NGBE_TDM_RP_RATE_MIN(v) ((0x3FFF & (v)))
+#define NGBE_TDM_RP_RATE_MAX(v) ((0x3FFF & (v)) << 16)
+
+/* qos */
+#define NGBE_TDM_PBWARB_CTL 0x18200
+#define NGBE_TDM_VM_CREDIT_VAL(v) (0x3FF & (v))
+
+/* etag */
+#define NGBE_TDM_ETAG_INS(_i) (0x18700 + ((_i) * 4)) /* 8 of these 0 - 7 */
+/* statistic */
+#define NGBE_TDM_DRP_CNT 0x18300
+#define NGBE_TDM_SEC_DRP 0x18304
+#define NGBE_TDM_PKT_CNT 0x18308
+#define NGBE_TDM_BYTE_CNT_L 0x1830C
+#define NGBE_TDM_BYTE_CNT_H 0x18310
+#define NGBE_TDM_OS2BMC_CNT 0x18314
+
+/**************************** Receive DMA registers **************************/
+/* receive control */
+#define NGBE_RDM_ARB_CTL 0x12000
+#define NGBE_RDM_POOL_RE 0x12004
+
+#define NGBE_RDM_PF_QDE 0x12080
+#define NGBE_RDM_PF_HIDE 0x12090
+/* VFRE bitmask */
+#define NGBE_RDM_POOL_RE_ENABLE_ALL 0xFFFFFFFFU
+
+/* statistic */
+#define NGBE_RDM_DRP_PKT 0x12500
+#define NGBE_RDM_PKT_CNT 0x12504
+#define NGBE_RDM_BYTE_CNT_L 0x12508
+#define NGBE_RDM_BYTE_CNT_H 0x1250C
+#define NGBE_RDM_BMC2OS_CNT 0x12510
+
+/***************************** RDB registers *********************************/
+/* Flow Control Registers */
+#define NGBE_RDB_RFCV 0x19200
+#define NGBE_RDB_RFCL 0x19220
+#define NGBE_RDB_RFCH 0x19260
+#define NGBE_RDB_RFCRT 0x192A0
+#define NGBE_RDB_RFCC 0x192A4
+/* receive packet buffer */
+#define NGBE_RDB_PB_WRAP 0x19004
+#define NGBE_RDB_PB_SZ 0x19020
+
+#define NGBE_RDB_PB_CTL 0x19000
+#define NGBE_RDB_PB_SZ_SHIFT 10
+#define NGBE_RDB_PB_SZ_MASK 0x000FFC00U
+/* lli interrupt */
+#define NGBE_RDB_LLI_THRE 0x19080
+#define NGBE_RDB_LLI_THRE_SZ(_v) ((0xFFF & (_v)))
+#define NGBE_RDB_LLI_THRE_UP(_v) ((0x7 & (_v)) << 16)
+#define NGBE_RDB_LLI_THRE_UP_SHIFT 16
+
+/* ring assignment */
+#define NGBE_RDB_PL_CFG(_i) (0x19300 + ((_i) * 4)) /* [0,7] */
+#define NGBE_RDB_RSSTBL(_i) (0x19400 + ((_i) * 4)) /* [0,31] */
+#define NGBE_RDB_RSSRK(_i) (0x19480 + ((_i) * 4)) /* [0,9] */
+#define NGBE_RDB_RA_CTL 0x194F4
+#define NGBE_RDB_5T_SDP(_i) (0x19A00 + ((_i) * 4)) /*Src Dst Addr Q Filter*/
+#define NGBE_RDB_5T_CTL0(_i) (0x19C00 + ((_i) * 4)) /* Five Tuple Q Filter */
+#define NGBE_RDB_ETYPE_CLS(_i) (0x19100 + ((_i) * 4)) /* EType Q Select */
+#define NGBE_RDB_SYN_CLS 0x19130
+#define NGBE_RDB_5T_CTL1(_i) (0x19E00 + ((_i) * 4)) /*8 of these (0-7)*/
+/* VM RSS */
+#define NGBE_RDB_VMRSSRK(_i, _p) (0x1A000 + ((_i) * 4) + ((_p) * 0x40))
+#define NGBE_RDB_VMRSSTBL(_i, _p) (0x1B000 + ((_i) * 4) + ((_p) * 0x40))
+/* statistic */
+#define NGBE_RDB_MPCNT 0x19040
+#define NGBE_RDB_PKT_CNT 0x19060
+#define NGBE_RDB_REPLI_CNT 0x19064
+#define NGBE_RDB_DRP_CNT 0x19068
+#define NGBE_RDB_LXONTXC 0x1921C
+#define NGBE_RDB_LXOFFTXC 0x19218
+#define NGBE_RDB_PFCMACDAL 0x19210
+#define NGBE_RDB_PFCMACDAH 0x19214
+#define NGBE_RDB_TXSWERR 0x1906C
+#define NGBE_RDB_TXSWERR_TB_FREE 0x3FF
+/* rdb_pl_cfg reg mask */
+#define NGBE_RDB_PL_CFG_L4HDR 0x2
+#define NGBE_RDB_PL_CFG_L3HDR 0x4
+#define NGBE_RDB_PL_CFG_L2HDR 0x8
+#define NGBE_RDB_PL_CFG_TUN_OUTER_L2HDR 0x20
+#define NGBE_RDB_PL_CFG_TUN_TUNHDR 0x10
+/* RQTC Bit Masks and Shifts */
+#define NGBE_RDB_RSS_TC_SHIFT_TC(_i) ((_i) * 4)
+#define NGBE_RDB_RSS_TC_TC0_MASK (0x7 << 0)
+#define NGBE_RDB_RSS_TC_TC1_MASK (0x7 << 4)
+#define NGBE_RDB_RSS_TC_TC2_MASK (0x7 << 8)
+#define NGBE_RDB_RSS_TC_TC3_MASK (0x7 << 12)
+#define NGBE_RDB_RSS_TC_TC4_MASK (0x7 << 16)
+#define NGBE_RDB_RSS_TC_TC5_MASK (0x7 << 20)
+#define NGBE_RDB_RSS_TC_TC6_MASK (0x7 << 24)
+#define NGBE_RDB_RSS_TC_TC7_MASK (0x7 << 28)
+/* Packet Buffer Initialization */
+#define NGBE_MAX_PACKET_BUFFERS 8
+#define NGBE_RDB_PB_SZ_48KB 0x00000030U /* 48KB Packet Buffer */
+#define NGBE_RDB_PB_SZ_64KB 0x00000040U /* 64KB Packet Buffer */
+#define NGBE_RDB_PB_SZ_80KB 0x00000050U /* 80KB Packet Buffer */
+#define NGBE_RDB_PB_SZ_128KB 0x00000080U /* 128KB Packet Buffer */
+#define NGBE_RDB_PB_SZ_MAX 0x00000200U /* 512KB Packet Buffer */
+
+/* Packet buffer allocation strategies */
+enum {
+ PBA_STRATEGY_EQUAL = 0, /* Distribute PB space equally */
+#define PBA_STRATEGY_EQUAL PBA_STRATEGY_EQUAL
+ PBA_STRATEGY_WEIGHTED = 1, /* Weight front half of TCs */
+#define PBA_STRATEGY_WEIGHTED PBA_STRATEGY_WEIGHTED
+};
+
+/* FCRTL Bit Masks */
+#define NGBE_RDB_RFCL_XONE 0x80000000U /* XON enable */
+#define NGBE_RDB_RFCH_XOFFE 0x80000000U /* Packet buffer fc enable */
+/* FCCFG Bit Masks */
+#define NGBE_RDB_RFCC_RFCE_802_3X 0x00000008U /* Tx link FC enable */
+
+/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
+#define NGBE_RDB_5T_CTL1_SIZE_BP 0x00001000U /* Packet size bypass */
+#define NGBE_RDB_5T_CTL1_LLI 0x00100000U /* Enables low latency Int */
+#define NGBE_RDB_LLI_THRE_PRIORITY_MASK 0x00070000U /* VLAN priority mask */
+#define NGBE_RDB_LLI_THRE_PRIORITY_EN 0x00080000U /* VLAN priority enable */
+
+#define NGBE_MAX_RDB_5T_CTL0_FILTERS 128
+#define NGBE_RDB_5T_CTL0_PROTOCOL_MASK 0x00000003U
+#define NGBE_RDB_5T_CTL0_PROTOCOL_TCP 0x00000000U
+#define NGBE_RDB_5T_CTL0_PROTOCOL_UDP 0x00000001U
+#define NGBE_RDB_5T_CTL0_PROTOCOL_SCTP 2
+#define NGBE_RDB_5T_CTL0_PRIORITY_MASK 0x00000007U
+#define NGBE_RDB_5T_CTL0_PRIORITY_SHIFT 2
+#define NGBE_RDB_5T_CTL0_POOL_MASK 0x0000003FU
+#define NGBE_RDB_5T_CTL0_POOL_SHIFT 8
+#define NGBE_RDB_5T_CTL0_5TUPLE_MASK_MASK 0x00000007U
+#define NGBE_RDB_5T_CTL0_5TUPLE_MASK_SHIFT 27
+#define NGBE_RDB_5T_CTL0_SOURCE_PORT_MASK 0x1B
+#define NGBE_RDB_5T_CTL0_DEST_PORT_MASK 0x05
+#define NGBE_RDB_5T_CTL0_PROTOCOL_COMP_MASK 0x0F
+#define NGBE_RDB_5T_CTL0_POOL_MASK_EN 0x40000000U
+#define NGBE_RDB_5T_CTL0_QUEUE_ENABLE 0x80000000U
+
+#define NGBE_RDB_ETYPE_CLS_RX_QUEUE 0x007F0000U /* bits 22:16 */
+#define NGBE_RDB_ETYPE_CLS_RX_QUEUE_SHIFT 16
+#define NGBE_RDB_ETYPE_CLS_LLI 0x20000000U /* bit 29 */
+#define NGBE_RDB_ETYPE_CLS_QUEUE_EN 0x80000000U /* bit 31 */
+
+/* Receive Config masks */
+#define NGBE_RDB_PB_CTL_PBEN (0x80000000) /* Enable Receiver */
+#define NGBE_RDB_PB_CTL_DISABLED 0x1
+
+#define NGBE_RDB_RA_CTL_RSS_EN 0x00000004U /* RSS Enable */
+#define NGBE_RDB_RA_CTL_RSS_MASK 0xFFFF0000U
+#define NGBE_RDB_RA_CTL_RSS_IPV4_TCP 0x00010000U
+#define NGBE_RDB_RA_CTL_RSS_IPV4 0x00020000U
+#define NGBE_RDB_RA_CTL_RSS_IPV6 0x00100000U
+#define NGBE_RDB_RA_CTL_RSS_IPV6_TCP 0x00200000U
+#define NGBE_RDB_RA_CTL_RSS_IPV4_UDP 0x00400000U
+#define NGBE_RDB_RA_CTL_RSS_IPV6_UDP 0x00800000U
+
+/******************************* PSR Registers *******************************/
+/* psr control */
+#define NGBE_PSR_CTL 0x15000
+#define NGBE_PSR_VLAN_CTL 0x15088
+#define NGBE_PSR_VM_CTL 0x151B0
+#define NGBE_PSR_PKT_CNT 0x151B8
+#define NGBE_PSR_MNG_PKT_CNT 0x151BC
+#define NGBE_PSR_DBG_DOP_CNT 0x151C0
+#define NGBE_PSR_MNG_DOP_CNT 0x151C4
+#define NGBE_PSR_VM_FLP_L 0x151C8
+
+/* Header split receive */
+#define NGBE_PSR_CTL_SW_EN 0x00040000U
+#define NGBE_PSR_CTL_PCSD 0x00002000U
+#define NGBE_PSR_CTL_IPPCSE 0x00001000U
+#define NGBE_PSR_CTL_BAM 0x00000400U
+#define NGBE_PSR_CTL_UPE 0x00000200U
+#define NGBE_PSR_CTL_MPE 0x00000100U
+#define NGBE_PSR_CTL_MFE 0x00000080U
+#define NGBE_PSR_CTL_MO 0x00000060U
+#define NGBE_PSR_CTL_TPE 0x00000010U
+#define NGBE_PSR_CTL_MO_SHIFT 5
+/* VT_CTL bitmasks */
+#define NGBE_PSR_VM_CTL_DIS_DEFPL 0x20000000U /* disable default pool */
+#define NGBE_PSR_VM_CTL_REPLEN 0x40000000U /* replication enabled */
+#define NGBE_PSR_VM_CTL_POOL_SHIFT 7
+#define NGBE_PSR_VM_CTL_POOL_MASK (0x7 << NGBE_PSR_VM_CTL_POOL_SHIFT)
+/* VLAN Control Bit Masks */
+#define NGBE_PSR_VLAN_CTL_VET 0x0000FFFFU /* bits 0-15 */
+#define NGBE_PSR_VLAN_CTL_CFI 0x10000000U /* bit 28 */
+#define NGBE_PSR_VLAN_CTL_CFIEN 0x20000000U /* bit 29 */
+#define NGBE_PSR_VLAN_CTL_VFE 0x40000000U /* bit 30 */
+
+/* vm L2 contorl */
+#define NGBE_PSR_VM_L2CTL(_i) (0x15600 + ((_i) * 4))
+/* VMOLR bitmasks */
+#define NGBE_PSR_VM_L2CTL_LBDIS 0x00000002U /* disable loopback */
+#define NGBE_PSR_VM_L2CTL_LLB 0x00000004U /* local pool loopback */
+#define NGBE_PSR_VM_L2CTL_UPE 0x00000010U /* unicast promiscuous */
+#define NGBE_PSR_VM_L2CTL_TPE 0x00000020U /* ETAG promiscuous */
+#define NGBE_PSR_VM_L2CTL_VACC 0x00000040U /* accept nomatched vlan */
+#define NGBE_PSR_VM_L2CTL_VPE 0x00000080U /* vlan promiscuous mode */
+#define NGBE_PSR_VM_L2CTL_AUPE 0x00000100U /* accept untagged packets */
+#define NGBE_PSR_VM_L2CTL_ROMPE 0x00000200U /*accept packets in MTA tbl*/
+#define NGBE_PSR_VM_L2CTL_ROPE 0x00000400U /* accept packets in UC tbl*/
+#define NGBE_PSR_VM_L2CTL_BAM 0x00000800U /* accept broadcast packets*/
+#define NGBE_PSR_VM_L2CTL_MPE 0x00001000U /* multicast promiscuous */
+
+/* etype switcher 1st stage */
+#define NGBE_PSR_ETYPE_SWC(_i) (0x15128 + ((_i) * 4)) /* EType Queue Filter */
+/* ETYPE Queue Filter/Select Bit Masks */
+#define NGBE_MAX_PSR_ETYPE_SWC_FILTERS 8
+#define NGBE_PSR_ETYPE_SWC_FCOE 0x08000000U /* bit 27 */
+#define NGBE_PSR_ETYPE_SWC_TX_ANTISPOOF 0x20000000U /* bit 29 */
+#define NGBE_PSR_ETYPE_SWC_1588 0x40000000U /* bit 30 */
+#define NGBE_PSR_ETYPE_SWC_FILTER_EN 0x80000000U /* bit 31 */
+#define NGBE_PSR_ETYPE_SWC_POOL_ENABLE (1 << 26) /* bit 26 */
+#define NGBE_PSR_ETYPE_SWC_POOL_SHIFT 20
+/*
+ * ETQF filter list: one static filter per filter consumer. This is
+ * to avoid filter collisions later. Add new filters
+ * here!!
+ *
+ * Current filters:
+ * EAPOL 802.1x (0x888e): Filter 0
+ * FCoE (0x8906): Filter 2
+ * 1588 (0x88f7): Filter 3
+ * FIP (0x8914): Filter 4
+ * LLDP (0x88CC): Filter 5
+ * LACP (0x8809): Filter 6
+ * FC (0x8808): Filter 7
+ */
+#define NGBE_PSR_ETYPE_SWC_FILTER_EAPOL 0
+#define NGBE_PSR_ETYPE_SWC_FILTER_FCOE 2
+#define NGBE_PSR_ETYPE_SWC_FILTER_1588 3
+#define NGBE_PSR_ETYPE_SWC_FILTER_FIP 4
+#define NGBE_PSR_ETYPE_SWC_FILTER_LLDP 5
+#define NGBE_PSR_ETYPE_SWC_FILTER_LACP 6
+#define NGBE_PSR_ETYPE_SWC_FILTER_FC 7
+
+/* mcasst/ucast overflow tbl */
+#define NGBE_PSR_MC_TBL(_i) (0x15200 + ((_i) * 4))
+#define NGBE_PSR_UC_TBL(_i) (0x15400 + ((_i) * 4))
+
+/* vlan tbl */
+#define NGBE_PSR_VLAN_TBL(_i) (0x16000 + ((_i) * 4))
+
+/* mac switcher */
+#define NGBE_PSR_MAC_SWC_AD_L 0x16200
+#define NGBE_PSR_MAC_SWC_AD_H 0x16204
+#define NGBE_PSR_MAC_SWC_VM 0x16208
+#define NGBE_PSR_MAC_SWC_IDX 0x16210
+/* RAH */
+#define NGBE_PSR_MAC_SWC_AD_H_AD(v) (((v) & 0xFFFF))
+#define NGBE_PSR_MAC_SWC_AD_H_ADTYPE(v) (((v) & 0x1) << 30)
+#define NGBE_PSR_MAC_SWC_AD_H_AV 0x80000000U
+#define NGBE_CLEAR_VMDQ_ALL 0xFFFFFFFFU
+
+/* vlan switch */
+#define NGBE_PSR_VLAN_SWC 0x16220
+#define NGBE_PSR_VLAN_SWC_VM_L 0x16224
+#define NGBE_PSR_VLAN_SWC_IDX 0x16230 /* 32 vlan entries */
+/* VLAN pool filtering masks */
+#define NGBE_PSR_VLAN_SWC_VIEN 0x80000000U /* filter is valid */
+#define NGBE_PSR_VLAN_SWC_ENTRIES 32
+#define NGBE_PSR_VLAN_SWC_VLANID_MASK 0x00000FFFU
+#define NGBE_ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.1q protocol */
+
+/* Manangbeent */
+#define NGBE_PSR_MNG_FIT_CTL 0x15820
+/* Manangbeent Bit Fields and Masks */
+#define NGBE_PSR_MNG_FIT_CTL_MPROXYE 0x40000000U /* Manangbeent Proxy Enable*/
+#define NGBE_PSR_MNG_FIT_CTL_RCV_TCO_EN 0x00020000U /* Rcv TCO packet enable */
+#define NGBE_PSR_MNG_FIT_CTL_EN_BMC2OS 0x10000000U /* Ena BMC2OS and OS2BMC
+ *traffic */
+#define NGBE_PSR_MNG_FIT_CTL_EN_BMC2OS_SHIFT 28
+
+#define NGBE_PSR_MNG_FLEX_SEL 0x1582C
+#define NGBE_PSR_MNG_FLEX_DW_L(_i) (0x15A00 + ((_i) * 16)) /* [0,15] */
+#define NGBE_PSR_MNG_FLEX_DW_H(_i) (0x15A04 + ((_i) * 16))
+#define NGBE_PSR_MNG_FLEX_MSK(_i) (0x15A08 + ((_i) * 16))
+
+/* mirror */
+#define NGBE_PSR_MR_CTL(_i) (0x15B00 + ((_i) * 4)) /* [0,3] */
+#define NGBE_PSR_MR_VLAN_L(_i) (0x15B10 + ((_i) * 8))
+#define NGBE_PSR_MR_VM_L(_i) (0x15B30 + ((_i) * 8))
+
+/* 1588 */
+#define NGBE_PSR_1588_CTL 0x15188 /* Rx Time Sync Control register - RW */
+#define NGBE_PSR_1588_STMPL 0x151E8 /* Rx timestamp Low - RO */
+#define NGBE_PSR_1588_STMPH 0x151A4 /* Rx timestamp High - RO */
+#define NGBE_PSR_1588_ATTRL 0x151A0 /* Rx timestamp attribute low - RO */
+#define NGBE_PSR_1588_ATTRH 0x151A8 /* Rx timestamp attribute high - RO */
+#define NGBE_PSR_1588_MSGTYPE 0x15120 /* RX message type register low - RW */
+/* 1588 CTL Bit */
+#define NGBE_PSR_1588_CTL_VALID 0x00000001U /* Rx timestamp valid */
+#define NGBE_PSR_1588_CTL_TYPE_MASK 0x0000000EU /* Rx type mask */
+#define NGBE_PSR_1588_CTL_TYPE_L2_V2 0x00
+#define NGBE_PSR_1588_CTL_TYPE_L4_V1 0x02
+#define NGBE_PSR_1588_CTL_TYPE_L2_L4_V2 0x04
+#define NGBE_PSR_1588_CTL_TYPE_EVENT_V2 0x0A
+#define NGBE_PSR_1588_CTL_ENABLED 0x00000010U /* Rx Timestamp enabled*/
+/* 1588 msg type bit */
+#define NGBE_PSR_1588_MSGTYPE_V1_CTRLT_MASK 0x000000FFU
+#define NGBE_PSR_1588_MSGTYPE_V1_SYNC_MSG 0x00
+#define NGBE_PSR_1588_MSGTYPE_V1_DELAY_REQ_MSG 0x01
+#define NGBE_PSR_1588_MSGTYPE_V1_FOLLOWUP_MSG 0x02
+#define NGBE_PSR_1588_MSGTYPE_V1_DELAY_RESP_MSG 0x03
+#define NGBE_PSR_1588_MSGTYPE_V1_MGMT_MSG 0x04
+#define NGBE_PSR_1588_MSGTYPE_V2_MSGID_MASK 0x0000FF00U
+#define NGBE_PSR_1588_MSGTYPE_V2_SYNC_MSG 0x0000
+#define NGBE_PSR_1588_MSGTYPE_V2_DELAY_REQ_MSG 0x0100
+#define NGBE_PSR_1588_MSGTYPE_V2_PDELAY_REQ_MSG 0x0200
+#define NGBE_PSR_1588_MSGTYPE_V2_PDELAY_RESP_MSG 0x0300
+#define NGBE_PSR_1588_MSGTYPE_V2_FOLLOWUP_MSG 0x0800
+#define NGBE_PSR_1588_MSGTYPE_V2_DELAY_RESP_MSG 0x0900
+#define NGBE_PSR_1588_MSGTYPE_V2_PDELAY_FOLLOWUP_MSG 0x0A00
+#define NGBE_PSR_1588_MSGTYPE_V2_ANNOUNCE_MSG 0x0B00
+#define NGBE_PSR_1588_MSGTYPE_V2_SIGNALLING_MSG 0x0C00
+#define NGBE_PSR_1588_MSGTYPE_V2_MGMT_MSG 0x0D00
+
+/* Wake up registers */
+#define NGBE_PSR_WKUP_CTL 0x15B80
+#define NGBE_PSR_WKUP_IPV 0x15B84
+#define NGBE_PSR_LAN_FLEX_SEL 0x15B8C
+#define NGBE_PSR_WKUP_IP4TBL(_i) (0x15BC0 + ((_i) * 4)) /* [0,3] */
+#define NGBE_PSR_WKUP_IP6TBL(_i) (0x15BE0 + ((_i) * 4))
+#define NGBE_PSR_LAN_FLEX_DW_L(_i) (0x15C00 + ((_i) * 16)) /* [0,15] */
+#define NGBE_PSR_LAN_FLEX_DW_H(_i) (0x15C04 + ((_i) * 16))
+#define NGBE_PSR_LAN_FLEX_MSK(_i) (0x15C08 + ((_i) * 16))
+#define NGBE_PSR_LAN_FLEX_CTL 0x15CFC
+/* Wake Up Filter Control Bit */
+#define NGBE_PSR_WKUP_CTL_LNKC 0x00000001U /* Link Status Change Wakeup Enable*/
+#define NGBE_PSR_WKUP_CTL_MAG 0x00000002U /* Magic Packet Wakeup Enable */
+#define NGBE_PSR_WKUP_CTL_EX 0x00000004U /* Directed Exact Wakeup Enable */
+#define NGBE_PSR_WKUP_CTL_MC 0x00000008U /* Directed Multicast Wakeup Enable*/
+#define NGBE_PSR_WKUP_CTL_BC 0x00000010U /* Broadcast Wakeup Enable */
+#define NGBE_PSR_WKUP_CTL_ARP 0x00000020U /* ARP Request Packet Wakeup Enable*/
+#define NGBE_PSR_WKUP_CTL_IPV4 0x00000040U /* Directed IPv4 Pkt Wakeup Enable */
+#define NGBE_PSR_WKUP_CTL_IPV6 0x00000080U /* Directed IPv6 Pkt Wakeup Enable */
+#define NGBE_PSR_WKUP_CTL_IGNORE_TCO 0x00008000U /* Ignore WakeOn TCO pkts */
+#define NGBE_PSR_WKUP_CTL_FLX0 0x00010000U /* Flexible Filter 0 Ena */
+#define NGBE_PSR_WKUP_CTL_FLX1 0x00020000U /* Flexible Filter 1 Ena */
+#define NGBE_PSR_WKUP_CTL_FLX2 0x00040000U /* Flexible Filter 2 Ena */
+#define NGBE_PSR_WKUP_CTL_FLX3 0x00080000U /* Flexible Filter 3 Ena */
+#define NGBE_PSR_WKUP_CTL_FLX4 0x00100000U /* Flexible Filter 4 Ena */
+#define NGBE_PSR_WKUP_CTL_FLX5 0x00200000U /* Flexible Filter 5 Ena */
+#define NGBE_PSR_WKUP_CTL_FLX_FILTERS 0x000F0000U /* Mask for 4 flex filters */
+#define NGBE_PSR_WKUP_CTL_FLX_FILTERS_6 0x003F0000U /* Mask for 6 flex filters*/
+#define NGBE_PSR_WKUP_CTL_FLX_FILTERS_8 0x00FF0000U /* Mask for 8 flex filters*/
+#define NGBE_PSR_WKUP_CTL_FW_RST_WK 0x80000000U /* Ena wake on FW reset
+ * assertion */
+/* Mask for Ext. flex filters */
+#define NGBE_PSR_WKUP_CTL_EXT_FLX_FILTERS 0x00300000U
+#define NGBE_PSR_WKUP_CTL_ALL_FILTERS 0x000F00FFU /* Mask all 4 flex filters*/
+#define NGBE_PSR_WKUP_CTL_ALL_FILTERS_6 0x003F00FFU /* Mask all 6 flex filters*/
+#define NGBE_PSR_WKUP_CTL_ALL_FILTERS_8 0x00FF00FFU /* Mask all 8 flex filters*/
+#define NGBE_PSR_WKUP_CTL_FLX_OFFSET 16 /* Offset to the Flex Filters bits*/
+
+#define NGBE_PSR_MAX_SZ 0x15020
+
+/****************************** TDB ******************************************/
+#define NGBE_TDB_RFCS 0x1CE00
+#define NGBE_TDB_PB_SZ 0x1CC00
+
+#define NGBE_TDB_PRB_CTL 0x17010
+#define NGBE_TDB_PBRARB_CTL 0x1CD00
+
+#define NGBE_TDB_PB_SZ_MAX 0x00005000U /* 20KB Packet Buffer */
+#define NGBE_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */
+#define NGBE_MAX_PB 8
+/* statistic */
+#define NGBE_TDB_OUT_PKT_CNT 0x1CF00
+#define NGBE_TDB_MNG_PKT_CNT 0x1CF04
+#define NGBE_TDB_LB_PKT_CNT 0x1CF08
+#define NGBE_TDB_MNG_LARGE_DOP_CNT 0x1CF0C
+
+/****************************** TSEC *****************************************/
+/* Security Control Registers */
+#define NGBE_TSEC_CTL 0x1D000
+#define NGBE_TSEC_ST 0x1D004
+#define NGBE_TSEC_BUF_AF 0x1D008
+#define NGBE_TSEC_BUF_AE 0x1D00C
+#define NGBE_TSEC_MIN_IFG 0x1D020
+
+/* 1588 */
+#define NGBE_TSEC_1588_CTL 0x11F00 /* Tx Time Sync Control reg */
+#define NGBE_TSEC_1588_STMPL 0x11F04 /* Tx timestamp value Low */
+#define NGBE_TSEC_1588_STMPH 0x11F08 /* Tx timestamp value High */
+#define NGBE_TSEC_1588_SYSTIML 0x11F0C /* System time register Low */
+#define NGBE_TSEC_1588_SYSTIMH 0x11F10 /* System time register High */
+#define NGBE_TSEC_1588_INC 0x11F14 /* Increment attributes reg */
+#define NGBE_TSEC_1588_INC_IV(v) ((v) & 0x7FFFFFF)
+
+#define NGBE_TSEC_1588_ADJL 0x11F18 /* Time Adjustment Offset reg Low */
+#define NGBE_TSEC_1588_ADJH 0x11F1C /* Time Adjustment Offset reg High*/
+
+#define NGBE_TSEC_1588_INT_ST 0x11F20
+#define NGBE_TSEC_1588_INT_EN 0x11F24
+
+/* 1588 fields */
+#define NGBE_TSEC_1588_CTL_VALID 0x00000001U /* Tx timestamp valid */
+#define NGBE_TSEC_1588_CTL_ENABLED 0x00000010U /* Tx timestamping enabled */
+
+#define NGBE_TSEC_1588_AUX_CTL 0x11F28
+#define NGBE_TSEC_1588_TRGT_L(i) (0x11F2C + ((i) * 8)) /* [0,1] */
+#define NGBE_TSEC_1588_TRGT_H(i) (0x11F30 + ((i) * 8)) /* [0,1] */
+#define NGBE_TSEC_1588_FREQ_CLK_L(i) (0x11F3C + ((i) * 8)) /* [0,1] */
+#define NGBE_TSEC_1588_FREQ_CLK_H(i) (0x11F40 + ((i) * 8)) /* [0,1] */
+#define NGBE_TSEC_1588_AUX_STMP_L(i) (0x11F4C + ((i) * 8)) /* [0,1] */
+#define NGBE_TSEC_1588_AUX_STMP_H(i) (0x11F50 + ((i) * 8)) /* [0,1] */
+#define NGBE_TSEC_1588_SDP(n) (0x11F5C + ((n) * 4)) /* [0,3] */
+
+/********************************* RSEC **************************************/
+/* general rsec */
+#define NGBE_RSEC_CTL 0x17000
+#define NGBE_RSEC_ST 0x17004
+/* general rsec fields */
+#define NGBE_RSEC_CTL_SECRX_DIS 0x00000001U
+#define NGBE_RSEC_CTL_RX_DIS 0x00000002U
+#define NGBE_RSEC_CTL_CRC_STRIP 0x00000004U
+#define NGBE_RSEC_CTL_SAVE_MAC_ERR 0x00000040U
+#define NGBE_RSEC_ST_RSEC_RDY 0x00000001U
+#define NGBE_RSEC_ST_RSEC_OFLD_DIS 0x00000002U
+#define NGBE_RSEC_ST_ECC_RXERR 0x00000004U
+
+/* link sec */
+#define NGBE_RSEC_LSEC_CAP 0x17200
+#define NGBE_RSEC_LSEC_CTL 0x17204
+#define NGBE_RSEC_LSEC_SCI_L 0x17208
+#define NGBE_RSEC_LSEC_SCI_H 0x1720C
+#define NGBE_RSEC_LSEC_SA0 0x17210
+#define NGBE_RSEC_LSEC_SA1 0x17214
+#define NGBE_RSEC_LSEC_PKNUM0 0x17218
+#define NGBE_RSEC_LSEC_PKNUM1 0x1721C
+#define NGBE_RSEC_LSEC_KEY0(_n) 0x17220
+#define NGBE_RSEC_LSEC_KEY1(_n) 0x17230
+#define NGBE_RSEC_LSEC_UNTAG_PKT 0x17240
+#define NGBE_RSEC_LSEC_DEC_OCTET 0x17244
+#define NGBE_RSEC_LSEC_VLD_OCTET 0x17248
+#define NGBE_RSEC_LSEC_BAD_PKT 0x1724C
+#define NGBE_RSEC_LSEC_NOSCI_PKT 0x17250
+#define NGBE_RSEC_LSEC_UNSCI_PKT 0x17254
+#define NGBE_RSEC_LSEC_UNCHK_PKT 0x17258
+#define NGBE_RSEC_LSEC_DLY_PKT 0x1725C
+#define NGBE_RSEC_LSEC_LATE_PKT 0x17260
+#define NGBE_RSEC_LSEC_OK_PKT(_n) 0x17264
+#define NGBE_RSEC_LSEC_INV_PKT(_n) 0x17274
+#define NGBE_RSEC_LSEC_BADSA_PKT 0x1727C
+#define NGBE_RSEC_LSEC_INVSA_PKT 0x17280
+
+/* ipsec */
+#define NGBE_RSEC_IPS_IDX 0x17100
+#define NGBE_RSEC_IPS_IDX_WT 0x80000000U
+#define NGBE_RSEC_IPS_IDX_RD 0x40000000U
+#define NGBE_RSEC_IPS_IDX_TB_IDX 0x0U /* */
+#define NGBE_RSEC_IPS_IDX_TB_IP 0x00000002U
+#define NGBE_RSEC_IPS_IDX_TB_SPI 0x00000004U
+#define NGBE_RSEC_IPS_IDX_TB_KEY 0x00000006U
+#define NGBE_RSEC_IPS_IDX_EN 0x00000001U
+#define NGBE_RSEC_IPS_IP(i) (0x17104 + ((i) * 4))
+#define NGBE_RSEC_IPS_SPI 0x17114
+#define NGBE_RSEC_IPS_IP_IDX 0x17118
+#define NGBE_RSEC_IPS_KEY(i) (0x1711C + ((i) * 4))
+#define NGBE_RSEC_IPS_SALT 0x1712C
+#define NGBE_RSEC_IPS_MODE 0x17130
+#define NGBE_RSEC_IPS_MODE_IPV6 0x00000010
+#define NGBE_RSEC_IPS_MODE_DEC 0x00000008
+#define NGBE_RSEC_IPS_MODE_ESP 0x00000004
+#define NGBE_RSEC_IPS_MODE_AH 0x00000002
+#define NGBE_RSEC_IPS_MODE_VALID 0x00000001
+
+/************************************** ETH PHY ******************************/
+#define NGBE_XPCS_IDA_ADDR 0x13000
+#define NGBE_XPCS_IDA_DATA 0x13004
+#define NGBE_ETHPHY_IDA_ADDR 0x13008
+#define NGBE_ETHPHY_IDA_DATA 0x1300C
+
+/************************************** MNG ********************************/
+#define NGBE_MNG_FW_SM 0x1E000
+#define NGBE_MNG_SW_SM 0x1E004
+#define NGBE_MNG_SWFW_SYNC 0x1E008
+#define NGBE_MNG_MBOX 0x1E100
+#define NGBE_MNG_MBOX_CTL 0x1E044
+
+#define NGBE_MNG_OS2BMC_CNT 0x1E094
+#define NGBE_MNG_BMC2OS_CNT 0x1E090
+
+/* Firmware Semaphore Register */
+#define NGBE_MNG_FW_SM_MODE_MASK 0xE
+#define NGBE_MNG_FW_SM_TS_ENABLED 0x1
+/* SW Semaphore Register bitmasks */
+#define NGBE_MNG_SW_SM_SM 0x00000001U /* software Semaphore */
+
+/* SW_FW_SYNC definitions */
+#define NGBE_MNG_SWFW_SYNC_SW_PHY 0x0001
+#define NGBE_MNG_SWFW_SYNC_SW_FLASH 0x0008
+#define NGBE_MNG_SWFW_SYNC_SW_MB 0x0004
+
+#define NGBE_MNG_MBOX_CTL_SWRDY 0x1
+#define NGBE_MNG_MBOX_CTL_SWACK 0x2
+#define NGBE_MNG_MBOX_CTL_FWRDY 0x4
+#define NGBE_MNG_MBOX_CTL_FWACK 0x8
+
+/************************************* ETH MAC *****************************/
+#define NGBE_MAC_TX_CFG 0x11000
+#define NGBE_MAC_RX_CFG 0x11004
+#define NGBE_MAC_PKT_FLT 0x11008
+#define NGBE_MAC_PKT_FLT_PR (0x1) /* promiscuous mode */
+#define NGBE_MAC_PKT_FLT_RA (0x80000000) /* receive all */
+#define NGBE_MAC_WDG_TIMEOUT 0x1100C
+#define NGBE_MAC_TX_FLOW_CTRL 0x11070
+#define NGBE_MAC_RX_FLOW_CTRL 0x11090
+#define NGBE_MAC_INT_ST 0x110B0
+#define NGBE_MAC_INT_EN 0x110B4
+#define NGBE_MAC_ADDRESS0_HIGH 0x11300
+#define NGBE_MAC_ADDRESS0_LOW 0x11304
+
+#define NGBE_MAC_TX_CFG_TE 0x00000001U
+#define NGBE_MAC_TX_CFG_SPEED_MASK 0x60000000U
+#define NGBE_MAC_TX_CFG_SPEED_1G 0x60000000U
+#define NGBE_MAC_RX_CFG_RE 0x00000001U
+#define NGBE_MAC_RX_CFG_JE 0x00000100U
+#define NGBE_MAC_RX_CFG_LM 0x00000400U
+#define NGBE_MAC_WDG_TIMEOUT_PWE 0x00000100U
+#define NGBE_MAC_WDG_TIMEOUT_WTO_MASK 0x0000000FU
+#define NGBE_MAC_WDG_TIMEOUT_WTO_DELTA 2
+
+#define NGBE_MAC_RX_FLOW_CTRL_RFE 0x00000001U /* receive fc enable */
+
+#define NGBE_MSCA 0x11200
+#define NGBE_MSCA_RA(v) ((0xFFFF & (v)))
+#define NGBE_MSCA_PA(v) ((0x1F & (v)) << 16)
+#define NGBE_MSCA_DA(v) ((0x1F & (v)) << 21)
+#define NGBE_MSCC 0x11204
+#define NGBE_MSCC_DATA(v) ((0xFFFF & (v)))
+#define NGBE_MSCC_CMD(v) ((0x3 & (v)) << 16)
+enum NGBE_MSCA_CMD_value {
+ NGBE_MSCA_CMD_RSV = 0,
+ NGBE_MSCA_CMD_WRITE,
+ NGBE_MSCA_CMD_POST_READ,
+ NGBE_MSCA_CMD_READ,
+};
+#define NGBE_MSCC_SADDR ((0x1U) << 18)
+#define NGBE_MSCC_CR(v) ((0x8U & (v)) << 19)
+#define NGBE_MSCC_BUSY ((0x1U) << 22)
+#define NGBE_MDIO_CLK(v) ((0x7 & (v)) << 19)
+
+/* EEE registers */
+
+/* statistic */
+#define NGBE_MAC_LXOFFRXC 0x11988
+#define NGBE_MAC_PXOFFRXC 0x119DC
+#define NGBE_RX_BC_FRAMES_GOOD_LOW 0x11918
+#define NGBE_RX_CRC_ERROR_FRAMES_LOW 0x11928
+#define NGBE_RX_LEN_ERROR_FRAMES_LOW 0x11978
+#define NGBE_RX_UNDERSIZE_FRAMES_GOOD 0x11938
+#define NGBE_RX_OVERSIZE_FRAMES_GOOD 0x1193C
+#define NGBE_RX_FRAME_CNT_GOOD_BAD_LOW 0x11900
+#define NGBE_TX_FRAME_CNT_GOOD_BAD_LOW 0x1181C
+#define NGBE_TX_MC_FRAMES_GOOD_LOW 0x1182C
+#define NGBE_TX_BC_FRAMES_GOOD_LOW 0x11824
+#define NGBE_MMC_CONTROL 0x11800
+#define NGBE_MMC_CONTROL_RSTONRD 0x4 /* reset on read */
+#define NGBE_MMC_CONTROL_UP 0x700
+
+/********************************* BAR registers ***************************/
+/* Interrupt Registers */
+#define NGBE_BME_CTL 0x12020
+#define NGBE_PX_MISC_IC 0x100
+#define NGBE_PX_MISC_ICS 0x104
+#define NGBE_PX_MISC_IEN 0x108
+#define NGBE_PX_MISC_IVAR 0x4FC
+#define NGBE_PX_GPIE 0x118
+#define NGBE_PX_ISB_ADDR_L 0x160
+#define NGBE_PX_ISB_ADDR_H 0x164
+#define NGBE_PX_TCP_TIMER 0x170
+#define NGBE_PX_ITRSEL 0x180
+#define NGBE_PX_IC 0x120
+#define NGBE_PX_ICS 0x130
+#define NGBE_PX_IMS 0x140
+#define NGBE_PX_IMC 0x150
+#define NGBE_PX_IVAR(_i) (0x500 + (_i) * 4) /* [0,3] */
+#define NGBE_PX_ITR(_i) (0x200 + (_i) * 4) /* [0,8] */
+#define NGBE_PX_TRANSACTION_PENDING 0x168
+#define NGBE_PX_INTA 0x110
+
+/* Interrupt register bitmasks */
+/* Extended Interrupt Cause Read */
+#define NGBE_PX_MISC_IC_DEV_RST 0x00000400U /* device reset event */
+#define NGBE_PX_MISC_IC_TIMESYNC 0x00000800U /* time sync */
+#define NGBE_PX_MISC_IC_STALL 0x00001000U /* trans or recv path is
+ * stalled */
+#define NGBE_PX_MISC_IC_LINKSEC 0x00002000U /* Tx LinkSec require key
+ * exchange */
+#define NGBE_PX_MISC_IC_RX_MISS 0x00004000U /* Packet Buffer Overrun */
+#define NGBE_PX_MISC_IC_I2C 0x00010000U /* I2C interrupt */
+#define NGBE_PX_MISC_IC_ETH_EVENT 0x00020000U /* err reported by MAC except
+ * eth link down */
+#define NGBE_PX_MISC_IC_PHY 0x00040000U /* link up */
+#define NGBE_PX_MISC_IC_INT_ERR 0x00100000U /* integrity error */
+#define NGBE_PX_MISC_IC_SPI 0x00200000U /* SPI interface */
+#define NGBE_PX_MISC_IC_VF_MBOX 0x00800000U /* VF-PF message box */
+#define NGBE_PX_MISC_IC_GPIO 0x04000000U /* GPIO interrupt */
+#define NGBE_PX_MISC_IC_PCIE_REQ_ERR 0x08000000U /* pcie request error int */
+#define NGBE_PX_MISC_IC_OVER_HEAT 0x10000000U /* overheat detection */
+#define NGBE_PX_MISC_IC_PROBE_MATCH 0x20000000U /* probe match */
+#define NGBE_PX_MISC_IC_MNG_HOST_MBOX 0x40000000U /* mng mailbox */
+#define NGBE_PX_MISC_IC_TIMER 0x80000000U /* tcp timer */
+
+/* Extended Interrupt Cause Set */
+#define NGBE_PX_MISC_ICS_ETH_LKDN 0x00000100U
+#define NGBE_PX_MISC_ICS_DEV_RST 0x00000400U
+#define NGBE_PX_MISC_ICS_TIMESYNC 0x00000800U
+#define NGBE_PX_MISC_ICS_STALL 0x00001000U
+#define NGBE_PX_MISC_ICS_LINKSEC 0x00002000U
+#define NGBE_PX_MISC_ICS_RX_MISS 0x00004000U
+#define NGBE_PX_MISC_ICS_FLOW_DIR 0x00008000U
+#define NGBE_PX_MISC_ICS_I2C 0x00010000U
+#define NGBE_PX_MISC_ICS_ETH_EVENT 0x00020000U
+#define NGBE_PX_MISC_ICS_ETH_LK 0x00040000U
+#define NGBE_PX_MISC_ICS_ETH_AN 0x00080000U
+#define NGBE_PX_MISC_ICS_INT_ERR 0x00100000U
+#define NGBE_PX_MISC_ICS_SPI 0x00200000U
+#define NGBE_PX_MISC_ICS_VF_MBOX 0x00800000U
+#define NGBE_PX_MISC_ICS_GPIO 0x04000000U
+#define NGBE_PX_MISC_ICS_PCIE_REQ_ERR 0x08000000U
+#define NGBE_PX_MISC_ICS_OVER_HEAT 0x10000000U
+#define NGBE_PX_MISC_ICS_PROBE_MATCH 0x20000000U
+#define NGBE_PX_MISC_ICS_MNG_HOST_MBOX 0x40000000U
+#define NGBE_PX_MISC_ICS_TIMER 0x80000000U
+
+/* Extended Interrupt Enable Set */
+#define NGBE_PX_MISC_IEN_ETH_LKDN 0x00000100U
+#define NGBE_PX_MISC_IEN_DEV_RST 0x00000400U
+#define NGBE_PX_MISC_IEN_TIMESYNC 0x00000800U
+#define NGBE_PX_MISC_IEN_STALL 0x00001000U
+#define NGBE_PX_MISC_IEN_LINKSEC 0x00002000U
+#define NGBE_PX_MISC_IEN_RX_MISS 0x00004000U
+#define NGBE_PX_MISC_IEN_I2C 0x00010000U
+#define NGBE_PX_MISC_IEN_ETH_EVENT 0x00020000U
+#define NGBE_PX_MISC_IEN_ETH_LK 0x00040000U
+#define NGBE_PX_MISC_IEN_ETH_AN 0x00080000U
+#define NGBE_PX_MISC_IEN_INT_ERR 0x00100000U
+#define NGBE_PX_MISC_IEN_SPI 0x00200000U
+#define NGBE_PX_MISC_IEN_VF_MBOX 0x00800000U
+#define NGBE_PX_MISC_IEN_GPIO 0x04000000U
+#define NGBE_PX_MISC_IEN_PCIE_REQ_ERR 0x08000000U
+#define NGBE_PX_MISC_IEN_OVER_HEAT 0x10000000U
+#define NGBE_PX_MISC_IEN_PROBE_MATCH 0x20000000U
+#define NGBE_PX_MISC_IEN_MNG_HOST_MBOX 0x40000000U
+#define NGBE_PX_MISC_IEN_TIMER 0x80000000U
+
+#define NGBE_PX_MISC_IEN_MASK ( \
+ NGBE_PX_MISC_IEN_ETH_LKDN| \
+ NGBE_PX_MISC_IEN_DEV_RST | \
+ NGBE_PX_MISC_IEN_ETH_EVENT | \
+ NGBE_PX_MISC_IEN_ETH_LK | \
+ NGBE_PX_MISC_IEN_ETH_AN | \
+ NGBE_PX_MISC_IEN_INT_ERR | \
+ NGBE_PX_MISC_IEN_VF_MBOX | \
+ NGBE_PX_MISC_IEN_GPIO | \
+ NGBE_PX_MISC_IEN_MNG_HOST_MBOX | \
+ NGBE_PX_MISC_IEN_STALL | \
+ NGBE_PX_MISC_IEN_PCIE_REQ_ERR | \
+ NGBE_PX_MISC_IEN_TIMER)
+
+/* General purpose Interrupt Enable */
+#define NGBE_PX_GPIE_MODEL 0x00000001U
+#define NGBE_PX_GPIE_IMEN 0x00000002U
+#define NGBE_PX_GPIE_LL_INTERVAL 0x000000F0U
+
+/* Interrupt Vector Allocation Registers */
+#define NGBE_PX_IVAR_REG_NUM 64
+#define NGBE_PX_IVAR_ALLOC_VAL 0x80 /* Interrupt Allocation valid */
+
+#define NGBE_MAX_INT_RATE 500000
+#define NGBE_MIN_INT_RATE 980
+#define NGBE_MAX_EITR 0x00007FFCU
+#define NGBE_MIN_EITR 4
+#define NGBE_PX_ITR_ITR_INT_MASK 0x00000FF8U
+#define NGBE_PX_ITR_LLI_CREDIT 0x001f0000U
+#define NGBE_PX_ITR_LLI_MOD 0x00008000U
+#define NGBE_PX_ITR_CNT_WDIS 0x80000000U
+#define NGBE_PX_ITR_ITR_CNT 0x0FE00000U
+
+/* transmit DMA Registers */
+#define NGBE_PX_TR_BAL(_i) (0x03000 + ((_i) * 0x40)) /* [0, 7] */
+#define NGBE_PX_TR_BAH(_i) (0x03004 + ((_i) * 0x40))
+#define NGBE_PX_TR_WP(_i) (0x03008 + ((_i) * 0x40))
+#define NGBE_PX_TR_RP(_i) (0x0300C + ((_i) * 0x40))
+#define NGBE_PX_TR_CFG(_i) (0x03010 + ((_i) * 0x40))
+/* Transmit Config masks */
+#define NGBE_PX_TR_CFG_ENABLE (1) /* Ena specific Tx Queue */
+#define NGBE_PX_TR_CFG_TR_SIZE_SHIFT 1 /* tx desc number per ring */
+#define NGBE_PX_TR_CFG_SWFLSH (1 << 26) /* Tx Desc. wr-bk flushing */
+#define NGBE_PX_TR_CFG_WTHRESH_SHIFT 16 /* shift to WTHRESH bits */
+#define NGBE_PX_TR_CFG_THRE_SHIFT 8
+
+#define NGBE_PX_TR_RPn(q_per_pool, vf_number, vf_q_index) \
+ (NGBE_PX_TR_RP((q_per_pool)*(vf_number) + (vf_q_index)))
+
+#define NGBE_PX_TR_WPn(q_per_pool, vf_number, vf_q_index) \
+ (NGBE_PX_TR_WP((q_per_pool)*(vf_number) + (vf_q_index)))
+
+/* Receive DMA Registers */
+#define NGBE_PX_RR_BAL(_i) (0x01000 + ((_i) * 0x40)) /* [0, 7] */
+#define NGBE_PX_RR_BAH(_i) (0x01004 + ((_i) * 0x40))
+#define NGBE_PX_RR_WP(_i) (0x01008 + ((_i) * 0x40))
+#define NGBE_PX_RR_RP(_i) (0x0100C + ((_i) * 0x40))
+#define NGBE_PX_RR_CFG(_i) (0x01010 + ((_i) * 0x40))
+/* PX_RR_CFG bit definitions */
+#define NGBE_PX_RR_CFG_RR_SIZE_SHIFT 1
+#define NGBE_PX_RR_CFG_BSIZEPKT_SHIFT 2 /* so many KBs */
+#define NGBE_PX_RR_CFG_BSIZEHDRSIZE_SHIFT 6 /* 64byte resolution (>> 6)
+ * + at bit 8 offset (<< 12)
+ * = (<< 6)
+ */
+#define NGBE_PX_RR_CFG_DROP_EN 0x40000000U
+#define NGBE_PX_RR_CFG_VLAN 0x80000000U
+#define NGBE_PX_RR_CFG_RSC 0x20000000U
+#define NGBE_PX_RR_CFG_CNTAG 0x10000000U
+#define NGBE_PX_RR_CFG_RSC_CNT_MD 0x08000000U
+#define NGBE_PX_RR_CFG_SPLIT_MODE 0x04000000U
+#define NGBE_PX_RR_CFG_STALL 0x02000000U
+#define NGBE_PX_RR_CFG_MAX_RSCBUF_1 0x00000000U
+#define NGBE_PX_RR_CFG_MAX_RSCBUF_4 0x00800000U
+#define NGBE_PX_RR_CFG_MAX_RSCBUF_8 0x01000000U
+#define NGBE_PX_RR_CFG_MAX_RSCBUF_16 0x01800000U
+#define NGBE_PX_RR_CFG_RR_THER 0x00070000U
+#define NGBE_PX_RR_CFG_RR_THER_SHIFT 16
+
+#define NGBE_PX_RR_CFG_RR_HDR_SZ 0x0000F000U
+#define NGBE_PX_RR_CFG_RR_BUF_SZ 0x00000F00U
+#define NGBE_PX_RR_CFG_RR_SZ 0x0000007EU
+#define NGBE_PX_RR_CFG_RR_EN 0x00000001U
+
+/* statistic */
+#define NGBE_PX_MPRC(_i) (0x1020 + ((_i) * 64)) /* [0,7] */
+#define NGBE_PX_BPRC(_i) (0x1024 + ((_i) * 64))
+
+
+#define NGBE_PX_MPTC(_i) (0x3020 + ((_i) * 64)) /* [0,7] */
+#define NGBE_PX_BPTC(_i) (0x3024 + ((_i) * 64))
+
+#define NGBE_VX_GPRC 0x01014
+#define NGBE_VX_GORC_LSB 0x01018
+#define NGBE_VX_GORC_MSB 0x0101C
+#define NGBE_VX_MPRC 0x01020
+#define NGBE_VX_BPRC 0x01024
+
+#define NGBE_VX_GPTC 0x03014
+#define NGBE_VX_GOTC_LSB 0x03018
+#define NGBE_VX_GOTC_MSB 0x0301C
+#define NGBE_VX_MPTC 0x03020
+#define NGBE_VX_BPTC 0x03024
+
+#define NGBE_PX_GPRC 0x12504
+
+#define NGBE_PX_GPTC 0x18308
+
+#define NGBE_PX_GORC_LSB 0x12508
+#define NGBE_PX_GORC_MSB 0x1250C
+
+#define NGBE_PX_GOTC_LSB 0x1830C
+#define NGBE_PX_GOTC_MSB 0x18310
+
+/*************************** Flash region definition *************************/
+/* EEC Register */
+#define NGBE_EEC_SK 0x00000001U /* EEPROM Clock */
+#define NGBE_EEC_CS 0x00000002U /* EEPROM Chip Select */
+#define NGBE_EEC_DI 0x00000004U /* EEPROM Data In */
+#define NGBE_EEC_DO 0x00000008U /* EEPROM Data Out */
+#define NGBE_EEC_FWE_MASK 0x00000030U /* FLASH Write Enable */
+#define NGBE_EEC_FWE_DIS 0x00000010U /* Disable FLASH writes */
+#define NGBE_EEC_FWE_EN 0x00000020U /* Enable FLASH writes */
+#define NGBE_EEC_FWE_SHIFT 4
+#define NGBE_EEC_REQ 0x00000040U /* EEPROM Access Request */
+#define NGBE_EEC_GNT 0x00000080U /* EEPROM Access Grant */
+#define NGBE_EEC_PRES 0x00000100U /* EEPROM Present */
+#define NGBE_EEC_ARD 0x00000200U /* EEPROM Auto Read Done */
+#define NGBE_EEC_FLUP 0x00800000U /* Flash update command */
+#define NGBE_EEC_SEC1VAL 0x02000000U /* Sector 1 Valid */
+#define NGBE_EEC_FLUDONE 0x04000000U /* Flash update done */
+/* EEPROM Addressing bits based on type (0-small, 1-large) */
+#define NGBE_EEC_ADDR_SIZE 0x00000400U
+#define NGBE_EEC_SIZE 0x00007800U /* EEPROM Size */
+#define NGBE_EERD_MAX_ADDR 0x00003FFFU /* EERD alows 14 bits for addr. */
+
+#define NGBE_EEC_SIZE_SHIFT 11
+#define NGBE_EEPROM_WORD_SIZE_SHIFT 6
+#define NGBE_EEPROM_OPCODE_BITS 8
+
+/* FLA Register */
+#define NGBE_FLA_LOCKED 0x00000040U
+
+/* Part Number String Length */
+#define NGBE_PBANUM_LENGTH 32
+
+/* Checksum and EEPROM pointers */
+#define NGBE_PBANUM_PTR_GUARD 0xFAFA
+#define NGBE_CHECKSUM_CAP_ST_PASS 0x80658383
+#define NGBE_CHECKSUM_CAP_ST_FAIL 0x70657376
+#define NGBE_EEPROM_CHECKSUM 0x2F
+#define NGBE_EEPROM_SUM 0xBABA
+#define NGBE_OPTION_ROM_PTR 0x05
+#define NGBE_SHADOW_RAM_SIZE 0x4000
+#define NGBE_PCIE_CONFIG_SIZE 0x08
+#define NGBE_EEPROM_LAST_WORD 0x800
+#define NGBE_FW_PTR 0x0F
+#define NGBE_SW_REGION_PTR 0x28
+
+#define NGBE_CALSUM_COMMAND 0xE9
+#define NGBE_CALSUM_CAP_STATUS 0x10224
+#define NGBE_EEPROM_VERSION_STORE_REG 0x1022C
+#define NGBE_SAN_MAC_ADDR_PTR 0x18
+#define NGBE_DEVICE_CAPS 0x1C
+#define NGBE_EEPROM_VERSION_L 0x1D
+#define NGBE_EEPROM_VERSION_H 0x1E
+
+#define NGBE_MAX_MSIX_VECTORS_EMERALD 0x09
+
+/* MSI-X capability fields masks */
+#define NGBE_PCIE_MSIX_TBL_SZ_MASK 0x7FF
+
+/* EEPROM Commands - SPI */
+#define NGBE_EEPROM_MAX_RETRY_SPI 5000 /* Max wait 5ms for RDY signal */
+#define NGBE_EEPROM_STATUS_RDY_SPI 0x01
+#define NGBE_EEPROM_READ_OPCODE_SPI 0x03 /* EEPROM read opcode */
+#define NGBE_EEPROM_WRITE_OPCODE_SPI 0x02 /* EEPROM write opcode */
+#define NGBE_EEPROM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = addr bit-8 */
+#define NGBE_EEPROM_WREN_OPCODE_SPI 0x06 /* EEPROM set Write Ena latch */
+/* EEPROM reset Write Enable latch */
+#define NGBE_EEPROM_WRDI_OPCODE_SPI 0x04
+#define NGBE_EEPROM_RDSR_OPCODE_SPI 0x05 /* EEPROM read Status reg */
+#define NGBE_EEPROM_WRSR_OPCODE_SPI 0x01 /* EEPROM write Status reg */
+#define NGBE_EEPROM_ERASE4K_OPCODE_SPI 0x20 /* EEPROM ERASE 4KB */
+#define NGBE_EEPROM_ERASE64K_OPCODE_SPI 0xD8 /* EEPROM ERASE 64KB */
+#define NGBE_EEPROM_ERASE256_OPCODE_SPI 0xDB /* EEPROM ERASE 256B */
+
+/* EEPROM Read Register */
+#define NGBE_EEPROM_RW_REG_DATA 16 /* data offset in EEPROM read reg */
+#define NGBE_EEPROM_RW_REG_DONE 2 /* Offset to READ done bit */
+#define NGBE_EEPROM_RW_REG_START 1 /* First bit to start operation */
+#define NGBE_EEPROM_RW_ADDR_SHIFT 2 /* Shift to the address bits */
+#define NGBE_NVM_POLL_WRITE 1 /* Flag for polling for wr complete */
+#define NGBE_NVM_POLL_READ 0 /* Flag for polling for rd complete */
+
+#define NVM_INIT_CTRL_3 0x38
+#define NVM_INIT_CTRL_3_LPLU 0x8
+
+#define NGBE_ETH_LENGTH_OF_ADDRESS 6
+
+#define NGBE_EEPROM_PAGE_SIZE_MAX 128
+#define NGBE_EEPROM_RD_BUFFER_MAX_COUNT 256 /* words rd in burst */
+#define NGBE_EEPROM_WR_BUFFER_MAX_COUNT 256 /* words wr in burst */
+#define NGBE_EEPROM_CTRL_2 1 /* EEPROM CTRL word 2 */
+#define NGBE_EEPROM_CCD_BIT 2
+
+#ifndef NGBE_EEPROM_GRANT_ATTEMPTS
+#define NGBE_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM attempts to gain grant */
+#endif
+
+#ifndef NGBE_EERD_EEWR_ATTEMPTS
+/* Number of 5 microseconds we wait for EERD read and
+ * EERW write to complete */
+#define NGBE_EERD_EEWR_ATTEMPTS 100000
+#endif
+
+#ifndef NGBE_FLUDONE_ATTEMPTS
+/* # attempts we wait for flush update to complete */
+#define NGBE_FLUDONE_ATTEMPTS 20000
+#endif
+
+#define NGBE_PCIE_CTRL2 0x5 /* PCIe Control 2 Offset */
+#define NGBE_PCIE_CTRL2_DUMMY_ENABLE 0x8 /* Dummy Function Enable */
+#define NGBE_PCIE_CTRL2_LAN_DISABLE 0x2 /* LAN PCI Disable */
+#define NGBE_PCIE_CTRL2_DISABLE_SELECT 0x1 /* LAN Disable Select */
+
+#define NGBE_SAN_MAC_ADDR_PORT0_OFFSET 0x0
+#define NGBE_SAN_MAC_ADDR_PORT1_OFFSET 0x3
+#define NGBE_DEVICE_CAPS_ALLOW_ANY_SFP 0x1
+#define NGBE_DEVICE_CAPS_FCOE_OFFLOADS 0x2
+#define NGBE_FW_LESM_PARAMETERS_PTR 0x2
+#define NGBE_FW_LESM_STATE_1 0x1
+#define NGBE_FW_LESM_STATE_ENABLED 0x8000 /* LESM Enable bit */
+#define NGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR 0x4
+#define NGBE_FW_PATCH_VERSION_4 0x7
+#define NGBE_FCOE_IBA_CAPS_BLK_PTR 0x33 /* iSCSI/FCOE block */
+#define NGBE_FCOE_IBA_CAPS_FCOE 0x20 /* FCOE flags */
+#define NGBE_ISCSI_FCOE_BLK_PTR 0x17 /* iSCSI/FCOE block */
+#define NGBE_ISCSI_FCOE_FLAGS_OFFSET 0x0 /* FCOE flags */
+#define NGBE_ISCSI_FCOE_FLAGS_ENABLE 0x1 /* FCOE flags enable bit */
+#define NGBE_ALT_SAN_MAC_ADDR_BLK_PTR 0x17 /* Alt. SAN MAC block */
+#define NGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET 0x0 /* Alt SAN MAC capability */
+#define NGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET 0x1 /* Alt SAN MAC 0 offset */
+#define NGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET 0x4 /* Alt SAN MAC 1 offset */
+#define NGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET 0x7 /* Alt WWNN prefix offset */
+#define NGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET 0x8 /* Alt WWPN prefix offset */
+#define NGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC 0x0 /* Alt SAN MAC exists */
+#define NGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN 0x1 /* Alt WWN base exists */
+#define NGBE_DEVICE_CAPS_WOL_PORT0_1 0x4 /* WoL supported on ports 0 & 1 */
+#define NGBE_DEVICE_CAPS_WOL_PORT0 0x8 /* WoL supported on port 0 */
+#define NGBE_DEVICE_CAPS_WOL_MASK 0xC /* Mask for WoL capabilities */
+
+/******************************** PCI Bus Info *******************************/
+#define NGBE_PCI_DEVICE_STATUS 0xAA
+#define NGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020
+#define NGBE_PCI_LINK_STATUS 0xB2
+#define NGBE_PCI_DEVICE_CONTROL2 0xC8
+#define NGBE_PCI_LINK_WIDTH 0x3F0
+#define NGBE_PCI_LINK_WIDTH_1 0x10
+#define NGBE_PCI_LINK_WIDTH_2 0x20
+#define NGBE_PCI_LINK_WIDTH_4 0x40
+#define NGBE_PCI_LINK_WIDTH_8 0x80
+#define NGBE_PCI_LINK_SPEED 0xF
+#define NGBE_PCI_LINK_SPEED_2500 0x1
+#define NGBE_PCI_LINK_SPEED_5000 0x2
+#define NGBE_PCI_LINK_SPEED_8000 0x3
+#define NGBE_PCI_HEADER_TYPE_REGISTER 0x0E
+#define NGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80
+#define NGBE_PCI_DEVICE_CONTROL2_16ms 0x0005
+
+#define NGBE_PCIDEVCTRL2_RELAX_ORDER_OFFSET 4
+#define NGBE_PCIDEVCTRL2_RELAX_ORDER_MASK \
+ (0x0001 << NGBE_PCIDEVCTRL2_RELAX_ORDER_OFFSET)
+#define NGBE_PCIDEVCTRL2_RELAX_ORDER_ENABLE \
+ (0x01 << NGBE_PCIDEVCTRL2_RELAX_ORDER_OFFSET)
+
+#define NGBE_PCIDEVCTRL2_TIMEO_MASK 0xf
+#define NGBE_PCIDEVCTRL2_16_32ms_def 0x0
+#define NGBE_PCIDEVCTRL2_50_100us 0x1
+#define NGBE_PCIDEVCTRL2_1_2ms 0x2
+#define NGBE_PCIDEVCTRL2_16_32ms 0x5
+#define NGBE_PCIDEVCTRL2_65_130ms 0x6
+#define NGBE_PCIDEVCTRL2_260_520ms 0x9
+#define NGBE_PCIDEVCTRL2_1_2s 0xa
+#define NGBE_PCIDEVCTRL2_4_8s 0xd
+#define NGBE_PCIDEVCTRL2_17_34s 0xe
+
+/******************* Receive Descriptor bit definitions **********************/
+#define NGBE_RXD_IPSEC_STATUS_SECP 0x00020000U
+#define NGBE_RXD_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000U
+#define NGBE_RXD_IPSEC_ERROR_INVALID_LENGTH 0x10000000U
+#define NGBE_RXD_IPSEC_ERROR_AUTH_FAILED 0x18000000U
+#define NGBE_RXD_IPSEC_ERROR_BIT_MASK 0x18000000U
+
+#define NGBE_RXD_NEXTP_MASK 0x000FFFF0U /* Next Descriptor Index */
+#define NGBE_RXD_NEXTP_SHIFT 0x00000004U
+#define NGBE_RXD_STAT_MASK 0x000fffffU /* Stat/NEXTP: bit 0-19 */
+#define NGBE_RXD_STAT_DD 0x00000001U /* Done */
+#define NGBE_RXD_STAT_EOP 0x00000002U /* End of Packet */
+#define NGBE_RXD_STAT_CLASS_ID_MASK 0x0000001CU
+#define NGBE_RXD_STAT_CLASS_ID_TC_RSS 0x00000000U
+#define NGBE_RXD_STAT_CLASS_ID_SYN 0x00000008U
+#define NGBE_RXD_STAT_CLASS_ID_5_TUPLE 0x0000000CU
+#define NGBE_RXD_STAT_CLASS_ID_L2_ETYPE 0x00000010U
+#define NGBE_RXD_STAT_VP 0x00000020U /* IEEE VLAN Pkt */
+#define NGBE_RXD_STAT_UDPCS 0x00000040U /* UDP xsum calculated */
+#define NGBE_RXD_STAT_L4CS 0x00000080U /* L4 xsum calculated */
+#define NGBE_RXD_STAT_IPCS 0x00000100U /* IP xsum calculated */
+#define NGBE_RXD_STAT_PIF 0x00000200U /* passed in-exact filter */
+#define NGBE_RXD_STAT_OUTERIPCS 0x00000400U /* Cloud IP xsum calculated*/
+#define NGBE_RXD_STAT_VEXT 0x00000800U /* 1st VLAN found */
+#define NGBE_RXD_STAT_LLINT 0x00002000U /* Pkt caused Low Latency
+ * Int */
+#define NGBE_RXD_STAT_TS 0x00004000U /* IEEE1588 Time Stamp */
+#define NGBE_RXD_STAT_SECP 0x00008000U /* Security Processing */
+#define NGBE_RXD_STAT_LB 0x00010000U /* Loopback Status */
+#define NGBE_RXD_STAT_FCEOFS 0x00020000U /* FCoE EOF/SOF Stat */
+#define NGBE_RXD_STAT_FCSTAT 0x000C0000U /* FCoE Pkt Stat */
+#define NGBE_RXD_STAT_FCSTAT_NOMTCH 0x00000000U /* 00: No Ctxt Match */
+#define NGBE_RXD_STAT_FCSTAT_NODDP 0x00040000U /* 01: Ctxt w/o DDP */
+#define NGBE_RXD_STAT_FCSTAT_FCPRSP 0x00080000U /* 10: Recv. FCP_RSP */
+#define NGBE_RXD_STAT_FCSTAT_DDP 0x000C0000U /* 11: Ctxt w/ DDP */
+
+#define NGBE_RXD_ERR_MASK 0xfff00000U /* RDESC.ERRORS mask */
+#define NGBE_RXD_ERR_SHIFT 20 /* RDESC.ERRORS shift */
+#define NGBE_RXD_ERR_FCEOFE 0x80000000U /* FCEOFe/IPE */
+#define NGBE_RXD_ERR_HBO 0x00800000U /*Header Buffer Overflow */
+#define NGBE_RXD_ERR_OUTERIPER 0x04000000U /* CRC IP Header error */
+#define NGBE_RXD_ERR_SECERR_MASK 0x18000000U
+#define NGBE_RXD_ERR_RXE 0x20000000U /* Any MAC Error */
+#define NGBE_RXD_ERR_TCPE 0x40000000U /* TCP/UDP Checksum Error */
+#define NGBE_RXD_ERR_IPE 0x80000000U /* IP Checksum Error */
+
+#define NGBE_RXDPS_HDRSTAT_HDRSP 0x00008000U
+#define NGBE_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FFU
+
+#define NGBE_RXD_RSSTYPE_MASK 0x0000000FU
+#define NGBE_RXD_TPID_MASK 0x000001C0U
+#define NGBE_RXD_TPID_SHIFT 6
+#define NGBE_RXD_HDRBUFLEN_MASK 0x00007FE0U
+#define NGBE_RXD_RSCCNT_MASK 0x001E0000U
+#define NGBE_RXD_RSCCNT_SHIFT 17
+#define NGBE_RXD_HDRBUFLEN_SHIFT 5
+#define NGBE_RXD_SPLITHEADER_EN 0x00001000U
+#define NGBE_RXD_SPH 0x8000
+
+/* RSS Hash results */
+#define NGBE_RXD_RSSTYPE_NONE 0x00000000U
+#define NGBE_RXD_RSSTYPE_IPV4_TCP 0x00000001U
+#define NGBE_RXD_RSSTYPE_IPV4 0x00000002U
+#define NGBE_RXD_RSSTYPE_IPV6_TCP 0x00000003U
+#define NGBE_RXD_RSSTYPE_IPV4_SCTP 0x00000004U
+#define NGBE_RXD_RSSTYPE_IPV6 0x00000005U
+#define NGBE_RXD_RSSTYPE_IPV6_SCTP 0x00000006U
+#define NGBE_RXD_RSSTYPE_IPV4_UDP 0x00000007U
+#define NGBE_RXD_RSSTYPE_IPV6_UDP 0x00000008U
+
+/**
+ * receive packet type
+ * PTYPE:8 = TUN:2 + PKT:2 + TYP:4
+ **/
+/* TUN */
+#define NGBE_PTYPE_TUN_IPV4 (0x80)
+#define NGBE_PTYPE_TUN_IPV6 (0xC0)
+
+/* PKT for TUN */
+#define NGBE_PTYPE_PKT_IPIP (0x00) /* IP+IP */
+#define NGBE_PTYPE_PKT_IG (0x10) /* IP+GRE */
+#define NGBE_PTYPE_PKT_IGM (0x20) /* IP+GRE+MAC */
+#define NGBE_PTYPE_PKT_IGMV (0x30) /* IP+GRE+MAC+VLAN */
+/* PKT for !TUN */
+#define NGBE_PTYPE_PKT_MAC (0x10)
+#define NGBE_PTYPE_PKT_IP (0x20)
+#define NGBE_PTYPE_PKT_FCOE (0x30)
+
+/* TYP for PKT=mac */
+#define NGBE_PTYPE_TYP_MAC (0x01)
+#define NGBE_PTYPE_TYP_TS (0x02) /* time sync */
+#define NGBE_PTYPE_TYP_FIP (0x03)
+#define NGBE_PTYPE_TYP_LLDP (0x04)
+#define NGBE_PTYPE_TYP_CNM (0x05)
+#define NGBE_PTYPE_TYP_EAPOL (0x06)
+#define NGBE_PTYPE_TYP_ARP (0x07)
+/* TYP for PKT=ip */
+#define NGBE_PTYPE_PKT_IPV6 (0x08)
+#define NGBE_PTYPE_TYP_IPFRAG (0x01)
+#define NGBE_PTYPE_TYP_IP (0x02)
+#define NGBE_PTYPE_TYP_UDP (0x03)
+#define NGBE_PTYPE_TYP_TCP (0x04)
+#define NGBE_PTYPE_TYP_SCTP (0x05)
+/* TYP for PKT=fcoe */
+#define NGBE_PTYPE_PKT_VFT (0x08)
+#define NGBE_PTYPE_TYP_FCOE (0x00)
+#define NGBE_PTYPE_TYP_FCDATA (0x01)
+#define NGBE_PTYPE_TYP_FCRDY (0x02)
+#define NGBE_PTYPE_TYP_FCRSP (0x03)
+#define NGBE_PTYPE_TYP_FCOTHER (0x04)
+
+/* Packet type non-ip values */
+enum ngbe_l2_ptypes {
+ NGBE_PTYPE_L2_ABORTED = (NGBE_PTYPE_PKT_MAC),
+ NGBE_PTYPE_L2_MAC = (NGBE_PTYPE_PKT_MAC | NGBE_PTYPE_TYP_MAC),
+ NGBE_PTYPE_L2_TS = (NGBE_PTYPE_PKT_MAC | NGBE_PTYPE_TYP_TS),
+ NGBE_PTYPE_L2_FIP = (NGBE_PTYPE_PKT_MAC | NGBE_PTYPE_TYP_FIP),
+ NGBE_PTYPE_L2_LLDP = (NGBE_PTYPE_PKT_MAC | NGBE_PTYPE_TYP_LLDP),
+ NGBE_PTYPE_L2_CNM = (NGBE_PTYPE_PKT_MAC | NGBE_PTYPE_TYP_CNM),
+ NGBE_PTYPE_L2_EAPOL = (NGBE_PTYPE_PKT_MAC | NGBE_PTYPE_TYP_EAPOL),
+ NGBE_PTYPE_L2_ARP = (NGBE_PTYPE_PKT_MAC | NGBE_PTYPE_TYP_ARP),
+
+ NGBE_PTYPE_L2_IPV4_FRAG = (NGBE_PTYPE_PKT_IP |
+ NGBE_PTYPE_TYP_IPFRAG),
+ NGBE_PTYPE_L2_IPV4 = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_TYP_IP),
+ NGBE_PTYPE_L2_IPV4_UDP = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_TYP_UDP),
+ NGBE_PTYPE_L2_IPV4_TCP = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_TYP_TCP),
+ NGBE_PTYPE_L2_IPV4_SCTP = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_TYP_SCTP),
+ NGBE_PTYPE_L2_IPV6_FRAG = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_PKT_IPV6 |
+ NGBE_PTYPE_TYP_IPFRAG),
+ NGBE_PTYPE_L2_IPV6 = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_PKT_IPV6 |
+ NGBE_PTYPE_TYP_IP),
+ NGBE_PTYPE_L2_IPV6_UDP = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_PKT_IPV6 |
+ NGBE_PTYPE_TYP_UDP),
+ NGBE_PTYPE_L2_IPV6_TCP = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_PKT_IPV6 |
+ NGBE_PTYPE_TYP_TCP),
+ NGBE_PTYPE_L2_IPV6_SCTP = (NGBE_PTYPE_PKT_IP | NGBE_PTYPE_PKT_IPV6 |
+ NGBE_PTYPE_TYP_SCTP),
+
+ NGBE_PTYPE_L2_FCOE = (NGBE_PTYPE_PKT_FCOE | NGBE_PTYPE_TYP_FCOE),
+ NGBE_PTYPE_L2_FCOE_FCDATA = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_TYP_FCDATA),
+ NGBE_PTYPE_L2_FCOE_FCRDY = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_TYP_FCRDY),
+ NGBE_PTYPE_L2_FCOE_FCRSP = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_TYP_FCRSP),
+ NGBE_PTYPE_L2_FCOE_FCOTHER = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_TYP_FCOTHER),
+ NGBE_PTYPE_L2_FCOE_VFT = (NGBE_PTYPE_PKT_FCOE | NGBE_PTYPE_PKT_VFT),
+ NGBE_PTYPE_L2_FCOE_VFT_FCDATA = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_PKT_VFT | NGBE_PTYPE_TYP_FCDATA),
+ NGBE_PTYPE_L2_FCOE_VFT_FCRDY = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_PKT_VFT | NGBE_PTYPE_TYP_FCRDY),
+ NGBE_PTYPE_L2_FCOE_VFT_FCRSP = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_PKT_VFT | NGBE_PTYPE_TYP_FCRSP),
+ NGBE_PTYPE_L2_FCOE_VFT_FCOTHER = (NGBE_PTYPE_PKT_FCOE |
+ NGBE_PTYPE_PKT_VFT | NGBE_PTYPE_TYP_FCOTHER),
+
+ NGBE_PTYPE_L2_TUN4_MAC = (NGBE_PTYPE_TUN_IPV4 | NGBE_PTYPE_PKT_IGM),
+ NGBE_PTYPE_L2_TUN6_MAC = (NGBE_PTYPE_TUN_IPV6 | NGBE_PTYPE_PKT_IGM),
+};
+
+#define NGBE_RXD_PKTTYPE(_rxd) \
+ ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF)
+#define NGBE_PTYPE_TUN(_pt) ((_pt) & 0xC0)
+#define NGBE_PTYPE_PKT(_pt) ((_pt) & 0x30)
+#define NGBE_PTYPE_TYP(_pt) ((_pt) & 0x0F)
+#define NGBE_PTYPE_TYPL4(_pt) ((_pt) & 0x07)
+
+#define NGBE_RXD_IPV6EX(_rxd) \
+ ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 6) & 0x1)
+
+/* Security Processing bit Indication */
+#define NGBE_RXD_LNKSEC_STATUS_SECP 0x00020000U
+#define NGBE_RXD_LNKSEC_ERROR_NO_SA_MATCH 0x08000000U
+#define NGBE_RXD_LNKSEC_ERROR_REPLAY_ERROR 0x10000000U
+#define NGBE_RXD_LNKSEC_ERROR_BIT_MASK 0x18000000U
+#define NGBE_RXD_LNKSEC_ERROR_BAD_SIG 0x18000000U
+
+/* Masks to determine if packets should be dropped due to frame errors */
+#define NGBE_RXD_ERR_FRAME_ERR_MASK NGBE_RXD_ERR_RXE
+
+/*********************** Adv Transmit Descriptor Config Masks ****************/
+#define NGBE_TXD_DTALEN_MASK 0x0000FFFFU /* Data buf length(bytes) */
+#define NGBE_TXD_MAC_LINKSEC 0x00040000U /* Insert LinkSec */
+#define NGBE_TXD_MAC_TSTAMP 0x00080000U /* IEEE1588 time stamp */
+#define NGBE_TXD_IPSEC_SA_INDEX_MASK 0x000003FFU /* IPSec SA index */
+#define NGBE_TXD_IPSEC_ESP_LEN_MASK 0x000001FFU /* IPSec ESP length */
+#define NGBE_TXD_DTYP_MASK 0x00F00000U /* DTYP mask */
+#define NGBE_TXD_DTYP_CTXT 0x00100000U /* Adv Context Desc */
+#define NGBE_TXD_DTYP_DATA 0x00000000U /* Adv Data Descriptor */
+#define NGBE_TXD_EOP 0x01000000U /* End of Packet */
+#define NGBE_TXD_IFCS 0x02000000U /* Insert FCS */
+#define NGBE_TXD_LINKSEC 0x04000000U /* enable linksec */
+#define NGBE_TXD_RS 0x08000000U /* Report Status */
+#define NGBE_TXD_ECU 0x10000000U /* DDP hdr type or iSCSI */
+#define NGBE_TXD_QCN 0x20000000U /* cntag insertion enable */
+#define NGBE_TXD_VLE 0x40000000U /* VLAN pkt enable */
+#define NGBE_TXD_TSE 0x80000000U /* TCP Seg enable */
+#define NGBE_TXD_STAT_DD 0x00000001U /* Descriptor Done */
+#define NGBE_TXD_IDX_SHIFT 4 /* Adv desc Index shift */
+#define NGBE_TXD_CC 0x00000080U /* Check Context */
+#define NGBE_TXD_IPSEC 0x00000100U /* enable ipsec esp */
+#define NGBE_TXD_IIPCS 0x00000400U
+#define NGBE_TXD_EIPCS 0x00000800U
+#define NGBE_TXD_L4CS 0x00000200U
+#define NGBE_TXD_PAYLEN_SHIFT 13 /* Adv desc PAYLEN shift */
+#define NGBE_TXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
+#define NGBE_TXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */
+#define NGBE_TXD_TAG_TPID_SEL_SHIFT 11
+#define NGBE_TXD_IPSEC_TYPE_SHIFT 14
+#define NGBE_TXD_ENC_SHIFT 15
+
+#define NGBE_TXD_TUCMD_IPSEC_TYPE_ESP 0x00004000U /* IPSec Type ESP */
+#define NGBE_TXD_TUCMD_IPSEC_ENCRYPT_EN 0x00008000/* ESP Encrypt Enable */
+#define NGBE_TXD_TUCMD_FCOE 0x00010000U /* FCoE Frame Type */
+#define NGBE_TXD_FCOEF_EOF_MASK (0x3 << 10) /* FC EOF index */
+#define NGBE_TXD_FCOEF_SOF ((1 << 2) << 10) /* FC SOF index */
+#define NGBE_TXD_FCOEF_PARINC ((1 << 3) << 10) /* Rel_Off in F_CTL */
+#define NGBE_TXD_FCOEF_ORIE ((1 << 4) << 10) /* Orientation End */
+#define NGBE_TXD_FCOEF_ORIS ((1 << 5) << 10) /* Orientation Start */
+#define NGBE_TXD_FCOEF_EOF_N (0x0 << 10) /* 00: EOFn */
+#define NGBE_TXD_FCOEF_EOF_T (0x1 << 10) /* 01: EOFt */
+#define NGBE_TXD_FCOEF_EOF_NI (0x2 << 10) /* 10: EOFni */
+#define NGBE_TXD_FCOEF_EOF_A (0x3 << 10) /* 11: EOFa */
+#define NGBE_TXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
+#define NGBE_TXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
+
+#define NGBE_TXD_OUTER_IPLEN_SHIFT 12 /* Adv ctxt OUTERIPLEN shift */
+#define NGBE_TXD_TUNNEL_LEN_SHIFT 21 /* Adv ctxt TUNNELLEN shift */
+#define NGBE_TXD_TUNNEL_TYPE_SHIFT 11 /* Adv Tx Desc Tunnel Type shift */
+#define NGBE_TXD_TUNNEL_DECTTL_SHIFT 27 /* Adv ctxt DECTTL shift */
+#define NGBE_TXD_TUNNEL_UDP (0x0ULL << NGBE_TXD_TUNNEL_TYPE_SHIFT)
+#define NGBE_TXD_TUNNEL_GRE (0x1ULL << NGBE_TXD_TUNNEL_TYPE_SHIFT)
+
+/************ ngbe_type.h ************/
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define NGBE_REQ_TX_DESCRIPTOR_MULTIPLE 8
+#define NGBE_REQ_RX_DESCRIPTOR_MULTIPLE 8
+#define NGBE_REQ_TX_BUFFER_GRANULARITY 1024
+
+/* Vlan-specific macros */
+#define NGBE_RX_DESC_SPECIAL_VLAN_MASK 0x0FFF /* VLAN ID in lower 12 bits */
+#define NGBE_RX_DESC_SPECIAL_PRI_MASK 0xE000 /* Priority in upper 3 bits */
+#define NGBE_RX_DESC_SPECIAL_PRI_SHIFT 0x000D /* Priority in upper 3 of 16 */
+#define NGBE_TX_DESC_SPECIAL_PRI_SHIFT NGBE_RX_DESC_SPECIAL_PRI_SHIFT
+
+/* Transmit Descriptor */
+union ngbe_tx_desc {
+ struct {
+ __le64 buffer_addr; /* Address of descriptor's data buf */
+ __le32 cmd_type_len;
+ __le32 olinfo_status;
+ } read;
+ struct {
+ __le64 rsvd; /* Reserved */
+ __le32 nxtseq_seed;
+ __le32 status;
+ } wb;
+};
+
+/* Receive Descriptor */
+union ngbe_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ } read;
+ struct {
+ struct {
+ union {
+ __le32 data;
+ struct {
+ __le16 pkt_info; /* RSS, Pkt type */
+ __le16 hdr_info; /* Splithdr, hdrlen */
+ } hs_rss;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ struct {
+ __le16 ip_id; /* IP id */
+ __le16 csum; /* Packet Checksum */
+ } csum_ip;
+ } hi_dword;
+ } lower;
+ struct {
+ __le32 status_error; /* ext status/error */
+ __le16 length; /* Packet length */
+ __le16 vlan; /* VLAN tag */
+ } upper;
+ } wb; /* writeback */
+};
+
+/* Context descriptors */
+struct ngbe_tx_context_desc {
+ __le32 vlan_macip_lens;
+ __le32 seqnum_seed;
+ __le32 type_tucmd_mlhl;
+ __le32 mss_l4len_idx;
+};
+
+/************************* Flow Directory HASH *******************************/
+/* Software ATR hash keys */
+#define NGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2
+#define NGBE_ATR_SIGNATURE_HASH_KEY 0x174D3614
+
+/* Software ATR input stream values and masks */
+#define NGBE_ATR_HASH_MASK 0x7fff
+#define NGBE_ATR_L4TYPE_MASK 0x3
+#define NGBE_ATR_L4TYPE_UDP 0x1
+#define NGBE_ATR_L4TYPE_TCP 0x2
+#define NGBE_ATR_L4TYPE_SCTP 0x3
+#define NGBE_ATR_L4TYPE_IPV6_MASK 0x4
+#define NGBE_ATR_L4TYPE_TUNNEL_MASK 0x10
+enum ngbe_atr_flow_type {
+ NGBE_ATR_FLOW_TYPE_IPV4 = 0x0,
+ NGBE_ATR_FLOW_TYPE_UDPV4 = 0x1,
+ NGBE_ATR_FLOW_TYPE_TCPV4 = 0x2,
+ NGBE_ATR_FLOW_TYPE_SCTPV4 = 0x3,
+ NGBE_ATR_FLOW_TYPE_IPV6 = 0x4,
+ NGBE_ATR_FLOW_TYPE_UDPV6 = 0x5,
+ NGBE_ATR_FLOW_TYPE_TCPV6 = 0x6,
+ NGBE_ATR_FLOW_TYPE_SCTPV6 = 0x7,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_IPV4 = 0x10,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_UDPV4 = 0x11,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_TCPV4 = 0x12,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_SCTPV4 = 0x13,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_IPV6 = 0x14,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_UDPV6 = 0x15,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_TCPV6 = 0x16,
+ NGBE_ATR_FLOW_TYPE_TUNNELED_SCTPV6 = 0x17,
+};
+
+/* Flow Director ATR input struct. */
+union ngbe_atr_input {
+ /*
+ * Byte layout in order, all values with MSB first:
+ *
+ * vm_pool - 1 byte
+ * flow_type - 1 byte
+ * vlan_id - 2 bytes
+ * src_ip - 16 bytes
+ * inner_mac - 6 bytes
+ * cloud_mode - 2 bytes
+ * tni_vni - 4 bytes
+ * dst_ip - 16 bytes
+ * src_port - 2 bytes
+ * dst_port - 2 bytes
+ * flex_bytes - 2 bytes
+ * bkt_hash - 2 bytes
+ */
+ struct {
+ u8 vm_pool;
+ u8 flow_type;
+ __be16 vlan_id;
+ __be32 dst_ip[4];
+ __be32 src_ip[4];
+ __be16 src_port;
+ __be16 dst_port;
+ __be16 flex_bytes;
+ __be16 bkt_hash;
+ } formatted;
+ __be32 dword_stream[11];
+};
+
+/* Flow Director compressed ATR hash input struct */
+union ngbe_atr_hash_dword {
+ struct {
+ u8 vm_pool;
+ u8 flow_type;
+ __be16 vlan_id;
+ } formatted;
+ __be32 ip;
+ struct {
+ __be16 src;
+ __be16 dst;
+ } port;
+ __be16 flex_bytes;
+ __be32 dword;
+};
+
+/****************** Manageablility Host Interface defines ********************/
+#define NGBE_HI_MAX_BLOCK_BYTE_LENGTH 256 /* Num of bytes in range */
+#define NGBE_HI_MAX_BLOCK_DWORD_LENGTH 64 /* Num of dwords in range */
+#define NGBE_HI_COMMAND_TIMEOUT 5000 /* Process HI command limit */
+#define NGBE_HI_FLASH_ERASE_TIMEOUT 5000 /* Process Erase command limit */
+#define NGBE_HI_FLASH_UPDATE_TIMEOUT 5000 /* Process Update command limit */
+#define NGBE_HI_FLASH_VERIFY_TIMEOUT 60000 /* Process Apply command limit */
+#define NGBE_HI_PHY_MGMT_REQ_TIMEOUT 2000 /* Wait up to 2 seconds */
+
+/* CEM Support */
+#define FW_CEM_HDR_LEN 0x4
+#define FW_CEM_CMD_DRIVER_INFO 0xDD
+#define FW_CEM_CMD_DRIVER_INFO_LEN 0x5
+#define FW_CEM_CMD_RESERVED 0X0
+#define FW_CEM_UNUSED_VER 0x0
+#define FW_CEM_MAX_RETRIES 3
+#define FW_CEM_RESP_STATUS_SUCCESS 0x1
+#define FW_READ_SHADOW_RAM_CMD 0x31
+#define FW_READ_SHADOW_RAM_LEN 0x6
+#define FW_WRITE_SHADOW_RAM_CMD 0x33
+#define FW_WRITE_SHADOW_RAM_LEN 0xA /* 8 plus 1 WORD to write */
+#define FW_SHADOW_RAM_DUMP_CMD 0x36
+#define FW_SHADOW_RAM_DUMP_LEN 0
+#define FW_DEFAULT_CHECKSUM 0xFF /* checksum always 0xFF */
+#define FW_NVM_DATA_OFFSET 3
+#define FW_MAX_READ_BUFFER_SIZE 244
+#define FW_DISABLE_RXEN_CMD 0xDE
+#define FW_DISABLE_RXEN_LEN 0x1
+#define FW_PHY_MGMT_REQ_CMD 0x20
+#define FW_RESET_CMD 0xDF
+#define FW_RESET_LEN 0x2
+#define FW_SETUP_MAC_LINK_CMD 0xE0
+#define FW_SETUP_MAC_LINK_LEN 0x2
+#define FW_FLASH_UPGRADE_START_CMD 0xE3
+#define FW_FLASH_UPGRADE_START_LEN 0x1
+#define FW_FLASH_UPGRADE_WRITE_CMD 0xE4
+#define FW_FLASH_UPGRADE_VERIFY_CMD 0xE5
+#define FW_FLASH_UPGRADE_VERIFY_LEN 0x4
+#define FW_EEPROM_CHECK_STATUS 0xE9
+#define FW_PHY_SIGNAL 0xF0
+
+/* Host Interface Command Structures */
+struct ngbe_hic_hdr {
+ u8 cmd;
+ u8 buf_len;
+ union {
+ u8 cmd_resv;
+ u8 ret_status;
+ } cmd_or_resp;
+ u8 checksum;
+};
+
+struct ngbe_hic_hdr2_req {
+ u8 cmd;
+ u8 buf_lenh;
+ u8 buf_lenl;
+ u8 checksum;
+};
+
+struct ngbe_hic_hdr2_rsp {
+ u8 cmd;
+ u8 buf_lenl;
+ u8 buf_lenh_status; /* 7-5: high bits of buf_len, 4-0: status */
+ u8 checksum;
+};
+
+union ngbe_hic_hdr2 {
+ struct ngbe_hic_hdr2_req req;
+ struct ngbe_hic_hdr2_rsp rsp;
+};
+
+struct ngbe_hic_drv_info {
+ struct ngbe_hic_hdr hdr;
+ u8 port_num;
+ u8 ver_sub;
+ u8 ver_build;
+ u8 ver_min;
+ u8 ver_maj;
+ u8 pad; /* end spacing to ensure length is mult. of dword */
+ u16 pad2; /* end spacing to ensure length is mult. of dword2 */
+};
+
+/* These need to be dword aligned */
+struct ngbe_hic_read_shadow_ram {
+ union ngbe_hic_hdr2 hdr;
+ u32 address;
+ u16 length;
+ u16 pad2;
+ u16 data;
+ u16 pad3;
+};
+
+struct ngbe_hic_write_shadow_ram {
+ union ngbe_hic_hdr2 hdr;
+ u32 address;
+ u16 length;
+ u16 pad2;
+ u16 data;
+ u16 pad3;
+};
+
+struct ngbe_hic_disable_rxen {
+ struct ngbe_hic_hdr hdr;
+ u8 port_number;
+ u8 pad2;
+ u16 pad3;
+};
+
+struct ngbe_hic_reset {
+ struct ngbe_hic_hdr hdr;
+ u16 lan_id;
+ u16 reset_type;
+};
+
+struct ngbe_hic_phy_cfg {
+ struct ngbe_hic_hdr hdr;
+ u8 lan_id;
+ u8 phy_mode;
+ u16 phy_speed;
+};
+
+enum ngbe_module_id {
+ NGBE_MODULE_EEPROM = 0,
+ NGBE_MODULE_FIRMWARE,
+ NGBE_MODULE_HARDWARE,
+ NGBE_MODULE_PCIE
+};
+
+struct ngbe_hic_upg_start {
+ struct ngbe_hic_hdr hdr;
+ u8 module_id;
+ u8 pad2;
+ u16 pad3;
+};
+
+struct ngbe_hic_upg_write {
+ struct ngbe_hic_hdr hdr;
+ u8 data_len;
+ u8 eof_flag;
+ u16 check_sum;
+ u32 data[62];
+};
+
+enum ngbe_upg_flag {
+ NGBE_RESET_NONE = 0,
+ NGBE_RESET_FIRMWARE,
+ NGBE_RELOAD_EEPROM,
+ NGBE_RESET_LAN
+};
+
+struct ngbe_hic_upg_verify {
+ struct ngbe_hic_hdr hdr;
+ u32 action_flag;
+};
+
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define NGBE_PCI_MASTER_DISABLE_TIMEOUT 800
+
+/* Check whether address is multicast. This is little-endian specific check.*/
+#define NGBE_IS_MULTICAST(Address) \
+ (bool)(((u8 *)(Address))[0] & ((u8)0x01))
+
+/* Check whether an address is broadcast. */
+#define NGBE_IS_BROADCAST(Address) \
+ ((((u8 *)(Address))[0] == ((u8)0xff)) && \
+ (((u8 *)(Address))[1] == ((u8)0xff)))
+
+/* DCB registers */
+#define NGBE_DCB_MAX_TRAFFIC_CLASS 8
+
+/* Power Manangbeent */
+/* DMA Coalescing configuration */
+struct ngbe_dmac_config {
+ u16 watchdog_timer; /* usec units */
+ bool fcoe_en;
+ u32 link_speed;
+ u8 fcoe_tc;
+ u8 num_tcs;
+};
+
+/* Autonegotiation advertised speeds */
+typedef u32 ngbe_autoneg_advertised;
+/* Link speed */
+#define NGBE_LINK_SPEED_UNKNOWN 0
+#define NGBE_LINK_SPEED_100_FULL 1
+#define NGBE_LINK_SPEED_1GB_FULL 2
+#define NGBE_LINK_SPEED_10_FULL 8
+#define NGBE_LINK_SPEED_AUTONEG (NGBE_LINK_SPEED_100_FULL | \
+ NGBE_LINK_SPEED_1GB_FULL | \
+ NGBE_LINK_SPEED_10_FULL)
+
+/* Physical layer type */
+typedef u32 ngbe_physical_layer;
+#define NGBE_PHYSICAL_LAYER_UNKNOWN 0
+#define NGBE_PHYSICAL_LAYER_1000BASE_T 0x0002
+#define NGBE_PHYSICAL_LAYER_100BASE_TX 0x0004
+#define NGBE_PHYSICAL_LAYER_SFP_PLUS_CU 0x0008
+#define NGBE_PHYSICAL_LAYER_1000BASE_KX 0x0200
+#define NGBE_PHYSICAL_LAYER_1000BASE_BX 0x0400
+#define NGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000
+#define NGBE_PHYSICAL_LAYER_1000BASE_SX 0x4000
+
+/* Special PHY Init Routine */
+#define NGBE_PHY_INIT_OFFSET_NL 0x002B
+#define NGBE_PHY_INIT_END_NL 0xFFFF
+#define NGBE_CONTROL_MASK_NL 0xF000
+#define NGBE_DATA_MASK_NL 0x0FFF
+#define NGBE_CONTROL_SHIFT_NL 12
+#define NGBE_DELAY_NL 0
+#define NGBE_DATA_NL 1
+#define NGBE_CONTROL_NL 0x000F
+#define NGBE_CONTROL_EOL_NL 0x0FFF
+#define NGBE_CONTROL_SOL_NL 0x0000
+
+/* ethtool */
+#define SPEED_10 10
+#define SPEED_100 100
+#define SPEED_1000 1000
+
+/* Flow Control Data Sheet defined values
+ * Calculation and defines taken from 802.1bb Annex O
+ */
+
+/* BitTimes (BT) conversion */
+#define NGBE_BT2KB(BT) ((BT + (8 * 1024 - 1)) / (8 * 1024))
+#define NGBE_B2BT(BT) (BT * 8)
+
+/* Calculate Delay to respond to PFC */
+#define NGBE_PFC_D 672
+
+/* Calculate Cable Delay */
+#define NGBE_CABLE_DC 5556 /* Delay Copper */
+#define NGBE_CABLE_DO 5000 /* Delay Optical */
+
+/* Calculate Interface Delay X540 */
+#define NGBE_PHY_DC 25600 /* Delay 10G BASET */
+#define NGBE_MAC_DC 8192 /* Delay Copper XAUI interface */
+#define NGBE_XAUI_DC (2 * 2048) /* Delay Copper Phy */
+
+#define NGBE_ID_X540 (NGBE_MAC_DC + NGBE_XAUI_DC + NGBE_PHY_DC)
+
+/* Calculate Interface Delay */
+#define NGBE_PHY_D 12800
+#define NGBE_MAC_D 4096
+#define NGBE_XAUI_D (2 * 1024)
+
+#define NGBE_ID (NGBE_MAC_D + NGBE_XAUI_D + NGBE_PHY_D)
+
+/* Calculate Delay incurred from higher layer */
+#define NGBE_HD 6144
+
+/* Calculate PCI Bus delay for low thresholds */
+#define NGBE_PCI_DELAY 10000
+
+/* Calculate X540 delay value in bit times */
+#define NGBE_DV_X540(_max_frame_link, _max_frame_tc) \
+ ((36 * \
+ (NGBE_B2BT(_max_frame_link) + \
+ NGBE_PFC_D + \
+ (2 * NGBE_CABLE_DC) + \
+ (2 * NGBE_ID_X540) + \
+ NGBE_HD) / 25 + 1) + \
+ 2 * NGBE_B2BT(_max_frame_tc))
+
+
+/* Calculate delay value in bit times */
+#define NGBE_DV(_max_frame_link, _max_frame_tc) \
+ ((36 * \
+ (NGBE_B2BT(_max_frame_link) + \
+ NGBE_PFC_D + \
+ (2 * NGBE_CABLE_DC) + \
+ (2 * NGBE_ID) + \
+ NGBE_HD) / 25 + 1) + \
+ 2 * NGBE_B2BT(_max_frame_tc))
+
+/* Calculate low threshold delay values */
+#define NGBE_LOW_DV_X540(_max_frame_tc) \
+ (2 * NGBE_B2BT(_max_frame_tc) + \
+ (36 * NGBE_PCI_DELAY / 25) + 1)
+
+#define NGBE_LOW_DV(_max_frame_tc) \
+ (2 * NGBE_LOW_DV_X540(_max_frame_tc))
+
+/*
+ * Unavailable: The FCoE Boot Option ROM is not present in the flash.
+ * Disabled: Present; boot order is not set for any targets on the port.
+ * Enabled: Present; boot order is set for at least one target on the port.
+ */
+enum ngbe_fcoe_boot_status {
+ ngbe_fcoe_bootstatus_disabled = 0,
+ ngbe_fcoe_bootstatus_enabled = 1,
+ ngbe_fcoe_bootstatus_unavailable = 0xFFFF
+};
+
+enum ngbe_eeprom_type {
+ ngbe_eeprom_uninitialized = 0,
+ ngbe_eeprom_spi,
+ ngbe_flash,
+ ngbe_eeprom_none /* No NVM support */
+};
+
+enum ngbe_phy_type {
+ ngbe_phy_unknown = 0,
+ ngbe_phy_none,
+ ngbe_phy_internal,
+ ngbe_phy_m88e1512,
+ ngbe_phy_m88e1512_sfi,
+ ngbe_phy_yt8521s,
+ ngbe_phy_yt8521s_sfi,
+ ngbe_phy_zte,
+ ngbe_phy_sfp_passive_tyco,
+ ngbe_phy_sfp_passive_unknown,
+ ngbe_phy_sfp_active_unknown,
+ ngbe_phy_sfp_avago,
+ ngbe_phy_sfp_ftl,
+ ngbe_phy_sfp_ftl_active,
+ ngbe_phy_sfp_unknown,
+ ngbe_phy_sfp_intel,
+ ngbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/
+ ngbe_phy_generic
+};
+
+/*
+ * SFP+ module type IDs:
+ *
+ * ID Module Type
+ * =============
+ * 0 SFP_DA_CU
+ * 1 SFP_SR
+ * 2 SFP_LR
+ * 3 SFP_DA_CU_CORE0
+ * 4 SFP_DA_CU_CORE1
+ * 5 SFP_SR/LR_CORE0
+ * 6 SFP_SR/LR_CORE1
+ */
+enum ngbe_sfp_type {
+ ngbe_sfp_type_da_cu = 0,
+ ngbe_sfp_type_sr = 1,
+ ngbe_sfp_type_lr = 2,
+ ngbe_sfp_type_da_cu_core0 = 3,
+ ngbe_sfp_type_da_cu_core1 = 4,
+ ngbe_sfp_type_srlr_core0 = 5,
+ ngbe_sfp_type_srlr_core1 = 6,
+ ngbe_sfp_type_da_act_lmt_core0 = 7,
+ ngbe_sfp_type_da_act_lmt_core1 = 8,
+ ngbe_sfp_type_1g_cu_core0 = 9,
+ ngbe_sfp_type_1g_cu_core1 = 10,
+ ngbe_sfp_type_1g_sx_core0 = 11,
+ ngbe_sfp_type_1g_sx_core1 = 12,
+ ngbe_sfp_type_1g_lx_core0 = 13,
+ ngbe_sfp_type_1g_lx_core1 = 14,
+ ngbe_sfp_type_not_present = 0xFFFE,
+ ngbe_sfp_type_unknown = 0xFFFF
+};
+
+enum ngbe_media_type {
+ ngbe_media_type_unknown = 0,
+ ngbe_media_type_fiber,
+ ngbe_media_type_copper,
+ ngbe_media_type_backplane,
+ ngbe_media_type_virtual
+};
+
+/* Flow Control Settings */
+enum ngbe_fc_mode {
+ ngbe_fc_none = 0,
+ ngbe_fc_rx_pause,
+ ngbe_fc_tx_pause,
+ ngbe_fc_full,
+ ngbe_fc_default
+};
+
+/* Smart Speed Settings */
+#define NGBE_SMARTSPEED_MAX_RETRIES 3
+enum ngbe_smart_speed {
+ ngbe_smart_speed_auto = 0,
+ ngbe_smart_speed_on,
+ ngbe_smart_speed_off
+};
+
+/* PCI bus types */
+enum ngbe_bus_type {
+ ngbe_bus_type_unknown = 0,
+ ngbe_bus_type_pci,
+ ngbe_bus_type_pcix,
+ ngbe_bus_type_pci_express,
+ ngbe_bus_type_internal,
+ ngbe_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum ngbe_bus_speed {
+ ngbe_bus_speed_unknown = 0,
+ ngbe_bus_speed_33 = 33,
+ ngbe_bus_speed_66 = 66,
+ ngbe_bus_speed_100 = 100,
+ ngbe_bus_speed_120 = 120,
+ ngbe_bus_speed_133 = 133,
+ ngbe_bus_speed_2500 = 2500,
+ ngbe_bus_speed_5000 = 5000,
+ ngbe_bus_speed_8000 = 8000,
+ ngbe_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum ngbe_bus_width {
+ ngbe_bus_width_unknown = 0,
+ ngbe_bus_width_pcie_x1 = 1,
+ ngbe_bus_width_pcie_x2 = 2,
+ ngbe_bus_width_pcie_x4 = 4,
+ ngbe_bus_width_pcie_x8 = 8,
+ ngbe_bus_width_32 = 32,
+ ngbe_bus_width_64 = 64,
+ ngbe_bus_width_reserved
+};
+
+struct ngbe_addr_filter_info {
+ u32 num_mc_addrs;
+ u32 rar_used_count;
+ u32 mta_in_use;
+ u32 overflow_promisc;
+ bool user_set_promisc;
+};
+
+/* Bus parameters */
+struct ngbe_bus_info {
+ enum ngbe_bus_speed speed;
+ enum ngbe_bus_width width;
+ enum ngbe_bus_type type;
+
+ u16 func;
+ u16 lan_id;
+};
+
+/* Flow control parameters */
+struct ngbe_fc_info {
+ u32 high_water; /* Flow Ctrl High-water */
+ u32 low_water; /* Flow Ctrl Low-water */
+ u16 pause_time; /* Flow Control Pause timer */
+ bool send_xon; /* Flow control send XON */
+ bool strict_ieee; /* Strict IEEE mode */
+ bool disable_fc_autoneg; /* Do not autonegotiate FC */
+ bool fc_was_autonegged; /* Is current_mode the result of autonegging? */
+ enum ngbe_fc_mode current_mode; /* FC mode in effect */
+ enum ngbe_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+/* Statistics counters collected by the MAC */
+struct ngbe_hw_stats {
+ u64 crcerrs;
+ u64 illerrc;
+ u64 errbc;
+ u64 mspdc;
+ u64 mpctotal;
+ u64 mpc[8];
+ u64 mlfc;
+ u64 mrfc;
+ u64 rlec;
+ u64 lxontxc;
+ u64 lxonrxc;
+ u64 lxofftxc;
+ u64 lxoffrxc;
+ u64 pxontxc[8];
+ u64 pxonrxc[8];
+ u64 pxofftxc[8];
+ u64 pxoffrxc[8];
+ u64 prc64;
+ u64 prc127;
+ u64 prc255;
+ u64 prc511;
+ u64 prc1023;
+ u64 prc1522;
+ u64 gprc;
+ u64 bprc;
+ u64 mprc;
+ u64 gptc;
+ u64 gorc;
+ u64 gotc;
+ u64 rnbc[8];
+ u64 ruc;
+ u64 rfc;
+ u64 roc;
+ u64 rjc;
+ u64 mngprc;
+ u64 mngpdc;
+ u64 mngptc;
+ u64 tor;
+ u64 tpr;
+ u64 tpt;
+ u64 ptc64;
+ u64 ptc127;
+ u64 ptc255;
+ u64 ptc511;
+ u64 ptc1023;
+ u64 ptc1522;
+ u64 mptc;
+ u64 bptc;
+ u64 xec;
+ u64 qprc[16];
+ u64 qptc[16];
+ u64 qbrc[16];
+ u64 qbtc[16];
+ u64 qprdc[16];
+ u64 pxon2offc[8];
+ u64 fccrc;
+ u64 fclast;
+ u64 fcoerpdc;
+ u64 fcoeprc;
+ u64 fcoeptc;
+ u64 fcoedwrc;
+ u64 fcoedwtc;
+ u64 fcoe_noddp;
+ u64 fcoe_noddp_ext_buff;
+ u64 ldpcec;
+ u64 pcrc8ec;
+ u64 b2ospc;
+ u64 b2ogprc;
+ u64 o2bgptc;
+ u64 o2bspc;
+};
+
+/* forward declaration */
+struct ngbe_hw;
+
+/* iterator type for walking multicast address lists */
+typedef u8* (*ngbe_mc_addr_itr) (struct ngbe_hw *hw, u8 **mc_addr_ptr,
+ u32 *vmdq);
+
+/* Function pointer table */
+struct ngbe_eeprom_operations {
+ s32 (*init_params)(struct ngbe_hw *);
+ s32 (*read)(struct ngbe_hw *, u16, u16 *);
+ s32 (*read_buffer)(struct ngbe_hw *, u16, u16, u16 *);
+ s32 (*read32)(struct ngbe_hw *, u16, u32 *);
+ s32 (*write)(struct ngbe_hw *, u16, u16);
+ s32 (*write_buffer)(struct ngbe_hw *, u16, u16, u16 *);
+ s32 (*validate_checksum)(struct ngbe_hw *, u16 *);
+ s32 (*update_checksum)(struct ngbe_hw *);
+ s32 (*calc_checksum)(struct ngbe_hw *);
+ s32 (*eeprom_chksum_cap_st)(struct ngbe_hw *, u16, u32 *);
+ s32 (*phy_signal_set)(struct ngbe_hw *);
+};
+
+struct ngbe_flash_operations {
+ s32 (*init_params)(struct ngbe_hw *);
+ s32 (*read_buffer)(struct ngbe_hw *, u32, u32, u32 *);
+ s32 (*write_buffer)(struct ngbe_hw *, u32, u32, u32 *);
+};
+
+struct ngbe_mac_operations {
+ s32 (*init_hw)(struct ngbe_hw *);
+ s32 (*reset_hw)(struct ngbe_hw *);
+ s32 (*start_hw)(struct ngbe_hw *);
+ s32 (*clear_hw_cntrs)(struct ngbe_hw *);
+ enum ngbe_media_type (*get_media_type)(struct ngbe_hw *);
+ s32 (*get_mac_addr)(struct ngbe_hw *, u8 *);
+ s32 (*get_device_caps)(struct ngbe_hw *, u16 *);
+ s32 (*stop_adapter)(struct ngbe_hw *);
+ s32 (*get_bus_info)(struct ngbe_hw *);
+ void (*set_lan_id)(struct ngbe_hw *);
+ s32 (*enable_rx_dma)(struct ngbe_hw *, u32);
+ s32 (*disable_sec_rx_path)(struct ngbe_hw *);
+ s32 (*enable_sec_rx_path)(struct ngbe_hw *);
+ s32 (*acquire_swfw_sync)(struct ngbe_hw *, u32);
+ void (*release_swfw_sync)(struct ngbe_hw *, u32);
+
+ /* Link */
+ void (*disable_tx_laser)(struct ngbe_hw *);
+ void (*enable_tx_laser)(struct ngbe_hw *);
+ void (*flap_tx_laser)(struct ngbe_hw *);
+ s32 (*setup_link)(struct ngbe_hw *, u32, bool);
+ s32 (*setup_mac_link)(struct ngbe_hw *, u32, bool);
+ s32 (*check_link)(struct ngbe_hw *, u32 *, bool *, bool);
+ s32 (*get_link_capabilities)(struct ngbe_hw *, u32 *,
+ bool *);
+ void (*set_rate_select_speed)(struct ngbe_hw *, u32);
+
+ /* Packet Buffer manipulation */
+ void (*setup_rxpba)(struct ngbe_hw *, int, u32, int);
+
+ /* LED */
+ s32 (*led_on)(struct ngbe_hw *, u32);
+ s32 (*led_off)(struct ngbe_hw *, u32);
+
+ /* RAR, Multicast, VLAN */
+ s32 (*set_rar)(struct ngbe_hw *, u32, u8 *, u64, u32);
+ s32 (*clear_rar)(struct ngbe_hw *, u32);
+ s32 (*insert_mac_addr)(struct ngbe_hw *, u8 *, u32);
+ s32 (*set_vmdq)(struct ngbe_hw *, u32, u32);
+ s32 (*set_vmdq_san_mac)(struct ngbe_hw *, u32);
+ s32 (*clear_vmdq)(struct ngbe_hw *, u32, u32);
+ s32 (*init_rx_addrs)(struct ngbe_hw *);
+ s32 (*update_uc_addr_list)(struct ngbe_hw *, u8 *, u32,
+ ngbe_mc_addr_itr);
+ s32 (*update_mc_addr_list)(struct ngbe_hw *, u8 *, u32,
+ ngbe_mc_addr_itr, bool clear);
+ s32 (*enable_mc)(struct ngbe_hw *);
+ s32 (*disable_mc)(struct ngbe_hw *);
+ s32 (*clear_vfta)(struct ngbe_hw *);
+ s32 (*set_vfta)(struct ngbe_hw *, u32, u32, bool);
+ s32 (*set_vlvf)(struct ngbe_hw *, u32, u32, bool, bool *);
+ s32 (*init_uta_tables)(struct ngbe_hw *);
+ void (*set_mac_anti_spoofing)(struct ngbe_hw *, bool, int);
+ void (*set_vlan_anti_spoofing)(struct ngbe_hw *, bool, int);
+
+ /* Flow Control */
+ s32 (*fc_enable)(struct ngbe_hw *);
+ s32 (*setup_fc)(struct ngbe_hw *);
+
+ /* Manageability interface */
+ s32 (*set_fw_drv_ver)(struct ngbe_hw *, u8, u8, u8, u8);
+ s32 (*get_thermal_sensor_data)(struct ngbe_hw *);
+ s32 (*init_thermal_sensor_thresh)(struct ngbe_hw *hw);
+ void (*get_rtrup2tc)(struct ngbe_hw *hw, u8 *map);
+ void (*disable_rx)(struct ngbe_hw *hw);
+ void (*enable_rx)(struct ngbe_hw *hw);
+ void (*set_source_address_pruning)(struct ngbe_hw *, bool,
+ unsigned int);
+ void (*set_ethertype_anti_spoofing)(struct ngbe_hw *, bool, int);
+ s32 (*dmac_config)(struct ngbe_hw *hw);
+ s32 (*setup_eee)(struct ngbe_hw *hw, bool enable_eee);
+};
+
+struct ngbe_phy_operations {
+ s32 (*identify)(struct ngbe_hw *);
+ s32 (*identify_sfp)(struct ngbe_hw *);
+ s32 (*init)(struct ngbe_hw *);
+ s32 (*reset)(struct ngbe_hw *);
+ s32 (*read_reg)(struct ngbe_hw *, u32, u32, u16 *);
+ s32 (*write_reg)(struct ngbe_hw *, u32, u32, u16);
+ s32 (*read_reg_mdi)(struct ngbe_hw *, u32, u32, u16 *);
+ s32 (*write_reg_mdi)(struct ngbe_hw *, u32, u32, u16);
+ u32 (*setup_link)(struct ngbe_hw *, u32, bool);
+ s32 (*setup_internal_link)(struct ngbe_hw *);
+ u32 (*setup_link_speed)(struct ngbe_hw *, u32, bool);
+ s32 (*check_link)(struct ngbe_hw *, u32 *, bool *);
+ s32 (*check_overtemp)(struct ngbe_hw *);
+ s32 (*check_event)(struct ngbe_hw *);
+ s32 (*get_adv_pause)(struct ngbe_hw *, u8 *);
+ s32 (*get_lp_adv_pause)(struct ngbe_hw *, u8 *);
+ s32 (*set_adv_pause)(struct ngbe_hw *, u16);
+ s32 (*setup_once)(struct ngbe_hw *);
+};
+
+struct ngbe_eeprom_info {
+ struct ngbe_eeprom_operations ops;
+ enum ngbe_eeprom_type type;
+ u32 semaphore_delay;
+ u16 word_size;
+ u16 address_bits;
+ u16 word_page_size;
+ u16 ctrl_word_3;
+ u16 sw_region_offset;
+};
+
+struct ngbe_flash_info {
+ struct ngbe_flash_operations ops;
+ u32 semaphore_delay;
+ u32 dword_size;
+ u16 address_bits;
+};
+
+#define NGBE_FLAGS_DOUBLE_RESET_REQUIRED 0x01
+struct ngbe_mac_info {
+ struct ngbe_mac_operations ops;
+ u8 addr[NGBE_ETH_LENGTH_OF_ADDRESS];
+ u8 perm_addr[NGBE_ETH_LENGTH_OF_ADDRESS];
+ u8 san_addr[NGBE_ETH_LENGTH_OF_ADDRESS];
+ /* prefix for World Wide Node Name (WWNN) */
+ u16 wwnn_prefix;
+ /* prefix for World Wide Port Name (WWPN) */
+ u16 wwpn_prefix;
+#define NGBE_MAX_MTA 128
+#define NGBE_MAX_VFTA_ENTRIES 128
+ u32 mta_shadow[NGBE_MAX_MTA];
+ s32 mc_filter_type;
+ u32 mcft_size;
+ u32 vft_shadow[NGBE_MAX_VFTA_ENTRIES];
+ u32 vft_size;
+ u32 num_rar_entries;
+ u32 rar_highwater;
+ u32 rx_pb_size;
+ u32 max_tx_queues;
+ u32 max_rx_queues;
+ u32 orig_sr_pcs_ctl2;
+ u32 orig_sr_pma_mmd_ctl1;
+ u32 orig_sr_an_mmd_ctl;
+ u32 orig_sr_an_mmd_adv_reg2;
+ u32 orig_vr_xs_or_pcs_mmd_digi_ctl1;
+ u8 san_mac_rar_index;
+ bool get_link_status;
+ u16 max_msix_vectors;
+ bool arc_subsystem_valid;
+ bool orig_link_settings_stored;
+ bool autotry_restart;
+ u8 flags;
+ struct ngbe_thermal_sensor_data thermal_sensor_data;
+ bool thermal_sensor_enabled;
+ struct ngbe_dmac_config dmac_config;
+ bool set_lben;
+ bool autoneg;
+};
+
+struct ngbe_phy_info {
+ struct ngbe_phy_operations ops;
+ enum ngbe_phy_type type;
+ u32 addr;
+ u32 id;
+ enum ngbe_sfp_type sfp_type;
+ bool sfp_setup_needed;
+ u32 revision;
+ enum ngbe_media_type media_type;
+ u32 phy_semaphore_mask;
+ u8 lan_id; /* to be delete */
+ ngbe_autoneg_advertised autoneg_advertised;
+ enum ngbe_smart_speed smart_speed;
+ bool smart_speed_active;
+ bool multispeed_fiber;
+ bool reset_if_overtemp;
+ ngbe_physical_layer link_mode;
+};
+
+#include "ngbe_mbx.h"
+
+struct ngbe_mbx_operations {
+ void (*init_params)(struct ngbe_hw *hw);
+ s32 (*read)(struct ngbe_hw *, u32 *, u16, u16);
+ s32 (*write)(struct ngbe_hw *, u32 *, u16, u16);
+ s32 (*read_posted)(struct ngbe_hw *, u32 *, u16, u16);
+ s32 (*write_posted)(struct ngbe_hw *, u32 *, u16, u16);
+ s32 (*check_for_msg)(struct ngbe_hw *, u16);
+ s32 (*check_for_ack)(struct ngbe_hw *, u16);
+ s32 (*check_for_rst)(struct ngbe_hw *, u16);
+};
+
+struct ngbe_mbx_stats {
+ u32 msgs_tx;
+ u32 msgs_rx;
+
+ u32 acks;
+ u32 reqs;
+ u32 rsts;
+};
+
+struct ngbe_mbx_info {
+ struct ngbe_mbx_operations ops;
+ struct ngbe_mbx_stats stats;
+ u32 timeout;
+ u32 udelay;
+ u32 v2p_mailbox;
+ u16 size;
+};
+
+enum ngbe_reset_type {
+ NGBE_LAN_RESET = 0,
+ NGBE_SW_RESET,
+ NGBE_GLOBAL_RESET
+};
+
+enum ngbe_link_status {
+ NGBE_LINK_STATUS_NONE = 0,
+ NGBE_LINK_STATUS_KX,
+ NGBE_LINK_STATUS_KX4
+};
+
+struct ngbe_hw {
+ u8 __iomem *hw_addr;
+ void *back;
+ struct ngbe_mac_info mac;
+ struct ngbe_addr_filter_info addr_ctrl;
+ struct ngbe_fc_info fc;
+ struct ngbe_phy_info phy;
+ struct ngbe_eeprom_info eeprom;
+ struct ngbe_flash_info flash;
+ struct ngbe_bus_info bus;
+ struct ngbe_mbx_info mbx;
+ u16 device_id;
+ u16 vendor_id;
+ u16 subsystem_device_id;
+ u16 subsystem_vendor_id;
+ u8 revision_id;
+ bool adapter_stopped;
+ int api_version;
+ enum ngbe_reset_type reset_type;
+ bool force_full_reset;
+ bool allow_unsupported_sfp;
+ bool wol_enabled;
+ enum ngbe_link_status link_status;
+ u16 tpid[8];
+};
+
+#define TCALL(hw, func, args...) (((hw)->func != NULL) \
+ ? (hw)->func((hw), ##args) : NGBE_NOT_IMPLEMENTED)
+
+/* Error Codes */
+#define NGBE_OK 0
+#define NGBE_ERR 100
+#define NGBE_NOT_IMPLEMENTED 0x7FFFFFFF
+/* (-NGBE_ERR, NGBE_ERR): reserved for non-ngbe defined error code */
+#define NGBE_ERR_NOSUPP -(NGBE_ERR+0)
+#define NGBE_ERR_EEPROM -(NGBE_ERR+1)
+#define NGBE_ERR_EEPROM_CHECKSUM -(NGBE_ERR+2)
+#define NGBE_ERR_PHY -(NGBE_ERR+3)
+#define NGBE_ERR_CONFIG -(NGBE_ERR+4)
+#define NGBE_ERR_PARAM -(NGBE_ERR+5)
+#define NGBE_ERR_MAC_TYPE -(NGBE_ERR+6)
+#define NGBE_ERR_UNKNOWN_PHY -(NGBE_ERR+7)
+#define NGBE_ERR_LINK_SETUP -(NGBE_ERR+8)
+#define NGBE_ERR_ADAPTER_STOPPED -(NGBE_ERR+9)
+#define NGBE_ERR_INVALID_MAC_ADDR -(NGBE_ERR+10)
+#define NGBE_ERR_DEVICE_NOT_SUPPORTED -(NGBE_ERR+11)
+#define NGBE_ERR_MASTER_REQUESTS_PENDING -(NGBE_ERR+12)
+#define NGBE_ERR_INVALID_LINK_SETTINGS -(NGBE_ERR+13)
+#define NGBE_ERR_AUTONEG_NOT_COMPLETE -(NGBE_ERR+14)
+#define NGBE_ERR_RESET_FAILED -(NGBE_ERR+15)
+#define NGBE_ERR_SWFW_SYNC -(NGBE_ERR+16)
+#define NGBE_ERR_PHY_ADDR_INVALID -(NGBE_ERR+17)
+#define NGBE_ERR_I2C -(NGBE_ERR+18)
+#define NGBE_ERR_SFP_NOT_SUPPORTED -(NGBE_ERR+19)
+#define NGBE_ERR_SFP_NOT_PRESENT -(NGBE_ERR+20)
+#define NGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -(NGBE_ERR+21)
+#define NGBE_ERR_NO_SAN_ADDR_PTR -(NGBE_ERR+22)
+#define NGBE_ERR_FDIR_REINIT_FAILED -(NGBE_ERR+23)
+#define NGBE_ERR_EEPROM_VERSION -(NGBE_ERR+24)
+#define NGBE_ERR_NO_SPACE -(NGBE_ERR+25)
+#define NGBE_ERR_OVERTEMP -(NGBE_ERR+26)
+#define NGBE_ERR_UNDERTEMP -(NGBE_ERR+27)
+#define NGBE_ERR_FC_NOT_NEGOTIATED -(NGBE_ERR+28)
+#define NGBE_ERR_FC_NOT_SUPPORTED -(NGBE_ERR+29)
+#define NGBE_ERR_SFP_SETUP_NOT_COMPLETE -(NGBE_ERR+30)
+#define NGBE_ERR_PBA_SECTION -(NGBE_ERR+31)
+#define NGBE_ERR_INVALID_ARGUMENT -(NGBE_ERR+32)
+#define NGBE_ERR_HOST_INTERFACE_COMMAND -(NGBE_ERR+33)
+#define NGBE_ERR_OUT_OF_MEM -(NGBE_ERR+34)
+#define NGBE_ERR_FEATURE_NOT_SUPPORTED -(NGBE_ERR+36)
+#define NGBE_ERR_EEPROM_PROTECTED_REGION -(NGBE_ERR+37)
+#define NGBE_ERR_FDIR_CMD_INCOMPLETE -(NGBE_ERR+38)
+#define NGBE_ERR_FLASH_LOADING_FAILED -(NGBE_ERR+39)
+#define NGBE_ERR_XPCS_POWER_UP_FAILED -(NGBE_ERR+40)
+#define NGBE_ERR_FW_RESP_INVALID -(NGBE_ERR+41)
+#define NGBE_ERR_PHY_INIT_NOT_DONE -(NGBE_ERR+42)
+#define NGBE_ERR_TIMEOUT -(NGBE_ERR+43)
+#define NGBE_ERR_TOKEN_RETRY -(NGBE_ERR+44)
+#define NGBE_ERR_REGISTER -(NGBE_ERR+45)
+#define NGBE_ERR_MBX -(NGBE_ERR+46)
+#define NGBE_ERR_MNG_ACCESS_FAILED -(NGBE_ERR+47)
+#define NGBE_ERR_PHY_TYPE -(NGBE_ERR+48)
+#define NGBE_ERR_PHY_TIMEOUT -(NGBE_ERR+49)
+
+/**
+ * register operations
+ **/
+/* read register */
+#define NGBE_DEAD_READ_RETRIES 10
+#define NGBE_DEAD_READ_REG 0xdeadbeefU
+#define NGBE_DEAD_READ_REG64 0xdeadbeefdeadbeefULL
+
+#define NGBE_FAILED_READ_REG 0xffffffffU
+#define NGBE_FAILED_READ_REG64 0xffffffffffffffffULL
+
+static inline bool NGBE_REMOVED(void __iomem *addr)
+{
+ return unlikely(!addr);
+}
+
+static inline u32
+ngbe_rd32(u8 __iomem *base)
+{
+ return readl(base);
+}
+
+static inline u32
+rd32(struct ngbe_hw *hw, u32 reg)
+{
+ u8 __iomem *base = READ_ONCE(hw->hw_addr);
+ u32 val = NGBE_FAILED_READ_REG;
+
+ if (unlikely(!base))
+ return val;
+
+ val = ngbe_rd32(base + reg);
+
+ return val;
+}
+#define rd32a(a, reg, offset) ( \
+ rd32((a), (reg) + ((offset) << 2)))
+
+static inline u32
+rd32m(struct ngbe_hw *hw, u32 reg, u32 mask)
+{
+ u8 __iomem *base = READ_ONCE(hw->hw_addr);
+ u32 val = NGBE_FAILED_READ_REG;
+
+ if (unlikely(!base))
+ return val;
+
+ val = ngbe_rd32(base + reg);
+ if (unlikely(val == NGBE_FAILED_READ_REG))
+ return val;
+
+ return val & mask;
+}
+
+/* write register */
+static inline void
+ngbe_wr32(u8 __iomem *base, u32 val)
+{
+ writel(val, base);
+}
+
+static inline void
+wr32(struct ngbe_hw *hw, u32 reg, u32 val)
+{
+ u8 __iomem *base = READ_ONCE(hw->hw_addr);
+
+ if (unlikely(!base))
+ return;
+
+ ngbe_wr32(base + reg, val);
+}
+#define wr32a(a, reg, off, val) \
+ wr32((a), (reg) + ((off) << 2), (val))
+
+static inline void
+wr32m(struct ngbe_hw *hw, u32 reg, u32 mask, u32 field)
+{
+ u8 __iomem *base = READ_ONCE(hw->hw_addr);
+ u32 val;
+
+ if (unlikely(!base))
+ return;
+
+ val = ngbe_rd32(base + reg);
+ if (unlikely(val == NGBE_FAILED_READ_REG))
+ return;
+
+ val = ((val & ~mask) | (field & mask));
+ ngbe_wr32(base + reg, val);
+}
+
+/* poll register */
+#define NGBE_MDIO_TIMEOUT 1000
+#define NGBE_I2C_TIMEOUT 1000
+#define NGBE_SPI_TIMEOUT 1000
+static inline s32
+po32m(struct ngbe_hw *hw, u32 reg,
+ u32 mask, u32 field, int usecs, int count)
+{
+ int loop;
+
+ loop = (count ? count : (usecs + 9) / 10);
+ usecs = (loop ? (usecs + loop - 1) / loop : 0);
+
+ count = loop;
+ do {
+ u32 value = rd32(hw, reg);
+ if ((value & mask) == (field & mask)) {
+ break;
+ }
+
+ if (loop-- <= 0)
+ break;
+
+ udelay(usecs);
+ } while (true);
+
+ return (count - loop <= count ? 0 : NGBE_ERR_TIMEOUT);
+}
+
+#define NGBE_WRITE_FLUSH(H) rd32(H, NGBE_MIS_PWR)
+
+#endif /* _NGBE_TYPE_H_ */
--
2.25.1
1
1

[PATCH openEuler-1.0-LTS] ras: report cpu logical index to userspace in arm event
by Yang Yingliang 03 Dec '21
by Yang Yingliang 03 Dec '21
03 Dec '21
From: Lostwayzxc <luoshengwei(a)huawei.com>
kunpeng inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4IG00?from=project-issue
CVE: NA
When the arm event is reported, the rasdaemon needs to know the cpu logical index,
but there is only mpidr without mapping between it and cpu logical index. Since the
kernel has saved the mapping, get the logical index by function get_logical_index()
and report it directly to userspace via perf i/f.
Signed-off-by: Lostwayzxc <luoshengwei(a)huawei.com>
Reviewed-by: Lv Ying <lvying6(a)huawei.com>
Reviewed-by: Xie XiuQi <xiexiuqi(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/ras/ras.c | 8 +++++++-
include/linux/ras.h | 11 +++++++++++
include/ras/ras_event.h | 10 +++++++---
3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 9302ed7f42588..a526f124a5ff8 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -32,6 +32,7 @@ void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
struct cper_arm_err_info *err_info;
struct cper_arm_ctx_info *ctx_info;
int n, sz;
+ int cpu;
pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num;
pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm);
@@ -58,8 +59,13 @@ void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
}
ven_err_data = (u8 *)ctx_info;
+ cpu = GET_LOGICAL_INDEX(err->mpidr);
+ /* when the return value is invalid, set cpu index to a large integer */
+ if (cpu < 0)
+ cpu = 0xFFFF;
+
trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len,
- ven_err_data, vsei_len, sev);
+ ven_err_data, vsei_len, sev, cpu);
}
static int __init ras_init(void)
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 3431b4a5fa42d..e5ec31ad7a132 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -40,4 +40,15 @@ static inline void
log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; }
#endif
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+#include <asm/smp_plat.h>
+/*
+ * Include ARM specific SMP header which provides a function mapping mpidr to
+ * cpu logical index.
+ */
+#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK)
+#else
+#define GET_LOGICAL_INDEX(mpidr) -EINVAL
+#endif /* CONFIG_ARM || CONFIG_ARM64 */
+
#endif /* __RAS_H__ */
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 7c8cb123ba32d..2d6a662886e6d 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -182,9 +182,10 @@ TRACE_EVENT(arm_event,
const u32 ctx_len,
const u8 *oem,
const u32 oem_len,
- u8 sev),
+ u8 sev,
+ int cpu),
- TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev),
+ TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu),
TP_STRUCT__entry(
__field(u64, mpidr)
@@ -199,6 +200,7 @@ TRACE_EVENT(arm_event,
__field(u32, oem_len)
__dynamic_array(u8, buf2, oem_len)
__field(u8, sev)
+ __field(int, cpu)
),
TP_fast_assign(
@@ -225,11 +227,13 @@ TRACE_EVENT(arm_event,
__entry->oem_len = oem_len;
memcpy(__get_dynamic_array(buf2), oem, oem_len);
__entry->sev = sev;
+ __entry->cpu = cpu;
),
- TP_printk("error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
+ TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
"running state: %d; PSCI state: %d; "
"%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s",
+ __entry->cpu,
__entry->sev,
__entry->affinity, __entry->mpidr, __entry->midr,
__entry->running_state, __entry->psci_state,
--
2.25.1
1
0

【Meeting Notice】openEuler kernel 技术分享第十五期 & 双周例会 Time: 2021-12-03 14:00-16:30
by Meeting Book 02 Dec '21
by Meeting Book 02 Dec '21
02 Dec '21
1
0

[PATCH openEuler-1.0-LTS 1/2] hugetlb: before freeing hugetlb page set dtor to appropriate value
by Yang Yingliang 02 Dec '21
by Yang Yingliang 02 Dec '21
02 Dec '21
From: Mike Kravetz <mike.kravetz(a)oracle.com>
mainline inclusion
from mainline-5.15-rc1
commit e32d20c0c88b1cd0a44f882c4f0eb2f536363d1b
category: bugfix
bugzilla: 180680
CVE: NA
---------------------------
When removing a hugetlb page from the pool the ref count is set to one (as
the free page has no ref count) and compound page destructor is set to
NULL_COMPOUND_DTOR. Since a subsequent call to free the hugetlb page will
call __free_pages for non-gigantic pages and free_gigantic_page for
gigantic pages the destructor is not used.
However, consider the following race with code taking a speculative
reference on the page:
Thread 0 Thread 1
-------- --------
remove_hugetlb_page
set_page_refcounted(page);
set_compound_page_dtor(page,
NULL_COMPOUND_DTOR);
get_page_unless_zero(page)
__update_and_free_page
__free_pages(page,
huge_page_order(h));
/* Note that __free_pages() will simply drop
the reference to the page. */
put_page(page)
__put_compound_page()
destroy_compound_page
NULL_COMPOUND_DTOR
BUG: kernel NULL pointer
dereference, address:
0000000000000000
To address this race, set the dtor to the normal compound page dtor for
non-gigantic pages. The dtor for gigantic pages does not matter as
gigantic pages are changed from a compound page to 'just a group of pages'
before freeing. Hence, the destructor is not used.
Link: https://lkml.kernel.org/r/20210809184832.18342-4-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Mina Almasry <almasrymina(a)google.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Conflicts:
mm/hugetlb.c
Signed-off-by: Chen Wandun <chenwandun(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
mm/hugetlb.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5190154de3b09..2f65dad443ab3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1213,12 +1213,30 @@ static void update_and_free_page(struct hstate *h, struct page *page)
1 << PG_writeback);
}
VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
- set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+ /*
+ * Very subtle
+ *
+ * For non-gigantic pages set the destructor to the normal compound
+ * page dtor. This is needed in case someone takes an additional
+ * temporary ref to the page, and freeing is delayed until they drop
+ * their reference.
+ *
+ * For gigantic pages set the destructor to the null dtor. This
+ * destructor will never be called. Before freeing the gigantic
+ * page destroy_compound_gigantic_page will turn the compound page
+ * into a simple group of pages. After this the destructor does not
+ * apply.
+ *
+ * This handles the case where more than one ref is held when and
+ * after update_and_free_page is called.
+ */
set_page_refcounted(page);
if (hstate_is_gigantic(h)) {
+ set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
destroy_compound_gigantic_page(page, huge_page_order(h));
free_gigantic_page(page, huge_page_order(h));
} else {
+ set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
__free_pages(page, huge_page_order(h));
}
}
--
2.25.1
1
1

kernel sig申报一个议题:openEuler 22.03增加osnoise/timerlat trace系统/硬件底噪干扰调测工具
by Wangshaobo (bobo) 02 Dec '21
by Wangshaobo (bobo) 02 Dec '21
02 Dec '21
议题:openEuler 22.03增加osnoise/timerlat trace系统/硬件底噪干扰调测工具
简介:跟踪系统/硬件底噪调测工具,保障高性能应用的低延时,osnoise/timerlat可以针对多核进行较高精度的硬件时延和系统时延探测。
https://gitee.com/openeuler/kernel/issues/I4G64B?from=project-issue
1
0
Memory cgroup enhancement.
Johannes Weiner (9):
mm: memcontrol: fix cpuhotplug statistics flushing
mm: memcontrol: kill mem_cgroup_nodeinfo()
mm: memcontrol: privatize memcg_page_state query functions
cgroup: rstat: support cgroup1
cgroup: rstat: punt root-level optimization to individual controllers
mm: memcontrol: switch to rstat
mm: memcontrol: consolidate lruvec stat flushing
kselftests: cgroup: update kmem test for new vmstat implementation
mm: memcontrol: fix blocking rstat function called from atomic cgroup1
thresholding code
Miaohe Lin (1):
mm, memcg: remove unused functions
Shakeel Butt (5):
memcg: switch lruvec stats to rstat
memcg: infrastructure to flush memcg stats
memcg: flush lruvec stats in the refault
memcg: flush stats only if updated
memcg: unify memcg stat flushing
Tejun Heo (2):
cgroup: rstat: fix A-A deadlock on 32bit around u64_stats_sync
blk-cgroup: blk_cgroup_bio_start() should use irq-safe operations on
blkg->iostat_cpu
block/blk-cgroup.c | 36 +-
include/linux/memcontrol.h | 179 ++++------
kernel/cgroup/cgroup.c | 34 +-
kernel/cgroup/rstat.c | 82 +++--
mm/memcontrol.c | 391 ++++++++++-----------
mm/vmscan.c | 6 +
mm/workingset.c | 1 +
tools/testing/selftests/cgroup/test_kmem.c | 22 +-
8 files changed, 372 insertions(+), 379 deletions(-)
--
2.20.1
1
17

30 Nov '21
From: Nadav Amit <namit(a)vmware.com>
stable inclusion
from stable-5.10.82
commit 40bc831ab5f630431010d1ff867390b07418a7ee
category: bugfix
bugzilla: 185820 https://gitee.com/openeuler/kernel/issues/I4DDEL
CVE: CVE-2021-4002
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
-----------------------------------------------
commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream.
When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB
flush is missing. This TLB flush must be performed before releasing the
i_mmap_rwsem, in order to prevent an unshared PMDs page from being
released and reused before the TLB flush took place.
Arguably, a comprehensive solution would use mmu_gather interface to
batch the TLB flushes and the PMDs page release, however it is not an
easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call
huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2)
deferring the release of the page reference for the PMDs page until
after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into
thinking PMDs are shared when they are not.
Fix __unmap_hugepage_range() by adding the missing TLB flush, and
forcing a flush when unshare is successful.
Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6
Signed-off-by: Nadav Amit <namit(a)vmware.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu(a)jp.fujitsu.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Liu Shixin <liushixin2(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
mm/hugetlb.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1bbe763dce73..47dd6b5e0040 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4017,6 +4017,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
struct mmu_notifier_range range;
+ bool force_flush = false;
WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
@@ -4045,10 +4046,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, vma, &address, ptep)) {
spin_unlock(ptl);
- /*
- * We just unmapped a page of PMDs by clearing a PUD.
- * The caller's TLB flush range should cover this area.
- */
+ tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
+ force_flush = true;
continue;
}
@@ -4105,6 +4104,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
mmu_notifier_invalidate_range_end(&range);
tlb_end_vma(tlb, vma);
+
+ /*
+ * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
+ * could defer the flush until now, since by holding i_mmap_rwsem we
+ * guaranteed that the last refernece would not be dropped. But we must
+ * do the flushing before we return, as otherwise i_mmap_rwsem will be
+ * dropped and the last reference to the shared PMDs page might be
+ * dropped as well.
+ *
+ * In theory we could defer the freeing of the PMD pages as well, but
+ * huge_pmd_unshare() relies on the exact page_count for the PMD page to
+ * detect sharing, so we cannot defer the release of the page either.
+ * Instead, do flush now.
+ */
+ if (force_flush)
+ tlb_flush_mmu_tlbonly(tlb);
}
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
--
2.20.1
1
0
Backport 5.10.79 LTS patches from upstream.
Geert Uytterhoeven (1):
usb: gadget: Mark USB_FSL_QE broken on 64-bit
Gustavo A. R. Silva (1):
media: staging/intel-ipu3: css: Fix wrong size comparison
imgu_css_fw_init
James Buren (1):
usb-storage: Add compatibility quirk flags for iODD 2531/2541
Jan Kara (1):
isofs: Fix out of bound access for corrupted isofs image
Johan Hovold (8):
comedi: dt9812: fix DMA buffers on stack
comedi: ni_usb6501: fix NULL-deref in command paths
comedi: vmk80xx: fix transfer-buffer overflows
comedi: vmk80xx: fix bulk-buffer overflow
comedi: vmk80xx: fix bulk and interrupt message timeouts
staging: r8712u: fix control-message timeout
staging: rtl8192u: fix control-message timeouts
rsi: fix control-message timeout
Juergen Gross (1):
Revert "x86/kvm: fix vcpu-id indexed array sizes"
Neal Liu (1):
usb: ehci: handshake CMD_RUN instead of STS_HALT
Paolo Bonzini (1):
KVM: x86: avoid warning with -Wbitwise-instead-of-logical
Pavel Skripkin (1):
staging: rtl8712: fix use-after-free in rtl8712_dl_fw
Petr Mladek (1):
printk/console: Allow to disable console output by using console="" or
console=null
Todd Kjos (1):
binder: don't detect sender/target during buffer cleanup
Viraj Shah (1):
usb: musb: Balance list entry in musb_gadget_queue
arch/x86/kvm/ioapic.c | 2 +-
arch/x86/kvm/ioapic.h | 4 +-
arch/x86/kvm/mmu/mmu.c | 2 +-
drivers/android/binder.c | 14 +--
drivers/net/wireless/rsi/rsi_91x_usb.c | 2 +-
drivers/staging/comedi/drivers/dt9812.c | 115 +++++++++++++++-----
drivers/staging/comedi/drivers/ni_usb6501.c | 10 ++
drivers/staging/comedi/drivers/vmk80xx.c | 28 ++---
drivers/staging/media/ipu3/ipu3-css-fw.c | 7 +-
drivers/staging/media/ipu3/ipu3-css-fw.h | 2 +-
drivers/staging/rtl8192u/r8192U_core.c | 18 +--
drivers/staging/rtl8712/usb_intf.c | 4 +-
drivers/staging/rtl8712/usb_ops_linux.c | 2 +-
drivers/usb/gadget/udc/Kconfig | 1 +
drivers/usb/host/ehci-hcd.c | 11 +-
drivers/usb/host/ehci-platform.c | 6 +
drivers/usb/host/ehci.h | 1 +
drivers/usb/musb/musb_gadget.c | 4 +-
drivers/usb/storage/unusual_devs.h | 10 ++
fs/isofs/inode.c | 2 +
kernel/printk/printk.c | 9 +-
21 files changed, 180 insertions(+), 74 deletions(-)
--
2.20.1
1
19

[PATCH openEuler-5.10 1/3] ima: Fix warning: no previous prototype for function 'ima_add_kexec_buffer'
by Zheng Zengkai 30 Nov '21
by Zheng Zengkai 30 Nov '21
30 Nov '21
From: Lakshmi Ramasubramanian <nramas(a)linux.microsoft.com>
mainline inclusion
from mainline-5.14
commit: c67913492fec317bc53ffdff496b6ba856d2868c
category: bugfix
bugzilla: 182971 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
---------------------------
The function prototype for ima_add_kexec_buffer() is present
in 'linux/ima.h'. But this header file is not included in
ima_kexec.c where the function is implemented. This results
in the following compiler warning when "-Wmissing-prototypes" flag
is turned on:
security/integrity/ima/ima_kexec.c:81:6: warning: no previous prototype
for function 'ima_add_kexec_buffer' [-Wmissing-prototypes]
Include the header file 'linux/ima.h' in ima_kexec.c to fix
the compiler warning.
Fixes: dce92f6b11c3 (arm64: Enable passing IMA log to next kernel on kexec)
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Lakshmi Ramasubramanian <nramas(a)linux.microsoft.com>
Acked-by: Rob Herring <robh(a)kernel.org>
Signed-off-by: Mimi Zohar <zohar(a)linux.ibm.com>
Signed-off-by: Guo Zihua <guozihua(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
security/integrity/ima/ima_kexec.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 667887665823..f799cc278a9a 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -11,6 +11,7 @@
#include <linux/vmalloc.h>
#include <linux/kexec.h>
#include <linux/of.h>
+#include <linux/ima.h>
#include "ima.h"
#ifdef CONFIG_IMA_KEXEC
--
2.20.1
1
2

[PATCH openEuler-5.10 01/10] powerpc/booke: Disable STRICT_KERNEL_RWX, DEBUG_PAGEALLOC and KFENCE
by Zheng Zengkai 30 Nov '21
by Zheng Zengkai 30 Nov '21
30 Nov '21
From: Christophe Leroy <christophe.leroy(a)csgroup.eu>
mainline inclusion
from mainline-v5.16-rc1
commit 68b44f94d6370e2c6c790fedd28e637fa9964a93
category: bugfix
bugzilla: 185780 https://gitee.com/openeuler/kernel/issues/I4EUY7
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
-------------------------------------------------
fsl_booke and 44x are not able to map kernel linear memory with
pages, so they can't support DEBUG_PAGEALLOC and KFENCE, and
STRICT_KERNEL_RWX is also a problem for now.
Enable those only on book3s (both 32 and 64 except KFENCE), 8xx and 40x.
Fixes: 88df6e90fa97 ("[POWERPC] DEBUG_PAGEALLOC for 32-bit")
Fixes: 95902e6c8864 ("powerpc/mm: Implement STRICT_KERNEL_RWX on PPC32")
Fixes: 90cbac0e995d ("powerpc: Enable KFENCE for PPC32")
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/d1ad9fdd9b27da3fdfa16510bb542ed51fa6e134.16342921…
Conflicts:
arch/powerpc/Kconfig
Signed-off-by: Liu Shixin <liushixin2(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
arch/powerpc/Kconfig | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 81a629c5133c..da2b1c3b9ae4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,7 +135,7 @@ config PPC
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
- select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
+ select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S_32 || PPC_8xx || 40x) && !HIBERNATION
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_COPY_MC if PPC64
@@ -184,7 +184,7 @@ config PPC
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KGDB
- select HAVE_ARCH_KFENCE if PPC32
+ select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_NVRAM_OPS
@@ -360,7 +360,7 @@ config PPC_OF_PLATFORM_PCI
depends on PPC64 # not supported on 32 bits yet
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
- depends on PPC32 || PPC_BOOK3S_64
+ depends on PPC_BOOK3S || PPC_8xx || 40x
def_bool y
config ARCH_SUPPORTS_UPROBES
--
2.20.1
1
9

[PATCH openEuler-5.10 01/14] ubifs: fix slab-out-of-bounds in ubifs_change_lp
by Zheng Zengkai 30 Nov '21
by Zheng Zengkai 30 Nov '21
30 Nov '21
From: Baokun Li <libaokun1(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 182993 https://gitee.com/openeuler/kernel/issues/I4DDEL
---------------------------
Hulk Robot reported a KASAN report about slab-out-of-bounds:
==================================================================
BUG: KASAN: slab-out-of-bounds in ubifs_change_lp+0x3a9/0x1390 [ubifs]
Read of size 8 at addr ffff888101c961f8 by task fsstress/1068
[...]
Call Trace:
check_memory_region+0x1c1/0x1e0
ubifs_change_lp+0x3a9/0x1390 [ubifs]
ubifs_change_one_lp+0x170/0x220 [ubifs]
ubifs_garbage_collect+0x7f9/0xda0 [ubifs]
ubifs_budget_space+0xfe4/0x1bd0 [ubifs]
ubifs_write_begin+0x528/0x10c0 [ubifs]
Allocated by task 1068:
kmemdup+0x25/0x50
ubifs_lpt_lookup_dirty+0x372/0xb00 [ubifs]
ubifs_update_one_lp+0x46/0x260 [ubifs]
ubifs_tnc_end_commit+0x98b/0x1720 [ubifs]
do_commit+0x6cb/0x1950 [ubifs]
ubifs_run_commit+0x15a/0x2b0 [ubifs]
ubifs_budget_space+0x1061/0x1bd0 [ubifs]
ubifs_write_begin+0x528/0x10c0 [ubifs]
[...]
==================================================================
In ubifs_garbage_collect(), if ubifs_find_dirty_leb returns an error,
lp is an uninitialized variable. But lp.num might be used in the out
branch, which is a random value. If the value is -1 or another value
that can pass the check, soob may occur in the ubifs_change_lp() in
the following procedure.
To solve this problem, we initialize lp.lnum to -1, and then initialize
it correctly in ubifs_find_dirty_leb, which is not equal to -1, and
ubifs_return_leb is executed only when lp.lnum != -1.
if find a retained or indexing LEB and continue to next loop, but break
before find another LEB, the "taken" flag of this LEB will be cleaned
in ubi_return_lebi(). This bug has also been fixed in this patch.
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
fs/ubifs/gc.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index dc3e26e9ed7b..05e1eeae8457 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -692,6 +692,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
for (i = 0; ; i++) {
int space_before, space_after;
+ /* Maybe continue after find and break before find */
+ lp.lnum = -1;
+
cond_resched();
/* Give the commit an opportunity to run */
@@ -843,7 +846,8 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
ubifs_wbuf_sync_nolock(wbuf);
ubifs_ro_mode(c, ret);
mutex_unlock(&wbuf->io_mutex);
- ubifs_return_leb(c, lp.lnum);
+ if (lp.lnum != -1)
+ ubifs_return_leb(c, lp.lnum);
return ret;
}
--
2.20.1
1
13
您好 Kernel,
我想通过此列表交流。
------------------ Original ------------------
From: "kernel-request"<kernel-request(a)openeuler.org>;
Date: Tue, Nov 30, 2021 04:12 PM
To: "杨嫣"<yan.yang(a)i-soft.com.cn>;
Subject: Welcome to the "Kernel" mailing list
欢迎使用“Kernel”邮件列表!
要通过此列表交流,请发送电子邮件至:
kernel(a)openeuler.org
取消订阅或调整选项请发送电子邮件至:
kernel-leave(a)openeuler.org
在主题或正文中使用"help"一词(不包括引号), 您将会收到一条带有说明的信息。更改
设置需要用到密码,但出于安全考虑,此处不包括此密码。如果您忘记了密码,则需要 通过网络用户界面重置密码。
Welcome to the "Kernel" mailing list!
To post to this list, send your email to:
kernel(a)openeuler.org
You can unsubscribe or make adjustments to your options via email by
sending a message to:
kernel-leave(a)openeuler.org
with the word 'help' in the subject or body (don't include the
quotes), and you will receive a message with instructions. You will
need your password to change your options, but for security purposes,
this password is not included here. If you have forgotten your
password, you will need to reset it via the web UI.
1
0

[PATCH openEuler-1.0-LTS] defconfig: update the defconfigs to support 9P
by Yang Yingliang 30 Nov '21
by Yang Yingliang 30 Nov '21
30 Nov '21
From: Laibin Qiu <qiulaibin(a)huawei.com>
hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4ICQF?from=project-issue
CVE: NA
-------------------------------------------------
Enable configs to support 9P.
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Hou Tao <houtao1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/configs/openeuler_defconfig | 9 ++++++++-
arch/x86/configs/openeuler_defconfig | 10 +++++++++-
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index c63a2f829db02..c2cb98dcf340b 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -1729,7 +1729,10 @@ CONFIG_RFKILL=m
CONFIG_RFKILL_LEDS=y
CONFIG_RFKILL_INPUT=y
CONFIG_RFKILL_GPIO=m
-# CONFIG_NET_9P is not set
+CONFIG_NET_9P=m
+CONFIG_NET_9P_VIRTIO=m
+# CONFIG_NET_9P_RDMA is not set
+# CONFIG_NET_9P_DEBUG is not set
# CONFIG_CAIF is not set
CONFIG_CEPH_LIB=m
# CONFIG_CEPH_LIB_PRETTYDEBUG is not set
@@ -5356,6 +5359,10 @@ CONFIG_CIFS_DFS_UPCALL=y
# CONFIG_CIFS_FSCACHE is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
+CONFIG_9P_FS=m
+CONFIG_9P_FSCACHE=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index 0b103110f821c..8a1c4daf5c4b0 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -1805,7 +1805,11 @@ CONFIG_RFKILL=m
CONFIG_RFKILL_LEDS=y
CONFIG_RFKILL_INPUT=y
# CONFIG_RFKILL_GPIO is not set
-# CONFIG_NET_9P is not set
+CONFIG_NET_9P=m
+CONFIG_NET_9P_VIRTIO=m
+# CONFIG_NET_9P_XEN is not set
+# CONFIG_NET_9P_RDMA is not set
+# CONFIG_NET_9P_DEBUG is not set
# CONFIG_CAIF is not set
CONFIG_CEPH_LIB=m
# CONFIG_CEPH_LIB_PRETTYDEBUG is not set
@@ -6838,6 +6842,10 @@ CONFIG_CIFS_DFS_UPCALL=y
# CONFIG_CIFS_FSCACHE is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
+CONFIG_9P_FS=m
+CONFIG_9P_FSCACHE=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
--
2.25.1
1
0
1
0

【Meeting Notice】openEuler kernel 技术分享第十五期 & 双周例会 Time: 2021-12-03 14:00-16:30
by Meeting Book 30 Nov '21
by Meeting Book 30 Nov '21
30 Nov '21
1
0

kernel sig 申报一个议题:openEuler 22.03 LTS 核心 CONFIG (PageSize/NR_CPUS/NODES_SHIFT)选择讨论
by Xie XiuQi 29 Nov '21
by Xie XiuQi 29 Nov '21
29 Nov '21
议题:openEuler 22.03 LTS 核心 CONFIG (PageSize/NR_CPUS/NODES_SHIFT)讨论
https://gitee.com/openeuler/kernel/issues/I4HDHZ
欢迎提前在 issue 中讨论
1
0

[PATCH openEuler-5.10 01/31] timer_list: avoid other cpu soft lockup when printing timer list
by Zheng Zengkai 29 Nov '21
by Zheng Zengkai 29 Nov '21
29 Nov '21
From: Yang Yingliang <yangyingliang(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4IYRE
---------------------------
If system has many cpus (e.g. 128), it will spend a lot of time to
print message to the console when execute echo q > /proc/sysrq-trigger.
When /proc/sys/kernel/numa_balancing is enabled, if the migration threads
is woke up, the thread cannot continue until the print finish, it will
trigger a soft lockup.
PID: 619 TASK: ffffa02fdd8bec80 CPU: 121 COMMAND: "migration/121"
#0 [ffff00000a103b10] __crash_kexec at ffff0000081bf200
#1 [ffff00000a103ca0] panic at ffff0000080ec93c
#2 [ffff00000a103d80] watchdog_timer_fn at ffff0000081f8a14
#3 [ffff00000a103e00] __run_hrtimer at ffff00000819701c
#4 [ffff00000a103e40] __hrtimer_run_queues at ffff000008197420
#5 [ffff00000a103ea0] hrtimer_interrupt at ffff00000819831c
#6 [ffff00000a103f10] arch_timer_dying_cpu at ffff000008b53144
#7 [ffff00000a103f30] handle_percpu_devid_irq at ffff000008174e34
#8 [ffff00000a103f70] generic_handle_irq at ffff00000816c5e8
#9 [ffff00000a103f90] __handle_domain_irq at ffff00000816d1f4
#10 [ffff00000a103fd0] gic_handle_irq at ffff000008081860
--- <IRQ stack> ---
#11 [ffff00000d6e3d50] el1_irq at ffff0000080834c8
#12 [ffff00000d6e3d60] multi_cpu_stop at ffff0000081d9964
#13 [ffff00000d6e3db0] cpu_stopper_thread at ffff0000081d9cfc
#14 [ffff00000d6e3e10] smpboot_thread_fn at ffff00000811e0a8
#15 [ffff00000d6e3e70] kthread at ffff000008118988
To avoid this soft lockup, add touch_all_softlockup_watchdogs()
in sysrq_timer_list_show()
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-By: Xie XiuQi <xiexiuqi(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: wangxiongfeng 00379786 <wangxiongfeng2(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
kernel/time/timer_list.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index acb326f5f50a..4cb0e6f62e97 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -289,13 +289,17 @@ void sysrq_timer_list_show(void)
timer_list_header(NULL, now);
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ touch_all_softlockup_watchdogs();
print_cpu(NULL, cpu, now);
+ }
#ifdef CONFIG_GENERIC_CLOCKEVENTS
timer_list_show_tickdevices_header(NULL);
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ touch_all_softlockup_watchdogs();
print_tickdevice(NULL, tick_get_device(cpu), cpu);
+ }
#endif
return;
}
--
2.20.1
1
30

[PATCH openEuler-1.0-LTS 01/13] sched: Introduce qos scheduler for co-location
by Yang Yingliang 29 Nov '21
by Yang Yingliang 29 Nov '21
29 Nov '21
From: Zheng Zucheng <zhengzucheng(a)huawei.com>
hulk inclusion
category: feature
bugzilla: 51828, https://gitee.com/openeuler/kernel/issues/I4K96G
CVE: NA
--------------------------------
We introduce the idea of qos level to scheduler, which now is
supported with different scheduler policies. The qos scheduler
will change the policy of correlative tasks when the qos level
of a task group is modified with cpu.qos_level cpu cgroup file.
In this way we are able to satisfy different needs of tasks in
different qos levels.
Signed-off-by: Zhang Qiao <zhangqiao22(a)huawei.com>
Signed-off-by: Zheng Zucheng <zhengzucheng(a)huawei.com>
Reviewed-by: Chen Hui <judy.chenhui(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
init/Kconfig | 8 ++++
kernel/sched/core.c | 93 ++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 4 ++
3 files changed, 105 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index c05347a29ca4d..a338519692d54 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -777,6 +777,14 @@ menuconfig CGROUP_SCHED
tasks.
if CGROUP_SCHED
+config QOS_SCHED
+ bool "Qos task scheduling"
+ depends on CGROUP_SCHED
+ depends on CFS_BANDWIDTH
+ depends on X86
+
+ default n
+
config FAIR_GROUP_SCHED
bool "Group scheduling for SCHED_OTHER"
depends on CGROUP_SCHED
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8866cd7f19c43..23160df884e49 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6338,6 +6338,15 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);
+#ifdef CONFIG_QOS_SCHED
+static int alloc_qos_sched_group(struct task_group *tg, struct task_group *parent)
+{
+ tg->qos_level = parent->qos_level;
+
+ return 1;
+}
+#endif
+
static void sched_free_group(struct task_group *tg)
{
free_fair_sched_group(tg);
@@ -6358,6 +6367,11 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_fair_sched_group(tg, parent))
goto err;
+#ifdef CONFIG_QOS_SCHED
+ if (!alloc_qos_sched_group(tg, parent))
+ goto err;
+#endif
+
if (!alloc_rt_sched_group(tg, parent))
goto err;
@@ -6426,6 +6440,30 @@ static void sched_change_group(struct task_struct *tsk, int type)
tg = autogroup_task_group(tsk, tg);
tsk->sched_task_group = tg;
+#ifdef CONFIG_QOS_SCHED
+ /*
+ * No need to re-setcheduler when a task is exiting or the task
+ * is in an autogroup.
+ */
+ if (!rt_task(tsk)
+ && !(tsk->flags & PF_EXITING)
+ && !task_group_is_autogroup(tg)) {
+ struct rq *rq = task_rq(tsk);
+ struct sched_attr attr = {
+ .sched_priority = 0,
+ };
+
+ if (tg->qos_level == -1) {
+ attr.sched_policy = SCHED_IDLE;
+ } else {
+ attr.sched_policy = SCHED_NORMAL;
+ }
+ attr.sched_nice = PRIO_TO_NICE(tsk->static_prio);
+
+ __setscheduler(rq, tsk, &attr, 0);
+ }
+#endif
+
#ifdef CONFIG_FAIR_GROUP_SCHED
if (tsk->sched_class->task_change_group)
tsk->sched_class->task_change_group(tsk, type);
@@ -6886,6 +6924,54 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
}
#endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_QOS_SCHED
+static int cpu_qos_write(struct cgroup_subsys_state *css,
+ struct cftype *cftype, s64 qos_level)
+{
+ struct css_task_iter it;
+ struct task_struct *tsk;
+ struct task_group *tg;
+ struct sched_param param;
+ int pid, policy;
+ tg = css_tg(css);
+
+ if (!tg->se[0])
+ return -EINVAL;
+
+ if (qos_level != -1 && qos_level != 0)
+ return -EINVAL;
+
+ if (tg->qos_level == qos_level)
+ goto done;
+
+ if (qos_level == -1) {
+ policy = SCHED_IDLE;
+ } else {
+ policy = SCHED_NORMAL;
+ }
+
+ tg->qos_level = qos_level;
+
+ param.sched_priority = 0;
+ css_task_iter_start(css, 0, &it);
+ while ((tsk = css_task_iter_next(&it))) {
+ pid = task_tgid_vnr(tsk);
+
+ if (pid > 0 && !rt_task(tsk))
+ sched_setscheduler(tsk, policy, ¶m);
+ }
+ css_task_iter_end(&it);
+
+done:
+ return 0;
+}
+
+static s64 cpu_qos_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return css_tg(css)->qos_level;
+}
+#endif /* CONFIG_QOS_SCHED */
+
static struct cftype cpu_legacy_files[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED
{
@@ -6921,6 +7007,13 @@ static struct cftype cpu_legacy_files[] = {
.read_u64 = cpu_rt_period_read_uint,
.write_u64 = cpu_rt_period_write_uint,
},
+#endif
+#ifdef CONFIG_QOS_SCHED
+ {
+ .name = "qos_level",
+ .read_s64 = cpu_qos_read,
+ .write_s64 = cpu_qos_write,
+ },
#endif
{ } /* Terminate */
};
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e6238db9dc996..c263cb2f35c5d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -402,7 +402,11 @@ struct task_group {
struct cfs_bandwidth cfs_bandwidth;
+#if defined(CONFIG_QOS_SCHED) && !defined(__GENKSYMS__)
+ long qos_level;
+#else
KABI_RESERVE(1)
+#endif
KABI_RESERVE(2)
};
--
2.25.1
1
12
1
0

[PATCH openEuler-1.0-LTS 1/3] ACPI: CPPC: Fix cppc_cpufreq_init failed in CPU Hotplug situation
by Yang Yingliang 27 Nov '21
by Yang Yingliang 27 Nov '21
27 Nov '21
From: Xiongfeng Wang <wangxiongfeng2(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4HYY4?from=project-issue
CVE: NA
-------------------------------------------------
Per-CPU variables cpc_desc_ptr are initialized in
acpi_cppc_processor_probe() when the processor devices are present and
added into the system. But when cpu_possible_mask and cpu_present_mask
is not equal, only cpc_desc_ptr in cpu_present_mask are initialized,
this will cause acpi_get_psd_map() failed in cppc_cpufreq_init().
To fix this issue, we parse the _PSD method for all possible CPUs to get
the P-State topology and modify acpi_get_psd_map() to rely on this
information.
Signed-off-by: Xiongfeng Wang <wangxiongfeng(a)huawei.com>
Reviewed-by: Keqian Zhu <zhukeqian1(a)huawei.com>
Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/acpi/cppc_acpi.c | 93 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 89 insertions(+), 4 deletions(-)
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 6134f20a13f0c..e71c0e0572bea 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -415,7 +415,7 @@ static int acpi_get_psd(struct cpc_desc *cpc_ptr, acpi_handle handle)
*
* Return: 0 for success or negative value for err.
*/
-int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
+static int __acpi_get_psd_map(struct cppc_cpudata **all_cpu_data, struct cpc_desc **cpc_pptr)
{
int count_target;
int retval = 0;
@@ -441,7 +441,7 @@ int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
if (cpumask_test_cpu(i, covered_cpus))
continue;
- cpc_ptr = per_cpu(cpc_desc_ptr, i);
+ cpc_ptr = cpc_pptr[i];
if (!cpc_ptr) {
retval = -EFAULT;
goto err_ret;
@@ -466,7 +466,7 @@ int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
if (i == j)
continue;
- match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
+ match_cpc_ptr = cpc_pptr[j];
if (!match_cpc_ptr) {
retval = -EFAULT;
goto err_ret;
@@ -499,7 +499,7 @@ int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
if (!match_pr)
continue;
- match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
+ match_cpc_ptr = cpc_pptr[j];
if (!match_cpc_ptr) {
retval = -EFAULT;
goto err_ret;
@@ -532,6 +532,91 @@ int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
free_cpumask_var(covered_cpus);
return retval;
}
+
+static acpi_status acpi_parse_cpc(acpi_handle handle, u32 lvl, void *data,
+ void **ret_p)
+{
+ struct acpi_device *adev = NULL;
+ struct cpc_desc *cpc_ptr, **cpc_pptr;
+ acpi_status status = AE_OK;
+ const int device_declaration = 1;
+ unsigned long long uid;
+ phys_cpuid_t phys_id;
+ int logical_id, ret;
+ int *parsed_core_num = (int *)ret_p;
+
+ if (acpi_bus_get_device(handle, &adev))
+ return AE_OK;
+
+ if (strcmp(acpi_device_hid(adev), ACPI_PROCESSOR_DEVICE_HID))
+ return AE_OK;
+
+ status = acpi_evaluate_integer(handle, METHOD_NAME__UID, NULL, &uid);
+ if (ACPI_FAILURE(status))
+ return AE_OK;
+ phys_id = acpi_get_phys_id(handle, device_declaration, uid);
+ if (invalid_phys_cpuid(phys_id))
+ return AE_OK;
+ logical_id = acpi_map_cpuid(phys_id, uid);
+ if (logical_id < 0)
+ return AE_OK;
+
+ cpc_pptr = (struct cpc_desc **)data;
+ cpc_ptr = cpc_pptr[logical_id];
+ cpc_ptr->cpu_id = logical_id;
+
+ ret = acpi_get_psd(cpc_ptr, handle);
+ if (ret)
+ return ret;
+
+ (*parsed_core_num)++;
+
+ return AE_OK;
+}
+
+int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
+{
+ struct cpc_desc **cpc_pptr, *cpc_ptr;
+ int parsed_core_num = 0;
+ int i, ret;
+
+ cpc_pptr = kcalloc(num_possible_cpus(), sizeof(void *), GFP_KERNEL);
+ if (!cpc_pptr)
+ return -ENOMEM;
+ for_each_possible_cpu(i) {
+ cpc_pptr[i] = kzalloc(sizeof(struct cpc_desc), GFP_KERNEL);
+ if (!cpc_pptr[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /*
+ * We can not use acpi_get_devices() to walk the processor devices
+ * because some processor device is not present.
+ */
+ ret = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, acpi_parse_cpc, NULL,
+ cpc_pptr, (void **)&parsed_core_num);
+ if (ret)
+ goto out;
+ if (parsed_core_num != num_possible_cpus()) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = __acpi_get_psd_map(all_cpu_data, cpc_pptr);
+
+out:
+ for_each_possible_cpu(i) {
+ cpc_ptr = cpc_pptr[i];
+ if (cpc_ptr)
+ kfree(cpc_ptr);
+ }
+ kfree(cpc_pptr);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(acpi_get_psd_map);
static int register_pcc_channel(int pcc_ss_idx)
--
2.25.1
1
2

[PATCH openEuler-5.10 16/31] perf, kvm/arm64: perf-kvm-stat to report VM TRAP
by Zheng Zengkai 26 Nov '21
by Zheng Zengkai 26 Nov '21
26 Nov '21
From: Zenghui Yu <yuzenghui(a)huawei.com>
virt inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4IZPY
CVE: NA
-------------------------------------------------
When guest exits due to "TRAP", we can analyze the guest exit reasons
deeplier. Enhance perf-kvm-stat to record and analyze VM TRAP events.
There is a mapping between guest's "trap_code" (ESR_ELx's bits[31:26])
and "trap_reason" - kvm_arm_exception_class. Copy it from kernel to
aarch64_guest_exits.h, export it to userspace.
This patch records two new KVM tracepoints: "kvm:kvm_trap_enter" and
"kvm:kvm_trap_exit", and reports statistical data between these two
tracepoints.
A simple test go below:
# ./tools/perf/perf kvm stat record -p 20763
[ perf record: Woken up 92 times to write data ]
[ perf record: Captured and wrote 203.727 MB perf.data.guest (2601786 samples) ]
# ./tools/perf/perf kvm stat report --event=vmexit
Analyze events for all VMs, all VCPUs:
VM-EXIT Samples Samples% Time% Min Time Max Time Avg time
TRAP 640931 97.12% 100.00% 2.44us 14683.86us 3446.49us ( +- 0.05% )
IRQ 19019 2.88% 0.00% 0.90us 461.94us 2.12us ( +- 2.09% )
Total Samples:659950, Total events handled time:2209005391.30us.
# ./tools/perf/perf kvm stat report --event=trap
Analyze events for all VMs, all VCPUs:
TRAP-EVENT Samples Samples% Time% Min Time Max Time Avg time
WFx 601194 93.80% 99.98% 0.90us 4294.04us 3671.01us ( +- 0.03% )
SYS64 33714 5.26% 0.01% 1.10us 41.34us 5.68us ( +- 0.18% )
DABT_LOW 6014 0.94% 0.00% 1.12us 18.04us 2.57us ( +- 0.91% )
IABT_LOW 12 0.00% 0.01% 12597.76us 14679.96us 12893.61us ( +- 1.34% )
Total Samples:640934, Total events handled time:2207353434.56us.
Signed-off-by: Zenghui Yu <yuzenghui(a)huawei.com>
Reviewed-by: Hailiang Zhang <zhang.zhanghailiang(a)huawei.com>
Signed-off-by: Zenghui Yu <yuzenghui(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Link: https://lore.kernel.org/r/1560330526-15468-6-git-send-email-yuzenghui@huawe…
Link: https://gitee.com/openeuler/kernel/commit/59634497418b
Reviewed-by: Yanan Wang <wangyanan55(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
.../arch/arm64/util/aarch64_guest_exits.h | 72 +++++++++++++++++++
tools/perf/arch/arm64/util/kvm-stat.c | 68 ++++++++++++++++++
2 files changed, 140 insertions(+)
diff --git a/tools/perf/arch/arm64/util/aarch64_guest_exits.h b/tools/perf/arch/arm64/util/aarch64_guest_exits.h
index aec2e6e012d3..76e8f0358182 100644
--- a/tools/perf/arch/arm64/util/aarch64_guest_exits.h
+++ b/tools/perf/arch/arm64/util/aarch64_guest_exits.h
@@ -24,4 +24,76 @@
{ARM_EXCEPTION_TRAP, "TRAP" }, \
{ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
+/* esr.h */
+#define ESR_ELx_EC_UNKNOWN (0x00)
+#define ESR_ELx_EC_WFx (0x01)
+/* Unallocated EC: 0x02 */
+#define ESR_ELx_EC_CP15_32 (0x03)
+#define ESR_ELx_EC_CP15_64 (0x04)
+#define ESR_ELx_EC_CP14_MR (0x05)
+#define ESR_ELx_EC_CP14_LS (0x06)
+#define ESR_ELx_EC_FP_ASIMD (0x07)
+#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
+/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_CP14_64 (0x0C)
+#define ESR_ELx_EC_BTI (0x0D)
+#define ESR_ELx_EC_ILL (0x0E)
+/* Unallocated EC: 0x0F - 0x10 */
+#define ESR_ELx_EC_SVC32 (0x11)
+#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
+/* Unallocated EC: 0x14 */
+#define ESR_ELx_EC_SVC64 (0x15)
+#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
+#define ESR_ELx_EC_SYS64 (0x18)
+#define ESR_ELx_EC_SVE (0x19)
+#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
+/* Unallocated EC: 0x1B */
+#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
+/* Unallocated EC: 0x1D - 0x1E */
+#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW (0x20)
+#define ESR_ELx_EC_IABT_CUR (0x21)
+#define ESR_ELx_EC_PC_ALIGN (0x22)
+/* Unallocated EC: 0x23 */
+#define ESR_ELx_EC_DABT_LOW (0x24)
+#define ESR_ELx_EC_DABT_CUR (0x25)
+#define ESR_ELx_EC_SP_ALIGN (0x26)
+/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_FP_EXC32 (0x28)
+/* Unallocated EC: 0x29 - 0x2B */
+#define ESR_ELx_EC_FP_EXC64 (0x2C)
+/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_SERROR (0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW (0x30)
+#define ESR_ELx_EC_BREAKPT_CUR (0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
+#define ESR_ELx_EC_WATCHPT_LOW (0x34)
+#define ESR_ELx_EC_WATCHPT_CUR (0x35)
+/* Unallocated EC: 0x36 - 0x37 */
+#define ESR_ELx_EC_BKPT32 (0x38)
+/* Unallocated EC: 0x39 */
+#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
+/* Unallocated EC: 0x3B */
+#define ESR_ELx_EC_BRK64 (0x3C)
+/* Unallocated EC: 0x3D - 0x3F */
+#define ESR_ELx_EC_MAX (0x3F)
+
+/* kvm_arm.h */
+#define ECN(x) { ESR_ELx_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+ ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
+ ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \
+ ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \
+ ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \
+ ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
+ ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
+ ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
+ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+
#endif /* ARCH_PERF_AARCH64_GUEST_EXITS_H */
diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c
index 2fed20370829..a0a97073d2d1 100644
--- a/tools/perf/arch/arm64/util/kvm-stat.c
+++ b/tools/perf/arch/arm64/util/kvm-stat.c
@@ -4,10 +4,14 @@
* Copyright(c) 2019 Huawei Technologies Co., Ltd
*/
+#include <string.h>
+#include "../../../util/debug.h"
+#include "../../../util/evsel.h"
#include "../../../util/kvm-stat.h"
#include "aarch64_guest_exits.h"
define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type);
+define_exit_reasons_table(arm64_trap_reasons, kvm_arm_exception_class);
static struct kvm_events_ops exit_events = {
.is_begin_event = exit_event_begin,
@@ -22,14 +26,78 @@ const char *kvm_exit_reason = "ret";
const char *kvm_entry_trace = "kvm:kvm_entry";
const char *kvm_exit_trace = "kvm:kvm_exit";
+const char *kvm_trap_reason = "esr_ec";
+const char *kvm_trap_enter_trace = "kvm:kvm_trap_enter";
+const char *kvm_trap_exit_trace = "kvm:kvm_trap_exit";
+
+static void trap_event_get_key(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->info = 0;
+ key->key = evsel__intval(evsel, sample, kvm_trap_reason);
+}
+
+static const char *get_trap_reason(u64 exit_code)
+{
+ struct exit_reasons_table *tbl = arm64_trap_reasons;
+
+ while (tbl->reason != NULL) {
+ if (tbl->exit_code == exit_code)
+ return tbl->reason;
+ tbl++;
+ }
+
+ pr_err("Unknown kvm trap exit code: %lld on aarch64\n",
+ (unsigned long long)exit_code);
+ return "UNKNOWN";
+}
+
+static bool trap_event_end(struct evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return (!strcmp(evsel->name, kvm_trap_exit_trace));
+}
+
+static bool trap_event_begin(struct evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+ if (!strcmp(evsel->name, kvm_trap_enter_trace)) {
+ trap_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+static void trap_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+ struct event_key *key,
+ char *decode)
+{
+ const char *trap_reason = get_trap_reason(key->key);
+
+ scnprintf(decode, decode_str_len, "%s", trap_reason);
+}
+
+static struct kvm_events_ops trap_events = {
+ .is_begin_event = trap_event_begin,
+ .is_end_event = trap_event_end,
+ .decode_key = trap_event_decode_key,
+ .name = "TRAP-EVENT",
+};
+
const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
+ "kvm:kvm_trap_enter",
+ "kvm:kvm_trap_exit",
NULL,
};
struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = &exit_events },
+ { .name = "trap", .ops = &trap_events },
{ NULL, NULL },
};
--
2.20.1
1
0

26 Nov '21
From: Yanling Song <songyl(a)ramaxel.com>
Ramaxel inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4I0OZ
CVE: NA
--------------------------
Fix typo of last_cmsn which should be last_pmsn
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
Reviewed-by: Zhang Lei<zhanglei48(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
drivers/scsi/spfc/hw/spfc_queue.c | 28 ++++++++++++++--------------
drivers/scsi/spfc/hw/spfc_queue.h | 2 +-
2 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/spfc/hw/spfc_queue.c b/drivers/scsi/spfc/hw/spfc_queue.c
index 3f73fa26aad1..abcf1ff3f49f 100644
--- a/drivers/scsi/spfc/hw/spfc_queue.c
+++ b/drivers/scsi/spfc/hw/spfc_queue.c
@@ -1027,7 +1027,7 @@ u32 spfc_create_ssq(void *handle)
sq_ctrl->wqe_offset = 0;
sq_ctrl->head_start_cmsn = 0;
sq_ctrl->head_end_cmsn = SPFC_GET_WP_END_CMSN(0, sq_ctrl->wqe_num_per_buf);
- sq_ctrl->last_cmsn = 0;
+ sq_ctrl->last_pmsn = 0;
/* Linked List SQ Owner Bit 1 valid,0 invalid */
sq_ctrl->last_pi_owner = 1;
atomic_set(&sq_ctrl->sq_valid, true);
@@ -3127,7 +3127,7 @@ static u32 spfc_parent_sq_ring_direct_wqe_doorbell(struct spfc_parent_ssq_info *
struct spfc_hba_info *hba;
hba = (struct spfc_hba_info *)sq->hba;
- pmsn = sq->last_cmsn;
+ pmsn = sq->last_pmsn;
if (sq->cache_id == INVALID_VALUE32) {
FC_DRV_PRINT(UNF_LOG_IO_ATT, UNF_ERR,
@@ -3166,7 +3166,7 @@ u32 spfc_parent_sq_ring_doorbell(struct spfc_parent_ssq_info *sq, u8 qos_level,
struct spfc_parent_sq_db door_bell;
hba = (struct spfc_hba_info *)sq->hba;
- pmsn = sq->last_cmsn;
+ pmsn = sq->last_pmsn;
/* Obtain the low 8 Bit of PMSN */
pmsn_lo = (u8)(pmsn & SPFC_PMSN_MASK);
/* Obtain the high 8 Bit of PMSN */
@@ -3231,10 +3231,10 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
FC_DRV_PRINT(UNF_LOG_NORMAL, UNF_INFO,
"[info]Ssq(0x%x), xid(0x%x) qid(0x%x) add wqepage at Pmsn(0x%x), sqe_minus_cqe_cnt(0x%x)",
ssq->sqn, ssq->context_id, ssq->sq_queue_id,
- ssq->last_cmsn,
+ ssq->last_pmsn,
atomic_read(&ssq->sqe_minus_cqe_cnt));
- link_wqe_msn = SPFC_MSN_DEC(ssq->last_cmsn);
+ link_wqe_msn = SPFC_MSN_DEC(ssq->last_pmsn);
link_wqe = (struct spfc_linkwqe *)spfc_get_wqe_page_entry(tail_wpg,
ssq->wqe_offset);
msn_wd = be32_to_cpu(link_wqe->val_wd1);
@@ -3250,7 +3250,7 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
}
sqe_in_wp =
(struct spfc_sqe *)spfc_get_wqe_page_entry(tail_wpg, ssq->wqe_offset);
- spfc_build_wqe_owner_pmsn(io_sqe, (ssq->last_pi_owner), ssq->last_cmsn);
+ spfc_build_wqe_owner_pmsn(io_sqe, (ssq->last_pi_owner), ssq->last_pmsn);
SPFC_IO_STAT((struct spfc_hba_info *)ssq->hba, wqe_type);
wqe_gpa = tail_wpg->wpg_phy_addr + (ssq->wqe_offset * sizeof(struct spfc_sqe));
@@ -3260,11 +3260,11 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
dre_door_bell.wd0.cos = 0;
dre_door_bell.wd0.c = 0;
dre_door_bell.wd0.pi_hi =
- (u32)(ssq->last_cmsn >> UNF_SHIFT_12) & SPFC_DB_WD0_PI_H_MASK;
+ (u32)(ssq->last_pmsn >> UNF_SHIFT_12) & SPFC_DB_WD0_PI_H_MASK;
dre_door_bell.wd0.cntx_size = SPFC_CNTX_SIZE_T_256B;
dre_door_bell.wd0.xid = ssq->context_id;
dre_door_bell.wd1.sm_data = ssq->cache_id;
- dre_door_bell.wd1.pi_lo = (u32)(ssq->last_cmsn & SPFC_DB_WD0_PI_L_MASK);
+ dre_door_bell.wd1.pi_lo = (u32)(ssq->last_pmsn & SPFC_DB_WD0_PI_L_MASK);
io_sqe->db_val = *(u64 *)&dre_door_bell;
spfc_convert_parent_wqe_to_big_endian(io_sqe);
@@ -3275,7 +3275,7 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
"[INFO]Ssq(0x%x) xid:0x%x,qid:0x%x wqegpa:0x%llx,o:0x%x,outstandind:0x%x,pmsn:0x%x,cmsn:0x%x",
ssq->sqn, ssq->context_id, ssq->sq_queue_id, wqe_gpa,
ssq->last_pi_owner, atomic_read(&ssq->sqe_minus_cqe_cnt),
- ssq->last_cmsn, SPFC_GET_QUEUE_CMSN(ssq));
+ ssq->last_pmsn, SPFC_GET_QUEUE_CMSN(ssq));
ssq->accum_wqe_cnt++;
if (ssq->accum_wqe_cnt == accum_db_num) {
@@ -3286,7 +3286,7 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
}
ssq->wqe_offset += 1;
- ssq->last_cmsn = SPFC_MSN_INC(ssq->last_cmsn);
+ ssq->last_pmsn = SPFC_MSN_INC(ssq->last_pmsn);
atomic_inc(&ssq->sq_wqe_cnt);
atomic_inc(&ssq->sqe_minus_cqe_cnt);
SPFC_SQ_IO_STAT(ssq, wqe_type);
@@ -3319,7 +3319,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
FC_DRV_PRINT(UNF_LOG_NORMAL, UNF_INFO,
"[info]Ssq(0x%x), xid(0x%x) qid(0x%x) add wqepage at Pmsn(0x%x), WpgCnt(0x%x)",
ssq->sqn, ssq->context_id, ssq->sq_queue_id,
- ssq->last_cmsn,
+ ssq->last_pmsn,
atomic_read(&ssq->wqe_page_cnt));
cur_cmsn = SPFC_GET_QUEUE_CMSN(ssq);
spfc_free_sq_wqe_page(ssq, cur_cmsn);
@@ -3335,7 +3335,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
link_wqe->next_page_addr_hi = cpu_to_be32(addr_wd);
addr_wd = SPFC_LSD(new_wqe_page->wpg_phy_addr);
link_wqe->next_page_addr_lo = cpu_to_be32(addr_wd);
- link_wqe_msn = SPFC_MSN_DEC(ssq->last_cmsn);
+ link_wqe_msn = SPFC_MSN_DEC(ssq->last_pmsn);
msn_wd = be32_to_cpu(link_wqe->val_wd1);
msn_wd |= ((u32)(link_wqe_msn & SPFC_MSNWD_L_MASK));
msn_wd |= (((u32)(link_wqe_msn & SPFC_MSNWD_H_MASK)) << UNF_SHIFT_16);
@@ -3351,7 +3351,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
atomic_inc(&ssq->wqe_page_cnt);
}
- spfc_build_wqe_owner_pmsn(io_sqe, !(ssq->last_pi_owner), ssq->last_cmsn);
+ spfc_build_wqe_owner_pmsn(io_sqe, !(ssq->last_pi_owner), ssq->last_pmsn);
SPFC_IO_STAT((struct spfc_hba_info *)ssq->hba, wqe_type);
spfc_convert_parent_wqe_to_big_endian(io_sqe);
sqe_in_wp = (struct spfc_sqe *)spfc_get_wqe_page_entry(tail_wpg, ssq->wqe_offset);
@@ -3371,7 +3371,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
ssq->accum_wqe_cnt = 0;
}
ssq->wqe_offset += 1;
- ssq->last_cmsn = SPFC_MSN_INC(ssq->last_cmsn);
+ ssq->last_pmsn = SPFC_MSN_INC(ssq->last_pmsn);
atomic_inc(&ssq->sq_wqe_cnt);
atomic_inc(&ssq->sqe_minus_cqe_cnt);
SPFC_SQ_IO_STAT(ssq, wqe_type);
diff --git a/drivers/scsi/spfc/hw/spfc_queue.h b/drivers/scsi/spfc/hw/spfc_queue.h
index b1184eb17556..c09f098e7324 100644
--- a/drivers/scsi/spfc/hw/spfc_queue.h
+++ b/drivers/scsi/spfc/hw/spfc_queue.h
@@ -597,7 +597,7 @@ struct spfc_parent_ssq_info {
u32 wqe_offset;
u16 head_start_cmsn;
u16 head_end_cmsn;
- u16 last_cmsn;
+ u16 last_pmsn;
u16 last_pi_owner;
u32 queue_style;
atomic_t sq_valid;
--
2.20.1
1
30

26 Nov '21
Ramaxel inclusion
category: features
bugzilla: https://gitee.com/openeuler/kernel/issues/I4JXCG
CVE: NA
Changes:
1. Use BSG module to replace with ioctrl
2. Use email as MODULE_AUTHOR
3. Add error handling for PCIE error case;
4. Support SMR hdd in hba mode.
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
Reviewed-by: Jiang Yu<yujiang(a)ramaxel.com>
---
drivers/scsi/spraid/spraid.h | 146 ++-
drivers/scsi/spraid/spraid_main.c | 1412 +++++++++++++++--------------
2 files changed, 840 insertions(+), 718 deletions(-)
diff --git a/drivers/scsi/spraid/spraid.h b/drivers/scsi/spraid/spraid.h
index da46d8e1b4b6..6a04fb65ec93 100644
--- a/drivers/scsi/spraid/spraid.h
+++ b/drivers/scsi/spraid/spraid.h
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2021 Ramaxel Memory Technology, Ltd */
#ifndef __SPRAID_H_
#define __SPRAID_H_
@@ -24,7 +25,7 @@
#define SENSE_SIZE(depth) ((depth) * SCSI_SENSE_BUFFERSIZE)
#define SPRAID_AQ_DEPTH 128
-#define SPRAID_NR_AEN_COMMANDS 1
+#define SPRAID_NR_AEN_COMMANDS 16
#define SPRAID_AQ_BLK_MQ_DEPTH (SPRAID_AQ_DEPTH - SPRAID_NR_AEN_COMMANDS)
#define SPRAID_AQ_MQ_TAG_DEPTH (SPRAID_AQ_BLK_MQ_DEPTH - 1)
@@ -44,7 +45,7 @@
#define SMALL_POOL_SIZE 256
#define MAX_SMALL_POOL_NUM 16
-#define MAX_CMD_PER_DEV 32
+#define MAX_CMD_PER_DEV 64
#define MAX_CDB_LEN 32
#define SPRAID_UP_TO_MULTY4(x) (((x) + 4) & (~0x03))
@@ -53,7 +54,7 @@
#define PCI_VENDOR_ID_RAMAXEL_LOGIC 0x1E81
-#define SPRAID_SERVER_DEVICE_HAB_DID 0x2100
+#define SPRAID_SERVER_DEVICE_HBA_DID 0x2100
#define SPRAID_SERVER_DEVICE_RAID_DID 0x2200
#define IO_6_DEFAULT_TX_LEN 256
@@ -142,11 +143,15 @@ enum {
enum {
SPRAID_AEN_DEV_CHANGED = 0x00,
+ SPRAID_AEN_FW_ACT_START = 0x01,
SPRAID_AEN_HOST_PROBING = 0x10,
};
enum {
- SPRAID_AEN_TIMESYN = 0x07
+ SPRAID_AEN_TIMESYN = 0x00,
+ SPRAID_AEN_FW_ACT_FINISH = 0x02,
+ SPRAID_AEN_EVENT_MIN = 0x80,
+ SPRAID_AEN_EVENT_MAX = 0xff,
};
enum {
@@ -175,6 +180,16 @@ enum spraid_state {
SPRAID_DEAD,
};
+enum {
+ SPRAID_CARD_HBA,
+ SPRAID_CARD_RAID,
+};
+
+enum spraid_cmd_type {
+ SPRAID_CMD_ADM,
+ SPRAID_CMD_IOPT,
+};
+
struct spraid_completion {
__le32 result;
union {
@@ -217,8 +232,6 @@ struct spraid_dev {
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool[MAX_SMALL_POOL_NUM];
mempool_t *iod_mempool;
- struct blk_mq_tag_set admin_tagset;
- struct request_queue *admin_q;
void __iomem *bar;
u32 max_qid;
u32 num_vecs;
@@ -232,23 +245,27 @@ struct spraid_dev {
u32 ctrl_config;
u32 online_queues;
u64 cap;
- struct device ctrl_device;
- struct cdev cdev;
int instance;
struct spraid_ctrl_info *ctrl_info;
struct spraid_dev_info *devices;
- struct spraid_ioq_ptcmd *ioq_ptcmds;
+ struct spraid_cmd *adm_cmds;
+ struct list_head adm_cmd_list;
+ spinlock_t adm_cmd_lock; /* spinlock for lock handling */
+
+ struct spraid_cmd *ioq_ptcmds;
struct list_head ioq_pt_list;
- spinlock_t ioq_pt_lock;
+ spinlock_t ioq_pt_lock; /* spinlock for lock handling */
- struct work_struct aen_work;
struct work_struct scan_work;
struct work_struct timesyn_work;
struct work_struct reset_work;
+ struct work_struct fw_act_work;
enum spraid_state state;
- spinlock_t state_lock;
+ spinlock_t state_lock; /* spinlock for lock handling */
+
+ struct request_queue *bsg_queue;
};
struct spraid_sgl_desc {
@@ -347,6 +364,35 @@ struct spraid_get_info {
__u32 rsvd12[4];
};
+struct spraid_usr_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 hdid;
+ union {
+ struct {
+ __le16 subopcode;
+ __le16 rsvd1;
+ } info_0;
+ __le32 cdw2;
+ };
+ union {
+ struct {
+ __le16 data_len;
+ __le16 param_len;
+ } info_1;
+ __le32 cdw3;
+ };
+ __u64 metadata;
+ union spraid_data_ptr dptr;
+ __le32 cdw10;
+ __le32 cdw11;
+ __le32 cdw12;
+ __le32 cdw13;
+ __le32 cdw14;
+ __le32 cdw15;
+};
+
enum {
SPRAID_CMD_FLAG_SGL_METABUF = (1 << 6),
SPRAID_CMD_FLAG_SGL_METASEG = (1 << 7),
@@ -393,6 +439,7 @@ struct spraid_admin_command {
struct spraid_get_info get_info;
struct spraid_abort_cmd abort;
struct spraid_reset_cmd reset;
+ struct spraid_usr_cmd usr_cmd;
};
};
@@ -456,9 +503,6 @@ struct spraid_ioq_command {
};
};
-#define SPRAID_IOCTL_RESET_CMD _IOWR('N', 0x80, struct spraid_passthru_common_cmd)
-#define SPRAID_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct spraid_passthru_common_cmd)
-
struct spraid_passthru_common_cmd {
__u8 opcode;
__u8 flags;
@@ -494,8 +538,6 @@ struct spraid_passthru_common_cmd {
__u32 result1;
};
-#define SPRAID_IOCTL_IOQ_CMD _IOWR('N', 0x42, struct spraid_ioq_passthru_cmd)
-
struct spraid_ioq_passthru_cmd {
__u8 opcode;
__u8 flags;
@@ -560,7 +602,21 @@ struct spraid_ioq_passthru_cmd {
__u32 result1;
};
-struct spraid_ioq_ptcmd {
+struct spraid_bsg_request {
+ u32 msgcode;
+ u32 control;
+ union {
+ struct spraid_passthru_common_cmd admcmd;
+ struct spraid_ioq_passthru_cmd ioqcmd;
+ };
+};
+
+enum {
+ SPRAID_BSG_ADM,
+ SPRAID_BSG_IOQ,
+};
+
+struct spraid_cmd {
int qid;
int cid;
u32 result0;
@@ -572,14 +628,6 @@ struct spraid_ioq_ptcmd {
struct list_head list;
};
-struct spraid_admin_request {
- struct spraid_admin_command *cmd;
- u32 result0;
- u32 result1;
- u16 flags;
- u16 status;
-};
-
struct spraid_queue {
struct spraid_dev *hdev;
spinlock_t sq_lock; /* spinlock for lock handling */
@@ -607,7 +655,6 @@ struct spraid_queue {
};
struct spraid_iod {
- struct spraid_admin_request req;
struct spraid_queue *spraidq;
enum spraid_cmd_state state;
int npages;
@@ -623,13 +670,51 @@ struct spraid_iod {
};
#define SPRAID_DEV_INFO_ATTR_BOOT(attr) ((attr) & 0x01)
-#define SPRAID_DEV_INFO_ATTR_HDD(attr) ((attr) & 0x02)
+#define SPRAID_DEV_INFO_ATTR_VD(attr) (((attr) & 0x02) == 0x0)
#define SPRAID_DEV_INFO_ATTR_PT(attr) (((attr) & 0x22) == 0x02)
#define SPRAID_DEV_INFO_ATTR_RAWDISK(attr) ((attr) & 0x20)
#define SPRAID_DEV_INFO_FLAG_VALID(flag) ((flag) & 0x01)
#define SPRAID_DEV_INFO_FLAG_CHANGE(flag) ((flag) & 0x02)
+#define BGTASK_TYPE_REBUILD 4
+#define USR_CMD_READ 0xc2
+#define USR_CMD_RDLEN 0x1000
+#define USR_CMD_VDINFO 0x704
+#define USR_CMD_BGTASK 0x504
+#define VDINFO_PARAM_LEN 0x04
+
+struct spraid_vd_info {
+ __u8 name[32];
+ __le16 id;
+ __u8 rg_id;
+ __u8 rg_level;
+ __u8 sg_num;
+ __u8 sg_disk_num;
+ __u8 vd_status;
+ __u8 vd_type;
+ __u8 rsvd1[4056];
+};
+
+#define MAX_REALTIME_BGTASK_NUM 32
+
+struct bgtask_info {
+ __u8 type;
+ __u8 progress;
+ __u8 rate;
+ __u8 rsvd0;
+ __le16 vd_id;
+ __le16 time_left;
+ __u8 rsvd1[4];
+};
+
+struct spraid_bgtask {
+ __u8 sw;
+ __u8 task_num;
+ __u8 rsvd[6];
+ struct bgtask_info bgtask[MAX_REALTIME_BGTASK_NUM];
+};
+
struct spraid_dev_info {
__le32 hdid;
__le16 target;
@@ -649,6 +734,11 @@ struct spraid_dev_list {
struct spraid_sdev_hostdata {
u32 hdid;
+ u16 max_io_kb;
+ u8 attr;
+ u8 flag;
+ u8 rg_id;
+ u8 rsvd[3];
};
#endif
diff --git a/drivers/scsi/spraid/spraid_main.c b/drivers/scsi/spraid/spraid_main.c
index a0a75ecb0027..86489164f672 100644
--- a/drivers/scsi/spraid/spraid_main.c
+++ b/drivers/scsi/spraid/spraid_main.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * Linux spraid device driver
- * Copyright(c) 2021 Ramaxel Memory Technology, Ltd
- */
+/* Copyright(c) 2021 Ramaxel Memory Technology, Ltd */
+
+/* Ramaxel Raid SPXXX Series Linux Driver */
+
#define pr_fmt(fmt) "spraid: " fmt
#include <linux/sched/signal.h>
@@ -23,6 +23,8 @@
#include <linux/debugfs.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/blkdev.h>
+#include <linux/bsg-lib.h>
+#include <asm/unaligned.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -30,6 +32,8 @@
#include <scsi/scsi_host.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_dbg.h>
+#include <scsi/sg.h>
+
#include "spraid.h"
@@ -112,10 +116,10 @@ MODULE_PARM_DESC(small_pool_num, "set prp small pool num, default 4, MAX 16");
static void spraid_free_queue(struct spraid_queue *spraidq);
static void spraid_handle_aen_notice(struct spraid_dev *hdev, u32 result);
-static void spraid_handle_aen_vs(struct spraid_dev *hdev, u32 result);
+static void spraid_handle_aen_vs(struct spraid_dev *hdev, u32 result, u32 result1);
static DEFINE_IDA(spraid_instance_ida);
-static dev_t spraid_chr_devt;
+
static struct class *spraid_class;
#define SPRAID_CAP_TIMEOUT_UNIT_MS (HZ / 2)
@@ -147,6 +151,13 @@ enum FW_STAT_CODE {
FW_STAT_NEED_RETRY
};
+static const char * const raid_levels[] = {"0", "1", "5", "6", "10", "50", "60", "NA"};
+
+static const char * const raid_states[] = {
+ "NA", "NORMAL", "FAULT", "DEGRADE", "NOT_FORMATTED", "FORMATTING", "SANITIZING",
+ "INITIALIZING", "INITIALIZE_FAIL", "DELETING", "DELETE_FAIL", "WRITE_PROTECT"
+};
+
static int ioq_depth_set(const char *val, const struct kernel_param *kp)
{
int n = 0;
@@ -263,12 +274,6 @@ static int spraid_pci_enable(struct spraid_dev *hdev)
return ret;
}
-static inline
-struct spraid_admin_request *spraid_admin_req(struct request *req)
-{
- return blk_mq_rq_to_pdu(req);
-}
-
static int spraid_npages_prp(u32 size, struct spraid_dev *hdev)
{
u32 nprps = DIV_ROUND_UP(size + hdev->page_size, hdev->page_size);
@@ -419,7 +424,7 @@ static void spraid_submit_cmd(struct spraid_queue *spraidq, const void *cmd)
writel(spraidq->sq_tail, spraidq->q_db);
spin_unlock_irqrestore(&spraidq->sq_lock, flags);
- dev_log_dbg(spraidq->hdev->dev, "cid[%d], qid[%d], opcode[0x%x], flags[0x%x], hdid[%u]\n",
+ dev_log_dbg(spraidq->hdev->dev, "cid[%d] qid[%d], opcode[0x%x], flags[0x%x], hdid[%u]\n",
acd->command_id, spraidq->qid, acd->opcode, acd->flags, le32_to_cpu(acd->hdid));
}
@@ -814,7 +819,7 @@ static void spraid_map_status(struct spraid_iod *iod, struct scsi_cmnd *scmd,
if (scmd->result & SAM_STAT_CHECK_CONDITION) {
memset(scmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
memcpy(scmd->sense_buffer, iod->sense, SCSI_SENSE_BUFFERSIZE);
- set_driver_byte(scmd, DRIVER_SENSE);
+ scmd->result = (scmd->result & 0x00ffffff) | (DRIVER_SENSE << 24);
}
break;
case FW_STAT_ABORTED:
@@ -850,14 +855,13 @@ static int spraid_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
int ret;
if (unlikely(!scmd)) {
- dev_err(hdev->dev, "err, scmd is null, return 0\n");
+ dev_err(hdev->dev, "err, scmd is null\n");
return 0;
}
if (unlikely(hdev->state != SPRAID_LIVE)) {
set_host_byte(scmd, DID_NO_CONNECT);
scmd->scsi_done(scmd);
- dev_err(hdev->dev, "[%s] err, hdev state is not live\n", __func__);
return 0;
}
@@ -894,7 +898,7 @@ static int spraid_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
WRITE_ONCE(iod->state, SPRAID_CMD_IN_FLIGHT);
spraid_submit_cmd(ioq, &ioq_cmd);
elapsed = jiffies - scmd->jiffies_at_alloc;
- dev_log_dbg(hdev->dev, "cid[%d], qid[%d] submit IO cost %3ld.%3ld seconds\n",
+ dev_log_dbg(hdev->dev, "cid[%d] qid[%d] submit IO cost %3ld.%3ld seconds\n",
cid, hwq, elapsed / HZ, elapsed % HZ);
return 0;
@@ -945,6 +949,10 @@ static int spraid_slave_alloc(struct scsi_device *sdev)
scan_host:
hostdata->hdid = le32_to_cpu(hdev->devices[idx].hdid);
+ hostdata->max_io_kb = le16_to_cpu(hdev->devices[idx].max_io_kb);
+ hostdata->attr = hdev->devices[idx].attr;
+ hostdata->flag = hdev->devices[idx].flag;
+ hostdata->rg_id = 0xff;
sdev->hostdata = hostdata;
up_read(&hdev->devices_rwsem);
return 0;
@@ -964,7 +972,7 @@ static int spraid_slave_configure(struct scsi_device *sdev)
struct spraid_sdev_hostdata *hostdata = sdev->hostdata;
u32 max_sec = sdev->host->max_sectors;
- if (!hostdata) {
+ if (hostdata) {
idx = hostdata->hdid - 1;
if (sdev->channel == hdev->devices[idx].channel &&
sdev->id == le16_to_cpu(hdev->devices[idx].target) &&
@@ -1176,6 +1184,75 @@ static inline bool spraid_cqe_pending(struct spraid_queue *spraidq)
spraidq->cq_phase;
}
+static void spraid_sata_report_zone_handle(struct scsi_cmnd *scmd, struct spraid_iod *iod)
+{
+ int i = 0;
+ unsigned int bytes = 0;
+ struct scatterlist *sg = scsi_sglist(scmd);
+
+ scsi_for_each_sg(scmd, sg, iod->nsge, i) {
+ unsigned int offset = 0;
+
+ if (bytes == 0) {
+ char *hdr;
+ u32 list_length;
+ u64 max_lba, opt_lba;
+ u16 same;
+
+ hdr = sg_virt(sg);
+
+ list_length = get_unaligned_le32(&hdr[0]);
+ same = get_unaligned_le16(&hdr[4]);
+ max_lba = get_unaligned_le64(&hdr[8]);
+ opt_lba = get_unaligned_le64(&hdr[16]);
+ put_unaligned_be32(list_length, &hdr[0]);
+ hdr[4] = same & 0xf;
+ put_unaligned_be64(max_lba, &hdr[8]);
+ put_unaligned_be64(opt_lba, &hdr[16]);
+ offset += 64;
+ bytes += 64;
+ }
+ while (offset < sg_dma_len(sg)) {
+ char *rec;
+ u8 cond, type, non_seq, reset;
+ u64 size, start, wp;
+
+ rec = sg_virt(sg) + offset;
+ type = rec[0] & 0xf;
+ cond = (rec[1] >> 4) & 0xf;
+ non_seq = (rec[1] & 2);
+ reset = (rec[1] & 1);
+ size = get_unaligned_le64(&rec[8]);
+ start = get_unaligned_le64(&rec[16]);
+ wp = get_unaligned_le64(&rec[24]);
+ rec[0] = type;
+ rec[1] = (cond << 4) | non_seq | reset;
+ put_unaligned_be64(size, &rec[8]);
+ put_unaligned_be64(start, &rec[16]);
+ put_unaligned_be64(wp, &rec[24]);
+ WARN_ON(offset + 64 > sg_dma_len(sg));
+ offset += 64;
+ bytes += 64;
+ }
+ }
+}
+
+static inline void spraid_handle_ata_cmd(struct spraid_dev *hdev, struct scsi_cmnd *scmd,
+ struct spraid_iod *iod)
+{
+ if (hdev->ctrl_info->card_type != SPRAID_CARD_HBA)
+ return;
+
+ switch (scmd->cmnd[0]) {
+ case ZBC_IN:
+ dev_info(hdev->dev, "[%s] process report zone\n", __func__);
+ spraid_sata_report_zone_handle(scmd, iod);
+ break;
+ default:
+ break;
+ }
+}
+
static void spraid_complete_ioq_cmnd(struct spraid_queue *ioq, struct spraid_completion *cqe)
{
struct spraid_dev *hdev = ioq->hdev;
@@ -1197,12 +1274,12 @@ static void spraid_complete_ioq_cmnd(struct spraid_queue *ioq, struct spraid_com
iod = scsi_cmd_priv(scmd);
elapsed = jiffies - scmd->jiffies_at_alloc;
- dev_log_dbg(hdev->dev, "cid[%d], qid[%d] finish IO cost %3ld.%3ld seconds\n",
+ dev_log_dbg(hdev->dev, "cid[%d] qid[%d] finish IO cost %3ld.%3ld seconds\n",
cqe->cmd_id, ioq->qid, elapsed / HZ, elapsed % HZ);
if (cmpxchg(&iod->state, SPRAID_CMD_IN_FLIGHT, SPRAID_CMD_COMPLETE) !=
SPRAID_CMD_IN_FLIGHT) {
- dev_warn(hdev->dev, "cid[%d], qid[%d] enters abnormal handler, cost %3ld.%3ld seconds\n",
+ dev_warn(hdev->dev, "cid[%d] qid[%d] enters abnormal handler, cost %3ld.%3ld seconds\n",
cqe->cmd_id, ioq->qid, elapsed / HZ, elapsed % HZ);
WRITE_ONCE(iod->state, SPRAID_CMD_TMO_COMPLETE);
@@ -1215,6 +1292,8 @@ static void spraid_complete_ioq_cmnd(struct spraid_queue *ioq, struct spraid_com
return;
}
+ spraid_handle_ata_cmd(hdev, scmd, iod);
+
spraid_map_status(iod, scmd, cqe);
if (iod->nsge) {
iod->nsge = 0;
@@ -1224,38 +1303,36 @@ static void spraid_complete_ioq_cmnd(struct spraid_queue *ioq, struct spraid_com
scmd->scsi_done(scmd);
}
-static inline void spraid_end_admin_request(struct request *req, __le16 status,
- __le32 result0, __le32 result1)
-{
- struct spraid_admin_request *rq = spraid_admin_req(req);
-
- rq->status = le16_to_cpu(status) >> 1;
- rq->result0 = le32_to_cpu(result0);
- rq->result1 = le32_to_cpu(result1);
- blk_mq_complete_request(req);
-}
-
static void spraid_complete_adminq_cmnd(struct spraid_queue *adminq, struct spraid_completion *cqe)
{
- struct blk_mq_tags *tags = adminq->hdev->admin_tagset.tags[0];
- struct request *req;
+ struct spraid_dev *hdev = adminq->hdev;
+ struct spraid_cmd *adm_cmd;
- req = blk_mq_tag_to_rq(tags, cqe->cmd_id);
- if (unlikely(!req)) {
+ adm_cmd = hdev->adm_cmds + cqe->cmd_id;
+ if (unlikely(adm_cmd->state == SPRAID_CMD_IDLE)) {
dev_warn(adminq->hdev->dev, "Invalid id %d completed on queue %d\n",
cqe->cmd_id, le16_to_cpu(cqe->sq_id));
return;
}
- spraid_end_admin_request(req, cqe->status, cqe->result, cqe->result1);
+
+ adm_cmd->status = le16_to_cpu(cqe->status) >> 1;
+ adm_cmd->result0 = le32_to_cpu(cqe->result);
+ adm_cmd->result1 = le32_to_cpu(cqe->result1);
+
+ complete(&adm_cmd->cmd_done);
}
+static void spraid_send_aen(struct spraid_dev *hdev, u16 cid);
+
static void spraid_complete_aen(struct spraid_queue *spraidq, struct spraid_completion *cqe)
{
struct spraid_dev *hdev = spraidq->hdev;
u32 result = le32_to_cpu(cqe->result);
- dev_info(hdev->dev, "rcv aen, status[%x], result[%x]\n",
- le16_to_cpu(cqe->status) >> 1, result);
+ dev_info(hdev->dev, "rcv aen, cid[%d], status[0x%x], result[0x%x]\n",
+ cqe->cmd_id, le16_to_cpu(cqe->status) >> 1, result);
+
+ spraid_send_aen(hdev, cqe->cmd_id);
if ((le16_to_cpu(cqe->status) >> 1) != SPRAID_SC_SUCCESS)
return;
@@ -1264,22 +1341,19 @@ static void spraid_complete_aen(struct spraid_queue *spraidq, struct spraid_comp
spraid_handle_aen_notice(hdev, result);
break;
case SPRAID_AEN_VS:
- spraid_handle_aen_vs(hdev, result);
+ spraid_handle_aen_vs(hdev, result, le32_to_cpu(cqe->result1));
break;
default:
dev_warn(hdev->dev, "Unsupported async event type: %u\n",
result & 0x7);
break;
}
- queue_work(spraid_wq, &hdev->aen_work);
}
-static void spraid_put_ioq_ptcmd(struct spraid_dev *hdev, struct spraid_ioq_ptcmd *cmd);
-
static void spraid_complete_ioq_sync_cmnd(struct spraid_queue *ioq, struct spraid_completion *cqe)
{
struct spraid_dev *hdev = ioq->hdev;
- struct spraid_ioq_ptcmd *ptcmd;
+ struct spraid_cmd *ptcmd;
ptcmd = hdev->ioq_ptcmds + (ioq->qid - 1) * SPRAID_PTCMDS_PERQ +
cqe->cmd_id - SPRAID_IO_BLK_MQ_DEPTH;
@@ -1289,8 +1363,6 @@ static void spraid_complete_ioq_sync_cmnd(struct spraid_queue *ioq, struct sprai
ptcmd->result1 = le32_to_cpu(cqe->result1);
complete(&ptcmd->cmd_done);
-
- spraid_put_ioq_ptcmd(hdev, ptcmd);
}
static inline void spraid_handle_cqe(struct spraid_queue *spraidq, u16 idx)
@@ -1304,7 +1376,7 @@ static inline void spraid_handle_cqe(struct spraid_queue *spraidq, u16 idx)
return;
}
- dev_log_dbg(hdev->dev, "cid[%d], qid[%d], result[0x%x], sq_id[%d], status[0x%x]\n",
+ dev_log_dbg(hdev->dev, "cid[%d] qid[%d], result[0x%x], sq_id[%d], status[0x%x]\n",
cqe->cmd_id, spraidq->qid, le32_to_cpu(cqe->result),
le16_to_cpu(cqe->sq_id), le16_to_cpu(cqe->status));
@@ -1452,62 +1524,117 @@ static u32 spraid_bar_size(struct spraid_dev *hdev, u32 nr_ioqs)
return (SPRAID_REG_DBS + ((nr_ioqs + 1) * 8 * hdev->db_stride));
}
-static inline void spraid_clear_spraid_request(struct request *req)
+static int spraid_alloc_admin_cmds(struct spraid_dev *hdev)
{
- if (!(req->rq_flags & RQF_DONTPREP)) {
- spraid_admin_req(req)->flags = 0;
- req->rq_flags |= RQF_DONTPREP;
+ int i;
+
+ INIT_LIST_HEAD(&hdev->adm_cmd_list);
+ spin_lock_init(&hdev->adm_cmd_lock);
+
+ hdev->adm_cmds = kcalloc_node(SPRAID_AQ_BLK_MQ_DEPTH, sizeof(struct spraid_cmd),
+ GFP_KERNEL, hdev->numa_node);
+
+ if (!hdev->adm_cmds) {
+ dev_err(hdev->dev, "Alloc admin cmds failed\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < SPRAID_AQ_BLK_MQ_DEPTH; i++) {
+ hdev->adm_cmds[i].qid = 0;
+ hdev->adm_cmds[i].cid = i;
+ list_add_tail(&(hdev->adm_cmds[i].list), &hdev->adm_cmd_list);
}
+
+ dev_info(hdev->dev, "Alloc admin cmds success, num[%d]\n", SPRAID_AQ_BLK_MQ_DEPTH);
+
+ return 0;
}
-static struct request *spraid_alloc_admin_request(struct request_queue *q,
- struct spraid_admin_command *cmd,
- blk_mq_req_flags_t flags)
+static void spraid_free_admin_cmds(struct spraid_dev *hdev)
{
- u32 op = COMMAND_IS_WRITE(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
- struct request *req;
+ kfree(hdev->adm_cmds);
+ INIT_LIST_HEAD(&hdev->adm_cmd_list);
+}
- req = blk_mq_alloc_request(q, op, flags);
- if (IS_ERR(req))
- return req;
- req->cmd_flags |= REQ_FAILFAST_DRIVER;
- spraid_clear_spraid_request(req);
- spraid_admin_req(req)->cmd = cmd;
+static struct spraid_cmd *spraid_get_cmd(struct spraid_dev *hdev, enum spraid_cmd_type type)
+{
+ struct spraid_cmd *cmd = NULL;
+ unsigned long flags;
+ struct list_head *head = &hdev->adm_cmd_list;
+ spinlock_t *slock = &hdev->adm_cmd_lock;
- return req;
+ if (type == SPRAID_CMD_IOPT) {
+ head = &hdev->ioq_pt_list;
+ slock = &hdev->ioq_pt_lock;
+ }
+
+ spin_lock_irqsave(slock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(slock, flags);
+ dev_err(hdev->dev, "err, cmd[%d] list empty\n", type);
+ return NULL;
+ }
+ cmd = list_entry(head->next, struct spraid_cmd, list);
+ list_del_init(&cmd->list);
+ spin_unlock_irqrestore(slock, flags);
+
+ WRITE_ONCE(cmd->state, SPRAID_CMD_IN_FLIGHT);
+
+ return cmd;
}
-static int spraid_submit_admin_sync_cmd(struct request_queue *q,
- struct spraid_admin_command *cmd,
- u32 *result, void *buffer,
- u32 bufflen, u32 timeout, int at_head, blk_mq_req_flags_t flags)
+static void spraid_put_cmd(struct spraid_dev *hdev, struct spraid_cmd *cmd,
+ enum spraid_cmd_type type)
{
- struct request *req;
- int ret;
+ unsigned long flags;
+ struct list_head *head = &hdev->adm_cmd_list;
+ spinlock_t *slock = &hdev->adm_cmd_lock;
+
+ if (type == SPRAID_CMD_IOPT) {
+ head = &hdev->ioq_pt_list;
+ slock = &hdev->ioq_pt_lock;
+ }
- req = spraid_alloc_admin_request(q, cmd, flags);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ spin_lock_irqsave(slock, flags);
+ WRITE_ONCE(cmd->state, SPRAID_CMD_IDLE);
+ list_add_tail(&cmd->list, head);
+ spin_unlock_irqrestore(slock, flags);
+}
- req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
- if (buffer && bufflen) {
- ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
- if (ret)
- goto out;
+
+static int spraid_submit_admin_sync_cmd(struct spraid_dev *hdev, struct spraid_admin_command *cmd,
+ u32 *result0, u32 *result1, u32 timeout)
+{
+ struct spraid_cmd *adm_cmd = spraid_get_cmd(hdev, SPRAID_CMD_ADM);
+
+ if (!adm_cmd) {
+ dev_err(hdev->dev, "err, get admin cmd failed\n");
+ return -EFAULT;
}
- blk_execute_rq(req->q, NULL, req, at_head);
- if (result)
- *result = spraid_admin_req(req)->result0;
+ timeout = timeout ? timeout : ADMIN_TIMEOUT;
- if (spraid_admin_req(req)->flags & SPRAID_REQ_CANCELLED)
- ret = -EINTR;
- else
- ret = spraid_admin_req(req)->status;
+ init_completion(&adm_cmd->cmd_done);
-out:
- blk_mq_free_request(req);
- return ret;
+ cmd->common.command_id = adm_cmd->cid;
+ spraid_submit_cmd(&hdev->queues[0], cmd);
+
+ if (!wait_for_completion_timeout(&adm_cmd->cmd_done, timeout)) {
+ dev_err(hdev->dev, "[%s] cid[%d] qid[%d] timeout, opcode[0x%x] subopcode[0x%x]\n",
+ __func__, adm_cmd->cid, adm_cmd->qid, cmd->usr_cmd.opcode,
+ cmd->usr_cmd.info_0.subopcode);
+ WRITE_ONCE(adm_cmd->state, SPRAID_CMD_TIMEOUT);
+ return -EINVAL;
+ }
+
+ if (result0)
+ *result0 = adm_cmd->result0;
+ if (result1)
+ *result1 = adm_cmd->result1;
+
+ spraid_put_cmd(hdev, adm_cmd, SPRAID_CMD_ADM);
+
+ return adm_cmd->status;
}
static int spraid_create_cq(struct spraid_dev *hdev, u16 qid,
@@ -1524,8 +1651,7 @@ static int spraid_create_cq(struct spraid_dev *hdev, u16 qid,
admin_cmd.create_cq.cq_flags = cpu_to_le16(flags);
admin_cmd.create_cq.irq_vector = cpu_to_le16(cq_vector);
- return spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, NULL,
- NULL, 0, 0, 0, 0);
+ return spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
}
static int spraid_create_sq(struct spraid_dev *hdev, u16 qid,
@@ -1542,8 +1668,7 @@ static int spraid_create_sq(struct spraid_dev *hdev, u16 qid,
admin_cmd.create_sq.sq_flags = cpu_to_le16(flags);
admin_cmd.create_sq.cqid = cpu_to_le16(qid);
- return spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, NULL,
- NULL, 0, 0, 0, 0);
+ return spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
}
static void spraid_free_queue(struct spraid_queue *spraidq)
@@ -1581,8 +1706,7 @@ static int spraid_delete_queue(struct spraid_dev *hdev, u8 op, u16 id)
admin_cmd.delete_queue.opcode = op;
admin_cmd.delete_queue.qid = cpu_to_le16(id);
- ret = spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, NULL,
- NULL, 0, 0, 0, 0);
+ ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
if (ret)
dev_err(hdev->dev, "Delete %s:[%d] failed\n",
@@ -1663,19 +1787,28 @@ static int spraid_set_features(struct spraid_dev *hdev, u32 fid, u32 dword11, vo
size_t buflen, u32 *result)
{
struct spraid_admin_command admin_cmd;
- u32 res;
int ret;
+ u8 *data_ptr = NULL;
+ dma_addr_t data_dma = 0;
+
+ if (buffer && buflen) {
+ data_ptr = dma_alloc_coherent(hdev->dev, buflen, &data_dma, GFP_KERNEL);
+ if (!data_ptr)
+ return -ENOMEM;
+
+ memcpy(data_ptr, buffer, buflen);
+ }
memset(&admin_cmd, 0, sizeof(admin_cmd));
admin_cmd.features.opcode = SPRAID_ADMIN_SET_FEATURES;
admin_cmd.features.fid = cpu_to_le32(fid);
admin_cmd.features.dword11 = cpu_to_le32(dword11);
+ admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
- ret = spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, &res,
- buffer, buflen, 0, 0, 0);
+ ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, result, NULL, 0);
- if (!ret && result)
- *result = res;
+ if (data_ptr)
+ dma_free_coherent(hdev->dev, buflen, data_ptr, data_dma);
return ret;
}
@@ -1764,8 +1897,7 @@ static int spraid_setup_io_queues(struct spraid_dev *hdev)
break;
}
dev_info(hdev->dev, "[%s] max_qid: %d, queue_count: %d, online_queue: %d, ioq_depth: %d\n",
- __func__, hdev->max_qid, hdev->queue_count,
- hdev->online_queues, hdev->ioq_depth);
+ __func__, hdev->max_qid, hdev->queue_count, hdev->online_queues, hdev->ioq_depth);
return spraid_create_io_queues(hdev);
}
@@ -1889,10 +2021,11 @@ static int spraid_get_dev_list(struct spraid_dev *hdev, struct spraid_dev_info *
u32 nd = le32_to_cpu(hdev->ctrl_info->nd);
struct spraid_admin_command admin_cmd;
struct spraid_dev_list *list_buf;
+ dma_addr_t data_dma = 0;
u32 i, idx, hdid, ndev;
int ret = 0;
- list_buf = kmalloc(sizeof(*list_buf), GFP_KERNEL);
+ list_buf = dma_alloc_coherent(hdev->dev, PAGE_SIZE, &data_dma, GFP_KERNEL);
if (!list_buf)
return -ENOMEM;
@@ -1901,9 +2034,9 @@ static int spraid_get_dev_list(struct spraid_dev *hdev, struct spraid_dev_info *
admin_cmd.get_info.opcode = SPRAID_ADMIN_GET_INFO;
admin_cmd.get_info.type = SPRAID_GET_INFO_DEV_LIST;
admin_cmd.get_info.cdw11 = cpu_to_le32(idx);
+ admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
- ret = spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, NULL, list_buf,
- sizeof(*list_buf), 0, 0, 0);
+ ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
if (ret) {
dev_err(hdev->dev, "Get device list failed, nd: %u, idx: %u, ret: %d\n",
@@ -1916,12 +2049,11 @@ static int spraid_get_dev_list(struct spraid_dev *hdev, struct spraid_dev_info *
for (i = 0; i < ndev; i++) {
hdid = le32_to_cpu(list_buf->devices[i].hdid);
- dev_info(hdev->dev, "list_buf->devices[%d], hdid: %u target: %d, channel: %d, lun: %d, attr[%x]\n",
- i, hdid,
- le16_to_cpu(list_buf->devices[i].target),
- list_buf->devices[i].channel,
- list_buf->devices[i].lun,
- list_buf->devices[i].attr);
+ dev_info(hdev->dev, "list_buf->devices[%d], hdid: %u target: %d, channel: %d, lun: %d, attr[0x%x]\n",
+ i, hdid, le16_to_cpu(list_buf->devices[i].target),
+ list_buf->devices[i].channel,
+ list_buf->devices[i].lun,
+ list_buf->devices[i].attr);
if (hdid > nd || hdid == 0) {
dev_err(hdev->dev, "err, hdid[%d] invalid\n", hdid);
continue;
@@ -1936,21 +2068,29 @@ static int spraid_get_dev_list(struct spraid_dev *hdev, struct spraid_dev_info *
}
out:
- kfree(list_buf);
+ dma_free_coherent(hdev->dev, PAGE_SIZE, list_buf, data_dma);
return ret;
}
-static void spraid_send_aen(struct spraid_dev *hdev)
+static void spraid_send_aen(struct spraid_dev *hdev, u16 cid)
{
struct spraid_queue *adminq = &hdev->queues[0];
struct spraid_admin_command admin_cmd;
memset(&admin_cmd, 0, sizeof(admin_cmd));
admin_cmd.common.opcode = SPRAID_ADMIN_ASYNC_EVENT;
- admin_cmd.common.command_id = SPRAID_AQ_BLK_MQ_DEPTH;
+ admin_cmd.common.command_id = cid;
spraid_submit_cmd(adminq, &admin_cmd);
- dev_info(hdev->dev, "send aen, cid[%d]\n", SPRAID_AQ_BLK_MQ_DEPTH);
+ dev_info(hdev->dev, "send aen, cid[%d]\n", cid);
+}
+
+static inline void spraid_send_all_aen(struct spraid_dev *hdev)
+{
+ u16 i;
+
+ for (i = 0; i < hdev->ctrl_info->aerl; i++)
+ spraid_send_aen(hdev, i + SPRAID_AQ_BLK_MQ_DEPTH);
}
static int spraid_add_device(struct spraid_dev *hdev, struct spraid_dev_info *device)
@@ -1976,7 +2116,7 @@ static int spraid_rescan_device(struct spraid_dev *hdev, struct spraid_dev_info
sdev = scsi_device_lookup(shost, device->channel, le16_to_cpu(device->target), 0);
if (!sdev) {
- dev_warn(hdev->dev, "Device is not exit, channel: %d, target_id: %d, lun: %d\n",
+ dev_warn(hdev->dev, "device is not exit rescan it, channel: %d, target_id: %d, lun: %d\n",
device->channel, le16_to_cpu(device->target), 0);
return -ENODEV;
}
@@ -1993,7 +2133,7 @@ static int spraid_remove_device(struct spraid_dev *hdev, struct spraid_dev_info
sdev = scsi_device_lookup(shost, org_device->channel, le16_to_cpu(org_device->target), 0);
if (!sdev) {
- dev_warn(hdev->dev, "Device is not exit, channel: %d, target_id: %d, lun: %d\n",
+ dev_warn(hdev->dev, "device is not exit remove it, channel: %d, target_id: %d, lun: %d\n",
org_device->channel, le16_to_cpu(org_device->target), 0);
return -ENODEV;
}
@@ -2083,6 +2223,15 @@ static void spraid_timesyn_work(struct work_struct *work)
spraid_configure_timestamp(hdev);
}
+static int spraid_init_ctrl_info(struct spraid_dev *hdev);
+static void spraid_fw_act_work(struct work_struct *work)
+{
+ struct spraid_dev *hdev = container_of(work, struct spraid_dev, fw_act_work);
+
+ if (spraid_init_ctrl_info(hdev))
+ dev_err(hdev->dev, "get ctrl info failed after fw act\n");
+}
+
static void spraid_queue_scan(struct spraid_dev *hdev)
{
queue_work(spraid_wq, &hdev->scan_work);
@@ -2094,6 +2243,9 @@ static void spraid_handle_aen_notice(struct spraid_dev *hdev, u32 result)
case SPRAID_AEN_DEV_CHANGED:
spraid_queue_scan(hdev);
break;
+ case SPRAID_AEN_FW_ACT_START:
+ dev_info(hdev->dev, "fw activation starting\n");
+ break;
case SPRAID_AEN_HOST_PROBING:
break;
default:
@@ -2101,25 +2253,25 @@ static void spraid_handle_aen_notice(struct spraid_dev *hdev, u32 result)
}
}
-static void spraid_handle_aen_vs(struct spraid_dev *hdev, u32 result)
+static void spraid_handle_aen_vs(struct spraid_dev *hdev, u32 result, u32 result1)
{
- switch (result) {
+ switch ((result & 0xff00) >> 8) {
case SPRAID_AEN_TIMESYN:
queue_work(spraid_wq, &hdev->timesyn_work);
break;
+ case SPRAID_AEN_FW_ACT_FINISH:
+ dev_info(hdev->dev, "fw activation finish\n");
+ queue_work(spraid_wq, &hdev->fw_act_work);
+ break;
+ case SPRAID_AEN_EVENT_MIN ... SPRAID_AEN_EVENT_MAX:
+ dev_info(hdev->dev, "rcv card event[%d], param1[0x%x] param2[0x%x]\n",
+ (result & 0xff00) >> 8, result, result1);
+ break;
default:
- dev_warn(hdev->dev, "async event result: %x\n", result);
+ dev_warn(hdev->dev, "async event result: 0x%x\n", result);
}
}
-static void spraid_async_event_work(struct work_struct *work)
-{
- struct spraid_dev *hdev =
- container_of(work, struct spraid_dev, aen_work);
-
- spraid_send_aen(hdev);
-}
-
static int spraid_alloc_resources(struct spraid_dev *hdev)
{
int ret, nqueue;
@@ -2149,10 +2301,16 @@ static int spraid_alloc_resources(struct spraid_dev *hdev)
goto destroy_dma_pools;
}
+ ret = spraid_alloc_admin_cmds(hdev);
+ if (ret)
+ goto free_queues;
+
dev_info(hdev->dev, "[%s] queues num: %d\n", __func__, nqueue);
return 0;
+free_queues:
+ kfree(hdev->queues);
destroy_dma_pools:
spraid_destroy_dma_pools(hdev);
free_ctrl_info:
@@ -2164,50 +2322,18 @@ static int spraid_alloc_resources(struct spraid_dev *hdev)
static void spraid_free_resources(struct spraid_dev *hdev)
{
+ spraid_free_admin_cmds(hdev);
kfree(hdev->queues);
spraid_destroy_dma_pools(hdev);
kfree(hdev->ctrl_info);
ida_free(&spraid_instance_ida, hdev->instance);
}
-static void spraid_setup_passthrough(struct request *req, struct spraid_admin_command *cmd)
-{
- memcpy(cmd, spraid_admin_req(req)->cmd, sizeof(*cmd));
- cmd->common.flags &= ~SPRAID_CMD_FLAG_SGL_ALL;
-}
-
-static inline void spraid_clear_hreq(struct request *req)
-{
- if (!(req->rq_flags & RQF_DONTPREP)) {
- spraid_admin_req(req)->flags = 0;
- req->rq_flags |= RQF_DONTPREP;
- }
-}
-
-static blk_status_t spraid_setup_admin_cmd(struct request *req, struct spraid_admin_command *cmd)
+static void spraid_bsg_unmap_data(struct spraid_dev *hdev, struct bsg_job *job)
{
- spraid_clear_hreq(req);
-
- memset(cmd, 0, sizeof(*cmd));
- switch (req_op(req)) {
- case REQ_OP_DRV_IN:
- case REQ_OP_DRV_OUT:
- spraid_setup_passthrough(req, cmd);
- break;
- default:
- WARN_ON_ONCE(1);
- return BLK_STS_IOERR;
- }
-
- cmd->common.command_id = req->tag;
- return BLK_STS_OK;
-}
-
-static void spraid_unmap_data(struct spraid_dev *hdev, struct request *req)
-{
- struct spraid_iod *iod = blk_mq_rq_to_pdu(req);
- enum dma_data_direction dma_dir = rq_data_dir(req) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ struct request *rq = blk_mq_rq_from_pdu(job);
+ struct spraid_iod *iod = job->dd_data;
+ enum dma_data_direction dma_dir = rq_data_dir(rq) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
if (iod->nsge)
dma_unmap_sg(hdev->dev, iod->sg, iod->nsge, dma_dir);
@@ -2215,36 +2341,36 @@ static void spraid_unmap_data(struct spraid_dev *hdev, struct request *req)
spraid_free_iod_res(hdev, iod);
}
-static blk_status_t spraid_admin_map_data(struct spraid_dev *hdev, struct request *req,
- struct spraid_admin_command *cmd)
+static int spraid_bsg_map_data(struct spraid_dev *hdev, struct bsg_job *job,
+ struct spraid_admin_command *cmd)
{
- struct spraid_iod *iod = blk_mq_rq_to_pdu(req);
- struct request_queue *admin_q = req->q;
- enum dma_data_direction dma_dir = rq_data_dir(req) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE;
- blk_status_t ret = BLK_STS_IOERR;
- int nr_mapped;
- int res;
+ struct request *rq = blk_mq_rq_from_pdu(job);
+ struct spraid_iod *iod = job->dd_data;
+ enum dma_data_direction dma_dir = rq_data_dir(rq) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ int ret = 0;
+
+ iod->sg = job->request_payload.sg_list;
+ iod->nsge = job->request_payload.sg_cnt;
+ iod->length = job->request_payload.payload_len;
+ iod->use_sgl = false;
+ iod->npages = -1;
+ iod->sg_drv_mgmt = false;
- sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
- iod->nsge = blk_rq_map_sg(admin_q, req, iod->sg);
if (!iod->nsge)
goto out;
- dev_info(hdev->dev, "nseg: %u, nsge: %u\n",
- blk_rq_nr_phys_segments(req), iod->nsge);
-
- ret = BLK_STS_RESOURCE;
- nr_mapped = dma_map_sg_attrs(hdev->dev, iod->sg, iod->nsge, dma_dir, DMA_ATTR_NO_WARN);
- if (!nr_mapped)
+ ret = dma_map_sg_attrs(hdev->dev, iod->sg, iod->nsge, dma_dir, DMA_ATTR_NO_WARN);
+ if (!ret)
goto out;
- res = spraid_setup_prps(hdev, iod);
- if (res)
+ ret = spraid_setup_prps(hdev, iod);
+ if (ret)
goto unmap;
+
cmd->common.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmd->common.dptr.prp2 = cpu_to_le64(iod->first_dma);
- return BLK_STS_OK;
+
+ return 0;
unmap:
dma_unmap_sg(hdev->dev, iod->sg, iod->nsge, dma_dir);
@@ -2252,137 +2378,29 @@ static blk_status_t spraid_admin_map_data(struct spraid_dev *hdev, struct reques
return ret;
}
-static blk_status_t spraid_init_admin_iod(struct request *rq, struct spraid_dev *hdev)
-{
- struct spraid_iod *iod = blk_mq_rq_to_pdu(rq);
- int nents = blk_rq_nr_phys_segments(rq);
- unsigned int size = blk_rq_payload_bytes(rq);
-
- if (nents > SPRAID_INT_PAGES || size > SPRAID_INT_BYTES(hdev)) {
- iod->sg = mempool_alloc(hdev->iod_mempool, GFP_ATOMIC);
- if (!iod->sg)
- return BLK_STS_RESOURCE;
- } else {
- iod->sg = iod->inline_sg;
- }
-
- iod->nsge = 0;
- iod->use_sgl = false;
- iod->npages = -1;
- iod->length = size;
- iod->sg_drv_mgmt = true;
-
- return BLK_STS_OK;
-}
-
-static blk_status_t spraid_queue_admin_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
-{
- struct spraid_queue *adminq = hctx->driver_data;
- struct spraid_dev *hdev = adminq->hdev;
- struct request *req = bd->rq;
- struct spraid_iod *iod = blk_mq_rq_to_pdu(req);
- struct spraid_admin_command cmd;
- blk_status_t ret;
-
- ret = spraid_setup_admin_cmd(req, &cmd);
- if (ret)
- goto out;
-
- ret = spraid_init_admin_iod(req, hdev);
- if (ret)
- goto out;
-
- if (blk_rq_nr_phys_segments(req)) {
- ret = spraid_admin_map_data(hdev, req, &cmd);
- if (ret)
- goto cleanup_iod;
- }
-
- blk_mq_start_request(req);
- spraid_submit_cmd(adminq, &cmd);
- return BLK_STS_OK;
-
-cleanup_iod:
- spraid_free_iod_res(hdev, iod);
-out:
- return ret;
-}
-
-static blk_status_t spraid_error_status(struct request *req)
-{
- switch (spraid_admin_req(req)->status & 0x7ff) {
- case SPRAID_SC_SUCCESS:
- return BLK_STS_OK;
- default:
- return BLK_STS_IOERR;
- }
-}
-
-static void spraid_complete_admin_rq(struct request *req)
-{
- struct spraid_iod *iod = blk_mq_rq_to_pdu(req);
- struct spraid_dev *hdev = iod->spraidq->hdev;
-
- if (blk_rq_nr_phys_segments(req))
- spraid_unmap_data(hdev, req);
- blk_mq_end_request(req, spraid_error_status(req));
-}
-
-static int spraid_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx)
-{
- struct spraid_dev *hdev = data;
- struct spraid_queue *adminq = &hdev->queues[0];
-
- WARN_ON(hctx_idx != 0);
- WARN_ON(hdev->admin_tagset.tags[0] != hctx->tags);
-
- hctx->driver_data = adminq;
- return 0;
-}
-
-static int spraid_admin_init_request(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- struct spraid_dev *hdev = set->driver_data;
- struct spraid_iod *iod = blk_mq_rq_to_pdu(req);
- struct spraid_queue *adminq = &hdev->queues[0];
-
- WARN_ON(!adminq);
- iod->spraidq = adminq;
- return 0;
-}
-
-static enum blk_eh_timer_return
-spraid_admin_timeout(struct request *req, bool reserved)
-{
- struct spraid_iod *iod = blk_mq_rq_to_pdu(req);
- struct spraid_queue *spraidq = iod->spraidq;
- struct spraid_dev *hdev = spraidq->hdev;
-
- dev_err(hdev->dev, "Admin cid[%d] qid[%d] timeout\n",
- req->tag, spraidq->qid);
-
- if (spraid_poll_cq(spraidq, req->tag)) {
- dev_warn(hdev->dev, "cid[%d] qid[%d] timeout, completion polled\n",
- req->tag, spraidq->qid);
- return BLK_EH_DONE;
- }
-
- spraid_end_admin_request(req, cpu_to_le16(-EINVAL), 0, 0);
- return BLK_EH_DONE;
-}
-
static int spraid_get_ctrl_info(struct spraid_dev *hdev, struct spraid_ctrl_info *ctrl_info)
{
struct spraid_admin_command admin_cmd;
+ u8 *data_ptr = NULL;
+ dma_addr_t data_dma = 0;
+ int ret;
+
+ data_ptr = dma_alloc_coherent(hdev->dev, PAGE_SIZE, &data_dma, GFP_KERNEL);
+ if (!data_ptr)
+ return -ENOMEM;
memset(&admin_cmd, 0, sizeof(admin_cmd));
admin_cmd.get_info.opcode = SPRAID_ADMIN_GET_INFO;
admin_cmd.get_info.type = SPRAID_GET_INFO_CTRL;
+ admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
+
+ ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
+ if (!ret)
+ memcpy(ctrl_info, data_ptr, sizeof(struct spraid_ctrl_info));
- return spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, NULL,
- ctrl_info, sizeof(struct spraid_ctrl_info), 0, 0, 0);
+ dma_free_coherent(hdev->dev, PAGE_SIZE, data_ptr, data_dma);
+
+ return ret;
}
static int spraid_init_ctrl_info(struct spraid_dev *hdev)
@@ -2416,6 +2434,11 @@ static int spraid_init_ctrl_info(struct spraid_dev *hdev)
dev_info(hdev->dev, "[%s]sn = %s\n", __func__, hdev->ctrl_info->sn);
dev_info(hdev->dev, "[%s]fr = %s\n", __func__, hdev->ctrl_info->fr);
+ if (!hdev->ctrl_info->aerl)
+ hdev->ctrl_info->aerl = 1;
+ if (hdev->ctrl_info->aerl > SPRAID_NR_AEN_COMMANDS)
+ hdev->ctrl_info->aerl = SPRAID_NR_AEN_COMMANDS;
+
return 0;
}
@@ -2444,99 +2467,54 @@ static void spraid_free_iod_ext_mem_pool(struct spraid_dev *hdev)
mempool_destroy(hdev->iod_mempool);
}
-static int spraid_submit_user_cmd(struct request_queue *q, struct spraid_admin_command *cmd,
- void __user *ubuffer, unsigned int bufflen, u32 *result,
- unsigned int timeout)
+static int spraid_user_admin_cmd(struct spraid_dev *hdev, struct bsg_job *job)
{
- struct request *req;
- struct bio *bio = NULL;
- int ret;
-
- req = spraid_alloc_admin_request(q, cmd, 0);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
- spraid_admin_req(req)->flags |= SPRAID_REQ_USERCMD;
-
- if (ubuffer && bufflen) {
- ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, GFP_KERNEL);
- if (ret)
- goto out;
- bio = req->bio;
- }
- blk_execute_rq(req->q, NULL, req, 0);
- if (spraid_admin_req(req)->flags & SPRAID_REQ_CANCELLED)
- ret = -EINTR;
- else
- ret = spraid_admin_req(req)->status;
- if (result) {
- result[0] = spraid_admin_req(req)->result0;
- result[1] = spraid_admin_req(req)->result1;
- }
- if (bio)
- blk_rq_unmap_user(bio);
-out:
- blk_mq_free_request(req);
- return ret;
-}
-
-static int spraid_user_admin_cmd(struct spraid_dev *hdev,
- struct spraid_passthru_common_cmd __user *ucmd)
-{
- struct spraid_passthru_common_cmd cmd;
+ struct spraid_bsg_request *bsg_req = (struct spraid_bsg_request *)(job->request);
+ struct spraid_passthru_common_cmd *cmd = &(bsg_req->admcmd);
struct spraid_admin_command admin_cmd;
- u32 timeout = 0;
+ u32 timeout = msecs_to_jiffies(cmd->timeout_ms);
+ u32 result[2] = {0};
int status;
- if (!capable(CAP_SYS_ADMIN)) {
- dev_err(hdev->dev, "Current user hasn't administrator right, reject service\n");
- return -EACCES;
- }
-
- if (copy_from_user(&cmd, ucmd, sizeof(cmd))) {
- dev_err(hdev->dev, "Copy command from user space to kernel space failed\n");
- return -EFAULT;
- }
-
- if (cmd.flags) {
- dev_err(hdev->dev, "Invalid flags in user command\n");
- return -EINVAL;
+ if (hdev->state >= SPRAID_RESETTING) {
+ dev_err(hdev->dev, "[%s] err, host state:[%d] is not right\n", __func__,
+ hdev->state);
+ return -EBUSY;
}
- dev_info(hdev->dev, "user_admin_cmd opcode: 0x%x, subopcode: 0x%x\n",
- cmd.opcode, cmd.cdw2 & 0x7ff);
+ dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x] init\n",
+ __func__, cmd->opcode, cmd->info_0.subopcode);
memset(&admin_cmd, 0, sizeof(admin_cmd));
- admin_cmd.common.opcode = cmd.opcode;
- admin_cmd.common.flags = cmd.flags;
- admin_cmd.common.hdid = cpu_to_le32(cmd.nsid);
- admin_cmd.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
- admin_cmd.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
- admin_cmd.common.cdw10 = cpu_to_le32(cmd.cdw10);
- admin_cmd.common.cdw11 = cpu_to_le32(cmd.cdw11);
- admin_cmd.common.cdw12 = cpu_to_le32(cmd.cdw12);
- admin_cmd.common.cdw13 = cpu_to_le32(cmd.cdw13);
- admin_cmd.common.cdw14 = cpu_to_le32(cmd.cdw14);
- admin_cmd.common.cdw15 = cpu_to_le32(cmd.cdw15);
-
- if (cmd.timeout_ms)
- timeout = msecs_to_jiffies(cmd.timeout_ms);
-
- status = spraid_submit_user_cmd(hdev->admin_q, &admin_cmd,
- (void __user *)(uintptr_t)cmd.addr, cmd.info_1.data_len,
- &cmd.result0, timeout);
-
- dev_info(hdev->dev, "user_admin_cmd status: 0x%x, result0: 0x%x, result1: 0x%x\n",
- status, cmd.result0, cmd.result1);
+ admin_cmd.common.opcode = cmd->opcode;
+ admin_cmd.common.flags = cmd->flags;
+ admin_cmd.common.hdid = cpu_to_le32(cmd->nsid);
+ admin_cmd.common.cdw2[0] = cpu_to_le32(cmd->cdw2);
+ admin_cmd.common.cdw2[1] = cpu_to_le32(cmd->cdw3);
+ admin_cmd.common.cdw10 = cpu_to_le32(cmd->cdw10);
+ admin_cmd.common.cdw11 = cpu_to_le32(cmd->cdw11);
+ admin_cmd.common.cdw12 = cpu_to_le32(cmd->cdw12);
+ admin_cmd.common.cdw13 = cpu_to_le32(cmd->cdw13);
+ admin_cmd.common.cdw14 = cpu_to_le32(cmd->cdw14);
+ admin_cmd.common.cdw15 = cpu_to_le32(cmd->cdw15);
+
+ status = spraid_bsg_map_data(hdev, job, &admin_cmd);
+ if (status) {
+ dev_err(hdev->dev, "[%s] err, map data failed\n", __func__);
+ return status;
+ }
+ status = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, &result[0], &result[1], timeout);
if (status >= 0) {
- if (put_user(cmd.result0, &ucmd->result0))
- return -EFAULT;
- if (put_user(cmd.result1, &ucmd->result1))
- return -EFAULT;
+ job->reply_len = sizeof(result);
+ memcpy(job->reply, result, sizeof(result));
}
+ dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x], status[0x%x] result0[0x%x] result1[0x%x]\n",
+ __func__, cmd->opcode, cmd->info_0.subopcode, status, result[0], result[1]);
+
+ spraid_bsg_unmap_data(hdev, job);
+
return status;
}
@@ -2548,8 +2526,8 @@ static int spraid_alloc_ioq_ptcmds(struct spraid_dev *hdev)
INIT_LIST_HEAD(&hdev->ioq_pt_list);
spin_lock_init(&hdev->ioq_pt_lock);
- hdev->ioq_ptcmds = kcalloc_node(ptnum, sizeof(struct spraid_ioq_ptcmd),
- GFP_KERNEL, hdev->numa_node);
+ hdev->ioq_ptcmds = kcalloc_node(ptnum, sizeof(struct spraid_cmd),
+ GFP_KERNEL, hdev->numa_node);
if (!hdev->ioq_ptcmds) {
dev_err(hdev->dev, "Alloc ioq_ptcmds failed\n");
@@ -2567,55 +2545,33 @@ static int spraid_alloc_ioq_ptcmds(struct spraid_dev *hdev)
return 0;
}
-static struct spraid_ioq_ptcmd *spraid_get_ioq_ptcmd(struct spraid_dev *hdev)
-{
- struct spraid_ioq_ptcmd *cmd = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&hdev->ioq_pt_lock, flags);
- if (list_empty(&hdev->ioq_pt_list)) {
- spin_unlock_irqrestore(&hdev->ioq_pt_lock, flags);
- dev_err(hdev->dev, "err, ioq ptcmd list empty\n");
- return NULL;
- }
- cmd = list_entry((&hdev->ioq_pt_list)->next, struct spraid_ioq_ptcmd, list);
- list_del_init(&cmd->list);
- spin_unlock_irqrestore(&hdev->ioq_pt_lock, flags);
-
- WRITE_ONCE(cmd->state, SPRAID_CMD_IDLE);
-
- return cmd;
-}
-
-static void spraid_put_ioq_ptcmd(struct spraid_dev *hdev, struct spraid_ioq_ptcmd *cmd)
+static void spraid_free_ioq_ptcmds(struct spraid_dev *hdev)
{
- unsigned long flags;
-
- spin_lock_irqsave(&hdev->ioq_pt_lock, flags);
- list_add(&cmd->list, (&hdev->ioq_pt_list)->next);
- spin_unlock_irqrestore(&hdev->ioq_pt_lock, flags);
+ kfree(hdev->ioq_ptcmds);
+ INIT_LIST_HEAD(&hdev->ioq_pt_list);
}
static int spraid_submit_ioq_sync_cmd(struct spraid_dev *hdev, struct spraid_ioq_command *cmd,
- u32 *result, void **sense, u32 timeout)
+ u32 *result, u32 *reslen, u32 timeout)
{
- struct spraid_queue *ioq;
int ret;
dma_addr_t sense_dma;
- struct spraid_ioq_ptcmd *pt_cmd = spraid_get_ioq_ptcmd(hdev);
-
- *sense = NULL;
+ struct spraid_queue *ioq;
+ void *sense_addr = NULL;
+ struct spraid_cmd *pt_cmd = spraid_get_cmd(hdev, SPRAID_CMD_IOPT);
- if (!pt_cmd)
+ if (!pt_cmd) {
+ dev_err(hdev->dev, "err, get ioq cmd failed\n");
return -EFAULT;
+ }
- dev_info(hdev->dev, "[%s] ptcmd, cid[%d], qid[%d]\n", __func__, pt_cmd->cid, pt_cmd->qid);
+ timeout = timeout ? timeout : ADMIN_TIMEOUT;
init_completion(&pt_cmd->cmd_done);
ioq = &hdev->queues[pt_cmd->qid];
ret = pt_cmd->cid * SCSI_SENSE_BUFFERSIZE;
- pt_cmd->priv = ioq->sense + ret;
+ sense_addr = ioq->sense + ret;
sense_dma = ioq->sense_dma_addr + ret;
cmd->common.sense_addr = cpu_to_le64(sense_dma);
@@ -2625,262 +2581,90 @@ static int spraid_submit_ioq_sync_cmd(struct spraid_dev *hdev, struct spraid_ioq
spraid_submit_cmd(ioq, cmd);
if (!wait_for_completion_timeout(&pt_cmd->cmd_done, timeout)) {
- dev_err(hdev->dev, "[%s] cid[%d], qid[%d] timeout\n",
- __func__, pt_cmd->cid, pt_cmd->qid);
+ dev_err(hdev->dev, "[%s] cid[%d] qid[%d] timeout, opcode[0x%x] subopcode[0x%x]\n",
+ __func__, pt_cmd->cid, pt_cmd->qid, cmd->common.opcode,
+ (le32_to_cpu(cmd->common.cdw3[0]) & 0xffff));
WRITE_ONCE(pt_cmd->state, SPRAID_CMD_TIMEOUT);
return -EINVAL;
}
- if (result) {
- result[0] = pt_cmd->result0;
- result[1] = pt_cmd->result1;
+ if (result && reslen) {
+ if ((pt_cmd->status & 0x17f) == 0x101) {
+ memcpy(result, sense_addr, SCSI_SENSE_BUFFERSIZE);
+ *reslen = SCSI_SENSE_BUFFERSIZE;
+ }
}
- if ((pt_cmd->status & 0x17f) == 0x101)
- *sense = pt_cmd->priv;
+ spraid_put_cmd(hdev, pt_cmd, SPRAID_CMD_IOPT);
return pt_cmd->status;
}
-static int spraid_user_ioq_cmd(struct spraid_dev *hdev,
- struct spraid_ioq_passthru_cmd __user *ucmd)
+static int spraid_user_ioq_cmd(struct spraid_dev *hdev, struct bsg_job *job)
{
- struct spraid_ioq_passthru_cmd cmd;
+ struct spraid_bsg_request *bsg_req = (struct spraid_bsg_request *)(job->request);
+ struct spraid_ioq_passthru_cmd *cmd = &(bsg_req->ioqcmd);
struct spraid_ioq_command ioq_cmd;
- u32 timeout = 0;
int status = 0;
- u8 *data_ptr = NULL;
- dma_addr_t data_dma;
- enum dma_data_direction dma_dir = DMA_NONE;
- void *sense = NULL;
-
- if (!capable(CAP_SYS_ADMIN)) {
- dev_err(hdev->dev, "Current user hasn't administrator right, reject service\n");
- return -EACCES;
- }
-
- if (copy_from_user(&cmd, ucmd, sizeof(cmd))) {
- dev_err(hdev->dev, "Copy command from user space to kernel space failed\n");
- return -EFAULT;
- }
+ u32 timeout = msecs_to_jiffies(cmd->timeout_ms);
- if (cmd.data_len > PAGE_SIZE) {
+ if (cmd->data_len > PAGE_SIZE) {
dev_err(hdev->dev, "[%s] data len bigger than 4k\n", __func__);
return -EFAULT;
}
- dev_info(hdev->dev, "[%s] opcode: 0x%x, subopcode: 0x%x, datalen: %d\n",
- __func__, cmd.opcode, cmd.info_1.subopcode, cmd.data_len);
-
- if (cmd.addr && cmd.data_len) {
- data_ptr = dma_alloc_coherent(hdev->dev, PAGE_SIZE, &data_dma, GFP_KERNEL);
- if (!data_ptr)
- return -ENOMEM;
-
- dma_dir = (cmd.opcode & 1) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ if (hdev->state != SPRAID_LIVE) {
+ dev_err(hdev->dev, "[%s] err, host state:[%d] is not live\n", __func__,
+ hdev->state);
+ return -EBUSY;
}
- if (dma_dir == DMA_TO_DEVICE) {
- if (copy_from_user(data_ptr, (void __user *)(uintptr_t)cmd.addr, cmd.data_len)) {
- dev_err(hdev->dev, "[%s] copy user data failed\n", __func__);
- status = -EFAULT;
- goto free_dma_mem;
- }
- }
+ dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x] init, datalen[%d]\n",
+ __func__, cmd->opcode, cmd->info_1.subopcode, cmd->data_len);
memset(&ioq_cmd, 0, sizeof(ioq_cmd));
- ioq_cmd.common.opcode = cmd.opcode;
- ioq_cmd.common.flags = cmd.flags;
- ioq_cmd.common.hdid = cpu_to_le32(cmd.nsid);
- ioq_cmd.common.sense_len = cpu_to_le16(cmd.info_0.res_sense_len);
- ioq_cmd.common.cdb_len = cmd.info_0.cdb_len;
- ioq_cmd.common.rsvd2 = cmd.info_0.rsvd0;
- ioq_cmd.common.cdw3[0] = cpu_to_le32(cmd.cdw3);
- ioq_cmd.common.cdw3[1] = cpu_to_le32(cmd.cdw4);
- ioq_cmd.common.cdw3[2] = cpu_to_le32(cmd.cdw5);
- ioq_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
-
- ioq_cmd.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
- ioq_cmd.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
- ioq_cmd.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
- ioq_cmd.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
- ioq_cmd.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
- ioq_cmd.common.cdw10[5] = cpu_to_le32(cmd.data_len);
-
- memcpy(ioq_cmd.common.cdb, &cmd.cdw16, cmd.info_0.cdb_len);
-
- ioq_cmd.common.cdw26[0] = cpu_to_le32(cmd.cdw26[0]);
- ioq_cmd.common.cdw26[1] = cpu_to_le32(cmd.cdw26[1]);
- ioq_cmd.common.cdw26[2] = cpu_to_le32(cmd.cdw26[2]);
- ioq_cmd.common.cdw26[3] = cpu_to_le32(cmd.cdw26[3]);
-
- if (cmd.timeout_ms)
- timeout = msecs_to_jiffies(cmd.timeout_ms);
- timeout = timeout ? timeout : ADMIN_TIMEOUT;
-
- status = spraid_submit_ioq_sync_cmd(hdev, &ioq_cmd, &cmd.result0, &sense, timeout);
-
- if (status >= 0) {
- if (put_user(cmd.result0, &ucmd->result0)) {
- status = -EFAULT;
- goto free_dma_mem;
- }
- if (put_user(cmd.result1, &ucmd->result1)) {
- status = -EFAULT;
- goto free_dma_mem;
- }
- if (dma_dir == DMA_FROM_DEVICE &&
- copy_to_user((void __user *)(uintptr_t)cmd.addr, data_ptr, cmd.data_len)) {
- status = -EFAULT;
- goto free_dma_mem;
- }
+ ioq_cmd.common.opcode = cmd->opcode;
+ ioq_cmd.common.flags = cmd->flags;
+ ioq_cmd.common.hdid = cpu_to_le32(cmd->nsid);
+ ioq_cmd.common.sense_len = cpu_to_le16(cmd->info_0.res_sense_len);
+ ioq_cmd.common.cdb_len = cmd->info_0.cdb_len;
+ ioq_cmd.common.rsvd2 = cmd->info_0.rsvd0;
+ ioq_cmd.common.cdw3[0] = cpu_to_le32(cmd->cdw3);
+ ioq_cmd.common.cdw3[1] = cpu_to_le32(cmd->cdw4);
+ ioq_cmd.common.cdw3[2] = cpu_to_le32(cmd->cdw5);
+
+ ioq_cmd.common.cdw10[0] = cpu_to_le32(cmd->cdw10);
+ ioq_cmd.common.cdw10[1] = cpu_to_le32(cmd->cdw11);
+ ioq_cmd.common.cdw10[2] = cpu_to_le32(cmd->cdw12);
+ ioq_cmd.common.cdw10[3] = cpu_to_le32(cmd->cdw13);
+ ioq_cmd.common.cdw10[4] = cpu_to_le32(cmd->cdw14);
+ ioq_cmd.common.cdw10[5] = cpu_to_le32(cmd->data_len);
+
+ memcpy(ioq_cmd.common.cdb, &cmd->cdw16, cmd->info_0.cdb_len);
+
+ ioq_cmd.common.cdw26[0] = cpu_to_le32(cmd->cdw26[0]);
+ ioq_cmd.common.cdw26[1] = cpu_to_le32(cmd->cdw26[1]);
+ ioq_cmd.common.cdw26[2] = cpu_to_le32(cmd->cdw26[2]);
+ ioq_cmd.common.cdw26[3] = cpu_to_le32(cmd->cdw26[3]);
+
+ status = spraid_bsg_map_data(hdev, job, (struct spraid_admin_command *)&ioq_cmd);
+ if (status) {
+ dev_err(hdev->dev, "[%s] err, map data failed\n", __func__);
+ return status;
}
- if (sense) {
- if (copy_to_user((void *__user *)(uintptr_t)cmd.sense_addr,
- sense, cmd.info_0.res_sense_len)) {
- status = -EFAULT;
- goto free_dma_mem;
- }
- }
+ status = spraid_submit_ioq_sync_cmd(hdev, &ioq_cmd, job->reply, &job->reply_len, timeout);
-free_dma_mem:
- if (data_ptr)
- dma_free_coherent(hdev->dev, PAGE_SIZE, data_ptr, data_dma);
+ dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x], status[0x%x] reply_len[%d]\n",
+ __func__, cmd->opcode, cmd->info_1.subopcode, status, job->reply_len);
- return status;
+ spraid_bsg_unmap_data(hdev, job);
+ return status;
}
static int spraid_reset_work_sync(struct spraid_dev *hdev);
-static int spraid_user_reset_cmd(struct spraid_dev *hdev)
-{
- int ret;
-
- dev_info(hdev->dev, "[%s] start user reset cmd\n", __func__);
- ret = spraid_reset_work_sync(hdev);
- dev_info(hdev->dev, "[%s] stop user reset cmd[%d]\n", __func__, ret);
-
- return ret;
-}
-
-static int hdev_open(struct inode *inode, struct file *file)
-{
- struct spraid_dev *hdev =
- container_of(inode->i_cdev, struct spraid_dev, cdev);
- file->private_data = hdev;
- return 0;
-}
-
-static long hdev_ioctl(struct file *file, u32 cmd, unsigned long arg)
-{
- struct spraid_dev *hdev = file->private_data;
- void __user *argp = (void __user *)arg;
-
- switch (cmd) {
- case SPRAID_IOCTL_ADMIN_CMD:
- return spraid_user_admin_cmd(hdev, argp);
- case SPRAID_IOCTL_IOQ_CMD:
- return spraid_user_ioq_cmd(hdev, argp);
- case SPRAID_IOCTL_RESET_CMD:
- return spraid_user_reset_cmd(hdev);
- default:
- return -ENOTTY;
- }
-}
-
-static const struct file_operations spraid_dev_fops = {
- .owner = THIS_MODULE,
- .open = hdev_open,
- .unlocked_ioctl = hdev_ioctl,
- .compat_ioctl = hdev_ioctl,
-};
-
-static int spraid_create_cdev(struct spraid_dev *hdev)
-{
- int ret;
-
- device_initialize(&hdev->ctrl_device);
- hdev->ctrl_device.devt = MKDEV(MAJOR(spraid_chr_devt), hdev->instance);
- hdev->ctrl_device.class = spraid_class;
- hdev->ctrl_device.parent = hdev->dev;
- dev_set_drvdata(&hdev->ctrl_device, hdev);
- ret = dev_set_name(&hdev->ctrl_device, "spraid%d", hdev->instance);
- if (ret)
- return ret;
- cdev_init(&hdev->cdev, &spraid_dev_fops);
- hdev->cdev.owner = THIS_MODULE;
- ret = cdev_device_add(&hdev->cdev, &hdev->ctrl_device);
- if (ret) {
- dev_err(hdev->dev, "Add cdev failed, ret: %d", ret);
- put_device(&hdev->ctrl_device);
- kfree_const(hdev->ctrl_device.kobj.name);
- return ret;
- }
-
- return 0;
-}
-
-static inline void spraid_remove_cdev(struct spraid_dev *hdev)
-{
- cdev_device_del(&hdev->cdev, &hdev->ctrl_device);
-}
-
-static const struct blk_mq_ops spraid_admin_mq_ops = {
- .queue_rq = spraid_queue_admin_rq,
- .complete = spraid_complete_admin_rq,
- .init_hctx = spraid_admin_init_hctx,
- .init_request = spraid_admin_init_request,
- .timeout = spraid_admin_timeout,
-};
-
-static void spraid_remove_admin_tagset(struct spraid_dev *hdev)
-{
- if (hdev->admin_q && !blk_queue_dying(hdev->admin_q)) {
- blk_mq_unquiesce_queue(hdev->admin_q);
- blk_cleanup_queue(hdev->admin_q);
- blk_mq_free_tag_set(&hdev->admin_tagset);
- }
-}
-
-static int spraid_alloc_admin_tags(struct spraid_dev *hdev)
-{
- if (!hdev->admin_q) {
- hdev->admin_tagset.ops = &spraid_admin_mq_ops;
- hdev->admin_tagset.nr_hw_queues = 1;
-
- hdev->admin_tagset.queue_depth = SPRAID_AQ_MQ_TAG_DEPTH;
- hdev->admin_tagset.timeout = ADMIN_TIMEOUT;
- hdev->admin_tagset.numa_node = hdev->numa_node;
- hdev->admin_tagset.cmd_size =
- spraid_cmd_size(hdev, true, false);
- hdev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
- hdev->admin_tagset.driver_data = hdev;
-
- if (blk_mq_alloc_tag_set(&hdev->admin_tagset)) {
- dev_err(hdev->dev, "Allocate admin tagset failed\n");
- return -ENOMEM;
- }
-
- hdev->admin_q = blk_mq_init_queue(&hdev->admin_tagset);
- if (IS_ERR(hdev->admin_q)) {
- dev_err(hdev->dev, "Initialize admin request queue failed\n");
- blk_mq_free_tag_set(&hdev->admin_tagset);
- return -ENOMEM;
- }
- if (!blk_get_queue(hdev->admin_q)) {
- dev_err(hdev->dev, "Get admin request queue failed\n");
- spraid_remove_admin_tagset(hdev);
- hdev->admin_q = NULL;
- return -ENODEV;
- }
- } else {
- blk_mq_unquiesce_queue(hdev->admin_q);
- }
- return 0;
-}
-
static bool spraid_check_scmd_completed(struct scsi_cmnd *scmd)
{
struct spraid_dev *hdev = shost_priv(scmd->device->host);
@@ -2891,7 +2675,7 @@ static bool spraid_check_scmd_completed(struct scsi_cmnd *scmd)
spraid_get_tag_from_scmd(scmd, &hwq, &cid);
spraidq = &hdev->queues[hwq];
if (READ_ONCE(iod->state) == SPRAID_CMD_COMPLETE || spraid_poll_cq(spraidq, cid)) {
- dev_warn(hdev->dev, "cid[%d], qid[%d] has been completed\n",
+ dev_warn(hdev->dev, "cid[%d] qid[%d] has been completed\n",
cid, spraidq->qid);
return true;
}
@@ -2927,8 +2711,7 @@ static int spraid_send_abort_cmd(struct spraid_dev *hdev, u32 hdid, u16 qid, u16
admin_cmd.abort.sqid = cpu_to_le16(qid);
admin_cmd.abort.cid = cpu_to_le16(cid);
- return spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, NULL,
- NULL, 0, 0, 0, 0);
+ return spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
}
/* send reset command by admin quueue temporary */
@@ -2941,8 +2724,7 @@ static int spraid_send_reset_cmd(struct spraid_dev *hdev, int type, u32 hdid)
admin_cmd.reset.hdid = cpu_to_le32(hdid);
admin_cmd.reset.type = type;
- return spraid_submit_admin_sync_cmd(hdev->admin_q, &admin_cmd, NULL,
- NULL, 0, 0, 0, 0);
+ return spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
}
static bool spraid_change_host_state(struct spraid_dev *hdev, enum spraid_state newstate)
@@ -3022,7 +2804,7 @@ static void spraid_back_fault_cqe(struct spraid_queue *ioq, struct spraid_comple
scsi_dma_unmap(scmd);
spraid_free_iod_res(hdev, iod);
scmd->scsi_done(scmd);
- dev_warn(hdev->dev, "Back fault CQE, cid[%d], qid[%d]\n",
+ dev_warn(hdev->dev, "Back fault CQE, cid[%d] qid[%d]\n",
cqe->cmd_id, ioq->qid);
}
@@ -3106,17 +2888,13 @@ static void spraid_reset_work(struct work_struct *work)
if (ret)
goto pci_disable;
- ret = spraid_alloc_admin_tags(hdev);
- if (ret)
- goto pci_disable;
-
ret = spraid_setup_io_queues(hdev);
if (ret || hdev->online_queues <= hdev->shost->nr_hw_queues)
goto pci_disable;
spraid_change_host_state(hdev, SPRAID_LIVE);
- spraid_send_aen(hdev);
+ spraid_send_all_aen(hdev);
return;
@@ -3288,6 +3066,62 @@ static int spraid_shost_reset_handler(struct scsi_cmnd *scmd)
return SUCCESS;
}
+static pci_ers_result_t spraid_pci_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct spraid_dev *hdev = pci_get_drvdata(pdev);
+
+ dev_info(hdev->dev, "enter pci error detect, state:%d\n", state);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ dev_warn(hdev->dev, "channel is normal, do nothing\n");
+
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ dev_warn(hdev->dev, "channel io frozen, need reset controller\n");
+
+ scsi_block_requests(hdev->shost);
+
+ spraid_change_host_state(hdev, SPRAID_RESETTING);
+
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ dev_warn(hdev->dev, "channel io failure, request disconnect\n");
+
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t spraid_pci_slot_reset(struct pci_dev *pdev)
+{
+ struct spraid_dev *hdev = pci_get_drvdata(pdev);
+
+ dev_info(hdev->dev, "restart after slot reset\n");
+
+ pci_restore_state(pdev);
+
+ if (!queue_work(spraid_wq, &hdev->reset_work)) {
+ dev_err(hdev->dev, "[%s] err, the device is resetting state\n", __func__);
+ return PCI_ERS_RESULT_NONE;
+ }
+
+ flush_work(&hdev->reset_work);
+
+ scsi_unblock_requests(hdev->shost);
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void spraid_reset_done(struct pci_dev *pdev)
+{
+ struct spraid_dev *hdev = pci_get_drvdata(pdev);
+
+ dev_info(hdev->dev, "enter spraid reset done\n");
+}
+
static ssize_t csts_pp_show(struct device *cdev, struct device_attribute *attr, char *buf)
{
struct Scsi_Host *shost = class_to_shost(cdev);
@@ -3347,7 +3181,7 @@ static ssize_t fw_version_show(struct device *cdev, struct device_attribute *att
struct Scsi_Host *shost = class_to_shost(cdev);
struct spraid_dev *hdev = shost_priv(shost);
- return snprintf(buf, sizeof(hdev->ctrl_info->fr), "%s\n", hdev->ctrl_info->fr);
+ return snprintf(buf, PAGE_SIZE, "%s\n", hdev->ctrl_info->fr);
}
static DEVICE_ATTR_RO(csts_pp);
@@ -3365,6 +3199,173 @@ static struct device_attribute *spraid_host_attrs[] = {
NULL,
};
+static int spraid_get_vd_info(struct spraid_dev *hdev, struct spraid_vd_info *vd_info, u16 vid)
+{
+ struct spraid_admin_command admin_cmd;
+ u8 *data_ptr = NULL;
+ dma_addr_t data_dma = 0;
+ int ret;
+
+ data_ptr = dma_alloc_coherent(hdev->dev, PAGE_SIZE, &data_dma, GFP_KERNEL);
+ if (!data_ptr)
+ return -ENOMEM;
+
+ memset(&admin_cmd, 0, sizeof(admin_cmd));
+ admin_cmd.usr_cmd.opcode = USR_CMD_READ;
+ admin_cmd.usr_cmd.info_0.subopcode = cpu_to_le16(USR_CMD_VDINFO);
+ admin_cmd.usr_cmd.info_1.data_len = cpu_to_le16(USR_CMD_RDLEN);
+ admin_cmd.usr_cmd.info_1.param_len = cpu_to_le16(VDINFO_PARAM_LEN);
+ admin_cmd.usr_cmd.cdw10 = cpu_to_le32(vid);
+ admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
+
+ ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
+ if (!ret)
+ memcpy(vd_info, data_ptr, sizeof(struct spraid_vd_info));
+
+ dma_free_coherent(hdev->dev, PAGE_SIZE, data_ptr, data_dma);
+
+ return ret;
+}
+
+static int spraid_get_bgtask(struct spraid_dev *hdev, struct spraid_bgtask *bgtask)
+{
+ struct spraid_admin_command admin_cmd;
+ u8 *data_ptr = NULL;
+ dma_addr_t data_dma = 0;
+ int ret;
+
+ data_ptr = dma_alloc_coherent(hdev->dev, PAGE_SIZE, &data_dma, GFP_KERNEL);
+ if (!data_ptr)
+ return -ENOMEM;
+
+ memset(&admin_cmd, 0, sizeof(admin_cmd));
+ admin_cmd.usr_cmd.opcode = USR_CMD_READ;
+ admin_cmd.usr_cmd.info_0.subopcode = cpu_to_le16(USR_CMD_BGTASK);
+ admin_cmd.usr_cmd.info_1.data_len = cpu_to_le16(USR_CMD_RDLEN);
+ admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
+
+ ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL, NULL, 0);
+ if (!ret)
+ memcpy(bgtask, data_ptr, sizeof(struct spraid_bgtask));
+
+ dma_free_coherent(hdev->dev, PAGE_SIZE, data_ptr, data_dma);
+
+ return ret;
+}
+
+static ssize_t raid_level_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct scsi_device *sdev;
+ struct spraid_dev *hdev;
+ struct spraid_vd_info *vd_info;
+ struct spraid_sdev_hostdata *hostdata;
+ int ret;
+
+ sdev = to_scsi_device(dev);
+ hdev = shost_priv(sdev->host);
+ hostdata = sdev->hostdata;
+
+ vd_info = kmalloc(sizeof(*vd_info), GFP_KERNEL);
+ if (!vd_info || !SPRAID_DEV_INFO_ATTR_VD(hostdata->attr))
+ return snprintf(buf, PAGE_SIZE, "NA\n");
+
+ ret = spraid_get_vd_info(hdev, vd_info, sdev->id);
+ if (ret)
+ vd_info->rg_level = ARRAY_SIZE(raid_levels) - 1;
+
+ ret = (vd_info->rg_level < ARRAY_SIZE(raid_levels)) ?
+ vd_info->rg_level : (ARRAY_SIZE(raid_levels) - 1);
+
+ kfree(vd_info);
+
+ return snprintf(buf, PAGE_SIZE, "RAID-%s\n", raid_levels[ret]);
+}
+
+static ssize_t raid_state_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct scsi_device *sdev;
+ struct spraid_dev *hdev;
+ struct spraid_vd_info *vd_info;
+ struct spraid_sdev_hostdata *hostdata;
+ int ret;
+
+ sdev = to_scsi_device(dev);
+ hdev = shost_priv(sdev->host);
+ hostdata = sdev->hostdata;
+
+ vd_info = kmalloc(sizeof(*vd_info), GFP_KERNEL);
+ if (!vd_info || !SPRAID_DEV_INFO_ATTR_VD(hostdata->attr))
+ return snprintf(buf, PAGE_SIZE, "NA\n");
+
+ ret = spraid_get_vd_info(hdev, vd_info, sdev->id);
+ if (ret) {
+ vd_info->vd_status = 0;
+ vd_info->rg_id = 0xff;
+ }
+
+ ret = (vd_info->vd_status < ARRAY_SIZE(raid_states)) ? vd_info->vd_status : 0;
+
+ kfree(vd_info);
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", raid_states[ret]);
+}
+
+static ssize_t raid_resync_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct scsi_device *sdev;
+ struct spraid_dev *hdev;
+ struct spraid_vd_info *vd_info;
+ struct spraid_bgtask *bgtask;
+ struct spraid_sdev_hostdata *hostdata;
+ u8 rg_id, i, progress = 0;
+ int ret;
+
+ sdev = to_scsi_device(dev);
+ hdev = shost_priv(sdev->host);
+ hostdata = sdev->hostdata;
+
+ vd_info = kmalloc(sizeof(*vd_info), GFP_KERNEL);
+ if (!vd_info || !SPRAID_DEV_INFO_ATTR_VD(hostdata->attr))
+ return snprintf(buf, PAGE_SIZE, "NA\n");
+
+ ret = spraid_get_vd_info(hdev, vd_info, sdev->id);
+ if (ret)
+ goto out;
+
+ rg_id = vd_info->rg_id;
+
+ bgtask = (struct spraid_bgtask *)vd_info;
+ ret = spraid_get_bgtask(hdev, bgtask);
+ if (ret)
+ goto out;
+ for (i = 0; i < bgtask->task_num; i++) {
+ if ((bgtask->bgtask[i].type == BGTASK_TYPE_REBUILD) &&
+ (le16_to_cpu(bgtask->bgtask[i].vd_id) == rg_id))
+ progress = bgtask->bgtask[i].progress;
+ }
+
+out:
+ kfree(vd_info);
+ return snprintf(buf, PAGE_SIZE, "%d\n", progress);
+}
+
+static DEVICE_ATTR_RO(raid_level);
+static DEVICE_ATTR_RO(raid_state);
+static DEVICE_ATTR_RO(raid_resync);
+
+static struct device_attribute *spraid_dev_attrs[] = {
+ &dev_attr_raid_level,
+ &dev_attr_raid_state,
+ &dev_attr_raid_resync,
+ NULL,
+};
+
+static struct pci_error_handlers spraid_err_handler = {
+ .error_detected = spraid_pci_error_detected,
+ .slot_reset = spraid_pci_slot_reset,
+ .reset_done = spraid_reset_done,
+};
+
static struct scsi_host_template spraid_driver_template = {
.module = THIS_MODULE,
.name = "Ramaxel Logic spraid driver",
@@ -3379,9 +3380,10 @@ static struct scsi_host_template spraid_driver_template = {
.eh_bus_reset_handler = spraid_bus_reset_handler,
.eh_host_reset_handler = spraid_shost_reset_handler,
.change_queue_depth = scsi_change_queue_depth,
- .host_tagset = 1,
+ .host_tagset = 0,
.this_id = -1,
.shost_attrs = spraid_host_attrs,
+ .sdev_attrs = spraid_dev_attrs,
};
static void spraid_shutdown(struct pci_dev *pdev)
@@ -3392,11 +3394,50 @@ static void spraid_shutdown(struct pci_dev *pdev)
spraid_disable_admin_queue(hdev, true);
}
+/* bsg dispatch user command */
+static int spraid_bsg_host_dispatch(struct bsg_job *job)
+{
+ struct Scsi_Host *shost = dev_to_shost(job->dev);
+ struct spraid_dev *hdev = shost_priv(shost);
+ struct request *rq = blk_mq_rq_from_pdu(job);
+ struct spraid_bsg_request *bsg_req = (struct spraid_bsg_request *)(job->request);
+ int ret = 0;
+
+ dev_info(hdev->dev, "[%s] msgcode[%d], msglen[%d], timeout[%d], req_nsge[%d], req_len[%d]\n",
+ __func__, bsg_req->msgcode, job->request_len, rq->timeout,
+ job->request_payload.sg_cnt, job->request_payload.payload_len);
+
+ job->reply_len = 0;
+
+ switch (bsg_req->msgcode) {
+ case SPRAID_BSG_ADM:
+ ret = spraid_user_admin_cmd(hdev, job);
+ break;
+ case SPRAID_BSG_IOQ:
+ ret = spraid_user_ioq_cmd(hdev, job);
+ break;
+ default:
+ dev_info(hdev->dev, "[%s] unsupport msgcode[%d]\n", __func__, bsg_req->msgcode);
+ break;
+ }
+
+ bsg_job_done(job, ret, 0);
+ return 0;
+}
+
+static inline void spraid_remove_bsg(struct spraid_dev *hdev)
+{
+ if (hdev->bsg_queue) {
+ bsg_unregister_queue(hdev->bsg_queue);
+ blk_cleanup_queue(hdev->bsg_queue);
+ }
+}
static int spraid_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct spraid_dev *hdev;
struct Scsi_Host *shost;
int node, ret;
+ char bsg_name[15];
shost = scsi_host_alloc(&spraid_driver_template, sizeof(*hdev));
if (!shost) {
@@ -3421,10 +3462,10 @@ static int spraid_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto put_dev;
init_rwsem(&hdev->devices_rwsem);
- INIT_WORK(&hdev->aen_work, spraid_async_event_work);
INIT_WORK(&hdev->scan_work, spraid_scan_work);
INIT_WORK(&hdev->timesyn_work, spraid_timesyn_work);
INIT_WORK(&hdev->reset_work, spraid_reset_work);
+ INIT_WORK(&hdev->fw_act_work, spraid_fw_act_work);
spin_lock_init(&hdev->state_lock);
ret = spraid_alloc_resources(hdev);
@@ -3439,17 +3480,13 @@ static int spraid_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
goto pci_disable;
- ret = spraid_alloc_admin_tags(hdev);
- if (ret)
- goto disable_admin_q;
-
ret = spraid_init_ctrl_info(hdev);
if (ret)
- goto free_admin_tagset;
+ goto disable_admin_q;
ret = spraid_alloc_iod_ext_mem_pool(hdev);
if (ret)
- goto free_admin_tagset;
+ goto disable_admin_q;
ret = spraid_setup_io_queues(hdev);
if (ret)
@@ -3464,9 +3501,14 @@ static int spraid_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto remove_io_queues;
}
- ret = spraid_create_cdev(hdev);
- if (ret)
+ snprintf(bsg_name, sizeof(bsg_name), "spraid%d", shost->host_no);
+ hdev->bsg_queue = bsg_setup_queue(&shost->shost_gendev, bsg_name, spraid_bsg_host_dispatch,
+ NULL, spraid_cmd_size(hdev, true, false));
+ if (IS_ERR(hdev->bsg_queue)) {
+ dev_err(hdev->dev, "err, setup bsg failed\n");
+ hdev->bsg_queue = NULL;
goto remove_io_queues;
+ }
if (hdev->online_queues == SPRAID_ADMIN_QUEUE_NUM) {
dev_warn(hdev->dev, "warn only admin queue can be used\n");
@@ -3475,11 +3517,11 @@ static int spraid_probe(struct pci_dev *pdev, const struct pci_device_id *id)
hdev->state = SPRAID_LIVE;
- spraid_send_aen(hdev);
+ spraid_send_all_aen(hdev);
ret = spraid_dev_list_init(hdev);
if (ret)
- goto remove_cdev;
+ goto remove_bsg;
ret = spraid_configure_timestamp(hdev);
if (ret)
@@ -3487,20 +3529,18 @@ static int spraid_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = spraid_alloc_ioq_ptcmds(hdev);
if (ret)
- goto remove_cdev;
+ goto remove_bsg;
scsi_scan_host(hdev->shost);
return 0;
-remove_cdev:
- spraid_remove_cdev(hdev);
+remove_bsg:
+ spraid_remove_bsg(hdev);
remove_io_queues:
spraid_remove_io_queues(hdev);
free_iod_mempool:
spraid_free_iod_ext_mem_pool(hdev);
-free_admin_tagset:
- spraid_remove_admin_tagset(hdev);
disable_admin_q:
spraid_disable_admin_queue(hdev, false);
pci_disable:
@@ -3532,14 +3572,12 @@ static void spraid_remove(struct pci_dev *pdev)
}
flush_work(&hdev->reset_work);
+ spraid_remove_bsg(hdev);
scsi_remove_host(shost);
-
- kfree(hdev->ioq_ptcmds);
+ spraid_free_ioq_ptcmds(hdev);
kfree(hdev->devices);
- spraid_remove_cdev(hdev);
spraid_remove_io_queues(hdev);
spraid_free_iod_ext_mem_pool(hdev);
- spraid_remove_admin_tagset(hdev);
spraid_disable_admin_queue(hdev, false);
spraid_pci_disable(hdev);
spraid_free_resources(hdev);
@@ -3551,7 +3589,7 @@ static void spraid_remove(struct pci_dev *pdev)
}
static const struct pci_device_id spraid_id_table[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_RAMAXEL_LOGIC, SPRAID_SERVER_DEVICE_HAB_DID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_RAMAXEL_LOGIC, SPRAID_SERVER_DEVICE_HBA_DID) },
{ PCI_DEVICE(PCI_VENDOR_ID_RAMAXEL_LOGIC, SPRAID_SERVER_DEVICE_RAID_DID) },
{ 0, }
};
@@ -3563,6 +3601,7 @@ static struct pci_driver spraid_driver = {
.probe = spraid_probe,
.remove = spraid_remove,
.shutdown = spraid_shutdown,
+ .err_handler = &spraid_err_handler,
};
static int __init spraid_init(void)
@@ -3573,14 +3612,10 @@ static int __init spraid_init(void)
if (!spraid_wq)
return -ENOMEM;
- ret = alloc_chrdev_region(&spraid_chr_devt, 0, SPRAID_MINORS, "spraid");
- if (ret < 0)
- goto destroy_wq;
-
spraid_class = class_create(THIS_MODULE, "spraid");
if (IS_ERR(spraid_class)) {
ret = PTR_ERR(spraid_class);
- goto unregister_chrdev;
+ goto destroy_wq;
}
ret = pci_register_driver(&spraid_driver);
@@ -3591,8 +3626,6 @@ static int __init spraid_init(void)
destroy_class:
class_destroy(spraid_class);
-unregister_chrdev:
- unregister_chrdev_region(spraid_chr_devt, SPRAID_MINORS);
destroy_wq:
destroy_workqueue(spraid_wq);
@@ -3603,12 +3636,11 @@ static void __exit spraid_exit(void)
{
pci_unregister_driver(&spraid_driver);
class_destroy(spraid_class);
- unregister_chrdev_region(spraid_chr_devt, SPRAID_MINORS);
destroy_workqueue(spraid_wq);
ida_destroy(&spraid_instance_ida);
}
-MODULE_AUTHOR("Ramaxel Memory Technology");
+MODULE_AUTHOR("songyl(a)ramaxel.com");
MODULE_DESCRIPTION("Ramaxel Memory Technology SPraid Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(SPRAID_DRV_VERSION);
--
2.27.0
1
0

Re: [PATCH openEuler-1.0-LTS 1/1] ras: report cpu logical index to userspace in arm event
by Xie XiuQi 25 Nov '21
by Xie XiuQi 25 Nov '21
25 Nov '21
Hi,
这个补丁在 openEuler 20.03 有合入吗?
修改 trace 格式相当于是修改ABI了,为避免与上层应用,如rasdaemon 等存在兼容性问题,
这个在之前版本也是修改过吗?
还有,这个如果是为了增加 cpu logical index,是不是看一看推一下上游社区,免得每个版本
都 port 这个补丁。
On 2021/11/25 15:30, lostway(a)zju.edu.cn wrote:
> From: Lostwayzxc <luoshengwei(a)huawei.com>
>
> kunpeng inclusion
> category: feature
> bugzilla: https://gitee.com/openeuler/kernel/issues/I4IG00?from=project-issue
> CVE: NA
>
> When the arm event is reported, the rasdaemon needs to know the cpu logical index,
> but there is only mpidr without mapping between it and cpu logical index. Since the
> kernel has saved the mapping, get the logical index by function get_logical_index()
> and report it directly to userspace via perf i/f.
>
> Signed-off-by: Shengwei Luo <luoshengwei(a)huawei.com>
> ---
> drivers/ras/ras.c | 8 +++++++-
> include/linux/ras.h | 11 +++++++++++
> include/ras/ras_event.h | 10 +++++++---
> 3 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
> index 9302ed7f4258..a526f124a5ff 100644
> --- a/drivers/ras/ras.c
> +++ b/drivers/ras/ras.c
> @@ -32,6 +32,7 @@ void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
> struct cper_arm_err_info *err_info;
> struct cper_arm_ctx_info *ctx_info;
> int n, sz;
> + int cpu;
>
> pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num;
> pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm);
> @@ -58,8 +59,13 @@ void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
> }
> ven_err_data = (u8 *)ctx_info;
>
> + cpu = GET_LOGICAL_INDEX(err->mpidr);
> + /* when the return value is invalid, set cpu index to a large integer */
> + if (cpu < 0)
> + cpu = 0xFFFF;
> +
> trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len,
> - ven_err_data, vsei_len, sev);
> + ven_err_data, vsei_len, sev, cpu);
> }
>
> static int __init ras_init(void)
> diff --git a/include/linux/ras.h b/include/linux/ras.h
> index 3431b4a5fa42..e5ec31ad7a13 100644
> --- a/include/linux/ras.h
> +++ b/include/linux/ras.h
> @@ -40,4 +40,15 @@ static inline void
> log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; }
> #endif
>
> +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
> +#include <asm/smp_plat.h>
> +/*
> + * Include ARM specific SMP header which provides a function mapping mpidr to
> + * cpu logical index.
> + */
> +#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK)
> +#else
> +#define GET_LOGICAL_INDEX(mpidr) -EINVAL
> +#endif /* CONFIG_ARM || CONFIG_ARM64 */
> +
> #endif /* __RAS_H__ */
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index 7c8cb123ba32..2d6a662886e6 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -182,9 +182,10 @@ TRACE_EVENT(arm_event,
> const u32 ctx_len,
> const u8 *oem,
> const u32 oem_len,
> - u8 sev),
> + u8 sev,
> + int cpu),
>
> - TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev),
> + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu),
>
> TP_STRUCT__entry(
> __field(u64, mpidr)
> @@ -199,6 +200,7 @@ TRACE_EVENT(arm_event,
> __field(u32, oem_len)
> __dynamic_array(u8, buf2, oem_len)
> __field(u8, sev)
> + __field(int, cpu)
> ),
>
> TP_fast_assign(
> @@ -225,11 +227,13 @@ TRACE_EVENT(arm_event,
> __entry->oem_len = oem_len;
> memcpy(__get_dynamic_array(buf2), oem, oem_len);
> __entry->sev = sev;
> + __entry->cpu = cpu;
> ),
>
> - TP_printk("error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
> + TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
> "running state: %d; PSCI state: %d; "
> "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s",
> + __entry->cpu,
> __entry->sev,
> __entry->affinity, __entry->mpidr, __entry->midr,
> __entry->running_state, __entry->psci_state,
>
1
0
你好,
欢迎参与 openEuler kernel。
kernel sig 例会是隔周的周五下午。
下次会议时间是 12月3号(下周五)下午2:10~4:00.
你可以订阅一下 kernel(a)openeuler.org 和 kernel-discuss(a)openeuer.org.
可以直接发邮件报议题。
On 2021/11/24 13:11, yan.yang(a)i-soft.com.cn wrote:
> 您好,
>
> 我是普华基础软件的杨嫣,负责跟踪OpenEuler的内核新动向,想参加贵司的kernel sig组的会议,但是看网页上没有写具体会议时间,及参会方式,麻烦您告知一下,我要怎么参与到SIG组的会议中,感谢!
>
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> */Best Regards/*
>
> 杨嫣* *
>
>
>
> 地址:北京市朝阳区容达路7号院(太极信息产业园)E座3层
>
> 邮编:100102
>
> 电话(Tel): +86 10 8406 5566-8102
>
> 传真(Fax):+86 10 8496 6005
>
> 手机(Mobile): +86 158 1062 8953
>
> 网站(Web): www.i-soft.com.cn <http://www.i-soft.com.cn/>
>
1
0

[PATCH openEuler-1.0-LTS] lib/clear_user: ensure loop in __arch_clear_user cache-aligned v2
by Yang Yingliang 24 Nov '21
by Yang Yingliang 24 Nov '21
24 Nov '21
From: Cheng Jian <cj.chengjian(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I3OX0C
CVE: NA
--------------------------------
We must ensure that the following four instructions are cache-aligned.
Otherwise, it will cause problems with the performance of libMicro
pread.
1:
# uao_user_alternative 9f, str, sttr, xzr, x0, 8
str xzr, [x0], #8
nop
subs x1, x1, #8
b.pl 1b
with this patch:
prc thr usecs/call samples errors cnt/samp size
pread_z100 1 1 5.88400 807 0 1 102400
The result of pread can range from 5 to 9 depending on the
alignment performance of this function.
Signed-off-by: Cheng Jian <cj.chengjian(a)huawei.com>
Reviewed-by: Xie XiuQi <xiexiuqi(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/lib/clear_user.S | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 9ebc5d84e6154..410768a8d4166 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -20,7 +20,7 @@
#include <asm/asm-uaccess.h>
.text
- .align 6
+
/* Prototype: int __arch_clear_user(void *addr, size_t sz)
* Purpose : clear some user memory
* Params : addr - user memory address to clear
@@ -34,6 +34,9 @@ ENTRY(__arch_clear_user)
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
+#ifdef CONFIG_ARCH_HISI
+ .align 5
+#endif
1:
uao_user_alternative 9f, str, sttr, xzr, x0, 8
subs x1, x1, #8
--
2.25.1
1
0

[PATCH OLK-5.10] atlantic: Fix OOB read and write in hw_atl_utils_fw_rpc_wait
by Zheng Zengkai 23 Nov '21
by Zheng Zengkai 23 Nov '21
23 Nov '21
From: Zekun Shen <bruceshenzk(a)gmail.com>
mainline inclusion
from mainline
commit b922f622592af76b57cbc566eaeccda0b31a3496
bugzilla: 185788 https://e.gitee.com/open_euler/dashboard?issue=I4IYTG
CVE: CVE-2021-43975
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
This bug report shows up when running our research tools. The
reports is SOOB read, but it seems SOOB write is also possible
a few lines below.
In details, fw.len and sw.len are inputs coming from io. A len
over the size of self->rpc triggers SOOB. The patch fixes the
bugs by adding sanity checks.
The bugs are triggerable with compromised/malfunctioning devices.
They are potentially exploitable given they first leak up to
0xffff bytes and able to overwrite the region later.
The patch is tested with QEMU emulater.
This is NOT tested with a real device.
Attached is the log we found by fuzzing.
BUG: KASAN: slab-out-of-bounds in
hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
Read of size 4 at addr ffff888016260b08 by task modprobe/213
CPU: 0 PID: 213 Comm: modprobe Not tainted 5.6.0 #1
Call Trace:
dump_stack+0x76/0xa0
print_address_description.constprop.0+0x16/0x200
? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
__kasan_report.cold+0x37/0x7c
? aq_hw_read_reg_bit+0x60/0x70 [atlantic]
? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
kasan_report+0xe/0x20
hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
hw_atl_utils_fw_rpc_call+0x95/0x130 [atlantic]
hw_atl_utils_fw_rpc_wait+0x176/0x210 [atlantic]
hw_atl_utils_mpi_create+0x229/0x2e0 [atlantic]
? hw_atl_utils_fw_rpc_wait+0x210/0x210 [atlantic]
? hw_atl_utils_initfw+0x9f/0x1c8 [atlantic]
hw_atl_utils_initfw+0x12a/0x1c8 [atlantic]
aq_nic_ndev_register+0x88/0x650 [atlantic]
? aq_nic_ndev_init+0x235/0x3c0 [atlantic]
aq_pci_probe+0x731/0x9b0 [atlantic]
? aq_pci_func_init+0xc0/0xc0 [atlantic]
local_pci_probe+0xd3/0x160
pci_device_probe+0x23f/0x3e0
Reported-by: Brendan Dolan-Gavitt <brendandg(a)nyu.edu>
Signed-off-by: Zekun Shen <bruceshenzk(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
.../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 404cbf60d3f2..da1d185f6d22 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -559,6 +559,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
goto err_exit;
if (fw.len == 0xFFFFU) {
+ if (sw.len > sizeof(self->rpc)) {
+ printk(KERN_INFO "Invalid sw len: %x\n", sw.len);
+ err = -EINVAL;
+ goto err_exit;
+ }
err = hw_atl_utils_fw_rpc_call(self, sw.len);
if (err < 0)
goto err_exit;
@@ -567,6 +572,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
if (rpc) {
if (fw.len) {
+ if (fw.len > sizeof(self->rpc)) {
+ printk(KERN_INFO "Invalid fw len: %x\n", fw.len);
+ err = -EINVAL;
+ goto err_exit;
+ }
err =
hw_atl_utils_fw_downld_dwords(self,
self->rpc_addr,
--
2.20.1
1
0

[PATCH openEuler-1.0-LTS] drm/ioctl: Ditch DRM_UNLOCKED except for the legacy vblank ioctl
by Yang Yingliang 23 Nov '21
by Yang Yingliang 23 Nov '21
23 Nov '21
From: Daniel Vetter <daniel.vetter(a)ffwll.ch>
mainline inclusion
from mainline-v5.4-rc1
commit 75426367cd377120a256cad0b35b02eec4b83591
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4JI7R
CVE: NA
--------------------------------
This completes Emil's series of removing DRM_UNLOCKED from modern
drivers. It's entirely cargo-culted since we ignore it on
non-DRIVER_LEGACY drivers since:
commit ea487835e8876abf7ad909636e308c801a2bcda6
Author: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Date: Mon Sep 28 21:42:40 2015 +0200
drm: Enforce unlocked ioctl operation for kms driver ioctls
Now justifying why we can do this for legacy drives too (and hence
close the source of all the bogus copypasting) is a bit more involved.
DRM_UNLOCKED was introduced in:
commit ed8b67040965e4fe695db333d5914e18ea5f146f
Author: Arnd Bergmann <arnd(a)arndb.de>
Date: Wed Dec 16 22:17:09 2009 +0000
drm: convert drm_ioctl to unlocked_ioctl
As a immediate hack to keep i810 happy, which would have deadlocked
without this trickery. The old BKL is automatically dropped in
schedule(), and hence the i810 vs. mmap_sem deadlock didn't actually
cause a real deadlock. But with a mutex it would. The solution was to
annotate these as DRM_UNLOCKED and mark i810 unsafe on SMP machines.
This conversion caused a regression, because unlike the BKL a mutex
isn't dropped over schedule (that thing again), which caused a vblank
wait in one thread to block the entire desktop and all its apps. Back
then we did vblank scheduling by blocking in the client, awesome isn't
it. This was fixed quickly in (ok not so quickly, took 2 years):
commit 8f4ff2b06afcd6f151868474a432c603057eaf56
Author: Ilija Hadzic <ihadzic(a)research.bell-labs.com>
Date: Mon Oct 31 17:46:18 2011 -0400
drm: do not sleep on vblank while holding a mutex
All the other DRM_UNLOCKED annotations for all the core ioctls was
work to reach finer-grained locking for modern drivers. This took
years, and culminated in:
commit fdd5b877e9ebc2029e1373b4a3cd057329a9ab7a
Author: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Date: Sat Dec 10 22:52:54 2016 +0100
drm: Enforce BKL-less ioctls for modern drivers
DRM_UNLOCKED was never required by any legacy drivers, except for the
vblank_wait IOCTL. Therefore we will not regress these old drivers by
going back to where we've been in 2011. For all modern drivers nothing
will change.
To make this perfectly clear, also add a comment to DRM_UNLOCKED.
v2: Don't forget about drm_ioc32.c (Michel).
Cc: Michel Dänzer <michel(a)daenzer.net>
Cc: Emil Velikov <emil.l.velikov(a)gmail.com>
Acked-by: Emil Velikov <emil.velikov(a)collabora.com>
Acked-by: Michel Dänzer <michel(a)daenzer.net>
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190605120835.2798-1-daniel.…
Signed-off-by: Liu ZiXian <liuzixian4(a)huawei.com>
Signed-off-by: Cheng Jian <cj.chengjian(a)huawei.com>
Reviewed-by: wangxiongfeng 00379786 <wangxiongfeng2(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/gpu/drm/drm_ioc32.c | 13 ++--
drivers/gpu/drm/drm_ioctl.c | 131 ++++++++++++++++++------------------
include/drm/drm_ioctl.h | 3 +
3 files changed, 74 insertions(+), 73 deletions(-)
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index f8672238d444b..ed20dbae66070 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -105,7 +105,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
.desc = compat_ptr(v32.desc),
};
err = drm_ioctl_kernel(file, drm_version, &v,
- DRM_UNLOCKED|DRM_RENDER_ALLOW);
+ DRM_RENDER_ALLOW);
if (err)
return err;
@@ -139,7 +139,7 @@ static int compat_drm_getunique(struct file *file, unsigned int cmd,
.unique = compat_ptr(uq32.unique),
};
- err = drm_ioctl_kernel(file, drm_getunique, &uq, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_getunique, &uq, 0);
if (err)
return err;
@@ -177,7 +177,7 @@ static int compat_drm_getmap(struct file *file, unsigned int cmd,
return -EFAULT;
map.offset = m32.offset;
- err = drm_ioctl_kernel(file, drm_legacy_getmap_ioctl, &map, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_legacy_getmap_ioctl, &map, 0);
if (err)
return err;
@@ -262,7 +262,7 @@ static int compat_drm_getclient(struct file *file, unsigned int cmd,
client.idx = c32.idx;
- err = drm_ioctl_kernel(file, drm_getclient, &client, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_getclient, &client, 0);
if (err)
return err;
@@ -292,7 +292,7 @@ static int compat_drm_getstats(struct file *file, unsigned int cmd,
drm_stats32_t __user *argp = (void __user *)arg;
int err;
- err = drm_ioctl_kernel(file, drm_noop, NULL, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_noop, NULL, 0);
if (err)
return err;
@@ -887,8 +887,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd,
sizeof(req64.modifier)))
return -EFAULT;
- err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64,
- DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64, 0);
if (err)
return err;
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ba129b64b61fc..3fe512b8a488b 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -549,22 +549,21 @@ EXPORT_SYMBOL(drm_ioctl_permit);
/* Ioctl table */
static const struct drm_ioctl_desc drm_ioctls[] = {
- DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0),
DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_legacy_getmap_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_CAP, drm_setclientcap, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_UNLOCKED | DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_legacy_getmap_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_CAP, drm_setclientcap, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_MASTER),
DRM_IOCTL_DEF(DRM_IOCTL_SET_UNIQUE, drm_invalid_op, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_BLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_UNBLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_AUTH|DRM_UNLOCKED|DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_AUTH|DRM_MASTER),
DRM_IOCTL_DEF(DRM_IOCTL_ADD_MAP, drm_legacy_addmap_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_RM_MAP, drm_legacy_rmmap_ioctl, DRM_AUTH),
@@ -572,8 +571,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_legacy_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_legacy_getsareactx, DRM_AUTH),
- DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_UNLOCKED|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_UNLOCKED|DRM_ROOT_ONLY),
+ DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_ROOT_ONLY),
+ DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_legacy_addctx, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_legacy_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
@@ -620,66 +619,66 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
-
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_UNLOCKED),
-
- DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
-
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_connector_property_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
+
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0),
+
+ DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_connector_property_set_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, 0),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_DESTROY, drm_syncobj_destroy_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, drm_syncobj_handle_to_fd_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER),
};
#define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls )
@@ -747,7 +746,7 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata,
return retcode;
/* Enforce sane locking for modern driver ioctls. */
- if (!drm_core_check_feature(dev, DRIVER_LEGACY) ||
+ if (likely(!drm_core_check_feature(dev, DRIVER_LEGACY)) ||
(flags & DRM_UNLOCKED))
retcode = func(dev, kdata, file_priv);
else {
diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h
index fafb6f592c4b9..10100a4bbe2ad 100644
--- a/include/drm/drm_ioctl.h
+++ b/include/drm/drm_ioctl.h
@@ -114,6 +114,9 @@ enum drm_ioctl_flags {
* Whether &drm_ioctl_desc.func should be called with the DRM BKL held
* or not. Enforced as the default for all modern drivers, hence there
* should never be a need to set this flag.
+ *
+ * Do not use anywhere else than for the VBLANK_WAIT IOCTL, which is the
+ * only legacy IOCTL which needs this.
*/
DRM_UNLOCKED = BIT(4),
/**
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 1/2] share_pool: add mm address check when access the process's sp_group file
by Yang Yingliang 23 Nov '21
by Yang Yingliang 23 Nov '21
23 Nov '21
From: Zhang Jian <zhangjian210(a)huawei.com>
ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4JICC
CVE: NA
-------------------------------------------------
When we access the process's sp_group file and the precess is a kernel
process, it's task_struct->mm value will be 0, so we must check it, and
make sure the process is not a kernel process.
v1->v2: The path of a process as a kernel process is not often triggered,
so add a unlikely function to accelerate execytion.
Signed-off-by: Zhang Jian <zhangjian210(a)huawei.com>
Reviewed-by: Ding Tianhong <dingtianhong(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
mm/share_pool.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c
index c6dcae92f67b5..2cc15f894cf84 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -3851,13 +3851,17 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
struct mm_struct *mm = task->mm;
- struct sp_group_master *master = mm->sp_group_master;
+ struct sp_group_master *master;
struct sp_proc_stat *proc_stat;
struct spg_proc_stat *spg_proc_stat;
int i;
unsigned long anon, file, shmem, total_rss, prot;
long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
+ if (unlikely(!mm))
+ return 0;
+
+ master = mm->sp_group_master;
if (!master)
return 0;
--
2.25.1
1
1

23 Nov '21
hulk inclusion
category: bugfix
bugzilla: NA
CVE: NA
--------------------------------
Enable some configs for test.
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/configs/hulk_defconfig | 66 +++++++++++++++++++++++++------
1 file changed, 55 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig
index 413667c434ee6..fdf628f1fa028 100644
--- a/arch/arm64/configs/hulk_defconfig
+++ b/arch/arm64/configs/hulk_defconfig
@@ -566,7 +566,7 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
-# CONFIG_CPU_FREQ_GOV_SCHEDUTIL is not set
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
#
# CPU frequency scaling drivers
@@ -1889,7 +1889,22 @@ CONFIG_MTD_PHYSMAP_OF=m
#
# CONFIG_MTD_DOCG3 is not set
# CONFIG_MTD_ONENAND is not set
-# CONFIG_MTD_NAND is not set
+CONFIG_MTD_NAND_ECC=m
+# CONFIG_MTD_NAND_ECC_SMC is not set
+CONFIG_MTD_NAND=m
+# CONFIG_MTD_NAND_ECC_BCH is not set
+# CONFIG_MTD_NAND_DENALI_PCI is not set
+# CONFIG_MTD_NAND_DENALI_DT is not set
+# CONFIG_MTD_NAND_GPIO is not set
+# CONFIG_MTD_NAND_RICOH is not set
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_DOCG4 is not set
+# CONFIG_MTD_NAND_CAFE is not set
+CONFIG_MTD_NAND_NANDSIM=m
+# CONFIG_MTD_NAND_BRCMNAND is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_NAND_HISI504 is not set
+# CONFIG_MTD_NAND_QCOM is not set
# CONFIG_MTD_SPI_NAND is not set
#
@@ -4840,7 +4855,13 @@ CONFIG_XFS_POSIX_ACL=y
# CONFIG_XFS_DEBUG is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
-# CONFIG_BTRFS_FS is not set
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BTRFS_FS_CHECK_INTEGRITY=y
+CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y
+CONFIG_BTRFS_DEBUG=y
+CONFIG_BTRFS_ASSERT=y
+CONFIG_BTRFS_FS_REF_VERIFY=y
# CONFIG_NILFS2_FS is not set
# CONFIG_F2FS_FS is not set
CONFIG_FS_DAX=y
@@ -4937,8 +4958,31 @@ CONFIG_MISC_FILESYSTEMS=y
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_UBIFS_FS is not set
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_JFFS2_FS_POSIX_ACL=y
+CONFIG_JFFS2_FS_SECURITY=y
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_ZLIB=y
+CONFIG_JFFS2_LZO=y
+CONFIG_JFFS2_RTIME=y
+CONFIG_JFFS2_RUBIN=y
+# CONFIG_JFFS2_CMODE_NONE is not set
+CONFIG_JFFS2_CMODE_PRIORITY=y
+# CONFIG_JFFS2_CMODE_SIZE is not set
+# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
+CONFIG_UBIFS_FS=m
+# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
+CONFIG_UBIFS_FS_LZO=y
+CONFIG_UBIFS_FS_ZLIB=y
+# CONFIG_UBIFS_ATIME_SUPPORT is not set
+CONFIG_UBIFS_FS_XATTR=y
+# CONFIG_UBIFS_FS_ENCRYPTION is not set
+CONFIG_UBIFS_FS_SECURITY=y
CONFIG_CRAMFS=m
CONFIG_CRAMFS_BLOCKDEV=y
# CONFIG_CRAMFS_MTD is not set
@@ -5503,7 +5547,7 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_PAGE_POISONING is not set
# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
+CONFIG_DEBUG_RODATA_TEST=y
# CONFIG_DEBUG_OBJECTS is not set
# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_SLUB_STATS is not set
@@ -5656,8 +5700,8 @@ CONFIG_ASYNC_RAID6_TEST=m
# CONFIG_TEST_HEXDUMP is not set
# CONFIG_TEST_STRING_HELPERS is not set
CONFIG_TEST_KSTRTOX=y
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
+CONFIG_TEST_PRINTF=m
+CONFIG_TEST_BITMAP=m
# CONFIG_TEST_BITFIELD is not set
# CONFIG_TEST_UUID is not set
# CONFIG_TEST_OVERFLOW is not set
@@ -5665,13 +5709,13 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_HASH is not set
# CONFIG_TEST_IDA is not set
# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_USER_COPY is not set
+CONFIG_TEST_USER_COPY=m
# CONFIG_TEST_BPF is not set
# CONFIG_FIND_BIT_BENCHMARK is not set
# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
+CONFIG_TEST_SYSCTL=y
# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
+CONFIG_TEST_STATIC_KEYS=m
# CONFIG_TEST_KMOD is not set
# CONFIG_TEST_FREE_PAGES is not set
# CONFIG_MEMTEST is not set
--
2.25.1
1
0

23 Nov '21
hulk inclusion
category: bugfix
bugzilla: NA
CVE: NA
--------------------------------
Enable some configs for test.
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/configs/hulk_defconfig | 60 +++++++++++++++++++++++++------
1 file changed, 49 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig
index 413667c434ee6..e540b799f8d60 100644
--- a/arch/arm64/configs/hulk_defconfig
+++ b/arch/arm64/configs/hulk_defconfig
@@ -566,7 +566,7 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
-# CONFIG_CPU_FREQ_GOV_SCHEDUTIL is not set
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
#
# CPU frequency scaling drivers
@@ -1889,7 +1889,22 @@ CONFIG_MTD_PHYSMAP_OF=m
#
# CONFIG_MTD_DOCG3 is not set
# CONFIG_MTD_ONENAND is not set
-# CONFIG_MTD_NAND is not set
+CONFIG_MTD_NAND_ECC=m
+# CONFIG_MTD_NAND_ECC_SMC is not set
+CONFIG_MTD_NAND=m
+# CONFIG_MTD_NAND_ECC_BCH is not set
+# CONFIG_MTD_NAND_DENALI_PCI is not set
+# CONFIG_MTD_NAND_DENALI_DT is not set
+# CONFIG_MTD_NAND_GPIO is not set
+# CONFIG_MTD_NAND_RICOH is not set
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_DOCG4 is not set
+# CONFIG_MTD_NAND_CAFE is not set
+CONFIG_MTD_NAND_NANDSIM=m
+# CONFIG_MTD_NAND_BRCMNAND is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_NAND_HISI504 is not set
+# CONFIG_MTD_NAND_QCOM is not set
# CONFIG_MTD_SPI_NAND is not set
#
@@ -4840,7 +4855,7 @@ CONFIG_XFS_POSIX_ACL=y
# CONFIG_XFS_DEBUG is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
-# CONFIG_BTRFS_FS is not set
+CONFIG_BTRFS_FS=y
# CONFIG_NILFS2_FS is not set
# CONFIG_F2FS_FS is not set
CONFIG_FS_DAX=y
@@ -4937,8 +4952,31 @@ CONFIG_MISC_FILESYSTEMS=y
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_UBIFS_FS is not set
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_JFFS2_FS_POSIX_ACL=y
+CONFIG_JFFS2_FS_SECURITY=y
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_ZLIB=y
+CONFIG_JFFS2_LZO=y
+CONFIG_JFFS2_RTIME=y
+CONFIG_JFFS2_RUBIN=y
+# CONFIG_JFFS2_CMODE_NONE is not set
+CONFIG_JFFS2_CMODE_PRIORITY=y
+# CONFIG_JFFS2_CMODE_SIZE is not set
+# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
+CONFIG_UBIFS_FS=m
+# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
+CONFIG_UBIFS_FS_LZO=y
+CONFIG_UBIFS_FS_ZLIB=y
+# CONFIG_UBIFS_ATIME_SUPPORT is not set
+CONFIG_UBIFS_FS_XATTR=y
+# CONFIG_UBIFS_FS_ENCRYPTION is not set
+CONFIG_UBIFS_FS_SECURITY=y
CONFIG_CRAMFS=m
CONFIG_CRAMFS_BLOCKDEV=y
# CONFIG_CRAMFS_MTD is not set
@@ -5503,7 +5541,7 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_PAGE_POISONING is not set
# CONFIG_DEBUG_PAGE_REF is not set
-# CONFIG_DEBUG_RODATA_TEST is not set
+CONFIG_DEBUG_RODATA_TEST=y
# CONFIG_DEBUG_OBJECTS is not set
# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_SLUB_STATS is not set
@@ -5656,8 +5694,8 @@ CONFIG_ASYNC_RAID6_TEST=m
# CONFIG_TEST_HEXDUMP is not set
# CONFIG_TEST_STRING_HELPERS is not set
CONFIG_TEST_KSTRTOX=y
-# CONFIG_TEST_PRINTF is not set
-# CONFIG_TEST_BITMAP is not set
+CONFIG_TEST_PRINTF=m
+CONFIG_TEST_BITMAP=m
# CONFIG_TEST_BITFIELD is not set
# CONFIG_TEST_UUID is not set
# CONFIG_TEST_OVERFLOW is not set
@@ -5665,13 +5703,13 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_HASH is not set
# CONFIG_TEST_IDA is not set
# CONFIG_TEST_LKM is not set
-# CONFIG_TEST_USER_COPY is not set
+CONFIG_TEST_USER_COPY=m
# CONFIG_TEST_BPF is not set
# CONFIG_FIND_BIT_BENCHMARK is not set
# CONFIG_TEST_FIRMWARE is not set
-# CONFIG_TEST_SYSCTL is not set
+CONFIG_TEST_SYSCTL=y
# CONFIG_TEST_UDELAY is not set
-# CONFIG_TEST_STATIC_KEYS is not set
+CONFIG_TEST_STATIC_KEYS=m
# CONFIG_TEST_KMOD is not set
# CONFIG_TEST_FREE_PAGES is not set
# CONFIG_MEMTEST is not set
--
2.25.1
1
0

23 Nov '21
From: Daniel Vetter <daniel.vetter(a)ffwll.ch>
This completes Emil's series of removing DRM_UNLOCKED from modern
drivers. It's entirely cargo-culted since we ignore it on
non-DRIVER_LEGACY drivers since:
commit ea487835e8876abf7ad909636e308c801a2bcda6
Author: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Date: Mon Sep 28 21:42:40 2015 +0200
drm: Enforce unlocked ioctl operation for kms driver ioctls
Now justifying why we can do this for legacy drives too (and hence
close the source of all the bogus copypasting) is a bit more involved.
DRM_UNLOCKED was introduced in:
commit ed8b67040965e4fe695db333d5914e18ea5f146f
Author: Arnd Bergmann <arnd(a)arndb.de>
Date: Wed Dec 16 22:17:09 2009 +0000
drm: convert drm_ioctl to unlocked_ioctl
As a immediate hack to keep i810 happy, which would have deadlocked
without this trickery. The old BKL is automatically dropped in
schedule(), and hence the i810 vs. mmap_sem deadlock didn't actually
cause a real deadlock. But with a mutex it would. The solution was to
annotate these as DRM_UNLOCKED and mark i810 unsafe on SMP machines.
This conversion caused a regression, because unlike the BKL a mutex
isn't dropped over schedule (that thing again), which caused a vblank
wait in one thread to block the entire desktop and all its apps. Back
then we did vblank scheduling by blocking in the client, awesome isn't
it. This was fixed quickly in (ok not so quickly, took 2 years):
commit 8f4ff2b06afcd6f151868474a432c603057eaf56
Author: Ilija Hadzic <ihadzic(a)research.bell-labs.com>
Date: Mon Oct 31 17:46:18 2011 -0400
drm: do not sleep on vblank while holding a mutex
All the other DRM_UNLOCKED annotations for all the core ioctls was
work to reach finer-grained locking for modern drivers. This took
years, and culminated in:
commit fdd5b877e9ebc2029e1373b4a3cd057329a9ab7a
Author: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Date: Sat Dec 10 22:52:54 2016 +0100
drm: Enforce BKL-less ioctls for modern drivers
DRM_UNLOCKED was never required by any legacy drivers, except for the
vblank_wait IOCTL. Therefore we will not regress these old drivers by
going back to where we've been in 2011. For all modern drivers nothing
will change.
To make this perfectly clear, also add a comment to DRM_UNLOCKED.
v2: Don't forget about drm_ioc32.c (Michel).
Cc: Michel Dänzer <michel(a)daenzer.net>
Cc: Emil Velikov <emil.l.velikov(a)gmail.com>
Acked-by: Emil Velikov <emil.velikov(a)collabora.com>
Acked-by: Michel Dänzer <michel(a)daenzer.net>
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190605120835.2798-1-daniel.…
---
drivers/gpu/drm/drm_ioc32.c | 13 ++--
drivers/gpu/drm/drm_ioctl.c | 131 ++++++++++++++++++------------------
include/drm/drm_ioctl.h | 3 +
3 files changed, 74 insertions(+), 73 deletions(-)
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index f8672238d..ed20dbae6 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -105,7 +105,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
.desc = compat_ptr(v32.desc),
};
err = drm_ioctl_kernel(file, drm_version, &v,
- DRM_UNLOCKED|DRM_RENDER_ALLOW);
+ DRM_RENDER_ALLOW);
if (err)
return err;
@@ -139,7 +139,7 @@ static int compat_drm_getunique(struct file *file, unsigned int cmd,
.unique = compat_ptr(uq32.unique),
};
- err = drm_ioctl_kernel(file, drm_getunique, &uq, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_getunique, &uq, 0);
if (err)
return err;
@@ -177,7 +177,7 @@ static int compat_drm_getmap(struct file *file, unsigned int cmd,
return -EFAULT;
map.offset = m32.offset;
- err = drm_ioctl_kernel(file, drm_legacy_getmap_ioctl, &map, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_legacy_getmap_ioctl, &map, 0);
if (err)
return err;
@@ -262,7 +262,7 @@ static int compat_drm_getclient(struct file *file, unsigned int cmd,
client.idx = c32.idx;
- err = drm_ioctl_kernel(file, drm_getclient, &client, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_getclient, &client, 0);
if (err)
return err;
@@ -292,7 +292,7 @@ static int compat_drm_getstats(struct file *file, unsigned int cmd,
drm_stats32_t __user *argp = (void __user *)arg;
int err;
- err = drm_ioctl_kernel(file, drm_noop, NULL, DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_noop, NULL, 0);
if (err)
return err;
@@ -887,8 +887,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd,
sizeof(req64.modifier)))
return -EFAULT;
- err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64,
- DRM_UNLOCKED);
+ err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64, 0);
if (err)
return err;
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ba129b64b..3fe512b8a 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -549,22 +549,21 @@ EXPORT_SYMBOL(drm_ioctl_permit);
/* Ioctl table */
static const struct drm_ioctl_desc drm_ioctls[] = {
- DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0),
DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_legacy_getmap_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_CAP, drm_setclientcap, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_UNLOCKED | DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_legacy_getmap_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_CAP, drm_setclientcap, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_MASTER),
DRM_IOCTL_DEF(DRM_IOCTL_SET_UNIQUE, drm_invalid_op, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_BLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_UNBLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_AUTH|DRM_UNLOCKED|DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_AUTH|DRM_MASTER),
DRM_IOCTL_DEF(DRM_IOCTL_ADD_MAP, drm_legacy_addmap_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_RM_MAP, drm_legacy_rmmap_ioctl, DRM_AUTH),
@@ -572,8 +571,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_legacy_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_legacy_getsareactx, DRM_AUTH),
- DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_UNLOCKED|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_UNLOCKED|DRM_ROOT_ONLY),
+ DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_ROOT_ONLY),
+ DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_legacy_addctx, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_legacy_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
@@ -620,66 +619,66 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
- DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
-
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_UNLOCKED),
-
- DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
-
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_connector_property_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
+
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0),
+
+ DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_connector_property_set_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, 0),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_DESTROY, drm_syncobj_destroy_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, drm_syncobj_handle_to_fd_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
- DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, 0),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER),
};
#define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls )
@@ -747,7 +746,7 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata,
return retcode;
/* Enforce sane locking for modern driver ioctls. */
- if (!drm_core_check_feature(dev, DRIVER_LEGACY) ||
+ if (likely(!drm_core_check_feature(dev, DRIVER_LEGACY)) ||
(flags & DRM_UNLOCKED))
retcode = func(dev, kdata, file_priv);
else {
diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h
index fafb6f592..10100a4bb 100644
--- a/include/drm/drm_ioctl.h
+++ b/include/drm/drm_ioctl.h
@@ -114,6 +114,9 @@ enum drm_ioctl_flags {
* Whether &drm_ioctl_desc.func should be called with the DRM BKL held
* or not. Enforced as the default for all modern drivers, hence there
* should never be a need to set this flag.
+ *
+ * Do not use anywhere else than for the VBLANK_WAIT IOCTL, which is the
+ * only legacy IOCTL which needs this.
*/
DRM_UNLOCKED = BIT(4),
/**
--
2.30.0
1
0
Kylin inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4J6G3?from=project-issue
CVE: NA
---------------------------------------
Probability of appearance (must be present), the installation
is successfully started, lscpu can see the binding of cpu and
node The reason is that CONFIG_NODES_SHIFT=4,2^4=16, which
cannot satisfy 32 nodes
Need to be changed to CONFIG_NODES_SHIFT=6
Signed-off-by: Wen Zhiwei <wenzhiwei(a)kylinos.cn>
---
arch/arm64/configs/openeuler_defconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 76d6a118330d..1a7d607137ad 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -384,7 +384,7 @@ CONFIG_NR_CPUS=1024
CONFIG_HOTPLUG_CPU=y
# CONFIG_ARM64_BOOTPARAM_HOTPLUG_CPU0 is not set
CONFIG_NUMA=y
-CONFIG_NODES_SHIFT=4
+CONFIG_NODES_SHIFT=6
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
--
2.30.0
1
0

[PATCH openEuler-1.0-LTS] rq-qos: fix missed wake-ups in rq_qos_throttle try two
by Yang Yingliang 22 Nov '21
by Yang Yingliang 22 Nov '21
22 Nov '21
From: Jan Kara <jack(a)suse.cz>
stable inclusion
from stable-5.10.51
commit 8cc58a6e2c394aa48aa05f600be7d279efbafcd7
bugzilla: 175263
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 11c7aa0ddea8611007768d3e6b58d45dc60a19e1 upstream.
Commit 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle")
tried to fix a problem that a process could be sleeping in rq_qos_wait()
without anyone to wake it up. However the fix is not complete and the
following can still happen:
CPU1 (waiter1) CPU2 (waiter2) CPU3 (waker)
rq_qos_wait() rq_qos_wait()
acquire_inflight_cb() -> fails
acquire_inflight_cb() -> fails
completes IOs, inflight
decreased
prepare_to_wait_exclusive()
prepare_to_wait_exclusive()
has_sleeper = !wq_has_single_sleeper() -> true as there are two sleepers
has_sleeper = !wq_has_single_sleeper() -> true
io_schedule() io_schedule()
Deadlock as now there's nobody to wakeup the two waiters. The logic
automatically blocking when there are already sleepers is really subtle
and the only way to make it work reliably is that we check whether there
are some waiters in the queue when adding ourselves there. That way, we
are guaranteed that at least the first process to enter the wait queue
will recheck the waiting condition before going to sleep and thus
guarantee forward progress.
Fixes: 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle")
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20210607112613.25344-1-jack@suse.cz
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Lihong Kou <koulihong(a)huawei.com>
Reviewed-by: Hou Tao <houtao1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/blk-wbt.c | 4 ++--
include/linux/wait.h | 1 +
kernel/sched/wait.c | 19 ++++++++++++++++---
3 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index ffd5c17f5a101..366d294a11ef1 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -549,8 +549,8 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
return;
- prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
- has_sleeper = !wq_has_single_sleeper(&rqw->wait);
+ has_sleeper = !__prepare_to_wait_exclusive(&rqw->wait, &data.wq,
+ TASK_UNINTERRUPTIBLE);
do {
/* The memory barrier in set_task_state saves us here. */
if (data.got_token)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 29d28c2084ce8..1234e6cbacd2a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1121,6 +1121,7 @@ do { \
* Waitqueues which are removed from the waitqueue_head at wakeup time
*/
void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+bool __prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 5dd47f1103d18..c95b392735672 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -235,17 +235,30 @@ prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_ent
}
EXPORT_SYMBOL(prepare_to_wait);
-void
-prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+/* Returns true if we are the first waiter in the queue, false otherwise. */
+bool
+__prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
{
unsigned long flags;
+ bool was_empty = false;
wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&wq_head->lock, flags);
- if (list_empty(&wq_entry->entry))
+ if (list_empty(&wq_entry->entry)) {
+ was_empty = list_empty(&wq_head->head);
__add_wait_queue_entry_tail(wq_head, wq_entry);
+ }
set_current_state(state);
spin_unlock_irqrestore(&wq_head->lock, flags);
+ return was_empty;
+
+}
+EXPORT_SYMBOL(__prepare_to_wait_exclusive);
+
+void
+prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+ __prepare_to_wait_exclusive(wq_head, wq_entry, state);
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
--
2.25.1
1
0
From: 沈子俊 <shenzijun(a)kylinos.cn>
This patchsets exported some of the common functions implemented by
the SM4 AESNI/AVX algorithm, and reused these functions to achieve
the acceleration of AESNI/AVX2 implementation.
The main algorithm implementation comes from SM4 AES-NI work by
libgcrypt and Markku-Juhani O. Saarinen at:
https://github.com/mjosaarinen/sm4ni
Benchmark on Intel i5-6200U 2.30GHz, performance data of three
implementation methods, pure software sm4-generic, aesni/avx
acceleration, and aesni/avx2 acceleration, the data comes from
the 218 mode and 518 mode of tcrypt. The abscissas are blocks of
different lengths. The data is tabulated and the unit is Mb/s:
block-size | 16 64 128 256 1024 1420 4096
sm4-generic
ECB enc | 60.94 70.41 72.27 73.02 73.87 73.58 73.59
ECB dec | 61.87 70.53 72.15 73.09 73.89 73.92 73.86
CBC enc | 56.71 66.31 68.05 69.84 70.02 70.12 70.24
CBC dec | 54.54 65.91 68.22 69.51 70.63 70.79 70.82
CFB enc | 57.21 67.24 69.10 70.25 70.73 70.52 71.42
CFB dec | 57.22 64.74 66.31 67.24 67.40 67.64 67.58
CTR enc | 59.47 68.64 69.91 71.02 71.86 71.61 71.95
CTR dec | 59.94 68.77 69.95 71.00 71.84 71.55 71.95
sm4-aesni-avx
ECB enc | 44.95 177.35 292.06 316.98 339.48 322.27 330.59
ECB dec | 45.28 178.66 292.31 317.52 339.59 322.52 331.16
CBC enc | 57.75 67.68 69.72 70.60 71.48 71.63 71.74
CBC dec | 44.32 176.83 284.32 307.24 328.61 312.61 325.82
CFB enc | 57.81 67.64 69.63 70.55 71.40 71.35 71.70
CFB dec | 43.14 167.78 282.03 307.20 328.35 318.24 325.95
CTR enc | 42.35 163.32 279.11 302.93 320.86 310.56 317.93
CTR dec | 42.39 162.81 278.49 302.37 321.11 310.33 318.37
sm4-aesni-avx2
ECB enc | 45.19 177.41 292.42 316.12 339.90 322.53 330.54
ECB dec | 44.83 178.90 291.45 317.31 339.85 322.55 331.07
CBC enc | 57.66 67.62 69.73 70.55 71.58 71.66 71.77
CBC dec | 44.34 176.86 286.10 501.68 559.58 483.87 527.46
CFB enc | 57.43 67.60 69.61 70.52 71.43 71.28 71.65
CFB dec | 43.12 167.75 268.09 499.33 558.35 490.36 524.73
CTR enc | 42.42 163.39 256.17 493.95 552.45 481.58 517.19
CTR dec | 42.49 163.11 256.36 493.34 552.62 481.49 516.83
From the benchmark data, it can be seen that when the block size is
1024, compared to AVX acceleration, the performance achieved by AVX2
has increased by about 70%, it is also 7.7 times of the pure software
implementation of sm4-generic.
沈子俊 (2):
crypto: x86/sm4 - export reusable AESNI/AVX functions
crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation
arch/x86/crypto/Makefile | 3 +
arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 497 ++++++++++++++++++++++++
arch/x86/crypto/sm4-avx.h | 24 ++
arch/x86/crypto/sm4_aesni_avx2_glue.c | 169 ++++++++
arch/x86/crypto/sm4_aesni_avx_glue.c | 92 +++--
crypto/Kconfig | 22 ++
6 files changed, 775 insertions(+), 32 deletions(-)
create mode 100644 arch/x86/crypto/sm4-aesni-avx2-asm_64.S
create mode 100644 arch/x86/crypto/sm4-avx.h
create mode 100644 arch/x86/crypto/sm4_aesni_avx2_glue.c
--
2.30.0
2
3
Add 3 new features:
1. Add the get_rxfh_indir_size in ethtool_ops structure
2. Support to configure DMA atrribute thru firmware
3. Clear the flag of CSUM and TSO when VXLAN is not supported
Yanling Song (3):
net:spnic: Add the get_rxfh_indir_size in ethtool_ops structure.
net:spnic:Support to configure DMA atrribute thru firmware.
net:spnic:add CSUM and TSO function execute condition
.../ramaxel/spnic/hw/sphw_comm_msg_intf.h | 2 +-
.../ethernet/ramaxel/spnic/hw/sphw_hw_comm.c | 27 ++++++
.../ethernet/ramaxel/spnic/hw/sphw_hw_comm.h | 3 +
.../ethernet/ramaxel/spnic/hw/sphw_hwdev.c | 82 ++++---------------
.../ethernet/ramaxel/spnic/spnic_ethtool.c | 2 +
.../net/ethernet/ramaxel/spnic/spnic_main.c | 7 +-
.../net/ethernet/ramaxel/spnic/spnic_rss.c | 5 ++
.../net/ethernet/ramaxel/spnic/spnic_rss.h | 2 +
8 files changed, 60 insertions(+), 70 deletions(-)
--
2.27.0
3
7

[PATCH openEuler-1.0-LTS 1/3] drivers : remove drivers/soc/hisilicon/lbc
by Yang Yingliang 22 Nov '21
by Yang Yingliang 22 Nov '21
22 Nov '21
From: fengsheng <fengsheng5(a)huawei.com>
driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4IYWE?from=project-issue
CVE: NA
------------------------------------------------------------
This driver is not in use. Remove it.
Signed-off-by: fengsheng <fengsheng5(a)huawei.com>
Reviewed-by: lidongming <lidongming5(a)huawei.com>
Reviewed-by: ouyang delong <ouyangdelong(a)huawei.com>
Acked-by: Xie XiuQi <xiexiuqi(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
MAINTAINERS | 5 -
arch/arm64/configs/hulk_defconfig | 1 -
arch/arm64/configs/openeuler_defconfig | 1 -
arch/arm64/configs/storage_ci_defconfig | 1 -
arch/arm64/configs/syzkaller_defconfig | 1 -
arch/x86/configs/openeuler_defconfig | 1 -
arch/x86/configs/storage_ci_defconfig | 1 -
drivers/soc/hisilicon/Kconfig | 1 -
drivers/soc/hisilicon/Makefile | 1 -
drivers/soc/hisilicon/lbc/Kconfig | 3 -
drivers/soc/hisilicon/lbc/Makefile | 2 -
drivers/soc/hisilicon/lbc/hs_lbc_pltfm.c | 466 -----------------------
drivers/soc/hisilicon/lbc/hs_lbc_pltfm.h | 120 ------
13 files changed, 604 deletions(-)
delete mode 100644 drivers/soc/hisilicon/lbc/Kconfig
delete mode 100644 drivers/soc/hisilicon/lbc/Makefile
delete mode 100644 drivers/soc/hisilicon/lbc/hs_lbc_pltfm.c
delete mode 100644 drivers/soc/hisilicon/lbc/hs_lbc_pltfm.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 15609985b6cc6..b84c7cbd4e555 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16332,11 +16332,6 @@ S: Buried alive in reporters
F: *
F: */
-HISILICON LOCALBUS DRIVER
-M: Feng Sheng <fengsheng5(a)huawei.com>
-S: Maintained
-F: drivers/soc/hisilicon/lbc/
-
HISILICON SYSCTRL DRIVER
M: Feng Sheng <fengsheng5(a)huawei.com>
S: Maintained
diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig
index a2a2216e43f1e..904e9b557122e 100644
--- a/arch/arm64/configs/hulk_defconfig
+++ b/arch/arm64/configs/hulk_defconfig
@@ -281,7 +281,6 @@ CONFIG_ARCH_XGENE=y
# CONFIG_ARCH_ZYNQMP is not set
CONFIG_HAVE_LIVEPATCH_WO_FTRACE=y
CONFIG_SOC_HISILICON_SYSCTL=m
-CONFIG_SOC_HISILICON_LBC=m
#
# Enable Livepatch
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 089f5be748b10..20ad626791d14 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -5011,7 +5011,6 @@ CONFIG_ARM_SMMU_V3=y
# Xilinx SoC drivers
#
# CONFIG_XILINX_VCU is not set
-CONFIG_SOC_HISILICON_LBC=m
CONFIG_SOC_HISILICON_SYSCTL=m
# CONFIG_PM_DEVFREQ is not set
CONFIG_EXTCON=y
diff --git a/arch/arm64/configs/storage_ci_defconfig b/arch/arm64/configs/storage_ci_defconfig
index f468538f22a52..6d671761fd080 100644
--- a/arch/arm64/configs/storage_ci_defconfig
+++ b/arch/arm64/configs/storage_ci_defconfig
@@ -2219,7 +2219,6 @@ CONFIG_CLKSRC_VERSATILE=y
# Xilinx SoC drivers
#
# CONFIG_XILINX_VCU is not set
-CONFIG_SOC_HISILICON_LBC=m
# CONFIG_PM_DEVFREQ is not set
# CONFIG_EXTCON is not set
# CONFIG_MEMORY is not set
diff --git a/arch/arm64/configs/syzkaller_defconfig b/arch/arm64/configs/syzkaller_defconfig
index eb045bdc5ef16..7c8eca466e25a 100644
--- a/arch/arm64/configs/syzkaller_defconfig
+++ b/arch/arm64/configs/syzkaller_defconfig
@@ -281,7 +281,6 @@ CONFIG_ARCH_XGENE=y
# CONFIG_ARCH_ZYNQMP is not set
CONFIG_HAVE_LIVEPATCH_WO_FTRACE=y
CONFIG_SOC_HISILICON_SYSCTL=m
-CONFIG_SOC_HISILICON_LBC=m
#
# Enable Livepatch
#
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index 5a56a81e39e19..0700ef6fc0cf0 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -6168,7 +6168,6 @@ CONFIG_IRQ_REMAP=y
# Xilinx SoC drivers
#
# CONFIG_XILINX_VCU is not set
-CONFIG_SOC_HISILICON_LBC=m
CONFIG_SOC_HISILICON_SYSCTL=m
# CONFIG_PM_DEVFREQ is not set
# CONFIG_EXTCON is not set
diff --git a/arch/x86/configs/storage_ci_defconfig b/arch/x86/configs/storage_ci_defconfig
index 0847b17d293a0..27de222f67966 100644
--- a/arch/x86/configs/storage_ci_defconfig
+++ b/arch/x86/configs/storage_ci_defconfig
@@ -2389,7 +2389,6 @@ CONFIG_PCC=y
# Xilinx SoC drivers
#
# CONFIG_XILINX_VCU is not set
-CONFIG_SOC_HISILICON_LBC=m
CONFIG_SOC_HISILICON_SYSCTL=m
# CONFIG_PM_DEVFREQ is not set
# CONFIG_EXTCON is not set
diff --git a/drivers/soc/hisilicon/Kconfig b/drivers/soc/hisilicon/Kconfig
index 520f60cec77b7..6dd657e3eaa98 100644
--- a/drivers/soc/hisilicon/Kconfig
+++ b/drivers/soc/hisilicon/Kconfig
@@ -1,3 +1,2 @@
-source "drivers/soc/hisilicon/lbc/Kconfig"
source "drivers/soc/hisilicon/sysctl/Kconfig"
diff --git a/drivers/soc/hisilicon/Makefile b/drivers/soc/hisilicon/Makefile
index 33a64532ace35..fe68ce0d54c3e 100644
--- a/drivers/soc/hisilicon/Makefile
+++ b/drivers/soc/hisilicon/Makefile
@@ -1,2 +1 @@
-obj-y += lbc/
obj-y += sysctl/
diff --git a/drivers/soc/hisilicon/lbc/Kconfig b/drivers/soc/hisilicon/lbc/Kconfig
deleted file mode 100644
index 054b6634dae9a..0000000000000
--- a/drivers/soc/hisilicon/lbc/Kconfig
+++ /dev/null
@@ -1,3 +0,0 @@
-config SOC_HISILICON_LBC
- tristate
- default m
\ No newline at end of file
diff --git a/drivers/soc/hisilicon/lbc/Makefile b/drivers/soc/hisilicon/lbc/Makefile
deleted file mode 100644
index d7ec052cb42e2..0000000000000
--- a/drivers/soc/hisilicon/lbc/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-lbc-objs := hs_lbc_pltfm.o
-obj-$(CONFIG_SOC_HISILICON_LBC) += lbc.o
\ No newline at end of file
diff --git a/drivers/soc/hisilicon/lbc/hs_lbc_pltfm.c b/drivers/soc/hisilicon/lbc/hs_lbc_pltfm.c
deleted file mode 100644
index c188cd308f862..0000000000000
--- a/drivers/soc/hisilicon/lbc/hs_lbc_pltfm.c
+++ /dev/null
@@ -1,466 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2019 Hisilicon Limited, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <linux/spinlock.h>
-#include <linux/of_address.h>
-#include <linux/acpi.h>
-
-#include "hs_lbc_pltfm.h"
-
-#define LBC_DRIVER_VERSION "1.9.39.0"
-
-struct hisi_lbc_dev g_lbc_dev = {0};
-
-static void lbc_set_cs_base_addr(unsigned int index, unsigned int cs_base_addr)
-{
- LBC_REG_REGION *lbc_reg = (LBC_REG_REGION *)(ACCESS_ONCE(g_lbc_dev.regs_base));
-
- lbc_reg->cs_base[index] = cs_base_addr;
-}
-
-static void lbc_set_cs_data_width(unsigned int index, unsigned int width)
-{
- LBC_REG_REGION *lbc_reg = (LBC_REG_REGION *)(ACCESS_ONCE(g_lbc_dev.regs_base));
-
- lbc_reg->cs_ctrl[index].data_width = width;
-}
-
-static void lbc_set_cs_data_offset(unsigned int index, unsigned int offset)
-{
- LBC_REG_REGION *lbc_reg = (LBC_REG_REGION *)(ACCESS_ONCE(g_lbc_dev.regs_base));
-
- lbc_reg->cs_ctrl[index].addr_offset = offset;
-}
-
-static void lbc_set_cs_mem_size(unsigned int index, u64 mem_size)
-{
- unsigned int size = 0;
- LBC_REG_REGION *lbc_reg = (LBC_REG_REGION *)(ACCESS_ONCE(g_lbc_dev.regs_base));
-
- switch (mem_size) {
- case LBC_CS_MEM_SIZE_0:
- size = LBC_CS_MEM_SIZE_REG_0;
- break;
- case LBC_CS_MEM_SIZE_64K:
- size = LBC_CS_MEM_SIZE_REG_64K;
- break;
- case LBC_CS_MEM_SIZE_128K:
- size = LBC_CS_MEM_SIZE_REG_128K;
- break;
- case LBC_CS_MEM_SIZE_256K:
- size = LBC_CS_MEM_SIZE_REG_256K;
- break;
- case LBC_CS_MEM_SIZE_512K:
- size = LBC_CS_MEM_SIZE_REG_512K;
- break;
- case LBC_CS_MEM_SIZE_1M:
- size = LBC_CS_MEM_SIZE_REG_1M;
- break;
- case LBC_CS_MEM_SIZE_2M:
- size = LBC_CS_MEM_SIZE_REG_2M;
- break;
- case LBC_CS_MEM_SIZE_4M:
- size = LBC_CS_MEM_SIZE_REG_4M;
- break;
- case LBC_CS_MEM_SIZE_8M:
- size = LBC_CS_MEM_SIZE_REG_8M;
- break;
- case LBC_CS_MEM_SIZE_16M:
- size = LBC_CS_MEM_SIZE_REG_16M;
- break;
- case LBC_CS_MEM_SIZE_32M:
- size = LBC_CS_MEM_SIZE_REG_32M;
- break;
- case LBC_CS_MEM_SIZE_64M:
- size = LBC_CS_MEM_SIZE_REG_64M;
- break;
- case LBC_CS_MEM_SIZE_128M:
- size = LBC_CS_MEM_SIZE_REG_128M;
- break;
- case LBC_CS_MEM_SIZE_256M:
- size = LBC_CS_MEM_SIZE_REG_256M;
- break;
- default:
- size = 0;
- }
-
- lbc_reg->cs_ctrl[index].mem_size = size;
-}
-
-static int hisi_lbc_para_check(unsigned int index, unsigned int offset, unsigned int type)
-{
- /* cs index check */
- if (index >= LBC_CS_MAX_NUM)
- return -EINVAL;
-
- /* cs offset check */
- if (offset >= g_lbc_dev.cs[index].size)
- return -EINVAL;
-
- if (type != LBC_RWDATA_WIDTH_8
- && type != LBC_RWDATA_WIDTH_16
- && type != LBC_RWDATA_WIDTH_32)
- return -EINVAL;
-
- /* width check */
- if ((type == LBC_RWDATA_WIDTH_16)
- || (type == LBC_RWDATA_WIDTH_32)) {
- if (offset % (type * 0x2))
- return -EINVAL;
- }
-
- return 0;
-}
-
-static unsigned int lbc_read(unsigned int index, unsigned int offset, unsigned int type)
-{
- void __iomem *base_addr = ACCESS_ONCE(g_lbc_dev.cs[index].cs_base);
- unsigned int value;
- unsigned long flags;
-
- spin_lock_irqsave(&g_lbc_dev.cs[index].lock, flags);
-
- if (type == LBC_RWDATA_WIDTH_8)
- value = readb(base_addr + offset) & 0xff;
- else if (type == LBC_RWDATA_WIDTH_16)
- value = readw(base_addr + offset) & 0xffff;
- else
- value = readl(base_addr + offset) & 0xffffffff;
-
- spin_unlock_irqrestore(&g_lbc_dev.cs[index].lock, flags);
-
- return value;
-}
-
-static unsigned int lbc_read_unlock(unsigned int index, unsigned int offset, unsigned int type)
-{
- void __iomem *base_addr = ACCESS_ONCE(g_lbc_dev.cs[index].cs_base);
- unsigned int value;
-
- if (type == LBC_RWDATA_WIDTH_8)
- value = readb(base_addr + offset) & 0xff;
- else if (type == LBC_RWDATA_WIDTH_16)
- value = readw(base_addr + offset) & 0xffff;
- else
- value = readl(base_addr + offset) & 0xffffffff;
-
- return value;
-}
-
-static int lbc_write(unsigned int index, unsigned int offset, unsigned int type, unsigned int data)
-{
- void __iomem *base_addr = ACCESS_ONCE(g_lbc_dev.cs[index].cs_base);
- unsigned long flags;
-
- spin_lock_irqsave(&g_lbc_dev.cs[index].lock, flags);
-
- if (type == LBC_RWDATA_WIDTH_8)
- writeb(data & 0xff, base_addr + offset);
- else if (type == LBC_RWDATA_WIDTH_16)
- writew(data & 0xffff, base_addr + offset);
- else
- writel(data & 0xffffffff, base_addr + offset);
-
- spin_unlock_irqrestore(&g_lbc_dev.cs[index].lock, flags);
-
- return 0;
-}
-
-static int lbc_write_unlock(unsigned int index, unsigned int offset, unsigned int type, unsigned int data)
-{
- void __iomem *base_addr = ACCESS_ONCE(g_lbc_dev.cs[index].cs_base);
-
- if (type == LBC_RWDATA_WIDTH_8)
- writeb(data & 0xff, base_addr + offset);
- else if (type == LBC_RWDATA_WIDTH_16)
- writew(data & 0xffff, base_addr + offset);
- else
- writel(data & 0xffffffff, base_addr + offset);
-
- return 0;
-}
-
-int lbc_read8(unsigned int index, unsigned int offset, unsigned char *value)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_8)) {
- pr_err("Lbc para check failed\n");
- return -EINVAL;
- }
-
- if (!value) {
- pr_err("value is null\n");
- return -EINVAL;
- }
-
- *value = (unsigned char)lbc_read(index, offset, LBC_RWDATA_WIDTH_8);
-
- return 0;
-}
-EXPORT_SYMBOL(lbc_read8);
-
-int lbc_read8_nolock(unsigned int index, unsigned int offset, unsigned char *value)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_8)) {
- pr_err("Lbc para check failed\n");
- return -EINVAL;
- }
-
- if (!value) {
- pr_err("value is null\n");
- return -EINVAL;
- }
-
- *value = (unsigned char)lbc_read_unlock(index, offset, LBC_RWDATA_WIDTH_8);
- return 0;
-}
-EXPORT_SYMBOL(lbc_read8_nolock);
-
-unsigned short lbc_read16(unsigned int index, unsigned int offset)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_16)) {
- pr_err("Lbc para check failed\n");
- return 0;
- }
-
- return (unsigned short)lbc_read(index, offset, LBC_RWDATA_WIDTH_16);
-}
-
-unsigned int lbc_read32(unsigned int index, unsigned int offset)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_32)) {
- pr_err("Lbc para check failed\n");
- return 0;
- }
-
- return lbc_read(index, offset, LBC_RWDATA_WIDTH_32);
-}
-
-int lbc_write8(unsigned int index, unsigned int offset, unsigned char data)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_8)) {
- pr_err("Lbc para check failed\n");
- return -EINVAL;
- }
-
- return lbc_write(index, offset, LBC_RWDATA_WIDTH_8, (unsigned int)data);
-}
-EXPORT_SYMBOL(lbc_write8);
-
-int lbc_write8_nolock(unsigned int index, unsigned int offset, unsigned char data)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_8)) {
- pr_err("Lbc para check failed\n");
- return -EINVAL;
- }
-
- return lbc_write_unlock(index, offset, LBC_RWDATA_WIDTH_8, (unsigned int)data);
-}
-EXPORT_SYMBOL(lbc_write8_nolock);
-
-int lbc_write16(unsigned int index, unsigned int offset, unsigned short data)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_16)) {
- pr_err("Lbc para check failed\n");
- return -EINVAL;
- }
-
- return lbc_write(index, offset, LBC_RWDATA_WIDTH_16, (unsigned int)data);
-}
-
-int lbc_write32(unsigned int index, unsigned int offset, unsigned int data)
-{
- /* para check */
- if (hisi_lbc_para_check(index, offset, LBC_RWDATA_WIDTH_32)) {
- pr_err("Lbc para check failed\n");
- return -EINVAL;
- }
-
- return lbc_write(index, offset, LBC_RWDATA_WIDTH_32, (unsigned int)data);
-}
-
-static int hisi_lbc_cs_init(struct platform_device *pdev)
-{
- unsigned int index;
- unsigned int width;
- unsigned int shift;
- struct resource *cs_base = NULL;
-
- if (has_acpi_companion(g_lbc_dev.dev)) {
- /* get cs index */
- index = 0;
- (void)device_property_read_u32(g_lbc_dev.dev, "index", &index);
-
- if (index >= LBC_CS_MAX_NUM) {
- dev_err(g_lbc_dev.dev, "Cs index error\n");
- return -EINVAL;
- }
-
- /* lock init */
- spin_lock_init(&g_lbc_dev.cs[index].lock);
-
- /* get cs base address */
- cs_base = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-
- if (!cs_base) {
- dev_err(g_lbc_dev.dev, "Can not find this cs base resource\n");
- return -ENOENT;
- }
-
- g_lbc_dev.cs[index].cs_base = devm_ioremap_resource(&pdev->dev, cs_base);
-
- if (IS_ERR(g_lbc_dev.cs[index].cs_base))
- return (int)PTR_ERR(g_lbc_dev.cs[index].cs_base);
-
- g_lbc_dev.cs[index].size = (unsigned int)resource_size(cs_base);
-
- lbc_set_cs_base_addr(index, (unsigned int)cs_base->start);
- lbc_set_cs_mem_size(index, resource_size(cs_base));
-
- /* get cs width */
- width = 0;
- (void)device_property_read_u32(g_lbc_dev.dev, "width", &width);
-
- if (width > LBC_CS_WIDTH_32) {
- dev_err(g_lbc_dev.dev, "Cs width error\n");
- return -EINVAL;
- }
-
- g_lbc_dev.cs[index].width = width;
- lbc_set_cs_data_width(index, width);
-
- /* get cs address offset */
- shift = 0;
- (void)device_property_read_u32(g_lbc_dev.dev, "shift", &shift);
-
- if (shift > LBC_CS_ADDR_SHIFT_2) {
- dev_err(g_lbc_dev.dev, "Cs address shift error\n");
- return -EINVAL;
- }
-
- g_lbc_dev.cs[index].shift = shift;
-
- lbc_set_cs_data_offset(index, shift);
- }
-
- return 0;
-}
-
-static int hisi_lbc_probe(struct platform_device *pdev)
-{
- int ret;
- struct resource *regs_base = NULL;
-
- dev_info(&pdev->dev, "hisi lbc probe\n");
-
- if ((!pdev->dev.of_node) && (!ACPI_COMPANION(&pdev->dev))) {
- dev_err(&pdev->dev, "Device OF-Node and ACPI-Node is NULL\n");
- return -EFAULT;
- }
-
- g_lbc_dev.dev = &pdev->dev;
-
- /* get resource num */
- regs_base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
- if (!g_lbc_dev.is_reg_remaped) {
- g_lbc_dev.regs_base = devm_ioremap_resource(&pdev->dev, regs_base);
- g_lbc_dev.is_reg_remaped = 1;
- }
-
- if (IS_ERR(g_lbc_dev.regs_base)) {
- dev_err(&pdev->dev, "ERROR: regbase\n");
- return (int)PTR_ERR(g_lbc_dev.regs_base);
- }
-
- /* localbus cs init */
- ret = hisi_lbc_cs_init(pdev);
- if (ret) {
- dev_err(&pdev->dev, "Localbus cs init failed\n");
- return -1;
- }
-
- platform_set_drvdata(pdev, &g_lbc_dev);
- dev_info(&pdev->dev, "hisi lbc probe prob ok\n");
- return 0;
-}
-
-static int hisi_lbc_remove(struct platform_device *pdev)
-{
- return 0;
-}
-
-static const struct of_device_id g_hisi_lbc_pltfm_match[] = {
- {
- .compatible = "hisilicon, hi1620_lbc",
- },
- {},
-};
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id g_hisi_lbc_acpi_match[] = {
- { "HISI0C01", 0 },
- { }
-};
-MODULE_DEVICE_TABLE(acpi, g_hisi_lbc_acpi_match);
-#endif
-
-static struct platform_driver g_hisi_lbc_driver = {
- .probe = hisi_lbc_probe,
- .remove = hisi_lbc_remove,
- .driver = {
- .name = "hisi-lbc",
- .owner = THIS_MODULE,
- .of_match_table = g_hisi_lbc_pltfm_match,
-#ifdef CONFIG_ACPI
- .acpi_match_table = ACPI_PTR(g_hisi_lbc_acpi_match),
-#endif
- },
-
-};
-
-static int __init hisi_lbc_init_driver(void)
-{
- return platform_driver_register((struct platform_driver *)&g_hisi_lbc_driver);
-}
-
-static void __exit hisi_lbc_exit_driver(void)
-{
- platform_driver_unregister((struct platform_driver *)&g_hisi_lbc_driver);
-}
-
-module_init(hisi_lbc_init_driver);
-module_exit(hisi_lbc_exit_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
-MODULE_VERSION(LBC_DRIVER_VERSION);
-MODULE_DESCRIPTION("LBC driver for linux");
diff --git a/drivers/soc/hisilicon/lbc/hs_lbc_pltfm.h b/drivers/soc/hisilicon/lbc/hs_lbc_pltfm.h
deleted file mode 100644
index 24ca4366ec512..0000000000000
--- a/drivers/soc/hisilicon/lbc/hs_lbc_pltfm.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2019 Hisilicon Limited, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _HS_LBC_PLTFM_H_
-#define _HS_LBC_PLTFM_H_
-#include <linux/version.h>
-
-/* RW data width */
-#define LBC_RWDATA_WIDTH_8 (0)
-#define LBC_RWDATA_WIDTH_16 (1)
-#define LBC_RWDATA_WIDTH_32 (2)
-
-/* cs width */
-#define LBC_CS_WIDTH_8 (0)
-#define LBC_CS_WIDTH_16 (1)
-#define LBC_CS_WIDTH_32 (2)
-
-/* cs address shift */
-#define LBC_CS_ADDR_SHIFT_0 (0)
-#define LBC_CS_ADDR_SHIFT_1 (1)
-#define LBC_CS_ADDR_SHIFT_2 (2)
-
-#define LBC_CS_MAX_NUM (4)
-
-#define LBC_CS_MEM_SIZE_0 (0)
-#define LBC_CS_MEM_SIZE_64K (64 * 1024)
-#define LBC_CS_MEM_SIZE_128K (LBC_CS_MEM_SIZE_64K << 1)
-#define LBC_CS_MEM_SIZE_256K (LBC_CS_MEM_SIZE_128K << 1)
-#define LBC_CS_MEM_SIZE_512K (LBC_CS_MEM_SIZE_256K << 1)
-#define LBC_CS_MEM_SIZE_1M (LBC_CS_MEM_SIZE_512K << 1)
-#define LBC_CS_MEM_SIZE_2M (LBC_CS_MEM_SIZE_1M << 1)
-#define LBC_CS_MEM_SIZE_4M (LBC_CS_MEM_SIZE_2M << 1)
-#define LBC_CS_MEM_SIZE_8M (LBC_CS_MEM_SIZE_4M << 1)
-#define LBC_CS_MEM_SIZE_16M (LBC_CS_MEM_SIZE_8M << 1)
-#define LBC_CS_MEM_SIZE_32M (LBC_CS_MEM_SIZE_16M << 1)
-#define LBC_CS_MEM_SIZE_64M (LBC_CS_MEM_SIZE_32M << 1)
-#define LBC_CS_MEM_SIZE_128M (LBC_CS_MEM_SIZE_64M << 1)
-#define LBC_CS_MEM_SIZE_256M (LBC_CS_MEM_SIZE_128M << 1)
-
-#define LBC_CS_MEM_SIZE_REG_0 (0)
-#define LBC_CS_MEM_SIZE_REG_64K (1)
-#define LBC_CS_MEM_SIZE_REG_128K (2)
-#define LBC_CS_MEM_SIZE_REG_256K (3)
-#define LBC_CS_MEM_SIZE_REG_512K (4)
-#define LBC_CS_MEM_SIZE_REG_1M (5)
-#define LBC_CS_MEM_SIZE_REG_2M (6)
-#define LBC_CS_MEM_SIZE_REG_4M (7)
-#define LBC_CS_MEM_SIZE_REG_8M (8)
-#define LBC_CS_MEM_SIZE_REG_16M (9)
-#define LBC_CS_MEM_SIZE_REG_32M (10)
-#define LBC_CS_MEM_SIZE_REG_64M (11)
-#define LBC_CS_MEM_SIZE_REG_128M (12)
-#define LBC_CS_MEM_SIZE_REG_256M (13)
-#define LBC_CS_MEM_SIZE_REG_512M (14)
-#define LBC_CS_MEM_SIZE_REG_1G (15)
-#define LBC_CS_MEM_SIZE_REG_2G (16)
-#define LBC_CS_MEM_SIZE_REG_4G (17)
-
-typedef struct lbc_cs_ctrl {
- volatile unsigned int mem_size : 5;
- volatile unsigned int data_width : 2;
- volatile unsigned int data_order : 1;
- volatile unsigned int byte_order : 1;
- volatile unsigned int rdy_mode : 1;
- volatile unsigned int rdy_pol : 1;
- volatile unsigned int addr_offset : 1;
- volatile unsigned int lbctl_en : 1;
- volatile unsigned int page_en : 1;
- volatile unsigned int page_size : 2;
- volatile unsigned int rdy_tout_en : 1;
- volatile unsigned int rble : 1;
- volatile unsigned int reserved : 14;
-} LBC_CS_CTRL;
-
-#define LBC_REG_RSV_MAX_NUM 4
-#define LBC_REG_CRE_MAX_NUM 4
-typedef struct lbc_reg_region {
- volatile unsigned int cs_base[LBC_CS_MAX_NUM];
- volatile unsigned int cs_base_reserved[LBC_REG_RSV_MAX_NUM];
- volatile LBC_CS_CTRL cs_ctrl[LBC_CS_MAX_NUM];
- volatile LBC_CS_CTRL cs_ctrl_creserved[LBC_REG_CRE_MAX_NUM];
-} LBC_REG_REGION;
-
-struct hisi_lbc_cs {
- unsigned int index;
- spinlock_t lock;
- void __iomem *cs_base;
- unsigned int size;
- unsigned int width; /* width */
- unsigned int shift; /* address shift */
-};
-
-struct hisi_lbc_dev {
- unsigned char is_reg_remaped;
- struct device *dev;
- void __iomem *regs_base; /* localbus regs base addr */
- struct hisi_lbc_cs cs[LBC_CS_MAX_NUM];
-};
-
-#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 16, 0)
-#define __ACCESS_ONCE(x) ({ \
- __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
- (volatile typeof(x) *)&(x); })
-#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
-#endif
-
-#endif
--
2.25.1
1
2

[PATCH openEuler-1.0-LTS] atlantic: Fix OOB read and write in hw_atl_utils_fw_rpc_wait
by Yang Yingliang 22 Nov '21
by Yang Yingliang 22 Nov '21
22 Nov '21
From: Zekun Shen <bruceshenzk(a)gmail.com>
mainline inclusion
from mainline-v5.16-rc2
commit b922f622592af76b57cbc566eaeccda0b31a3496
category: bugfix
bugzilla: NA
CVE: CVE-2021-43975
-------------------------------------------------
This bug report shows up when running our research tools. The
reports is SOOB read, but it seems SOOB write is also possible
a few lines below.
In details, fw.len and sw.len are inputs coming from io. A len
over the size of self->rpc triggers SOOB. The patch fixes the
bugs by adding sanity checks.
The bugs are triggerable with compromised/malfunctioning devices.
They are potentially exploitable given they first leak up to
0xffff bytes and able to overwrite the region later.
The patch is tested with QEMU emulater.
This is NOT tested with a real device.
Attached is the log we found by fuzzing.
BUG: KASAN: slab-out-of-bounds in
hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
Read of size 4 at addr ffff888016260b08 by task modprobe/213
CPU: 0 PID: 213 Comm: modprobe Not tainted 5.6.0 #1
Call Trace:
dump_stack+0x76/0xa0
print_address_description.constprop.0+0x16/0x200
? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
__kasan_report.cold+0x37/0x7c
? aq_hw_read_reg_bit+0x60/0x70 [atlantic]
? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
kasan_report+0xe/0x20
hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic]
hw_atl_utils_fw_rpc_call+0x95/0x130 [atlantic]
hw_atl_utils_fw_rpc_wait+0x176/0x210 [atlantic]
hw_atl_utils_mpi_create+0x229/0x2e0 [atlantic]
? hw_atl_utils_fw_rpc_wait+0x210/0x210 [atlantic]
? hw_atl_utils_initfw+0x9f/0x1c8 [atlantic]
hw_atl_utils_initfw+0x12a/0x1c8 [atlantic]
aq_nic_ndev_register+0x88/0x650 [atlantic]
? aq_nic_ndev_init+0x235/0x3c0 [atlantic]
aq_pci_probe+0x731/0x9b0 [atlantic]
? aq_pci_func_init+0xc0/0xc0 [atlantic]
local_pci_probe+0xd3/0x160
pci_device_probe+0x23f/0x3e0
Reported-by: Brendan Dolan-Gavitt <brendandg(a)nyu.edu>
Signed-off-by: Zekun Shen <bruceshenzk(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
.../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 096ec18e8f15a..49c80bac9ce28 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -459,6 +459,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
goto err_exit;
if (fw.len == 0xFFFFU) {
+ if (sw.len > sizeof(self->rpc)) {
+ printk(KERN_INFO "Invalid sw len: %x\n", sw.len);
+ err = -EINVAL;
+ goto err_exit;
+ }
err = hw_atl_utils_fw_rpc_call(self, sw.len);
if (err < 0)
goto err_exit;
@@ -469,6 +474,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
if (rpc) {
if (fw.len) {
+ if (fw.len > sizeof(self->rpc)) {
+ printk(KERN_INFO "Invalid fw len: %x\n", fw.len);
+ err = -EINVAL;
+ goto err_exit;
+ }
err =
hw_atl_utils_fw_downld_dwords(self,
self->rpc_addr,
--
2.25.1
1
0

[PATCH OLK-5.10 0/4] Introduce x86 assembler accelerated implementation for SM4 algorithm
by shenzijun 22 Nov '21
by shenzijun 22 Nov '21
22 Nov '21
From: 沈子俊 <shenzijun(a)kylinos.cn>
This patchset extracts the public SM4 algorithm as a separate library,
At the same time, the acceleration implementation of SM4 in arm64 was
adjusted to adapt to this SM4 library. Then introduces an accelerated
implementation of the instruction set on x86_64.
This optimization supports the four modes of SM4, ECB, CBC, CFB, and
CTR. Since CBC and CFB do not support multiple block parallel
encryption, the optimization effect is not obvious. And all selftests
have passed already.
The main algorithm implementation comes from SM4 AES-NI work by
libgcrypt and Markku-Juhani O. Saarinen at:
https://github.com/mjosaarinen/sm4ni
Benchmark on Intel Xeon Cascadelake, the data comes from the mode 218
and mode 518 of tcrypt. The abscissas are blocks of different lengths.
The data is tabulated and the unit is Mb/s:
sm4-generic | 16 64 128 256 1024 1420 4096
ECB enc | 40.99 46.50 48.05 48.41 49.20 49.25 49.28
ECB dec | 41.07 46.99 48.15 48.67 49.20 49.25 49.29
CBC enc | 37.71 45.28 46.77 47.60 48.32 48.37 48.40
CBC dec | 36.48 44.82 46.43 47.45 48.23 48.30 48.36
CFB enc | 37.94 44.84 46.12 46.94 47.57 47.46 47.68
CFB dec | 37.50 42.84 43.74 44.37 44.85 44.80 44.96
CTR enc | 39.20 45.63 46.75 47.49 48.09 47.85 48.08
CTR dec | 39.64 45.70 46.72 47.47 47.98 47.88 48.06
sm4-aesni-avx
ECB enc | 33.75 134.47 221.64 243.43 264.05 251.58 258.13
ECB dec | 34.02 134.92 223.11 245.14 264.12 251.04 258.33
CBC enc | 38.85 46.18 47.67 48.34 49.00 48.96 49.14
CBC dec | 33.54 131.29 223.88 245.27 265.50 252.41 263.78
CFB enc | 38.70 46.10 47.58 48.29 49.01 48.94 49.19
CFB dec | 32.79 128.40 223.23 244.87 265.77 253.31 262.79
CTR enc | 32.58 122.23 220.29 241.16 259.57 248.32 256.69
CTR dec | 32.81 122.47 218.99 241.54 258.42 248.58 256.61
---
v3 changes:
* Remove single block algorithm that does not greatly improve performance
* Remove accelerated for sm4 key expand, which is not performance-critical
* Fix the warning on arm64/sm4-ce
v2 changes:
* SM4 library functions use "sm4_" prefix instead of "crypto_" prefix
* sm4-aesni-avx supports accelerated implementation of four specific modes
* tcrypt benchmark supports sm4-aesni-avx
* fixes of other reviews
沈子俊 (4):
crypto: sm4 - create SM4 library based on sm4 generic code
crypto: arm64/sm4-ce - Make dependent on sm4 library instead of
sm4-generic
crypto: x86/sm4 - add AES-NI/AVX/x86_64 implementation
crypto: tcrypt - add the asynchronous speed test for SM4
arch/arm64/crypto/Kconfig | 2 +-
arch/arm64/crypto/sm4-ce-glue.c | 20 +-
arch/x86/crypto/Makefile | 3 +
arch/x86/crypto/sm4-aesni-avx-asm_64.S | 589 +++++++++++++++++++++++++
arch/x86/crypto/sm4_aesni_avx_glue.c | 459 +++++++++++++++++++
crypto/Kconfig | 22 +
crypto/sm4_generic.c | 180 +-------
crypto/tcrypt.c | 26 +-
include/crypto/sm4.h | 25 +-
lib/crypto/Kconfig | 3 +
lib/crypto/Makefile | 3 +
lib/crypto/sm4.c | 176 ++++++++
12 files changed, 1330 insertions(+), 178 deletions(-)
create mode 100644 arch/x86/crypto/sm4-aesni-avx-asm_64.S
create mode 100644 arch/x86/crypto/sm4_aesni_avx_glue.c
create mode 100644 lib/crypto/sm4.c
--
2.30.0
1
4

[PATCH openEuler-5.10 01/88] topology: Represent clusters of CPUs within a die
by Zheng Zengkai 18 Nov '21
by Zheng Zengkai 18 Nov '21
18 Nov '21
From: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
mainline inclusion
from tip/sched/core for v5.15-release
commit: c5e22feffdd736cb02b98b0f5b375c8ebc858dd4
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4GEZS
CVE: NA
Reference: https://lore.kernel.org/lkml/20210924085104.44806-1-21cnbao@gmail.com/
------------------------------------------------------------------------
Both ACPI and DT provide the ability to describe additional layers of
topology between that of individual cores and higher level constructs
such as the level at which the last level cache is shared.
In ACPI this can be represented in PPTT as a Processor Hierarchy
Node Structure [1] that is the parent of the CPU cores and in turn
has a parent Processor Hierarchy Nodes Structure representing
a higher level of topology.
For example Kunpeng 920 has 6 or 8 clusters in each NUMA node, and each
cluster has 4 cpus. All clusters share L3 cache data, but each cluster
has local L3 tag. On the other hand, each clusters will share some
internal system bus.
+-----------------------------------+ +---------+
| +------+ +------+ +--------------------------+ |
| | CPU0 | | cpu1 | | +-----------+ | |
| +------+ +------+ | | | | |
| +----+ L3 | | |
| +------+ +------+ cluster | | tag | | |
| | CPU2 | | CPU3 | | | | | |
| +------+ +------+ | +-----------+ | |
| | | |
+-----------------------------------+ | |
+-----------------------------------+ | |
| +------+ +------+ +--------------------------+ |
| | | | | | +-----------+ | |
| +------+ +------+ | | | | |
| | | L3 | | |
| +------+ +------+ +----+ tag | | |
| | | | | | | | | |
| +------+ +------+ | +-----------+ | |
| | | |
+-----------------------------------+ | L3 |
| data |
+-----------------------------------+ | |
| +------+ +------+ | +-----------+ | |
| | | | | | | | | |
| +------+ +------+ +----+ L3 | | |
| | | tag | | |
| +------+ +------+ | | | | |
| | | | | | +-----------+ | |
| +------+ +------+ +--------------------------+ |
+-----------------------------------| | |
+-----------------------------------| | |
| +------+ +------+ +--------------------------+ |
| | | | | | +-----------+ | |
| +------+ +------+ | | | | |
| +----+ L3 | | |
| +------+ +------+ | | tag | | |
| | | | | | | | | |
| +------+ +------+ | +-----------+ | |
| | | |
+-----------------------------------+ | |
+-----------------------------------+ | |
| +------+ +------+ +--------------------------+ |
| | | | | | +-----------+ | |
| +------+ +------+ | | | | |
| | | L3 | | |
| +------+ +------+ +---+ tag | | |
| | | | | | | | | |
| +------+ +------+ | +-----------+ | |
| | | |
+-----------------------------------+ | |
+-----------------------------------+ | |
| +------+ +------+ +--------------------------+ |
| | | | | | +-----------+ | |
| +------+ +------+ | | | | |
| | | L3 | | |
| +------+ +------+ +--+ tag | | |
| | | | | | | | | |
| +------+ +------+ | +-----------+ | |
| | +---------+
+-----------------------------------+
That means spreading tasks among clusters will bring more bandwidth
while packing tasks within one cluster will lead to smaller cache
synchronization latency. So both kernel and userspace will have
a chance to leverage this topology to deploy tasks accordingly to
achieve either smaller cache latency within one cluster or an even
distribution of load among clusters for higher throughput.
This patch exposes cluster topology to both kernel and userspace.
Libraried like hwloc will know cluster by cluster_cpus and related
sysfs attributes. PoC of HWLOC support at [2].
Note this patch only handle the ACPI case.
Special consideration is needed for SMT processors, where it is
necessary to move 2 levels up the hierarchy from the leaf nodes
(thus skipping the processor core level).
Note that arm64 / ACPI does not provide any means of identifying
a die level in the topology but that may be unrelate to the cluster
level.
[1] ACPI Specification 6.3 - section 5.2.29.1 processor hierarchy node
structure (Type 0)
[2] https://github.com/hisilicon/hwloc/tree/linux-cluster
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Signed-off-by: Tian Tao <tiantao6(a)hisilicon.com>
Signed-off-by: Barry Song <song.bao.hua(a)hisilicon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Link: https://lore.kernel.org/r/20210924085104.44806-2-21cnbao@gmail.com
Signed-off-by: Yicong Yang <yangyicong(a)hisilicon.com>
Reviewed-by: tao zeng <prime.zeng(a)hisilicon.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
Documentation/admin-guide/cputopology.rst | 26 +++++++--
arch/arm64/kernel/topology.c | 2 +
drivers/acpi/pptt.c | 67 +++++++++++++++++++++++
drivers/base/arch_topology.c | 15 +++++
drivers/base/topology.c | 10 ++++
include/linux/acpi.h | 5 ++
include/linux/arch_topology.h | 5 ++
include/linux/topology.h | 6 ++
8 files changed, 132 insertions(+), 4 deletions(-)
diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst
index b90dafcc8237..57be98ae27b8 100644
--- a/Documentation/admin-guide/cputopology.rst
+++ b/Documentation/admin-guide/cputopology.rst
@@ -18,6 +18,11 @@ die_id:
identifier (rather than the kernel's). The actual value is
architecture and platform dependent.
+cluster_id:
+ the cluster ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent.
+
core_id:
the CPU core ID of cpuX. Typically it is the hardware platform's
@@ -64,6 +69,15 @@ die_cpus_list:
human-readable list of CPUs within the same die.
+cluster_cpus:
+
+ internal kernel map of CPUs within the same cluster
+
+cluster_cpus_list:
+
+ human-readable list of CPUs within the same cluster.
+ The format is like 0-3, 8-11, 14,17.
+
book_siblings:
internal kernel map of cpuX's hardware threads within the same
@@ -96,11 +110,13 @@ these macros in include/asm-XXX/topology.h::
#define topology_physical_package_id(cpu)
#define topology_die_id(cpu)
+ #define topology_cluster_id(cpu)
#define topology_core_id(cpu)
#define topology_book_id(cpu)
#define topology_drawer_id(cpu)
#define topology_sibling_cpumask(cpu)
#define topology_core_cpumask(cpu)
+ #define topology_cluster_cpumask(cpu)
#define topology_die_cpumask(cpu)
#define topology_book_cpumask(cpu)
#define topology_drawer_cpumask(cpu)
@@ -116,10 +132,12 @@ not defined by include/asm-XXX/topology.h:
1) topology_physical_package_id: -1
2) topology_die_id: -1
-3) topology_core_id: 0
-4) topology_sibling_cpumask: just the given CPU
-5) topology_core_cpumask: just the given CPU
-6) topology_die_cpumask: just the given CPU
+3) topology_cluster_id: -1
+4) topology_core_id: 0
+5) topology_sibling_cpumask: just the given CPU
+6) topology_core_cpumask: just the given CPU
+7) topology_cluster_cpumask: just the given CPU
+8) topology_die_cpumask: just the given CPU
For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
default definitions for topology_book_id() and topology_book_cpumask().
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 19cedf882a6b..a80fcb6dd88a 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -114,6 +114,8 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].thread_id = -1;
cpu_topology[cpu].core_id = topology_id;
}
+ topology_id = find_acpi_cpu_topology_cluster(cpu);
+ cpu_topology[cpu].cluster_id = topology_id;
topology_id = find_acpi_cpu_topology_package(cpu);
cpu_topology[cpu].package_id = topology_id;
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 20bd584f2516..3d9403ded527 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -849,6 +849,73 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
ACPI_PPTT_PHYSICAL_PACKAGE);
}
+/**
+ * find_acpi_cpu_topology_cluster() - Determine a unique CPU cluster value
+ * @cpu: Kernel logical CPU number
+ *
+ * Determine a topology unique cluster ID for the given CPU/thread.
+ * This ID can then be used to group peers, which will have matching ids.
+ *
+ * The cluster, if present is the level of topology above CPUs. In a
+ * multi-thread CPU, it will be the level above the CPU, not the thread.
+ * It may not exist in single CPU systems. In simple multi-CPU systems,
+ * it may be equal to the package topology level.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, the CPU cannot be found
+ * or there is no toplogy level above the CPU..
+ * Otherwise returns a value which represents the package for this CPU.
+ */
+
+int find_acpi_cpu_topology_cluster(unsigned int cpu)
+{
+ struct acpi_table_header *table;
+ acpi_status status;
+ struct acpi_pptt_processor *cpu_node, *cluster_node;
+ u32 acpi_cpu_id;
+ int retval;
+ int is_thread;
+
+ status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+ if (ACPI_FAILURE(status)) {
+ acpi_pptt_warn_missing();
+ return -ENOENT;
+ }
+
+ acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+ cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
+ if (cpu_node == NULL || !cpu_node->parent) {
+ retval = -ENOENT;
+ goto put_table;
+ }
+
+ is_thread = cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_IS_THREAD;
+ cluster_node = fetch_pptt_node(table, cpu_node->parent);
+ if (cluster_node == NULL) {
+ retval = -ENOENT;
+ goto put_table;
+ }
+ if (is_thread) {
+ if (!cluster_node->parent) {
+ retval = -ENOENT;
+ goto put_table;
+ }
+ cluster_node = fetch_pptt_node(table, cluster_node->parent);
+ if (cluster_node == NULL) {
+ retval = -ENOENT;
+ goto put_table;
+ }
+ }
+ if (cluster_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID)
+ retval = cluster_node->acpi_processor_id;
+ else
+ retval = ACPI_PTR_DIFF(cluster_node, table);
+
+put_table:
+ acpi_put_table(table);
+
+ return retval;
+}
+
/**
* find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
* @cpu: Kernel logical CPU number
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index de8587cc119e..21e63b6fab83 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -506,6 +506,11 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
return core_mask;
}
+const struct cpumask *cpu_clustergroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].cluster_sibling;
+}
+
void update_siblings_masks(unsigned int cpuid)
{
struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
@@ -523,6 +528,12 @@ void update_siblings_masks(unsigned int cpuid)
if (cpuid_topo->package_id != cpu_topo->package_id)
continue;
+ if (cpuid_topo->cluster_id == cpu_topo->cluster_id &&
+ cpuid_topo->cluster_id != -1) {
+ cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
+ cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
+ }
+
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
@@ -541,6 +552,9 @@ static void clear_cpu_topology(int cpu)
cpumask_clear(&cpu_topo->llc_sibling);
cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
+ cpumask_clear(&cpu_topo->cluster_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->cluster_sibling);
+
cpumask_clear(&cpu_topo->core_sibling);
cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
cpumask_clear(&cpu_topo->thread_sibling);
@@ -556,6 +570,7 @@ void __init reset_cpu_topology(void)
cpu_topo->thread_id = -1;
cpu_topo->core_id = -1;
+ cpu_topo->cluster_id = -1;
cpu_topo->package_id = -1;
cpu_topo->llc_id = -1;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 4d254fcc93d1..7157ac08ff57 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -46,6 +46,9 @@ static DEVICE_ATTR_RO(physical_package_id);
define_id_show_func(die_id);
static DEVICE_ATTR_RO(die_id);
+define_id_show_func(cluster_id);
+static DEVICE_ATTR_RO(cluster_id);
+
define_id_show_func(core_id);
static DEVICE_ATTR_RO(core_id);
@@ -61,6 +64,10 @@ define_siblings_show_func(core_siblings, core_cpumask);
static DEVICE_ATTR_RO(core_siblings);
static DEVICE_ATTR_RO(core_siblings_list);
+define_siblings_show_func(cluster_cpus, cluster_cpumask);
+static DEVICE_ATTR_RO(cluster_cpus);
+static DEVICE_ATTR_RO(cluster_cpus_list);
+
define_siblings_show_func(die_cpus, die_cpumask);
static DEVICE_ATTR_RO(die_cpus);
static DEVICE_ATTR_RO(die_cpus_list);
@@ -88,6 +95,7 @@ static DEVICE_ATTR_RO(drawer_siblings_list);
static struct attribute *default_attrs[] = {
&dev_attr_physical_package_id.attr,
&dev_attr_die_id.attr,
+ &dev_attr_cluster_id.attr,
&dev_attr_core_id.attr,
&dev_attr_thread_siblings.attr,
&dev_attr_thread_siblings_list.attr,
@@ -95,6 +103,8 @@ static struct attribute *default_attrs[] = {
&dev_attr_core_cpus_list.attr,
&dev_attr_core_siblings.attr,
&dev_attr_core_siblings_list.attr,
+ &dev_attr_cluster_cpus.attr,
+ &dev_attr_cluster_cpus_list.attr,
&dev_attr_die_cpus.attr,
&dev_attr_die_cpus_list.attr,
&dev_attr_package_cpus.attr,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a3db177329c2..9045dfb6d19c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1350,6 +1350,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
int acpi_pptt_init(void);
int acpi_pptt_cpu_is_thread(unsigned int cpu);
int find_acpi_cpu_topology(unsigned int cpu, int level);
+int find_acpi_cpu_topology_cluster(unsigned int cpu);
int find_acpi_cpu_topology_package(unsigned int cpu);
int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
@@ -1362,6 +1363,10 @@ static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
{
return -EINVAL;
}
+static inline int find_acpi_cpu_topology_cluster(unsigned int cpu)
+{
+ return -EINVAL;
+}
static inline int find_acpi_cpu_topology_package(unsigned int cpu)
{
return -EINVAL;
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 0f6cd6b73a61..987c7ea75291 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -49,10 +49,12 @@ void topology_set_thermal_pressure(const struct cpumask *cpus,
struct cpu_topology {
int thread_id;
int core_id;
+ int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
+ cpumask_t cluster_sibling;
cpumask_t llc_sibling;
};
@@ -60,13 +62,16 @@ struct cpu_topology {
extern struct cpu_topology cpu_topology[NR_CPUS];
#define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id)
+#define topology_cluster_id(cpu) (cpu_topology[cpu].cluster_id)
#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+#define topology_cluster_cpumask(cpu) (&cpu_topology[cpu].cluster_sibling)
#define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling)
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
+const struct cpumask *cpu_clustergroup_mask(int cpu);
void update_siblings_masks(unsigned int cpu);
void remove_cpu_topology(unsigned int cpuid);
void reset_cpu_topology(void);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 7634cd737061..80d27d717631 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -186,6 +186,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_die_id
#define topology_die_id(cpu) ((void)(cpu), -1)
#endif
+#ifndef topology_cluster_id
+#define topology_cluster_id(cpu) ((void)(cpu), -1)
+#endif
#ifndef topology_core_id
#define topology_core_id(cpu) ((void)(cpu), 0)
#endif
@@ -195,6 +198,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_core_cpumask
#define topology_core_cpumask(cpu) cpumask_of(cpu)
#endif
+#ifndef topology_cluster_cpumask
+#define topology_cluster_cpumask(cpu) cpumask_of(cpu)
+#endif
#ifndef topology_die_cpumask
#define topology_die_cpumask(cpu) cpumask_of(cpu)
#endif
--
2.20.1
1
87

[PATCH openEuler-1.0-LTS 1/2] crypto: public_key: fix overflow during implicit conversion
by Yang Yingliang 18 Nov '21
by Yang Yingliang 18 Nov '21
18 Nov '21
From: zhenwei pi <pizhenwei(a)bytedance.com>
stable inclusion
from linux-4.19.207
commit aab312696d37de80502ca633b40184de24f22917
--------------------------------
commit f985911b7bc75d5c98ed24d8aaa8b94c590f7c6a upstream.
Hit kernel warning like this, it can be reproduced by verifying 256
bytes datafile by keyctl command, run script:
RAWDATA=rawdata
SIGDATA=sigdata
modprobe pkcs8_key_parser
rm -rf *.der *.pem *.pfx
rm -rf $RAWDATA
dd if=/dev/random of=$RAWDATA bs=256 count=1
openssl req -nodes -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem \
-subj "/C=CN/ST=GD/L=SZ/O=vihoo/OU=dev/CN=xx.com/emailAddress=yy(a)xx.com"
KEY_ID=`openssl pkcs8 -in key.pem -topk8 -nocrypt -outform DER | keyctl \
padd asymmetric 123 @s`
keyctl pkey_sign $KEY_ID 0 $RAWDATA enc=pkcs1 hash=sha1 > $SIGDATA
keyctl pkey_verify $KEY_ID 0 $RAWDATA $SIGDATA enc=pkcs1 hash=sha1
Then the kernel reports:
WARNING: CPU: 5 PID: 344556 at crypto/rsa-pkcs1pad.c:540
pkcs1pad_verify+0x160/0x190
...
Call Trace:
public_key_verify_signature+0x282/0x380
? software_key_query+0x12d/0x180
? keyctl_pkey_params_get+0xd6/0x130
asymmetric_key_verify_signature+0x66/0x80
keyctl_pkey_verify+0xa5/0x100
do_syscall_64+0x35/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
The reason of this issue, in function 'asymmetric_key_verify_signature':
'.digest_size(u8) = params->in_len(u32)' leads overflow of an u8 value,
so use u32 instead of u8 for digest_size field. And reorder struct
public_key_signature, it saves 8 bytes on a 64-bit machine.
Cc: stable(a)vger.kernel.org
Signed-off-by: zhenwei pi <pizhenwei(a)bytedance.com>
Reviewed-by: Jarkko Sakkinen <jarkko(a)kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
include/crypto/public_key.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index e0b681a717bac..052e26fda2e6c 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -35,9 +35,9 @@ extern void public_key_free(struct public_key *key);
struct public_key_signature {
struct asymmetric_key_id *auth_ids[2];
u8 *s; /* Signature */
- u32 s_size; /* Number of bytes in signature */
u8 *digest;
- u8 digest_size; /* Number of bytes in digest */
+ u32 s_size; /* Number of bytes in signature */
+ u32 digest_size; /* Number of bytes in digest */
const char *pkey_algo;
const char *hash_algo;
};
--
2.25.1
1
1

【Meeting Notice】openEuler kernel 技术分享第十四期 & 双周例会 Time: 2021-11-19 14:00-16:30
by Meeting Book 18 Nov '21
by Meeting Book 18 Nov '21
18 Nov '21
1
0

[PATCH openEuler-1.0-LTS] net: bridge: fix stale eth hdr pointer in br_dev_xmit
by Yang Yingliang 18 Nov '21
by Yang Yingliang 18 Nov '21
18 Nov '21
From: Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com>
mainline inclusion
from mainline-v5.6-rc4
commit 823d81b0fa2cd83a640734e74caee338b5d3c093
category: bugfix
bugzilla: 185773
CVE: NA
-------------------------------------------------
In br_dev_xmit() we perform vlan filtering in br_allowed_ingress() but
if the packet has the vlan header inside (e.g. bridge with disabled
tx-vlan-offload) then the vlan filtering code will use skb_vlan_untag()
to extract the vid before filtering which in turn calls pskb_may_pull()
and we may end up with a stale eth pointer. Moreover the cached eth header
pointer will generally be wrong after that operation. Remove the eth header
caching and just use eth_hdr() directly, the compiler does the right thing
and calculates it only once so we don't lose anything.
Fixes: 057658cb33fb ("bridge: suppress arp pkts on BR_NEIGH_SUPPRESS ports")
Signed-off-by: Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Huang Guobin <huangguobin4(a)huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
net/bridge/br_device.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index a350c05b7ff5e..7b7784bb1cb99 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,7 +39,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
const struct nf_br_ops *nf_ops;
const unsigned char *dest;
- struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -58,15 +57,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
- (eth->h_proto == htons(ETH_P_ARP) ||
- eth->h_proto == htons(ETH_P_RARP)) &&
+ (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br->neigh_suppress_enabled) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] x86/entry: Make entry_64_compat.S objtool clean
by Yang Yingliang 17 Nov '21
by Yang Yingliang 17 Nov '21
17 Nov '21
From: Peter Zijlstra <peterz(a)infradead.org>
mainline inclusion
from mainline-v5.8-rc1
commit 1c3e5d3f60e26415d4227aa1193cf9e2db4df834
category: feature
bugzilla: 175666
CVE: NA
---------------------------
Currently entry_64_compat is exempt from objtool, but with vmlinux
mode there is no hiding it.
Make the following changes to make it pass:
- change entry_SYSENTER_compat to STT_NOTYPE; it's not a function
and doesn't have function type stack setup.
- mark all STT_NOTYPE symbols with UNWIND_HINT_EMPTY; so we do
validate them and don't treat them as unreachable.
- don't abuse RSP as a temp register, this confuses objtool
mightily as it (rightfully) thinks we're doing unspeakable
things to the stack.
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Alexandre Chartre <alexandre.chartre(a)oracle.com>
Acked-by: Andy Lutomirski <luto(a)kernel.org>
Link: https://lkml.kernel.org/r/20200505134341.272248024@linutronix.de
Signed-off-by: Wang ShaoBo <bobo.shaobowang(a)huawei.com>
Conflicts:
arch/x86/entry/entry_64_compat.S
[wangshaobo: change ENDPROC to END, avoid objtool skipping STT_FUNC type check]
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/x86/entry/Makefile | 2 --
arch/x86/entry/entry_64_compat.S | 17 +++++++++++++++--
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 06fc70cf5433d..e71890149ce0e 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -3,8 +3,6 @@
# Makefile for the x86 low level entry code
#
-OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
-
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,)
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 40d2834a8101e..f326e0c6e8dd8 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -47,11 +47,14 @@
* 0(%ebp) arg6
*/
ENTRY(entry_SYSENTER_compat)
+ UNWIND_HINT_EMPTY
/* Interrupts are off on entry. */
SWAPGS
/* We are about to clobber %rsp anyway, clobbering here is OK */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+ pushq %rax
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ popq %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -104,6 +107,9 @@ ENTRY(entry_SYSENTER_compat)
xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
xorl %r15d, %r15d /* nospec r15 */
+
+ UNWIND_HINT_REGS
+
cld
/*
@@ -147,7 +153,7 @@ ENTRY(entry_SYSENTER_compat)
popfq
jmp .Lsysenter_flags_fixed
GLOBAL(__end_entry_SYSENTER_compat)
-ENDPROC(entry_SYSENTER_compat)
+END(entry_SYSENTER_compat)
/*
* 32-bit SYSCALL entry.
@@ -197,6 +203,7 @@ ENDPROC(entry_SYSENTER_compat)
* 0(%esp) arg6
*/
ENTRY(entry_SYSCALL_compat)
+ UNWIND_HINT_EMPTY
/* Interrupts are off on entry. */
swapgs
@@ -247,6 +254,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq $0 /* pt_regs->r15 = 0 */
xorl %r15d, %r15d /* nospec r15 */
+ UNWIND_HINT_REGS
+
/*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
@@ -335,6 +344,7 @@ END(entry_SYSCALL_compat)
* ebp arg6
*/
ENTRY(entry_INT80_compat)
+ UNWIND_HINT_EMPTY
/*
* Interrupts are off on entry.
*/
@@ -396,6 +406,9 @@ ENTRY(entry_INT80_compat)
xorl %r14d, %r14d /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
xorl %r15d, %r15d /* nospec r15 */
+
+ UNWIND_HINT_REGS
+
cld
/*
--
2.25.1
1
0
From: 沈子俊 <shenzijun(a)kylinos.cn>
The GCM/CCM mode of SM4 is defined in the RFC 8998 specification:
https://datatracker.ietf.org/doc/html/rfc8998
沈子俊 (3):
crypto: tcrypt - Fix missing return value check
crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm
crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm
crypto/tcrypt.c | 73 ++++++++++++++++++++---
crypto/testmgr.c | 29 ++++++++++
crypto/testmgr.h | 148 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 241 insertions(+), 9 deletions(-)
--
2.30.0
3
5

【Meeting Notice】openEuler kernel 技术分享第十四期 & 双周例会 Time: 2021-11-19 14:00-16:30
by Meeting Book 16 Nov '21
by Meeting Book 16 Nov '21
16 Nov '21
1
0

[PATCH openEuler-1.0-LTS] io_uring: fix ltout double free on completion race
by Yang Yingliang 15 Nov '21
by Yang Yingliang 15 Nov '21
15 Nov '21
From: Pavel Begunkov <asml.silence(a)gmail.com>
mainline inclusion
from mainline-v5.13-rc2
commit 447c19f3b5074409c794b350b10306e1da1ef4ba
category: bugfix
bugzilla: 185736
CVE: NA
-----------------------------------------------
Always remove linked timeout on io_link_timeout_fn() from the master
request link list, otherwise we may get use-after-free when first
io_link_timeout_fn() puts linked timeout in the fail path, and then
will be found and put on master's free.
Cc: stable(a)vger.kernel.org # 5.10+
Fixes: 90cd7e424969d ("io_uring: track link timeout's master explicitly")
Reported-and-tested-by: syzbot+5a864149dd970b546223(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/69c46bf6ce37fec4fdcd98f0882e18eb07ce693a.16209901…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
conflicts:
fs/io_uring.c
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
fs/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 226e9f123f654..da61eeaf64e88 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5925,8 +5925,8 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (!list_empty(&req->link_list)) {
prev = list_entry(req->link_list.prev, struct io_kiocb,
link_list);
+ list_del_init(&req->link_list);
if (refcount_inc_not_zero(&prev->refs)) {
- list_del_init(&req->link_list);
prev->flags &= ~REQ_F_LINK_TIMEOUT;
} else
prev = NULL;
--
2.25.1
1
0
backport psi feature and avoid kabi change
bugzilla: https://gitee.com/openeuler/kernel/issues/I47QS2
Changes since v2:
1.Remove workingset_restore field.After adding WORKINGSET_RESTORE,
memcg_stat_item values will change.
enum memcg_stat_item {
MEMCG_CACHE = NR_VM_NODE_STAT_ITEMS,
MEMCG_RSS,
MEMCG_RSS_HUGE,
...
Thanks to Cheng jian.
2.Patch 22 and patch 23's commit add "from mainline-v5.10.78"
Baruch Siach (1):
psi: fix reference to kernel commandline enable
Dan Schatzberg (1):
kernel/sched/psi.c: expose pressure metrics on root cgroup
Johannes Weiner (12):
mm: workingset: tell cache transitions from workingset thrashing
sched: loadavg: consolidate LOAD_INT, LOAD_FRAC, CALC_LOAD
sched: loadavg: make calc_load_n() public
sched: sched.h: make rq locking and clock functions available in
stats.h
sched: introduce this_rq_lock_irq()
psi: pressure stall information for CPU, memory, and IO
psi: cgroup support
psi: make disabling/enabling easier for vendor kernels
psi: fix aggregation idle shut-off
psi: avoid divide-by-zero crash inside virtual machines
fs: kernfs: add poll file operation
sched/psi: Fix sampling error and rare div0 crashes with cgroups and
high uptime
Josef Bacik (1):
blk-iolatency: use a percentile approache for ssd's
Liu Xinpeng (2):
psi:enable psi in config
psi:avoid kabi change
Miklos Szeredi (1):
fuse: ignore PG_workingset after stealing
Olof Johansson (1):
kernel/sched/psi.c: simplify cgroup_move_task()
Suren Baghdasaryan (6):
psi: introduce state_mask to represent stalled psi states
psi: make psi_enable static
psi: rename psi fields in preparation for psi trigger addition
psi: split update_stats into parts
psi: track changed states
include/: refactor headers to allow kthread.h inclusion in psi_types.h
Yafang Shao (1):
mm, memcg: add workingset_restore in memory.stat
Documentation/accounting/psi.txt | 73 +++
Documentation/admin-guide/cgroup-v2.rst | 22 +
Documentation/admin-guide/kernel-parameters.txt | 4 +
arch/arm64/configs/openeuler_defconfig | 2 +
arch/powerpc/platforms/cell/cpufreq_spudemand.c | 2 +-
arch/powerpc/platforms/cell/spufs/sched.c | 9 +-
arch/s390/appldata/appldata_os.c | 4 -
arch/x86/configs/openeuler_defconfig | 2 +
block/blk-iolatency.c | 183 +++++-
drivers/cpuidle/governors/menu.c | 4 -
drivers/spi/spi-rockchip.c | 1 +
fs/fuse/dev.c | 1 +
fs/kernfs/file.c | 31 +-
fs/proc/loadavg.c | 3 -
include/linux/cgroup-defs.h | 12 +
include/linux/cgroup.h | 17 +
include/linux/kernfs.h | 8 +
include/linux/kthread.h | 4 +
include/linux/page-flags.h | 5 +
include/linux/psi.h | 55 ++
include/linux/psi_types.h | 95 +++
include/linux/sched.h | 13 +
include/linux/sched/loadavg.h | 24 +-
include/linux/swap.h | 1 +
include/trace/events/mmflags.h | 1 +
init/Kconfig | 28 +
kernel/cgroup/cgroup.c | 55 +-
kernel/debug/kdb/kdb_main.c | 7 +-
kernel/fork.c | 4 +
kernel/kthread.c | 3 +
kernel/sched/Makefile | 1 +
kernel/sched/core.c | 16 +-
kernel/sched/loadavg.c | 139 ++--
kernel/sched/psi.c | 823 ++++++++++++++++++++++++
kernel/sched/sched.h | 178 ++---
kernel/sched/stats.h | 86 +++
kernel/workqueue.c | 23 +
kernel/workqueue_internal.h | 6 +-
mm/compaction.c | 5 +
mm/filemap.c | 20 +-
mm/huge_memory.c | 1 +
mm/migrate.c | 2 +
mm/page_alloc.c | 9 +
mm/swap_state.c | 1 +
mm/vmscan.c | 10 +
mm/workingset.c | 113 +++-
46 files changed, 1827 insertions(+), 279 deletions(-)
create mode 100644 Documentation/accounting/psi.txt
create mode 100644 include/linux/psi.h
create mode 100644 include/linux/psi_types.h
create mode 100644 kernel/sched/psi.c
--
1.8.3.1
1
26

[PATCH openEuler-5.10 01/20] blk-mq: don't free tags if the tag_set is used by other device in queue initialztion
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Ye Bin <yebin10(a)huawei.com>
mainline inclusion
from mainline-v5.16
commit a846a8e6c9a5949582c5a6a8bbc83a7d27fd891e
category: bugfix
bugzilla: 185668 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
-----------------------------------------------
We got UAF report on v5.10 as follows:
[ 1446.674930] ==================================================================
[ 1446.675970] BUG: KASAN: use-after-free in blk_mq_get_driver_tag+0x9a4/0xa90
[ 1446.676902] Read of size 8 at addr ffff8880185afd10 by task kworker/1:2/12348
[ 1446.677851]
[ 1446.678073] CPU: 1 PID: 12348 Comm: kworker/1:2 Not tainted 5.10.0-10177-gc9c81b1e346a #2
[ 1446.679168] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[ 1446.680692] Workqueue: kthrotld blk_throtl_dispatch_work_fn
[ 1446.681448] Call Trace:
[ 1446.681800] dump_stack+0x9b/0xce
[ 1446.682916] print_address_description.constprop.6+0x3e/0x60
[ 1446.685999] kasan_report.cold.9+0x22/0x3a
[ 1446.687186] blk_mq_get_driver_tag+0x9a4/0xa90
[ 1446.687785] blk_mq_dispatch_rq_list+0x21a/0x1d40
[ 1446.692576] __blk_mq_do_dispatch_sched+0x394/0x830
[ 1446.695758] __blk_mq_sched_dispatch_requests+0x398/0x4f0
[ 1446.698279] blk_mq_sched_dispatch_requests+0xdf/0x140
[ 1446.698967] __blk_mq_run_hw_queue+0xc0/0x270
[ 1446.699561] __blk_mq_delay_run_hw_queue+0x4cc/0x550
[ 1446.701407] blk_mq_run_hw_queue+0x13b/0x2b0
[ 1446.702593] blk_mq_sched_insert_requests+0x1de/0x390
[ 1446.703309] blk_mq_flush_plug_list+0x4b4/0x760
[ 1446.705408] blk_flush_plug_list+0x2c5/0x480
[ 1446.708471] blk_finish_plug+0x55/0xa0
[ 1446.708980] blk_throtl_dispatch_work_fn+0x23b/0x2e0
[ 1446.711236] process_one_work+0x6d4/0xfe0
[ 1446.711778] worker_thread+0x91/0xc80
[ 1446.713400] kthread+0x32d/0x3f0
[ 1446.714362] ret_from_fork+0x1f/0x30
[ 1446.714846]
[ 1446.715062] Allocated by task 1:
[ 1446.715509] kasan_save_stack+0x19/0x40
[ 1446.716026] __kasan_kmalloc.constprop.1+0xc1/0xd0
[ 1446.716673] blk_mq_init_tags+0x6d/0x330
[ 1446.717207] blk_mq_alloc_rq_map+0x50/0x1c0
[ 1446.717769] __blk_mq_alloc_map_and_request+0xe5/0x320
[ 1446.718459] blk_mq_alloc_tag_set+0x679/0xdc0
[ 1446.719050] scsi_add_host_with_dma.cold.3+0xa0/0x5db
[ 1446.719736] virtscsi_probe+0x7bf/0xbd0
[ 1446.720265] virtio_dev_probe+0x402/0x6c0
[ 1446.720808] really_probe+0x276/0xde0
[ 1446.721320] driver_probe_device+0x267/0x3d0
[ 1446.721892] device_driver_attach+0xfe/0x140
[ 1446.722491] __driver_attach+0x13a/0x2c0
[ 1446.723037] bus_for_each_dev+0x146/0x1c0
[ 1446.723603] bus_add_driver+0x3fc/0x680
[ 1446.724145] driver_register+0x1c0/0x400
[ 1446.724693] init+0xa2/0xe8
[ 1446.725091] do_one_initcall+0x9e/0x310
[ 1446.725626] kernel_init_freeable+0xc56/0xcb9
[ 1446.726231] kernel_init+0x11/0x198
[ 1446.726714] ret_from_fork+0x1f/0x30
[ 1446.727212]
[ 1446.727433] Freed by task 26992:
[ 1446.727882] kasan_save_stack+0x19/0x40
[ 1446.728420] kasan_set_track+0x1c/0x30
[ 1446.728943] kasan_set_free_info+0x1b/0x30
[ 1446.729517] __kasan_slab_free+0x111/0x160
[ 1446.730084] kfree+0xb8/0x520
[ 1446.730507] blk_mq_free_map_and_requests+0x10b/0x1b0
[ 1446.731206] blk_mq_realloc_hw_ctxs+0x8cb/0x15b0
[ 1446.731844] blk_mq_init_allocated_queue+0x374/0x1380
[ 1446.732540] blk_mq_init_queue_data+0x7f/0xd0
[ 1446.733155] scsi_mq_alloc_queue+0x45/0x170
[ 1446.733730] scsi_alloc_sdev+0x73c/0xb20
[ 1446.734281] scsi_probe_and_add_lun+0x9a6/0x2d90
[ 1446.734916] __scsi_scan_target+0x208/0xc50
[ 1446.735500] scsi_scan_channel.part.3+0x113/0x170
[ 1446.736149] scsi_scan_host_selected+0x25a/0x360
[ 1446.736783] store_scan+0x290/0x2d0
[ 1446.737275] dev_attr_store+0x55/0x80
[ 1446.737782] sysfs_kf_write+0x132/0x190
[ 1446.738313] kernfs_fop_write_iter+0x319/0x4b0
[ 1446.738921] new_sync_write+0x40e/0x5c0
[ 1446.739429] vfs_write+0x519/0x720
[ 1446.739877] ksys_write+0xf8/0x1f0
[ 1446.740332] do_syscall_64+0x2d/0x40
[ 1446.740802] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1446.741462]
[ 1446.741670] The buggy address belongs to the object at ffff8880185afd00
[ 1446.741670] which belongs to the cache kmalloc-256 of size 256
[ 1446.743276] The buggy address is located 16 bytes inside of
[ 1446.743276] 256-byte region [ffff8880185afd00, ffff8880185afe00)
[ 1446.744765] The buggy address belongs to the page:
[ 1446.745416] page:ffffea0000616b00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x185ac
[ 1446.746694] head:ffffea0000616b00 order:2 compound_mapcount:0 compound_pincount:0
[ 1446.747719] flags: 0x1fffff80010200(slab|head)
[ 1446.748337] raw: 001fffff80010200 ffffea00006a3208 ffffea000061bf08 ffff88801004f240
[ 1446.749404] raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
[ 1446.750455] page dumped because: kasan: bad access detected
[ 1446.751227]
[ 1446.751445] Memory state around the buggy address:
[ 1446.752102] ffff8880185afc00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 1446.753090] ffff8880185afc80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 1446.754079] >ffff8880185afd00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1446.755065] ^
[ 1446.755589] ffff8880185afd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1446.756574] ffff8880185afe00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 1446.757566] ==================================================================
Flag 'BLK_MQ_F_TAG_QUEUE_SHARED' will be set if the second device on the
same host initializes it's queue successfully. However, if the second
device failed to allocate memory in blk_mq_alloc_and_init_hctx() from
blk_mq_realloc_hw_ctxs() from blk_mq_init_allocated_queue(),
__blk_mq_free_map_and_rqs() will be called on error path, and if
'BLK_MQ_TAG_HCTX_SHARED' is not set, 'tag_set->tags' will be freed
while it's still used by the first device.
To fix this issue we move release newly allocated hardware context from
blk_mq_realloc_hw_ctxs to __blk_mq_update_nr_hw_queues. As there is needn't to
release hardware context in blk_mq_init_allocated_queue.
Fixes: 868f2f0b7206 ("blk-mq: dynamic h/w context count")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Link: https://lore.kernel.org/r/20211108074019.1058843-1-yebin10@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
conflicts:
block/blk-mq.c
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
block/blk-mq.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bc7a04cc2acf..fac25524e99e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3237,8 +3237,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx = hctxs[j];
if (hctx) {
- if (hctx->tags)
- blk_mq_free_map_and_requests(set, j);
blk_mq_exit_hctx(q, set, hctx, j);
hctxs[j] = NULL;
}
@@ -3724,8 +3722,13 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
if (q->nr_hw_queues != set->nr_hw_queues) {
+ int i = prev_nr_hw_queues;
+
pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
nr_hw_queues, prev_nr_hw_queues);
+ for (; i < set->nr_hw_queues; i++)
+ blk_mq_free_map_and_requests(set, i);
+
set->nr_hw_queues = prev_nr_hw_queues;
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
goto fallback;
--
2.20.1
1
19

[PATCH openEuler-5.10 01/13] scsi: core: Put LLD module refcnt after SCSI device is released
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Ming Lei <ming.lei(a)redhat.com>
stable inclusion
from stable-5.10.78
commit 7b57c38d12aed1b5d92f74748bed25e0d041729f
bugzilla: 185700 https://gitee.com/openeuler/kernel/issues/I4IAU2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit f2b85040acec9a928b4eb1b57a989324e8e38d3f upstream.
SCSI host release is triggered when SCSI device is freed. We have to make
sure that the low-level device driver module won't be unloaded before SCSI
host instance is released because shost->hostt is required in the release
handler.
Make sure to put LLD module refcnt after SCSI device is released.
Fixes a kernel panic of 'BUG: unable to handle page fault for address'
reported by Changhui and Yi.
Link: https://lore.kernel.org/r/20211008050118.1440686-1-ming.lei@redhat.com
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Reported-by: Changhui Zhong <czhong(a)redhat.com>
Reported-by: Yi Zhang <yi.zhang(a)redhat.com>
Tested-by: Yi Zhang <yi.zhang(a)redhat.com>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
drivers/scsi/scsi.c | 4 +++-
drivers/scsi/scsi_sysfs.c | 9 +++++++++
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 24619c3bebd5..6ad834d61d4c 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -545,8 +545,10 @@ EXPORT_SYMBOL(scsi_device_get);
*/
void scsi_device_put(struct scsi_device *sdev)
{
- module_put(sdev->host->hostt->module);
+ struct module *mod = sdev->host->hostt->module;
+
put_device(&sdev->sdev_gendev);
+ module_put(mod);
}
EXPORT_SYMBOL(scsi_device_put);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 8173b67ec7b0..1378bb1a7371 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -450,9 +450,12 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL;
struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL;
unsigned long flags;
+ struct module *mod;
sdev = container_of(work, struct scsi_device, ew.work);
+ mod = sdev->host->hostt->module;
+
scsi_dh_release_device(sdev);
parent = sdev->sdev_gendev.parent;
@@ -501,11 +504,17 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
if (parent)
put_device(parent);
+ module_put(mod);
}
static void scsi_device_dev_release(struct device *dev)
{
struct scsi_device *sdp = to_scsi_device(dev);
+
+ /* Set module pointer as NULL in case of module unloading */
+ if (!try_module_get(sdp->host->hostt->module))
+ sdp->host->hostt->module = NULL;
+
execute_in_process_context(scsi_device_dev_release_usercontext,
&sdp->ew);
}
--
2.20.1
1
12

[PATCH openEuler-5.10 01/20] blk-cgroup: synchronize blkg creation against policy deactivation
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Yu Kuai <yukuai3(a)huawei.com>
mainline inclusion
from mainline
commit 0c9d338c8443b06da8e8d3bfce824c5ea6d3488f
category: bugfix
bugzilla: 182378 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
---------------------------
Our test reports a null pointer dereference:
[ 168.534653] ==================================================================
[ 168.535614] Disabling lock debugging due to kernel taint
[ 168.536346] BUG: kernel NULL pointer dereference, address: 0000000000000008
[ 168.537274] #PF: supervisor read access in kernel mode
[ 168.537964] #PF: error_code(0x0000) - not-present page
[ 168.538667] PGD 0 P4D 0
[ 168.539025] Oops: 0000 [#1] PREEMPT SMP KASAN
[ 168.539656] CPU: 13 PID: 759 Comm: bash Tainted: G B 5.15.0-rc2-next-202100
[ 168.540954] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_0738364
[ 168.542736] RIP: 0010:bfq_pd_init+0x88/0x1e0
[ 168.543318] Code: 98 00 00 00 e8 c9 e4 5b ff 4c 8b 65 00 49 8d 7c 24 08 e8 bb e4 5b ff 4d0
[ 168.545803] RSP: 0018:ffff88817095f9c0 EFLAGS: 00010002
[ 168.546497] RAX: 0000000000000001 RBX: ffff888101a1c000 RCX: 0000000000000000
[ 168.547438] RDX: 0000000000000003 RSI: 0000000000000002 RDI: ffff888106553428
[ 168.548402] RBP: ffff888106553400 R08: ffffffff961bcaf4 R09: 0000000000000001
[ 168.549365] R10: ffffffffa2e16c27 R11: fffffbfff45c2d84 R12: 0000000000000000
[ 168.550291] R13: ffff888101a1c098 R14: ffff88810c7a08c8 R15: ffffffffa55541a0
[ 168.551221] FS: 00007fac75227700(0000) GS:ffff88839ba80000(0000) knlGS:0000000000000000
[ 168.552278] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 168.553040] CR2: 0000000000000008 CR3: 0000000165ce7000 CR4: 00000000000006e0
[ 168.554000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 168.554929] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 168.555888] Call Trace:
[ 168.556221] <TASK>
[ 168.556510] blkg_create+0x1c0/0x8c0
[ 168.556989] blkg_conf_prep+0x574/0x650
[ 168.557502] ? stack_trace_save+0x99/0xd0
[ 168.558033] ? blkcg_conf_open_bdev+0x1b0/0x1b0
[ 168.558629] tg_set_conf.constprop.0+0xb9/0x280
[ 168.559231] ? kasan_set_track+0x29/0x40
[ 168.559758] ? kasan_set_free_info+0x30/0x60
[ 168.560344] ? tg_set_limit+0xae0/0xae0
[ 168.560853] ? do_sys_openat2+0x33b/0x640
[ 168.561383] ? do_sys_open+0xa2/0x100
[ 168.561877] ? __x64_sys_open+0x4e/0x60
[ 168.562383] ? __kasan_check_write+0x20/0x30
[ 168.562951] ? copyin+0x48/0x70
[ 168.563390] ? _copy_from_iter+0x234/0x9e0
[ 168.563948] tg_set_conf_u64+0x17/0x20
[ 168.564467] cgroup_file_write+0x1ad/0x380
[ 168.565014] ? cgroup_file_poll+0x80/0x80
[ 168.565568] ? __mutex_lock_slowpath+0x30/0x30
[ 168.566165] ? pgd_free+0x100/0x160
[ 168.566649] kernfs_fop_write_iter+0x21d/0x340
[ 168.567246] ? cgroup_file_poll+0x80/0x80
[ 168.567796] new_sync_write+0x29f/0x3c0
[ 168.568314] ? new_sync_read+0x410/0x410
[ 168.568840] ? __handle_mm_fault+0x1c97/0x2d80
[ 168.569425] ? copy_page_range+0x2b10/0x2b10
[ 168.570007] ? _raw_read_lock_bh+0xa0/0xa0
[ 168.570622] vfs_write+0x46e/0x630
[ 168.571091] ksys_write+0xcd/0x1e0
[ 168.571563] ? __x64_sys_read+0x60/0x60
[ 168.572081] ? __kasan_check_write+0x20/0x30
[ 168.572659] ? do_user_addr_fault+0x446/0xff0
[ 168.573264] __x64_sys_write+0x46/0x60
[ 168.573774] do_syscall_64+0x35/0x80
[ 168.574264] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 168.574960] RIP: 0033:0x7fac74915130
[ 168.575456] Code: 73 01 c3 48 8b 0d 58 ed 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 444
[ 168.577969] RSP: 002b:00007ffc3080e288 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 168.578986] RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007fac74915130
[ 168.579937] RDX: 0000000000000009 RSI: 000056007669f080 RDI: 0000000000000001
[ 168.580884] RBP: 000056007669f080 R08: 000000000000000a R09: 00007fac75227700
[ 168.581841] R10: 000056007655c8f0 R11: 0000000000000246 R12: 0000000000000009
[ 168.582796] R13: 0000000000000001 R14: 00007fac74be55e0 R15: 00007fac74be08c0
[ 168.583757] </TASK>
[ 168.584063] Modules linked in:
[ 168.584494] CR2: 0000000000000008
[ 168.584964] ---[ end trace 2475611ad0f77a1a ]---
This is because blkg_alloc() is called from blkg_conf_prep() without
holding 'q->queue_lock', and elevator is exited before blkg_create():
thread 1 thread 2
blkg_conf_prep
spin_lock_irq(&q->queue_lock);
blkg_lookup_check -> return NULL
spin_unlock_irq(&q->queue_lock);
blkg_alloc
blkcg_policy_enabled -> true
pd = ->pd_alloc_fn
blkg->pd[i] = pd
blk_mq_exit_sched
bfq_exit_queue
blkcg_deactivate_policy
spin_lock_irq(&q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols);
spin_unlock_irq(&q->queue_lock);
q->elevator = NULL;
spin_lock_irq(&q->queue_lock);
blkg_create
if (blkg->pd[i])
->pd_init_fn -> q->elevator is NULL
spin_unlock_irq(&q->queue_lock);
Because blkcg_deactivate_policy() requires queue to be frozen, we can
grab q_usage_counter to synchoronize blkg_conf_prep() against
blkcg_deactivate_policy().
Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Acked-by: Tejun Heo <tj(a)kernel.org>
Link: https://lore.kernel.org/r/20211020014036.2141723-1-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Conflict: block/blk-cgroup.c
- commit ed6cddefdfd3 ("block: convert the rest of block to
bdev_get_queue") is not backported.
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
block/blk-cgroup.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5b19665bc486..37a5dbd2c4e4 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -620,6 +620,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
q = disk->queue;
+ /*
+ * blkcg_deactivate_policy() requires queue to be frozen, we can grab
+ * q_usage_counter to prevent concurrent with blkcg_deactivate_policy().
+ */
+ ret = blk_queue_enter(q, 0);
+ if (ret)
+ return ret;
+
rcu_read_lock();
spin_lock_irq(&q->queue_lock);
@@ -689,6 +697,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
goto success;
}
success:
+ blk_queue_exit(q);
ctx->disk = disk;
ctx->blkg = blkg;
ctx->body = input;
@@ -701,6 +710,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
rcu_read_unlock();
fail:
put_disk_and_module(disk);
+ blk_queue_exit(q);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
--
2.20.1
1
19

[PATCH openEuler-5.10 01/66] ARM: 9132/1: Fix __get_user_check failure with ARM KASAN images
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Lexi Shao <shaolexi(a)huawei.com>
stable inclusion
from stable-5.10.77
commit 3ceaa85c331d30752af3b88d280cc1bcaee2eb27
bugzilla: 185677 https://gitee.com/openeuler/kernel/issues/I4IAP7
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit df909df0770779f1a5560c2bb641a2809655ef28 upstream.
ARM: kasan: Fix __get_user_check failure with kasan
In macro __get_user_check defined in arch/arm/include/asm/uaccess.h,
error code is store in register int __e(r0). When kasan is
enabled, assigning value to kernel address might trigger kasan check,
which unexpectedly overwrites r0 and causes undefined behavior on arm
kasan images.
One example is failure in do_futex and results in process soft lockup.
Log:
watchdog: BUG: soft lockup - CPU#0 stuck for 62946ms! [rs:main
Q:Reg:1151]
...
(__asan_store4) from (futex_wait_setup+0xf8/0x2b4)
(futex_wait_setup) from (futex_wait+0x138/0x394)
(futex_wait) from (do_futex+0x164/0xe40)
(do_futex) from (sys_futex_time32+0x178/0x230)
(sys_futex_time32) from (ret_fast_syscall+0x0/0x50)
The soft lockup happens in function futex_wait_setup. The reason is
function get_futex_value_locked always return EINVAL, thus pc jump
back to retry label and causes looping.
This line in function get_futex_value_locked
ret = __get_user(*dest, from);
is expanded to
*dest = (typeof(*(p))) __r2; ,
in macro __get_user_check. Writing to pointer dest triggers kasan check
and overwrites the return value of __get_user_x function.
The assembly code of get_futex_value_locked in kernel/futex.c:
...
c01f6dc8: eb0b020e bl c04b7608 <__get_user_4>
// "x = (typeof(*(p))) __r2;" triggers kasan check and r0 is overwritten
c01f6dCc: e1a00007 mov r0, r7
c01f6dd0: e1a05002 mov r5, r2
c01f6dd4: eb04f1e6 bl c0333574 <__asan_store4>
c01f6dd8: e5875000 str r5, [r7]
// save ret value of __get_user(*dest, from), which is dest address now
c01f6ddc: e1a05000 mov r5, r0
...
// checking return value of __get_user failed
c01f6e00: e3550000 cmp r5, #0
...
c01f6e0c: 01a00005 moveq r0, r5
// assign return value to EINVAL
c01f6e10: 13e0000d mvnne r0, #13
Return value is the destination address of get_user thus certainly
non-zero, so get_futex_value_locked always return EINVAL.
Fix it by using a tmp vairable to store the error code before the
assignment. This fix has no effects to non-kasan images thanks to compiler
optimization. It only affects cases that overwrite r0 due to kasan check.
This should fix bug discussed in Link:
[1] https://lore.kernel.org/linux-arm-kernel/0ef7c2a5-5d8b-c5e0-63fa-31693fd449…
Fixes: 421015713b30 ("ARM: 9017/2: Enable KASan for ARM")
Signed-off-by: Lexi Shao <shaolexi(a)huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
arch/arm/include/asm/uaccess.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 476d1a15e669..da2a9e5fc59b 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -200,6 +200,7 @@ extern int __get_user_64t_4(void *);
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
unsigned int __ua_flags = uaccess_save_and_enable(); \
+ int __tmp_e; \
switch (sizeof(*(__p))) { \
case 1: \
if (sizeof((x)) >= 8) \
@@ -227,9 +228,10 @@ extern int __get_user_64t_4(void *);
break; \
default: __e = __get_user_bad(); break; \
} \
+ __tmp_e = __e; \
uaccess_restore(__ua_flags); \
x = (typeof(*(p))) __r2; \
- __e; \
+ __tmp_e; \
})
#define get_user(x, p) \
--
2.20.1
1
65

[PATCH openEuler-5.10 01/14] block, bfq: fix UAF problem in bfqg_stats_init()
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Zheng Liang <zhengliang6(a)huawei.com>
mainline inclusion
from mainline
commit 2fc428f6b7ca80794cb9928c90d4de524366659f
category: bugfix
bugzilla: 185657 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
-------------------------------------------------
In bfq_pd_alloc(), the function bfqg_stats_init() init bfqg. If
blkg_rwstat_init() init bfqg_stats->bytes successful and init
bfqg_stats->ios failed, bfqg_stats_init() return failed, bfqg will
be freed. But blkg_rwstat->cpu_cnt is not deleted from the list of
percpu_counters. If we traverse the list of percpu_counters, It will
have UAF problem.
we should use blkg_rwstat_exit() to cleanup bfqg_stats bytes in the
above scenario.
Fixes: commit fd41e60331b ("bfq-iosched: stop using blkg->stat_bytes and ->stat_ios")
Signed-off-by: Zheng Liang <zhengliang6(a)huawei.com>
Acked-by: Tejun Heo <tj(a)kernel.org>
Link: https://lore.kernel.org/r/20211018024225.1493938-1-zhengliang6@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
block/bfq-cgroup.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index b791e2041e49..a6bcc779c912 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -463,7 +463,7 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
{
if (blkg_rwstat_init(&stats->bytes, gfp) ||
blkg_rwstat_init(&stats->ios, gfp))
- return -ENOMEM;
+ goto error;
#ifdef CONFIG_BFQ_CGROUP_DEBUG
if (blkg_rwstat_init(&stats->merged, gfp) ||
@@ -476,13 +476,15 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
bfq_stat_init(&stats->dequeue, gfp) ||
bfq_stat_init(&stats->group_wait_time, gfp) ||
bfq_stat_init(&stats->idle_time, gfp) ||
- bfq_stat_init(&stats->empty_time, gfp)) {
- bfqg_stats_exit(stats);
- return -ENOMEM;
- }
+ bfq_stat_init(&stats->empty_time, gfp))
+ goto error;
#endif
return 0;
+
+error:
+ bfqg_stats_exit(stats);
+ return -ENOMEM;
}
static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
--
2.20.1
1
13

[openEuler-5.10 1/4] ARM: 9077/1: PLT: Move struct plt_entries definition to header
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Alex Sverdlin <alexander.sverdlin(a)nokia.com>
stable inclusion
from stable-5.10.69
commit ce90c6706d5a95ddda8d3cea01768bd0b4445851
bugzilla: 182675 https://gitee.com/openeuler/kernel/issues/I4I3ED
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
-------------------------------------------------
commit 4e271701c17dee70c6e1351c4d7d42e70405c6a9 upstream upstream
No functional change, later it will be re-used in several files.
Signed-off-by: Alexander Sverdlin <alexander.sverdlin(a)nokia.com>
Signed-off-by: Russell King <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli(a)gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Conflicts:
arch/arm/include/asm/module.h
Signed-off-by: Li Huafei <lihuafei1(a)huawei.com>
Reviewed-by: Yang Jihong <yangjihong1(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
arch/arm/include/asm/module.h | 9 +++++++++
arch/arm/kernel/module-plts.c | 9 ---------
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index ac3df84b935c..9b17d2f5ab53 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -19,6 +19,15 @@ enum {
};
#endif
+#define PLT_ENT_STRIDE L1_CACHE_BYTES
+#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32))
+#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT)
+
+struct plt_entries {
+ u32 ldr[PLT_ENT_COUNT];
+ u32 lit[PLT_ENT_COUNT];
+};
+
struct mod_plt_sec {
int plt_shndx;
int plt_count;
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 1dbdf2726505..f019f389337b 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -12,10 +12,6 @@
#include <asm/cache.h>
#include <asm/opcodes.h>
-#define PLT_ENT_STRIDE L1_CACHE_BYTES
-#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32))
-#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT)
-
#ifdef CONFIG_THUMB2_KERNEL
#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \
(PLT_ENT_STRIDE - 4))
@@ -24,11 +20,6 @@
(PLT_ENT_STRIDE - 8))
#endif
-struct plt_entries {
- u32 ldr[PLT_ENT_COUNT];
- u32 lit[PLT_ENT_COUNT];
-};
-
static bool in_init(const struct module *mod, unsigned long loc)
{
return loc - (u32)mod->init_layout.base < mod->init_layout.size;
--
2.20.1
1
3

[PATCH openEuler-5.10 01/16] ext4: avoid recheck extent for EXT4_EX_FORCE_CACHE
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: yangerkun <yangerkun(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 182952 https://gitee.com/openeuler/kernel/issues/I4DDEL
---------------------------
Buffer with verified means that it has been checked before. No need
verify and call set_buffer_verified again.
Signed-off-by: yangerkun <yangerkun(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
fs/ext4/extents.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 618675a41efb..a77e25ca6867 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -544,13 +544,16 @@ __read_extent_tree_block(const char *function, unsigned int line,
if (err < 0)
goto errout;
}
- if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
- return bh;
- err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
- depth, pblk, le32_to_cpu(idx->ei_block));
- if (err)
- goto errout;
- set_buffer_verified(bh);
+ if (buffer_verified(bh)) {
+ if (!(flags & EXT4_EX_FORCE_CACHE))
+ return bh;
+ } else {
+ err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
+ depth, pblk, le32_to_cpu(idx->ei_block));
+ if (err)
+ goto errout;
+ set_buffer_verified(bh);
+ }
/*
* If this is a leaf block, cache all of its entries
*/
--
2.20.1
1
15

[PATCH openEuler-5.10 01/92] parisc: math-emu: Fix fall-through warnings
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Helge Deller <deller(a)gmx.de>
stable inclusion
from stable-5.10.76
commit b3b7f831a49b56c258ab8ef08d04045fcfb9b4a7
bugzilla: 182988 https://gitee.com/openeuler/kernel/issues/I4IAHF
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 6f1fce595b78b775d7fb585c15c2dc3a6994f96e upstream.
Fix lots of fallthrough warnings, e.g.:
arch/parisc/math-emu/fpudispatch.c:323:33: warning: this statement may fall through [-Wimplicit-fallthrough=]
Signed-off-by: Helge Deller <deller(a)gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
arch/parisc/math-emu/fpudispatch.c | 56 ++++++++++++++++++++++++++++--
1 file changed, 53 insertions(+), 3 deletions(-)
diff --git a/arch/parisc/math-emu/fpudispatch.c b/arch/parisc/math-emu/fpudispatch.c
index 7c46969ead9b..01ed133227c2 100644
--- a/arch/parisc/math-emu/fpudispatch.c
+++ b/arch/parisc/math-emu/fpudispatch.c
@@ -310,12 +310,15 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
r1 &= ~3;
fpregs[t+3] = fpregs[r1+3];
fpregs[t+2] = fpregs[r1+2];
+ fallthrough;
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
fpregs[t] = fpregs[r1];
return(NOEXCEPTION);
}
+ BUG();
case 3: /* FABS */
switch (fmt) {
case 2: /* illegal */
@@ -325,13 +328,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
r1 &= ~3;
fpregs[t+3] = fpregs[r1+3];
fpregs[t+2] = fpregs[r1+2];
+ fallthrough;
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
/* copy and clear sign bit */
fpregs[t] = fpregs[r1] & 0x7fffffff;
return(NOEXCEPTION);
}
+ BUG();
case 6: /* FNEG */
switch (fmt) {
case 2: /* illegal */
@@ -341,13 +347,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
r1 &= ~3;
fpregs[t+3] = fpregs[r1+3];
fpregs[t+2] = fpregs[r1+2];
+ fallthrough;
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
/* copy and invert sign bit */
fpregs[t] = fpregs[r1] ^ 0x80000000;
return(NOEXCEPTION);
}
+ BUG();
case 7: /* FNEGABS */
switch (fmt) {
case 2: /* illegal */
@@ -357,13 +366,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
r1 &= ~3;
fpregs[t+3] = fpregs[r1+3];
fpregs[t+2] = fpregs[r1+2];
+ fallthrough;
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
/* copy and set sign bit */
fpregs[t] = fpregs[r1] | 0x80000000;
return(NOEXCEPTION);
}
+ BUG();
case 4: /* FSQRT */
switch (fmt) {
case 0:
@@ -376,6 +388,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* quad not implemented */
return(MAJOR_0C_EXCP);
}
+ BUG();
case 5: /* FRND */
switch (fmt) {
case 0:
@@ -389,7 +402,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
return(MAJOR_0C_EXCP);
}
} /* end of switch (subop) */
-
+ BUG();
case 1: /* class 1 */
df = extru(ir,fpdfpos,2); /* get dest format */
if ((df & 2) || (fmt & 2)) {
@@ -419,6 +432,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* dbl/dbl */
return(MAJOR_0C_EXCP);
}
+ BUG();
case 1: /* FCNVXF */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -434,6 +448,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
return(dbl_to_dbl_fcnvxf(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 2: /* FCNVFX */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -449,6 +464,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
return(dbl_to_dbl_fcnvfx(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 3: /* FCNVFXT */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -464,6 +480,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 5: /* FCNVUF (PA2.0 only) */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -479,6 +496,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
return(dbl_to_dbl_fcnvuf(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 6: /* FCNVFU (PA2.0 only) */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -494,6 +512,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
return(dbl_to_dbl_fcnvfu(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 7: /* FCNVFUT (PA2.0 only) */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -509,10 +528,11 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
return(dbl_to_dbl_fcnvfut(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 4: /* undefined */
return(MAJOR_0C_EXCP);
} /* end of switch subop */
-
+ BUG();
case 2: /* class 2 */
fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS];
r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int);
@@ -590,6 +610,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* quad not implemented */
return(MAJOR_0C_EXCP);
}
+ BUG();
case 1: /* FTEST */
switch (fmt) {
case 0:
@@ -609,8 +630,10 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3:
return(MAJOR_0C_EXCP);
}
+ BUG();
} /* end of switch subop */
} /* end of else for PA1.0 & PA1.1 */
+ BUG();
case 3: /* class 3 */
r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int);
if (r2 == 0)
@@ -633,6 +656,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* quad not implemented */
return(MAJOR_0C_EXCP);
}
+ BUG();
case 1: /* FSUB */
switch (fmt) {
case 0:
@@ -645,6 +669,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* quad not implemented */
return(MAJOR_0C_EXCP);
}
+ BUG();
case 2: /* FMPY */
switch (fmt) {
case 0:
@@ -657,6 +682,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* quad not implemented */
return(MAJOR_0C_EXCP);
}
+ BUG();
case 3: /* FDIV */
switch (fmt) {
case 0:
@@ -669,6 +695,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* quad not implemented */
return(MAJOR_0C_EXCP);
}
+ BUG();
case 4: /* FREM */
switch (fmt) {
case 0:
@@ -681,6 +708,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
case 3: /* quad not implemented */
return(MAJOR_0C_EXCP);
}
+ BUG();
} /* end of class 3 switch */
} /* end of switch(class) */
@@ -736,10 +764,12 @@ u_int fpregs[];
return(MAJOR_0E_EXCP);
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
fpregs[t] = fpregs[r1];
return(NOEXCEPTION);
}
+ BUG();
case 3: /* FABS */
switch (fmt) {
case 2:
@@ -747,10 +777,12 @@ u_int fpregs[];
return(MAJOR_0E_EXCP);
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
fpregs[t] = fpregs[r1] & 0x7fffffff;
return(NOEXCEPTION);
}
+ BUG();
case 6: /* FNEG */
switch (fmt) {
case 2:
@@ -758,10 +790,12 @@ u_int fpregs[];
return(MAJOR_0E_EXCP);
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
fpregs[t] = fpregs[r1] ^ 0x80000000;
return(NOEXCEPTION);
}
+ BUG();
case 7: /* FNEGABS */
switch (fmt) {
case 2:
@@ -769,10 +803,12 @@ u_int fpregs[];
return(MAJOR_0E_EXCP);
case 1: /* double */
fpregs[t+1] = fpregs[r1+1];
+ fallthrough;
case 0: /* single */
fpregs[t] = fpregs[r1] | 0x80000000;
return(NOEXCEPTION);
}
+ BUG();
case 4: /* FSQRT */
switch (fmt) {
case 0:
@@ -785,6 +821,7 @@ u_int fpregs[];
case 3:
return(MAJOR_0E_EXCP);
}
+ BUG();
case 5: /* FRMD */
switch (fmt) {
case 0:
@@ -798,7 +835,7 @@ u_int fpregs[];
return(MAJOR_0E_EXCP);
}
} /* end of switch (subop */
-
+ BUG();
case 1: /* class 1 */
df = extru(ir,fpdfpos,2); /* get dest format */
/*
@@ -826,6 +863,7 @@ u_int fpregs[];
case 3: /* dbl/dbl */
return(MAJOR_0E_EXCP);
}
+ BUG();
case 1: /* FCNVXF */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -841,6 +879,7 @@ u_int fpregs[];
return(dbl_to_dbl_fcnvxf(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 2: /* FCNVFX */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -856,6 +895,7 @@ u_int fpregs[];
return(dbl_to_dbl_fcnvfx(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 3: /* FCNVFXT */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -871,6 +911,7 @@ u_int fpregs[];
return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 5: /* FCNVUF (PA2.0 only) */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -886,6 +927,7 @@ u_int fpregs[];
return(dbl_to_dbl_fcnvuf(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 6: /* FCNVFU (PA2.0 only) */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -901,6 +943,7 @@ u_int fpregs[];
return(dbl_to_dbl_fcnvfu(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 7: /* FCNVFUT (PA2.0 only) */
switch(fmt) {
case 0: /* sgl/sgl */
@@ -916,9 +959,11 @@ u_int fpregs[];
return(dbl_to_dbl_fcnvfut(&fpregs[r1],0,
&fpregs[t],status));
}
+ BUG();
case 4: /* undefined */
return(MAJOR_0C_EXCP);
} /* end of switch subop */
+ BUG();
case 2: /* class 2 */
/*
* Be careful out there.
@@ -994,6 +1039,7 @@ u_int fpregs[];
}
} /* end of switch subop */
} /* end of else for PA1.0 & PA1.1 */
+ BUG();
case 3: /* class 3 */
/*
* Be careful out there.
@@ -1026,6 +1072,7 @@ u_int fpregs[];
return(dbl_fadd(&fpregs[r1],&fpregs[r2],
&fpregs[t],status));
}
+ BUG();
case 1: /* FSUB */
switch (fmt) {
case 0:
@@ -1035,6 +1082,7 @@ u_int fpregs[];
return(dbl_fsub(&fpregs[r1],&fpregs[r2],
&fpregs[t],status));
}
+ BUG();
case 2: /* FMPY or XMPYU */
/*
* check for integer multiply (x bit set)
@@ -1071,6 +1119,7 @@ u_int fpregs[];
&fpregs[r2],&fpregs[t],status));
}
}
+ BUG();
case 3: /* FDIV */
switch (fmt) {
case 0:
@@ -1080,6 +1129,7 @@ u_int fpregs[];
return(dbl_fdiv(&fpregs[r1],&fpregs[r2],
&fpregs[t],status));
}
+ BUG();
case 4: /* FREM */
switch (fmt) {
case 0:
--
2.20.1
1
91

15 Nov '21
From: Jonas Hahnfeld <hahnjo(a)hahnjo.de>
stable inclusion
from stable-5.10.75
commit f077d699c1d2aa05c8d5982bd646b040353f052c
bugzilla: 182987 https://gitee.com/openeuler/kernel/issues/I4I3MP
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 48827e1d6af58f219e89c7ec08dccbca28c7694e upstream.
The device advertises 8 formats, but only a rate of 48kHz is honored
by the hardware and 24 bits give chopped audio, so only report the
one working combination. This fixes out-of-the-box audio experience
with PipeWire which otherwise attempts to choose S24_3LE (while
PulseAudio defaulted to S16_LE).
Signed-off-by: Jonas Hahnfeld <hahnjo(a)hahnjo.de>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20211012200906.3492-1-hahnjo@hahnjo.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
sound/usb/quirks-table.h | 42 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 5728bf722c88..7c649cd38049 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -77,6 +77,48 @@
/* E-Mu 0204 USB */
{ USB_DEVICE_VENDOR_SPEC(0x041e, 0x3f19) },
+/*
+ * Creative Technology, Ltd Live! Cam Sync HD [VF0770]
+ * The device advertises 8 formats, but only a rate of 48kHz is honored by the
+ * hardware and 24 bits give chopped audio, so only report the one working
+ * combination.
+ */
+{
+ USB_DEVICE(0x041e, 0x4095),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = &(const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_STANDARD_MIXER,
+ },
+ {
+ .ifnum = 3,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
+ .channels = 2,
+ .fmt_bits = 16,
+ .iface = 3,
+ .altsetting = 4,
+ .altset_idx = 4,
+ .endpoint = 0x82,
+ .ep_attr = 0x05,
+ .rates = SNDRV_PCM_RATE_48000,
+ .rate_min = 48000,
+ .rate_max = 48000,
+ .nr_rates = 1,
+ .rate_table = (unsigned int[]) { 48000 },
+ },
+ },
+ {
+ .ifnum = -1
+ },
+ },
+ },
+},
+
/*
* HP Wireless Audio
* When not ignored, causes instability issues for some users, forcing them to
--
2.20.1
1
99

[PATCH openEuler-5.10 01/20] ASoC: Intel: sof_sdw: tag SoundWire BEs as non-atomic
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
stable inclusion
from stable-5.10.74
commit 0bcfa99e8faeef75567e6d3a5ac9680d28240b21
bugzilla: 182986 https://gitee.com/openeuler/kernel/issues/I4I3MG
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
[ Upstream commit 58eafe1ff52ee1ce255759fc15729519af180cbb ]
The SoundWire BEs make use of 'stream' functions for .prepare and
.trigger. These functions will in turn force a Bank Switch, which
implies a wait operation.
Mark SoundWire BEs as nonatomic for consistency, but keep all other
types of BEs as is. The initialization of .nonatomic is done outside
of the create_sdw_dailink helper to avoid adding more parameters to
deal with a single exception to the rule that BEs are atomic.
Suggested-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
Reviewed-by: Rander Wang <rander.wang(a)intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com>
Reviewed-by: Bard Liao <bard.liao(a)intel.com>
Link: https://lore.kernel.org/r/20210907184436.33152-1-pierre-louis.bossart@linux…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
sound/soc/intel/boards/sof_sdw.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index 2770e8179983..25548555d8d7 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -847,6 +847,11 @@ static int create_sdw_dailink(struct device *dev, int *be_index,
cpus + *cpu_id, cpu_dai_num,
codecs, codec_num,
NULL, &sdw_ops);
+ /*
+ * SoundWire DAILINKs use 'stream' functions and Bank Switch operations
+ * based on wait_for_completion(), tag them as 'nonatomic'.
+ */
+ dai_links[*be_index].nonatomic = true;
ret = set_codec_init_func(link, dai_links + (*be_index)++,
playback, group_id);
--
2.20.1
1
19

[PATCH openEuler-5.10 01/80] Partially revert "usb: Kconfig: using select for USB_COMMON dependency"
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Ben Hutchings <ben(a)decadent.org.uk>
stable inclusion
from stable-5.10.73
commit 16d728110bd76d1ebb4aad8bcf36596f7ce11be0
bugzilla: 182983 https://gitee.com/openeuler/kernel/issues/I4I3M0
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 4d1aa9112c8e6995ef2c8a76972c9671332ccfea upstream.
This reverts commit cb9c1cfc86926d0e86d19c8e34f6c23458cd3478 for
USB_LED_TRIG. This config symbol has bool type and enables extra code
in usb_common itself, not a separate driver. Enabling it should not
force usb_common to be built-in!
Fixes: cb9c1cfc8692 ("usb: Kconfig: using select for USB_COMMON dependency")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
Signed-off-by: Salvatore Bonaccorso <carnil(a)debian.org>
Link: https://lore.kernel.org/r/20210921143442.340087-1-carnil@debian.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
drivers/usb/common/Kconfig | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/usb/common/Kconfig b/drivers/usb/common/Kconfig
index 5e8a04e3dd3c..b856622431a7 100644
--- a/drivers/usb/common/Kconfig
+++ b/drivers/usb/common/Kconfig
@@ -6,8 +6,7 @@ config USB_COMMON
config USB_LED_TRIG
bool "USB LED Triggers"
- depends on LEDS_CLASS && LEDS_TRIGGERS
- select USB_COMMON
+ depends on LEDS_CLASS && USB_COMMON && LEDS_TRIGGERS
help
This option adds LED triggers for USB host and/or gadget activity.
--
2.20.1
1
79

[PATCH openEuler-5.10 1/7] nbd: don't handle response without a corresponding request message
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Yu Kuai <yukuai3(a)huawei.com>
mainline inclusion
from mainline-next-20211018
commit b5644a3a79bf3be5f1238db1b2f241374b27b0f0
category: bugfix
bugzilla: 49890 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
---------------------------
While handling a response message from server, nbd_read_stat() will
try to get request by tag, and then complete the request. However,
this is problematic if nbd haven't sent a corresponding request
message:
t1 t2
submit_bio
nbd_queue_rq
blk_mq_start_request
recv_work
nbd_read_stat
blk_mq_tag_to_rq
blk_mq_complete_request
nbd_send_cmd
Thus add a new cmd flag 'NBD_CMD_INFLIGHT', it will be set in
nbd_send_cmd() and checked in nbd_read_stat().
Noted that this patch can't fix that blk_mq_tag_to_rq() might
return a freed request, and this will be fixed in following
patches.
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Link: https://lore.kernel.org/r/20210916093350.1410403-2-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
drivers/block/nbd.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 8a841d5f422d..cdee84f3c672 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -122,6 +122,12 @@ struct nbd_device {
};
#define NBD_CMD_REQUEUED 1
+/*
+ * This flag will be set if nbd_queue_rq() succeed, and will be checked and
+ * cleared in completion. Both setting and clearing of the flag are protected
+ * by cmd->lock.
+ */
+#define NBD_CMD_INFLIGHT 2
struct nbd_cmd {
struct nbd_device *nbd;
@@ -389,6 +395,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
mutex_unlock(&cmd->lock);
@@ -718,6 +725,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
+ tag, cmd->status, cmd->flags);
+ ret = -ENOENT;
+ goto out;
+ }
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
@@ -817,6 +830,7 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
return true;
mutex_lock(&cmd->lock);
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
@@ -953,7 +967,13 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
* returns EAGAIN can be retried on a different socket.
*/
ret = nbd_send_cmd(nbd, cmd, index);
- if (ret == -EAGAIN) {
+ /*
+ * Access to this flag is protected by cmd->lock, thus it's safe to set
+ * the flag after nbd_send_cmd() succeed to send request to server.
+ */
+ if (!ret)
+ __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ else if (ret == -EAGAIN) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
nbd_mark_nsock_dead(nbd, nsock, 1);
--
2.20.1
1
6

[PATCH openEuler-5.10 01/29] spi: rockchip: handle zero length transfers without timing out
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Tobias Schramm <t.schramm(a)manjaro.org>
stable inclusion
from stable-5.10.72
commit 2ababcd8c2ababe7f11032b928b9e8ab35af5e8c
bugzilla: 182982 https://gitee.com/openeuler/kernel/issues/I4I3L1
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
[ Upstream commit 5457773ef99f25fcc4b238ac76b68e28273250f4 ]
Previously zero length transfers submitted to the Rokchip SPI driver would
time out in the SPI layer. This happens because the SPI peripheral does
not trigger a transfer completion interrupt for zero length transfers.
Fix that by completing zero length transfers immediately at start of
transfer.
Signed-off-by: Tobias Schramm <t.schramm(a)manjaro.org>
Link: https://lore.kernel.org/r/20210827050357.165409-1-t.schramm@manjaro.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
drivers/spi/spi-rockchip.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 0aab37cd64e7..624273d0e727 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -582,6 +582,12 @@ static int rockchip_spi_transfer_one(
int ret;
bool use_dma;
+ /* Zero length transfers won't trigger an interrupt on completion */
+ if (!xfer->len) {
+ spi_finalize_current_transfer(ctlr);
+ return 1;
+ }
+
WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY));
--
2.20.1
1
28

[PATCH openEuler-5.10 01/86] tty: Fix out-of-bound vmalloc access in imageblit
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Igor Matheus Andrade Torrente <igormtorrente(a)gmail.com>
stable inclusion
from stable-5.10.71
commit d570c48dd37dbe8fc6875d4461d01a9554ae2560
bugzilla: 182981 https://gitee.com/openeuler/kernel/issues/I4I3KD
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
[ Upstream commit 3b0c406124719b625b1aba431659f5cdc24a982c ]
This issue happens when a userspace program does an ioctl
FBIOPUT_VSCREENINFO passing the fb_var_screeninfo struct
containing only the fields xres, yres, and bits_per_pixel
with values.
If this struct is the same as the previous ioctl, the
vc_resize() detects it and doesn't call the resize_screen(),
leaving the fb_var_screeninfo incomplete. And this leads to
the updatescrollmode() calculates a wrong value to
fbcon_display->vrows, which makes the real_y() return a
wrong value of y, and that value, eventually, causes
the imageblit to access an out-of-bound address value.
To solve this issue I made the resize_screen() be called
even if the screen does not need any resizing, so it will
"fix and fill" the fb_var_screeninfo independently.
Cc: stable <stable(a)vger.kernel.org> # after 5.15-rc2 is out, give it time to bake
Reported-and-tested-by: syzbot+858dc7a2f7ef07c2c219(a)syzkaller.appspotmail.com
Signed-off-by: Igor Matheus Andrade Torrente <igormtorrente(a)gmail.com>
Link: https://lore.kernel.org/r/20210628134509.15895-1-igormtorrente@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
drivers/tty/vt/vt.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index cea40ef090b7..a7ee1171eeb3 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1220,8 +1220,25 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
new_row_size = new_cols << 1;
new_screen_size = new_row_size * new_rows;
- if (new_cols == vc->vc_cols && new_rows == vc->vc_rows)
- return 0;
+ if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) {
+ /*
+ * This function is being called here to cover the case
+ * where the userspace calls the FBIOPUT_VSCREENINFO twice,
+ * passing the same fb_var_screeninfo containing the fields
+ * yres/xres equal to a number non-multiple of vc_font.height
+ * and yres_virtual/xres_virtual equal to number lesser than the
+ * vc_font.height and yres/xres.
+ * In the second call, the struct fb_var_screeninfo isn't
+ * being modified by the underlying driver because of the
+ * if above, and this causes the fbcon_display->vrows to become
+ * negative and it eventually leads to out-of-bound
+ * access by the imageblit function.
+ * To give the correct values to the struct and to not have
+ * to deal with possible errors from the code below, we call
+ * the resize_screen here as well.
+ */
+ return resize_screen(vc, new_cols, new_rows, user);
+ }
if (new_screen_size > KMALLOC_MAX_SIZE || !new_screen_size)
return -EINVAL;
--
2.20.1
1
85

[PATCH openEuler-5.10 01/13] cgroup: Fix memory leak caused by missing cgroup_bpf_offline
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Quanyang Wang <quanyang.wang(a)windriver.com>
mainline inclusion
from mainline
commit 04f8ef5643bcd8bcde25dfdebef998aea480b2ba
category: bugfix
bugzilla: 182945 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/…
---------------------------
When enabling CONFIG_CGROUP_BPF, kmemleak can be observed by running
the command as below:
$mount -t cgroup -o none,name=foo cgroup cgroup/
$umount cgroup/
unreferenced object 0xc3585c40 (size 64):
comm "mount", pid 425, jiffies 4294959825 (age 31.990s)
hex dump (first 32 bytes):
01 00 00 80 84 8c 28 c0 00 00 00 00 00 00 00 00 ......(.........
00 00 00 00 00 00 00 00 6c 43 a0 c3 00 00 00 00 ........lC......
backtrace:
[<e95a2f9e>] cgroup_bpf_inherit+0x44/0x24c
[<1f03679c>] cgroup_setup_root+0x174/0x37c
[<ed4b0ac5>] cgroup1_get_tree+0x2c0/0x4a0
[<f85b12fd>] vfs_get_tree+0x24/0x108
[<f55aec5c>] path_mount+0x384/0x988
[<e2d5e9cd>] do_mount+0x64/0x9c
[<208c9cfe>] sys_mount+0xfc/0x1f4
[<06dd06e0>] ret_fast_syscall+0x0/0x48
[<a8308cb3>] 0xbeb4daa8
This is because that since the commit 2b0d3d3e4fcf ("percpu_ref: reduce
memory footprint of percpu_ref in fast path") root_cgrp->bpf.refcnt.data
is allocated by the function percpu_ref_init in cgroup_bpf_inherit which
is called by cgroup_setup_root when mounting, but not freed along with
root_cgrp when umounting. Adding cgroup_bpf_offline which calls
percpu_ref_kill to cgroup_kill_sb can free root_cgrp->bpf.refcnt.data in
umount path.
This patch also fixes the commit 4bfc0bb2c60e ("bpf: decouple the lifetime
of cgroup_bpf from cgroup itself"). A cgroup_bpf_offline is needed to do a
cleanup that frees the resources which are allocated by cgroup_bpf_inherit
in cgroup_setup_root.
And inside cgroup_bpf_offline, cgroup_get() is at the beginning and
cgroup_put is at the end of cgroup_bpf_release which is called by
cgroup_bpf_offline. So cgroup_bpf_offline can keep the balance of
cgroup's refcount.
Fixes: 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path")
Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself")
Signed-off-by: Quanyang Wang <quanyang.wang(a)windriver.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Roman Gushchin <guro(a)fb.com>
Acked-by: John Fastabend <john.fastabend(a)gmail.com>
Link: https://lore.kernel.org/bpf/20211018075623.26884-1-quanyang.wang@windriver.…
Signed-off-by: Lu Jialin <lujialin4(a)huawei.com>
Reviewed-by: weiyang wang <wangweiyang2(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
kernel/cgroup/cgroup.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 86ab4a1305f6..5e4a50091c18 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2159,8 +2159,10 @@ static void cgroup_kill_sb(struct super_block *sb)
* And don't kill the default root.
*/
if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
- !percpu_ref_is_dying(&root->cgrp.self.refcnt))
+ !percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
+ cgroup_bpf_offline(&root->cgrp);
percpu_ref_kill(&root->cgrp.self.refcnt);
+ }
cgroup_put(&root->cgrp);
kernfs_kill_sb(sb);
}
--
2.20.1
1
12

[PATCH openEuler-5.10 01/97] PCI: aardvark: Increase polling delay to 1.5s while waiting for PIO response
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Pali Rohár <pali(a)kernel.org>
stable inclusion
from stable-5.10.70
commit 31bd6cd06a18315c9a1b4c6035d027f086c59942
bugzilla: 182949 https://gitee.com/openeuler/kernel/issues/I4I3GQ
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 2b58db229eb617d97d5746113b77045f1f884bcb upstream.
Measurements in different conditions showed that aardvark hardware PIO
response can take up to 1.44s. Increase wait timeout from 1ms to 1.5s to
ensure that we do not miss responses from hardware. After 1.44s hardware
returns errors (e.g. Completer abort).
The previous two patches fixed checking for PIO status, so now we can use
it to also catch errors which are reported by hardware after 1.44s.
After applying this patch, kernel can detect and print PIO errors to dmesg:
[ 6.879999] advk-pcie d0070000.pcie: Non-posted PIO Response Status: CA, 0xe00 @ 0x100004
[ 6.896436] advk-pcie d0070000.pcie: Posted PIO Response Status: COMP_ERR, 0x804 @ 0x100004
[ 6.913049] advk-pcie d0070000.pcie: Posted PIO Response Status: COMP_ERR, 0x804 @ 0x100010
[ 6.929663] advk-pcie d0070000.pcie: Non-posted PIO Response Status: CA, 0xe00 @ 0x100010
[ 6.953558] advk-pcie d0070000.pcie: Posted PIO Response Status: COMP_ERR, 0x804 @ 0x100014
[ 6.970170] advk-pcie d0070000.pcie: Non-posted PIO Response Status: CA, 0xe00 @ 0x100014
[ 6.994328] advk-pcie d0070000.pcie: Posted PIO Response Status: COMP_ERR, 0x804 @ 0x100004
Without this patch kernel prints only a generic error to dmesg:
[ 5.246847] advk-pcie d0070000.pcie: config read/write timed out
Link: https://lore.kernel.org/r/20210722144041.12661-3-pali@kernel.org
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Cc: stable(a)vger.kernel.org # 7fbcb5da811b ("PCI: aardvark: Don't rely on jiffies while holding spinlock")
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
drivers/pci/controller/pci-aardvark.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index f175cff39b46..4f1a29ede576 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -214,7 +214,7 @@
(PCIE_CONF_BUS(bus) | PCIE_CONF_DEV(PCI_SLOT(devfn)) | \
PCIE_CONF_FUNC(PCI_FUNC(devfn)) | PCIE_CONF_REG(where))
-#define PIO_RETRY_CNT 500
+#define PIO_RETRY_CNT 750000 /* 1.5 s */
#define PIO_RETRY_DELAY 2 /* 2 us*/
#define LINK_WAIT_MAX_RETRIES 10
--
2.20.1
1
96

[PATCH openEuler-5.10 1/5] arm64: remove page granularity limitation from KFENCE
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Liu Shixin <liushixin2(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 181005 https://gitee.com/openeuler/kernel/issues/I4DDEL
-------------------------------------------------
Currently if KFENCE is enabled in arm64, the entire linear map will be
mapped at page granularity which seems overkilled. Actually only the
kfence pool requires to be mapped at page granularity. We can remove the
restriction from KFENCE and force the linear mapping of the kfence pool
at page granularity later in arch_kfence_init_pool().
Signed-off-by: Liu Shixin <liushixin2(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
arch/arm64/include/asm/kfence.h | 70 ++++++++++++++++++++++++++++++++-
arch/arm64/mm/mmu.c | 6 +--
2 files changed, 71 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
index d061176d57ea..322e95bc228d 100644
--- a/arch/arm64/include/asm/kfence.h
+++ b/arch/arm64/include/asm/kfence.h
@@ -8,9 +8,77 @@
#ifndef __ASM_KFENCE_H
#define __ASM_KFENCE_H
+#include <linux/kfence.h>
#include <asm/cacheflush.h>
+#include <asm/pgalloc.h>
-static inline bool arch_kfence_init_pool(void) { return true; }
+static inline int split_pud_page(pud_t *pud, unsigned long addr)
+{
+ int i;
+ pmd_t *pmd = pmd_alloc_one(&init_mm, addr);
+ unsigned long pfn = PFN_DOWN(__pa(addr));
+
+ if (!pmd)
+ return -ENOMEM;
+
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ set_pmd(pmd + i, pmd_mkhuge(pfn_pmd(pfn + i * PTRS_PER_PTE, PAGE_KERNEL)));
+
+ smp_wmb(); /* See comment in __pte_alloc */
+ pud_populate(&init_mm, pud, pmd);
+
+ flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+ return 0;
+}
+
+static inline int split_pmd_page(pmd_t *pmd, unsigned long addr)
+{
+ int i;
+ pte_t *pte = pte_alloc_one_kernel(&init_mm);
+ unsigned long pfn = PFN_DOWN(__pa(addr));
+
+ if (!pte)
+ return -ENOMEM;
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(pte + i, pfn_pte(pfn + i, PAGE_KERNEL));
+
+ smp_wmb(); /* See comment in __pte_alloc */
+ pmd_populate_kernel(&init_mm, pmd, pte);
+
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+ return 0;
+}
+
+static inline bool arch_kfence_init_pool(void)
+{
+ unsigned long addr;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+ addr += PAGE_SIZE) {
+ pgd = pgd_offset(&init_mm, addr);
+ if (pgd_leaf(*pgd))
+ return false;
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_leaf(*p4d))
+ return false;
+ pud = pud_offset(p4d, addr);
+ if (pud_leaf(*pud)) {
+ if (split_pud_page(pud, addr & PUD_MASK))
+ return false;
+ }
+ pmd = pmd_offset(pud, addr);
+ if (pmd_leaf(*pmd)) {
+ if (split_pmd_page(pmd, addr & PMD_MASK))
+ return false;
+ }
+ }
+ return true;
+}
static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b6a9895d6655..1c2a965e65b3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -492,8 +492,7 @@ static void __init map_mem(pgd_t *pgdp)
int flags = 0;
u64 i;
- if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
- IS_ENABLED(CONFIG_KFENCE))
+ if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@@ -1458,8 +1457,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
* KFENCE requires linear map to be mapped at page granularity, so that
* it is possible to protect/unprotect single pages in the KFENCE pool.
*/
- if (rodata_full || debug_pagealloc_enabled() ||
- IS_ENABLED(CONFIG_KFENCE))
+ if (rodata_full || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
--
2.20.1
1
4

[PATCH openEuler-5.10 01/56] PCI: pci-bridge-emul: Add PCIe Root Capabilities Register
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Pali Rohár <pali(a)kernel.org>
stable inclusion
from stable-5.10.69
commit 9e766b86a9ef653a8ca48a9f70d3dbb580284594
bugzilla: 182675 https://gitee.com/openeuler/kernel/issues/I4I3ED
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit e902bb7c24a7099d0eb0eb4cba06f2d91e9299f3 upstream.
The 16-bit Root Capabilities register is at offset 0x1e in the PCIe
Capability. Rename current 'rsvd' struct member to 'rootcap'.
Link: https://lore.kernel.org/r/20210722144041.12661-4-pali@kernel.org
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
Acked-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Chen Jun <chenjun102(a)huawei.com>
---
drivers/pci/pci-bridge-emul.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci-bridge-emul.h b/drivers/pci/pci-bridge-emul.h
index b31883022a8e..49bbd37ee318 100644
--- a/drivers/pci/pci-bridge-emul.h
+++ b/drivers/pci/pci-bridge-emul.h
@@ -54,7 +54,7 @@ struct pci_bridge_emul_pcie_conf {
__le16 slotctl;
__le16 slotsta;
__le16 rootctl;
- __le16 rsvd;
+ __le16 rootcap;
__le32 rootsta;
__le32 devcap2;
__le16 devctl2;
--
2.20.1
1
55

[PATCH openEuler-5.10 01/26] sched/fair: fix sd_llc_alloc_all() compile error
by Zheng Zengkai 15 Nov '21
by Zheng Zengkai 15 Nov '21
15 Nov '21
From: Cheng Jian <cj.chengjian(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 38261, https://gitee.com/openeuler/kernel/issues/I49XPZ
CVE: NA
---------------------------
when CONFIG_SCHED_STEAL disabled:
kernel/sched/topology.c:24:74: warning: ‘struct s_data’ declared inside parameter list will not be visible outside of this definition or declaration
24 | static inline int sd_llc_alloc_all(const struct cpumask *cpu_map, struct s_data *d) { return 0; }
| ^~~~~~
kernel/sched/topology.c: In function ‘build_sched_domains’:
kernel/sched/topology.c:2188:32: error: passing argument 2 of ‘sd_llc_alloc_all’ from incompatible pointer type [-Werror=incompatible-pointer-types]
2188 | if (sd_llc_alloc_all(cpu_map, &d))
| ^~
| |
| struct s_data *
Signed-off-by: Cheng Jian <cj.chengjian(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
kernel/sched/topology.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 0564aeabbcb8..fcf6aebb13c4 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -13,8 +13,8 @@ DEFINE_MUTEX(sched_domains_mutex);
static cpumask_var_t sched_domains_tmpmask;
static cpumask_var_t sched_domains_tmpmask2;
-#ifdef CONFIG_SCHED_STEAL
struct s_data;
+#ifdef CONFIG_SCHED_STEAL
static int sd_llc_alloc(struct sched_domain *sd);
static void sd_llc_free(struct sched_domain *sd);
static int sd_llc_alloc_all(const struct cpumask *cpu_map, struct s_data *d);
--
2.20.1
1
25

[PATCH openEuler-1.0-LTS] iommu: smmuv2: fix compile error when CONFIG_ARCH_PHYTIUM is off
by Yang Yingliang 15 Nov '21
by Yang Yingliang 15 Nov '21
15 Nov '21
From: Zheng Zengkai <zhengzengkai(a)huawei.com>
phytium inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I41AUQ
--------------------------------------
Disabling CONFIG_ARCH_PHYTIUM results in following compile errors:
drivers/iommu/arm-smmu.c: In function ‘phytium_smmu_def_domain_type’:
drivers/iommu/arm-smmu.c:1641:6: error: implicit declaration of function ‘typeof_ft2000plus’ [-Werror=implicit-function-declaration]
1641 | if (typeof_ft2000plus() || typeof_s2500()) {
| ^~~~~~~~~~~~~~~~~
drivers/iommu/arm-smmu.c:1641:29: error: implicit declaration of function ‘typeof_s2500’ [-Werror=implicit-function-declaration]
1641 | if (typeof_ft2000plus() || typeof_s2500()) {
| ^~~~~~~~~~~~
cc1: some warnings being treated as errors
Fix it by using CONFIG_ARCH_PHYTIUM to control phytium related code.
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
Reviewed-by: Zhen Lei <thunder.leizhen(a)huawei.com>
Reviewed-by: wangxiongfeng 00379786 <wangxiongfeng2(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/iommu/arm-smmu.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 18863198bb036..d1c00b1dfd2ef 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1634,8 +1634,7 @@ static void arm_smmu_put_resv_regions(struct device *dev,
#ifdef CONFIG_SMMU_BYPASS_DEV
-#ifdef CONFIG_ARM64
-#include <asm/cputype.h>
+#ifdef CONFIG_ARCH_PHYTIUM
static int phytium_smmu_def_domain_type(struct device *dev, unsigned int *type)
{
if (typeof_ft2000plus() || typeof_s2500()) {
--
2.25.1
1
0
Ramaxel inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4I0OZ
CVE: NA
Fix typo of last_cmsn which should be last_pmsn
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
---
drivers/scsi/spfc/hw/spfc_queue.c | 28 ++++++++++++++--------------
drivers/scsi/spfc/hw/spfc_queue.h | 2 +-
2 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/spfc/hw/spfc_queue.c b/drivers/scsi/spfc/hw/spfc_queue.c
index 3f73fa26aad1..abcf1ff3f49f 100644
--- a/drivers/scsi/spfc/hw/spfc_queue.c
+++ b/drivers/scsi/spfc/hw/spfc_queue.c
@@ -1027,7 +1027,7 @@ u32 spfc_create_ssq(void *handle)
sq_ctrl->wqe_offset = 0;
sq_ctrl->head_start_cmsn = 0;
sq_ctrl->head_end_cmsn = SPFC_GET_WP_END_CMSN(0, sq_ctrl->wqe_num_per_buf);
- sq_ctrl->last_cmsn = 0;
+ sq_ctrl->last_pmsn = 0;
/* Linked List SQ Owner Bit 1 valid,0 invalid */
sq_ctrl->last_pi_owner = 1;
atomic_set(&sq_ctrl->sq_valid, true);
@@ -3127,7 +3127,7 @@ static u32 spfc_parent_sq_ring_direct_wqe_doorbell(struct spfc_parent_ssq_info *
struct spfc_hba_info *hba;
hba = (struct spfc_hba_info *)sq->hba;
- pmsn = sq->last_cmsn;
+ pmsn = sq->last_pmsn;
if (sq->cache_id == INVALID_VALUE32) {
FC_DRV_PRINT(UNF_LOG_IO_ATT, UNF_ERR,
@@ -3166,7 +3166,7 @@ u32 spfc_parent_sq_ring_doorbell(struct spfc_parent_ssq_info *sq, u8 qos_level,
struct spfc_parent_sq_db door_bell;
hba = (struct spfc_hba_info *)sq->hba;
- pmsn = sq->last_cmsn;
+ pmsn = sq->last_pmsn;
/* Obtain the low 8 Bit of PMSN */
pmsn_lo = (u8)(pmsn & SPFC_PMSN_MASK);
/* Obtain the high 8 Bit of PMSN */
@@ -3231,10 +3231,10 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
FC_DRV_PRINT(UNF_LOG_NORMAL, UNF_INFO,
"[info]Ssq(0x%x), xid(0x%x) qid(0x%x) add wqepage at Pmsn(0x%x), sqe_minus_cqe_cnt(0x%x)",
ssq->sqn, ssq->context_id, ssq->sq_queue_id,
- ssq->last_cmsn,
+ ssq->last_pmsn,
atomic_read(&ssq->sqe_minus_cqe_cnt));
- link_wqe_msn = SPFC_MSN_DEC(ssq->last_cmsn);
+ link_wqe_msn = SPFC_MSN_DEC(ssq->last_pmsn);
link_wqe = (struct spfc_linkwqe *)spfc_get_wqe_page_entry(tail_wpg,
ssq->wqe_offset);
msn_wd = be32_to_cpu(link_wqe->val_wd1);
@@ -3250,7 +3250,7 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
}
sqe_in_wp =
(struct spfc_sqe *)spfc_get_wqe_page_entry(tail_wpg, ssq->wqe_offset);
- spfc_build_wqe_owner_pmsn(io_sqe, (ssq->last_pi_owner), ssq->last_cmsn);
+ spfc_build_wqe_owner_pmsn(io_sqe, (ssq->last_pi_owner), ssq->last_pmsn);
SPFC_IO_STAT((struct spfc_hba_info *)ssq->hba, wqe_type);
wqe_gpa = tail_wpg->wpg_phy_addr + (ssq->wqe_offset * sizeof(struct spfc_sqe));
@@ -3260,11 +3260,11 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
dre_door_bell.wd0.cos = 0;
dre_door_bell.wd0.c = 0;
dre_door_bell.wd0.pi_hi =
- (u32)(ssq->last_cmsn >> UNF_SHIFT_12) & SPFC_DB_WD0_PI_H_MASK;
+ (u32)(ssq->last_pmsn >> UNF_SHIFT_12) & SPFC_DB_WD0_PI_H_MASK;
dre_door_bell.wd0.cntx_size = SPFC_CNTX_SIZE_T_256B;
dre_door_bell.wd0.xid = ssq->context_id;
dre_door_bell.wd1.sm_data = ssq->cache_id;
- dre_door_bell.wd1.pi_lo = (u32)(ssq->last_cmsn & SPFC_DB_WD0_PI_L_MASK);
+ dre_door_bell.wd1.pi_lo = (u32)(ssq->last_pmsn & SPFC_DB_WD0_PI_L_MASK);
io_sqe->db_val = *(u64 *)&dre_door_bell;
spfc_convert_parent_wqe_to_big_endian(io_sqe);
@@ -3275,7 +3275,7 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
"[INFO]Ssq(0x%x) xid:0x%x,qid:0x%x wqegpa:0x%llx,o:0x%x,outstandind:0x%x,pmsn:0x%x,cmsn:0x%x",
ssq->sqn, ssq->context_id, ssq->sq_queue_id, wqe_gpa,
ssq->last_pi_owner, atomic_read(&ssq->sqe_minus_cqe_cnt),
- ssq->last_cmsn, SPFC_GET_QUEUE_CMSN(ssq));
+ ssq->last_pmsn, SPFC_GET_QUEUE_CMSN(ssq));
ssq->accum_wqe_cnt++;
if (ssq->accum_wqe_cnt == accum_db_num) {
@@ -3286,7 +3286,7 @@ u32 spfc_direct_sq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *io
}
ssq->wqe_offset += 1;
- ssq->last_cmsn = SPFC_MSN_INC(ssq->last_cmsn);
+ ssq->last_pmsn = SPFC_MSN_INC(ssq->last_pmsn);
atomic_inc(&ssq->sq_wqe_cnt);
atomic_inc(&ssq->sqe_minus_cqe_cnt);
SPFC_SQ_IO_STAT(ssq, wqe_type);
@@ -3319,7 +3319,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
FC_DRV_PRINT(UNF_LOG_NORMAL, UNF_INFO,
"[info]Ssq(0x%x), xid(0x%x) qid(0x%x) add wqepage at Pmsn(0x%x), WpgCnt(0x%x)",
ssq->sqn, ssq->context_id, ssq->sq_queue_id,
- ssq->last_cmsn,
+ ssq->last_pmsn,
atomic_read(&ssq->wqe_page_cnt));
cur_cmsn = SPFC_GET_QUEUE_CMSN(ssq);
spfc_free_sq_wqe_page(ssq, cur_cmsn);
@@ -3335,7 +3335,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
link_wqe->next_page_addr_hi = cpu_to_be32(addr_wd);
addr_wd = SPFC_LSD(new_wqe_page->wpg_phy_addr);
link_wqe->next_page_addr_lo = cpu_to_be32(addr_wd);
- link_wqe_msn = SPFC_MSN_DEC(ssq->last_cmsn);
+ link_wqe_msn = SPFC_MSN_DEC(ssq->last_pmsn);
msn_wd = be32_to_cpu(link_wqe->val_wd1);
msn_wd |= ((u32)(link_wqe_msn & SPFC_MSNWD_L_MASK));
msn_wd |= (((u32)(link_wqe_msn & SPFC_MSNWD_H_MASK)) << UNF_SHIFT_16);
@@ -3351,7 +3351,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
atomic_inc(&ssq->wqe_page_cnt);
}
- spfc_build_wqe_owner_pmsn(io_sqe, !(ssq->last_pi_owner), ssq->last_cmsn);
+ spfc_build_wqe_owner_pmsn(io_sqe, !(ssq->last_pi_owner), ssq->last_pmsn);
SPFC_IO_STAT((struct spfc_hba_info *)ssq->hba, wqe_type);
spfc_convert_parent_wqe_to_big_endian(io_sqe);
sqe_in_wp = (struct spfc_sqe *)spfc_get_wqe_page_entry(tail_wpg, ssq->wqe_offset);
@@ -3371,7 +3371,7 @@ u32 spfc_parent_ssq_enqueue(struct spfc_parent_ssq_info *ssq, struct spfc_sqe *i
ssq->accum_wqe_cnt = 0;
}
ssq->wqe_offset += 1;
- ssq->last_cmsn = SPFC_MSN_INC(ssq->last_cmsn);
+ ssq->last_pmsn = SPFC_MSN_INC(ssq->last_pmsn);
atomic_inc(&ssq->sq_wqe_cnt);
atomic_inc(&ssq->sqe_minus_cqe_cnt);
SPFC_SQ_IO_STAT(ssq, wqe_type);
diff --git a/drivers/scsi/spfc/hw/spfc_queue.h b/drivers/scsi/spfc/hw/spfc_queue.h
index b1184eb17556..c09f098e7324 100644
--- a/drivers/scsi/spfc/hw/spfc_queue.h
+++ b/drivers/scsi/spfc/hw/spfc_queue.h
@@ -597,7 +597,7 @@ struct spfc_parent_ssq_info {
u32 wqe_offset;
u16 head_start_cmsn;
u16 head_end_cmsn;
- u16 last_cmsn;
+ u16 last_pmsn;
u16 last_pi_owner;
u32 queue_style;
atomic_t sq_valid;
--
2.30.0
2
1

15 Nov '21
From: 沈子俊 <shenzijun(a)kylinos.cn>
kylin inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4HDHZ?from=project-issue
CVE: NA
----------------------------------------------------------------------
change config about ARM64_4K_PAGES in openeuler-defconfig from arch/arm64
Signed-off-by: 沈子俊 <shenzijun(a)kylinos.cn>
---
arch/arm64/configs/openeuler_defconfig | 31 +++++++++++++-------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 76d6a118330d..87cf82bcb30e 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -250,12 +250,12 @@ CONFIG_TRACEPOINTS=y
CONFIG_ARM64=y
CONFIG_64BIT=y
CONFIG_MMU=y
-CONFIG_ARM64_PAGE_SHIFT=16
-CONFIG_ARM64_CONT_PTE_SHIFT=5
-CONFIG_ARM64_CONT_PMD_SHIFT=5
-CONFIG_ARCH_MMAP_RND_BITS_MIN=14
-CONFIG_ARCH_MMAP_RND_BITS_MAX=14
-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=7
+CONFIG_ARM64_PAGE_SHIFT=12
+CONFIG_ARM64_CONT_PTE_SHIFT=4
+CONFIG_ARM64_CONT_PMD_SHIFT=4
+CONFIG_ARCH_MMAP_RND_BITS_MIN=18
+CONFIG_ARCH_MMAP_RND_BITS_MAX=24
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
@@ -365,17 +365,17 @@ CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH=y
CONFIG_SOCIONEXT_SYNQUACER_PREITS=y
# end of ARM errata workarounds via the alternatives framework
-# CONFIG_ARM64_4K_PAGES is not set
+CONFIG_ARM64_4K_PAGES=y
# CONFIG_ARM64_16K_PAGES is not set
-CONFIG_ARM64_64K_PAGES=y
-# CONFIG_ARM64_VA_BITS_42 is not set
+# CONFIG_ARM64_64K_PAGES is not set
+CONFIG_ARM64_VA_BITS_39=y
# CONFIG_ARM64_VA_BITS_48 is not set
-CONFIG_ARM64_VA_BITS_52=y
+# CONFIG_ARM64_VA_BITS_52 is not set
# CONFIG_ARM64_FORCE_52BIT is not set
-CONFIG_ARM64_VA_BITS=52
-# CONFIG_ARM64_PA_BITS_48 is not set
-CONFIG_ARM64_PA_BITS_52=y
-CONFIG_ARM64_PA_BITS=52
+CONFIG_ARM64_VA_BITS=39
+CONFIG_ARM64_PA_BITS_48=y
+# CONFIG_ARM64_PA_BITS_52 is not set
+CONFIG_ARM64_PA_BITS=48
# CONFIG_CPU_BIG_ENDIAN is not set
CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_SCHED_MC=y
@@ -402,6 +402,7 @@ CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_HAVE_ARCH_PFN_VALID=y
CONFIG_HW_PERF_EVENTS=y
CONFIG_SYS_SUPPORTS_HUGETLBFS=y
+CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
CONFIG_PARAVIRT=y
@@ -411,7 +412,7 @@ CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_ARM64_CPU_PARK=y
# CONFIG_XEN is not set
-CONFIG_FORCE_MAX_ZONEORDER=14
+CONFIG_FORCE_MAX_ZONEORDER=11
CONFIG_UNMAP_KERNEL_AT_EL0=y
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
CONFIG_ARM64_PMEM_RESERVE=y
--
2.30.0
1
0

15 Nov '21
From: 沈子俊 <shenzijun(a)kylinos.cn>
kylin inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4I4E2#note_7386759
CVE: NA
------------------------------------------------------------------
add declaration of 'struct s_data'
Signed-off-by: 沈子俊 <shenzijun(a)kylinos.cn>
---
kernel/sched/topology.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 0564aeabbcb8..1fac5d28c0dc 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -20,6 +20,7 @@ static void sd_llc_free(struct sched_domain *sd);
static int sd_llc_alloc_all(const struct cpumask *cpu_map, struct s_data *d);
static void sd_llc_free_all(const struct cpumask *cpu_map);
#else
+struct s_data;
static inline void sd_llc_free(struct sched_domain *sd) {}
static inline int sd_llc_alloc_all(const struct cpumask *cpu_map, struct s_data *d) { return 0; }
static inline void sd_llc_free_all(const struct cpumask *cpu_map) {}
--
2.30.0
1
0

15 Nov '21
kylin inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4AH11?from=project-issue
CVE: NA
int driver/md/md.c, if the function autorun_array() is called,
first called do_md_run() and then do_md_stop(), this time, the
pointer mddev->private may be double free
Signed-off-by: zhangyue <zhangyue1(a)kylinos.cn>
---
drivers/md/md.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4406bb137a27..aa204ec74066 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6000,8 +6000,10 @@ static void __md_stop(struct mddev *mddev)
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
- pers->free(mddev, mddev->private);
- mddev->private = NULL;
+ if (mddev->private) {
+ pers->free(mddev, mddev->private);
+ mddev->private = NULL;
+ }
if (pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group;
module_put(pers->owner);
--
2.30.0
1
0

15 Nov '21
kylin inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4AFG6?from=project-issue
CVE: NA
in function qla24xx_sp_unmap, it is already called sp->free(sp), then
it`s not need to called sp->free(sp) after qla24xx_sp_unmap is called.
Signed-off-by: zhangyue <zhangyue1(a)kylinos.cn>
---
drivers/scsi/qla2xxx/qla_gs.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index c3195d4c25e5..a7198a1e23fb 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -4228,7 +4228,6 @@ static void qla2x00_async_gpnft_gnnft_sp_done(void *s, int res)
if (rc) {
/* Cleanup here to prevent memory leak */
qla24xx_sp_unmap(vha, sp);
- sp->free(sp);
}
spin_lock_irqsave(&vha->work_lock, flags);
--
2.30.0
1
0

13 Nov '21
mainline inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4AFG6?from=project-issue
CVE: NA
in function qla24xx_sp_unmap, it is already called sp->free(sp), then
it`s not need to called sp->free(sp) after qla24xx_sp_unmap is called.
Signed-off-by: zhangyue <zhangyue1(a)kylinos.cn>
---
drivers/scsi/qla2xxx/qla_gs.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index c3195d4c25e5..a7198a1e23fb 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -4228,7 +4228,6 @@ static void qla2x00_async_gpnft_gnnft_sp_done(void *s, int res)
if (rc) {
/* Cleanup here to prevent memory leak */
qla24xx_sp_unmap(vha, sp);
- sp->free(sp);
}
spin_lock_irqsave(&vha->work_lock, flags);
--
2.30.0
2
1
From: 沈子俊 <shenzijun(a)kylinos.cn>
The GCM/CCM mode of SM4 is defined in the RFC 8998 specification:
https://datatracker.ietf.org/doc/html/rfc8998
沈子俊 (3):
crypto: tcrypt - Fix missing return value check
crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm
crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm
crypto/tcrypt.c | 73 ++++++++++++++++++++---
crypto/testmgr.c | 29 ++++++++++
crypto/testmgr.h | 148 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 241 insertions(+), 9 deletions(-)
--
2.30.0
1
3

[PATCH openEuler-1.0-LTS 1/2] crypto: hisilicon - add CRYPTO_TFM_REQ_MAY_BACKLOG flag judge in sec_process()
by Yang Yingliang 12 Nov '21
by Yang Yingliang 12 Nov '21
12 Nov '21
From: Yu'an Wang <wangyuan46(a)huawei.com>
driver inclusion
category: Bugfix
bugzilla: NA
CVE: NA
Set the flag CRYPTO_TFM_REQ_MAY_BACKLOG in the crypto driver, which can
limit task process
Signed-off-by: Yu'an Wang <wangyuan46(a)huawei.com>
Reviewed-by: Longfang Liu <liulongfang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/crypto/hisilicon/sec2/sec.h | 1 +
drivers/crypto/hisilicon/sec2/sec_crypto.c | 11 ++++++++---
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index f6d1878edcc5c..51b93e25b750b 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -43,6 +43,7 @@ struct sec_req {
int err_type;
int req_id;
+ u32 flag;
/* Status of the SEC request */
bool fake_busy;
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 68fb0e5ef761d..482aa8d26640e 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -171,10 +171,13 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
struct sec_qp_ctx *qp_ctx = req->qp_ctx;
int ret;
- mutex_lock(&qp_ctx->req_lock);
+ if (ctx->fake_req_limit <=
+ atomic_read(&qp_ctx->qp->qp_status.used) &&
+ !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG))
+ return -EBUSY;
+ mutex_lock(&qp_ctx->req_lock);
ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
-
if (ctx->fake_req_limit <=
atomic_read(&qp_ctx->qp->qp_status.used) && !ret) {
list_add_tail(&req->backlog_head, &qp_ctx->backlog);
@@ -917,7 +920,8 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
sec_update_iv(req, ctx->alg_type);
ret = ctx->req_op->bd_send(ctx, req);
- if (unlikely(ret != -EBUSY && ret != -EINPROGRESS)) {
+ if (unlikely((ret != -EBUSY && ret != -EINPROGRESS) ||
+ (ret == -EBUSY && !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
dev_err_ratelimited(SEC_CTX_DEV(ctx),
"send sec request failed!\n");
goto err_send_req;
@@ -1009,6 +1013,7 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
if (!sk_req->cryptlen)
return 0;
+ req->flag = sk_req->base.flags;
req->c_req.sk_req = sk_req;
req->c_req.encrypt = encrypt;
req->ctx = ctx;
--
2.25.1
1
1

[PATCH openEuler-1.0-LTS 1/3] tcp: always set retrans_stamp on recovery
by Yang Yingliang 12 Nov '21
by Yang Yingliang 12 Nov '21
12 Nov '21
From: Yuchung Cheng <ycheng(a)google.com>
mainline inclusion
from mainline-v5.1-rc1
commit 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4AFRJ?from=project-issue
CVE: NA
------------------------------------------------------------
Previously TCP socket's retrans_stamp is not set if the
retransmission has failed to send. As a result if a socket is
experiencing local issues to retransmit packets, determining when
to abort a socket is complicated w/o knowning the starting time of
the recovery since retrans_stamp may remain zero.
This complication causes sub-optimal behavior that TCP may use the
latest, instead of the first, retransmission time to compute the
elapsed time of a stalling connection due to local issues. Then TCP
may disrecard TCP retries settings and keep retrying until it finally
succeed: not a good idea when the local host is already strained.
The simple fix is to always timestamp the start of a recovery.
It's worth noting that retrans_stamp is also used to compare echo
timestamp values to detect spurious recovery. This patch does
not break that because retrans_stamp is still later than when the
original packet was sent.
Signed-off-by: Yuchung Cheng <ycheng(a)google.com>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reviewed-by: Neal Cardwell <ncardwell(a)google.com>
Reviewed-by: Soheil Hassas Yeganeh <soheil(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Conflicts:
net/ipv4/tcp_timer.c
Signed-off-by: Jiazhenyuan <jiazhenyuan@uniontech> #openEuler_contributor
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
net/ipv4/tcp_output.c | 9 ++++-----
net/ipv4/tcp_timer.c | 23 +++--------------------
2 files changed, 7 insertions(+), 25 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 97b9d671a83c9..6710056fd1b23 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3011,13 +3011,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
#endif
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
tp->retrans_out += tcp_skb_pcount(skb);
-
- /* Save stamp of the first retransmit. */
- if (!tp->retrans_stamp)
- tp->retrans_stamp = tcp_skb_timestamp(skb);
-
}
+ /* Save stamp of the first (attempted) retransmit. */
+ if (!tp->retrans_stamp)
+ tp->retrans_stamp = tcp_skb_timestamp(skb);
+
if (tp->undo_retrans < 0)
tp->undo_retrans = 0;
tp->undo_retrans += tcp_skb_pcount(skb);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 681882a409686..8435cbad337d8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,27 +22,13 @@
#include <linux/gfp.h>
#include <net/tcp.h>
-static u32 tcp_retransmit_stamp(const struct sock *sk)
-{
- u32 start_ts = tcp_sk(sk)->retrans_stamp;
-
- if (unlikely(!start_ts)) {
- struct sk_buff *head = tcp_rtx_queue_head(sk);
-
- if (!head)
- return 0;
- start_ts = tcp_skb_timestamp(head);
- }
- return start_ts;
-}
-
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
- start_ts = tcp_retransmit_stamp(sk);
- if (!icsk->icsk_user_timeout || !start_ts)
+ start_ts = tcp_sk(sk)->retrans_stamp;
+ if (!icsk->icsk_user_timeout)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
if (elapsed >= icsk->icsk_user_timeout)
@@ -196,10 +182,7 @@ static bool retransmits_timed_out(struct sock *sk,
if (!inet_csk(sk)->icsk_retransmits)
return false;
- start_ts = tcp_retransmit_stamp(sk);
- if (!start_ts)
- return false;
-
+ start_ts = tcp_sk(sk)->retrans_stamp;
if (likely(timeout == 0)) {
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
--
2.25.1
1
2

[PATCH openEuler-1.0-LTS 01/49] perf/x86/intel/pt: Fix mask of num_address_ranges
by Yang Yingliang 12 Nov '21
by Yang Yingliang 12 Nov '21
12 Nov '21
From: Xiaoyao Li <xiaoyao.li(a)intel.com>
stable inclusion
from linux-4.19.207
commit f109e8a1678ce920b7f0df7865f2a31754ec5d1c
--------------------------------
[ Upstream commit c53c6b7409f4cd9e542991b53d597fbe2751d7db ]
Per SDM, bit 2:0 of CPUID(0x14,1).EAX[2:0] reports the number of
configurable address ranges for filtering, not bit 1:0.
Signed-off-by: Xiaoyao Li <xiaoyao.li(a)intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Link: https://lkml.kernel.org/r/20210824040622.4081502-1-xiaoyao.li@intel.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/x86/events/intel/pt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index db969f3f175cb..774fb0f0bf6df 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -69,7 +69,7 @@ static struct pt_cap_desc {
PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
- PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
+ PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7),
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
--
2.25.1
1
48

[PATCH openEuler-1.0-LTS 1/9] Revert "EMMC: fix ascend hisi emmc probe failed problem according to mmc_host struct"
by Yang Yingliang 12 Nov '21
by Yang Yingliang 12 Nov '21
12 Nov '21
From: zhangguijiang <zhangguijiang(a)huawei.com>
ascend inclusion
category: bugfix
feature: Ascend emmc adaption
bugzilla: https://gitee.com/openeuler/kernel/issues/I4F4LL
CVE: NA
--------------------
This reverts commit 80e65cb9c78f00090a95e6db3126e3e60ec2804d.
Signed-off-by: zhangguijiang <zhangguijiang(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Reviewed-by: YANHONG LU <luyanhong.luyanhong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/mmc/core/sd.c | 38 +++++++++++++++++++++++++++++---
drivers/mmc/host/dw_mmc_extern.h | 2 +-
include/linux/mmc/host.h | 2 +-
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 8760b749292b8..20ca371b9f874 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1163,9 +1163,9 @@ static int _mmc_sd_suspend(struct mmc_host *host)
err = mmc_deselect_cards(host);
if (!err) {
- if (!mmc_is_ascend_customized(host->parent))
+ if (!(mmc_is_ascend_customized(host->parent)))
mmc_power_off(host);
- else if (!mmc_card_keep_power(host))
+ else if (mmc_card_keep_power(host))
mmc_power_off(host);
mmc_card_set_suspended(host->card);
}
@@ -1269,10 +1269,42 @@ static int mmc_sd_runtime_resume(struct mmc_host *host)
return 0;
}
+#ifdef CONFIG_ASCEND_HISI_MMC
+/*********************sd ops begin**********************/
+static int mmc_do_sd_reset(struct mmc_host *host)
+{
+ struct mmc_card *card = host->card;
+
+ if (!host->bus_ops->power_restore)
+ return -EOPNOTSUPP;
+
+ if (!card)
+ return -EINVAL;
+
+ /* hw_reset for ip reset */
+ if (host->ops->hw_reset)
+ host->ops->hw_reset(host);
+
+ /* Only for K930/920 SD slow down clk*/
+ if (host->ops->slowdown_clk)
+ host->ops->slowdown_clk(host, host->ios.timing);
+
+ mmc_power_off(host);
+ mmc_set_clock(host, host->f_init);
+ /* Wait at least 200 ms */
+ mmc_delay(200);
+ mmc_power_up(host, host->card->ocr);
+ (void)mmc_select_voltage(host, host->card->ocr);
+
+ return host->bus_ops->power_restore(host);
+}
+#endif
static int mmc_sd_hw_reset(struct mmc_host *host)
{
+#ifdef CONFIG_ASCEND_HISI_MMC
if (mmc_is_ascend_customized(host->parent))
- return mmc_sd_reset(host);
+ return mmc_do_sd_reset(host);
+#endif
mmc_power_cycle(host, host->card->ocr);
return mmc_sd_init_card(host, host->card->ocr, host->card);
}
diff --git a/drivers/mmc/host/dw_mmc_extern.h b/drivers/mmc/host/dw_mmc_extern.h
index ab077b4955940..04d8c23f39e9a 100644
--- a/drivers/mmc/host/dw_mmc_extern.h
+++ b/drivers/mmc/host/dw_mmc_extern.h
@@ -8,7 +8,7 @@
#include "dw_mmc.h"
-#if defined(CONFIG_MMC_DW_HI3XXX) || defined(CONFIG_MMC_DW_HI3XXX_MODULE)
+#ifdef CONFIG_MMC_DW_HI3XXX_MODULE
extern void dw_mci_reg_dump(struct dw_mci *host);
extern void dw_mci_set_timeout(struct dw_mci *host);
extern bool dw_mci_stop_abort_cmd(struct mmc_command *cmd);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index fabc23d156242..78b4d0a813b71 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -542,13 +542,13 @@ struct mmc_host {
bool cqe_enabled;
bool cqe_on;
+ unsigned long private[0] ____cacheline_aligned;
#ifdef CONFIG_ASCEND_HISI_MMC
const struct mmc_cmdq_host_ops *cmdq_ops;
int sdio_present;
unsigned int cmdq_slots;
struct mmc_cmdq_context_info cmdq_ctx;
#endif
- unsigned long private[0] ____cacheline_aligned;
};
struct device_node;
--
2.25.1
1
8

[PATCH openEuler-1.0-LTS] iommu: support phytium ft2000plus and S2500 iommu function
by Yang Yingliang 11 Nov '21
by Yang Yingliang 11 Nov '21
11 Nov '21
From: Mao HongBo <maohongbo(a)phytium.com.cn>
phytium inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I41AUQ
-----------------------------------
To fix iommu issue of device access in virtualization scenario
for ft2000plus and S2500.
Convert to new cputype macros naming of phytium.
Signed-off-by: Mao HongBo <maohongbo(a)phytium.com.cn>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/include/asm/cputype.h | 14 +++++--
.../arm64/include/asm/phytium_machine_types.h | 37 +++++++++++++++++++
arch/arm64/kernel/topology.c | 2 +-
drivers/iommu/arm-smmu.c | 29 +++++++++++++--
drivers/irqchip/irq-gic-v3-its.c | 9 +++++
drivers/pci/quirks.c | 4 ++
drivers/usb/host/xhci-pci.c | 2 +-
7 files changed, 87 insertions(+), 10 deletions(-)
create mode 100644 arch/arm64/include/asm/phytium_machine_types.h
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 23298b0aedaf7..aa38796121300 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -101,8 +101,11 @@
#define HISI_CPU_PART_TSV110 0xD01
#define HISI_CPU_PART_TSV200 0xD02
-#define PHYTIUM_CPU_PART_FTC662 0x662
-#define PHYTIUM_CPU_PART_FTC663 0x663
+#define PHYTIUM_CPU_PART_1500A 0X660
+#define PHYTIUM_CPU_PART_2000AHK 0X661
+#define PHYTIUM_CPU_PART_2000PLUS 0X662
+#define PHYTIUM_CPU_PART_2004 0X663
+#define PHYTIUM_CPU_PART_2500 0X663
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -124,8 +127,11 @@
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
#define MIDR_HISI_TSV200 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV200)
-#define MIDR_PHYTIUM_FT2000PLUS MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_FTC662)
-#define MIDR_PHYTIUM_FT2500 MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_FTC663)
+#define MIDR_FT_1500A MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_1500A)
+#define MIDR_FT_2000AHK MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_2000AHK)
+#define MIDR_FT_2000PLUS MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_2000PLUS)
+#define MIDR_FT_2004 MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_2004)
+#define MIDR_FT_2500 MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_2500)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/phytium_machine_types.h b/arch/arm64/include/asm/phytium_machine_types.h
new file mode 100644
index 0000000000000..fb791988f0cee
--- /dev/null
+++ b/arch/arm64/include/asm/phytium_machine_types.h
@@ -0,0 +1,37 @@
+/*
+ * Authors: Wang Yinfeng <wangyinfenng(a)phytium.com.cn>
+ *
+ * Copyright (C) 2021, PHYTIUM Information Technology Co., Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PHYTIUM_MACHINE_TYPES_H__
+#define __PHYTIUM_MACHINE_TYPES_H__
+
+#include <asm/cputype.h>
+#include <linux/types.h>
+
+static inline bool phytium_part(u32 cpuid)
+{
+ return ((read_cpuid_id() & MIDR_CPU_MODEL_MASK) == cpuid);
+}
+
+#define typeof_ft1500a() phytium_part(MIDR_FT_1500A)
+#define typeof_ft2000ahk() phytium_part(MIDR_FT_2000AHK)
+#define typeof_ft2000plus() phytium_part(MIDR_FT_2000PLUS)
+#define typeof_ft2004() phytium_part(MIDR_FT_2004)
+#define typeof_s2500() phytium_part(MIDR_FT_2500)
+
+#endif
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 02d3e688d657d..2646695e2f2a4 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -291,7 +291,7 @@ void store_cpu_topology(unsigned int cpuid)
cpuid_topo->package_id = cpu_to_node(cpuid);
/* Some PHYTIUM FT2000PLUS platform firmware has no PPTT table */
- if ((read_cpuid_id() & MIDR_CPU_MODEL_MASK) == MIDR_PHYTIUM_FT2000PLUS
+ if ((read_cpuid_id() & MIDR_CPU_MODEL_MASK) == MIDR_FT_2000PLUS
&& cpu_to_node(cpuid) == NUMA_NO_NODE) {
cpuid_topo->thread_id = 0;
cpuid_topo->package_id = 0;
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 8a268ab82ef02..18863198bb036 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -56,6 +56,10 @@
#include "arm-smmu-regs.h"
+#ifdef CONFIG_ARCH_PHYTIUM
+#include <asm/phytium_machine_types.h>
+#endif
+
/*
* Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
* global register space are still, in fact, using a hypervisor to mediate it
@@ -1407,6 +1411,20 @@ static int arm_smmu_add_device(struct device *dev)
return -ENODEV;
}
+#ifdef CONFIG_ARCH_PHYTIUM
+ /* ft2000+ */
+ if (typeof_ft2000plus()) {
+ int num = fwspec->num_ids;
+
+ for (i = 0; i < num; i++) {
+#define FWID_READ(id) (((u16)(id) >> 3) | (((id) >> SMR_MASK_SHIFT | 0x7000) << SMR_MASK_SHIFT))
+ u32 fwid = FWID_READ(fwspec->ids[i]);
+
+ iommu_fwspec_add_ids(dev, &fwid, 1);
+ }
+ }
+#endif
+
ret = -EINVAL;
for (i = 0; i < fwspec->num_ids; i++) {
u16 sid = fwspec->ids[i];
@@ -1481,6 +1499,12 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
if (group && smmu->s2crs[idx].group &&
group != smmu->s2crs[idx].group)
return ERR_PTR(-EINVAL);
+#ifdef CONFIG_ARCH_PHYTIUM
+ if (typeof_s2500())
+ break;
+ if (typeof_ft2000plus() && !smmu->s2crs[idx].group)
+ continue;
+#endif
group = smmu->s2crs[idx].group;
}
@@ -1614,10 +1638,7 @@ static void arm_smmu_put_resv_regions(struct device *dev,
#include <asm/cputype.h>
static int phytium_smmu_def_domain_type(struct device *dev, unsigned int *type)
{
- u32 midr = read_cpuid_id();
-
- if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_PHYTIUM_FT2000PLUS)
- || ((midr & MIDR_CPU_MODEL_MASK) == MIDR_PHYTIUM_FT2500)) {
+ if (typeof_ft2000plus() || typeof_s2500()) {
*type = IOMMU_DOMAIN_IDENTITY;
return 0;
}
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 1a2ecfa23fd8c..79648a2139412 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -51,6 +51,10 @@
#include "irq-gic-common.h"
+#ifdef CONFIG_ARCH_PHYTIUM
+#include <asm/phytium_machine_types.h>
+#endif
+
#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0)
#define ITS_FLAGS_WORKAROUND_CAVIUM_22375 (1ULL << 1)
#define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2)
@@ -1223,6 +1227,11 @@ static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
msg->address_hi = upper_32_bits(addr);
msg->data = its_get_event_id(d);
+#ifdef CONFIG_ARCH_PHYTIUM
+ if (typeof_ft2000plus())
+ return;
+#endif
+
iommu_dma_map_msi_msg(d->irq, msg);
}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 961003c6dc807..99657b9bc82e0 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4646,6 +4646,10 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
/* Zhaoxin Root/Downstream Ports */
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+ /* because PLX switch Vendor id is 0x10b5 on phytium cpu */
+ { 0x10b5, PCI_ANY_ID, pci_quirk_xgene_acs },
+ /* because rootcomplex Vendor id is 0x17cd on phytium cpu */
+ { 0x17cd, PCI_ANY_ID, pci_quirk_xgene_acs },
{ 0 }
};
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 886cd6fc4b640..238a32cc311c9 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -313,7 +313,7 @@ static void phytium_xhci_pci_workaround(struct pci_dev *dev)
u32 midr = read_cpuid_id();
/* Firmware bug, DMA mask is not reported by the firmware */
- if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_PHYTIUM_FT2000PLUS)
+ if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_FT_2000PLUS)
dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
}
#else
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] arm64: Errata: fix kabi changed by cpu_errata and enable idc
by Yang Yingliang 11 Nov '21
by Yang Yingliang 11 Nov '21
11 Nov '21
From: Weilong Chen <chenweilong(a)huawei.com>
ascend inclusion
category: feature
bugzilla: 46922
CVE: NA
-------------------------------------
Patch "cache: Workaround HiSilicon Taishan DC CVAU"
breaks the kabi symbols:
cpu_hwcaps
cpu_hwcap_keys
Patch "arm64: Errata: fix kabi changed by cpu_errata" try to fix it
but incomplete. Eable IDC on platform TSV{110,200}.
Signed-off-by: Weilong Chen <chenweilong(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/kernel/cpufeature.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 822e6a2c0af1d..1bf9d84265de2 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -912,6 +912,19 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
{
u64 ctr;
+#ifndef CONFIG_HISILICON_ERRATUM_1980005
+ /* Fix kABI compatible for CONFIG_HISILICON_ERRATUM_1980005 */
+ static const struct midr_range idc_support_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ MIDR_REV(MIDR_HISI_TSV200, 1, 0),
+ { /* sentinel */ }
+ };
+ if (is_midr_in_range_list(read_cpuid_id(), idc_support_list)) {
+ pr_info("CPU features: detected: Taishan IDC coherence workaround\n");
+ return true;
+ }
+#endif
+
if (scope == SCOPE_SYSTEM)
ctr = arm64_ftr_reg_ctrel0.sys_val;
else
--
2.25.1
1
0

11 Nov '21
From: Xunlei Pang <xlpang(a)linux.alibaba.com>
Export "cpu|io|memory.pressure" to cgroup v1 "cpuacct" subsystem.
hulk inclusion
category: feature
bugzilla: 182979 https://gitee.com/openeuler/kernel/issues/I4HOX6
------------------------------------------
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Signed-off-by: Xunlei Pang <xlpang(a)linux.alibaba.com>
Signed-off-by: Chen Wandun <chenwandun(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
init/Kconfig | 10 ++++++++++
kernel/cgroup/cgroup.c | 28 ++++++++++++++++++++++++++++
kernel/sched/cpuacct.c | 10 ++++++++++
kernel/sched/psi.c | 4 ++++
kernel/sched/sched.h | 4 ++++
5 files changed, 56 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 04bc46ca0b9e..0afdb08131eb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -635,6 +635,16 @@ config PSI_DEFAULT_DISABLED
Say N if unsure.
+config PSI_CGROUP_V1
+ bool "Support PSI under cgroup v1"
+ default Y
+ depends on PSI
+ help
+ If set, pressure stall information tracking will be used
+ for cgroup v1 other than v2.
+
+ Say N if unsure.
+
endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 701ef7ba4f95..86ab4a1305f6 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3659,6 +3659,34 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
{
psi_trigger_replace(&of->priv, NULL);
}
+
+struct cftype cgroup_v1_psi_files[] = {
+ {
+ .name = "io.pressure",
+ .flags = CFTYPE_NO_PREFIX,
+ .seq_show = cgroup_io_pressure_show,
+ .write = cgroup_io_pressure_write,
+ .poll = cgroup_pressure_poll,
+ .release = cgroup_pressure_release,
+ },
+ {
+ .name = "memory.pressure",
+ .flags = CFTYPE_NO_PREFIX,
+ .seq_show = cgroup_memory_pressure_show,
+ .write = cgroup_memory_pressure_write,
+ .poll = cgroup_pressure_poll,
+ .release = cgroup_pressure_release,
+ },
+ {
+ .name = "cpu.pressure",
+ .flags = CFTYPE_NO_PREFIX,
+ .seq_show = cgroup_cpu_pressure_show,
+ .write = cgroup_cpu_pressure_write,
+ .poll = cgroup_pressure_poll,
+ .release = cgroup_pressure_release,
+ },
+ { } /* terminate */
+};
#endif /* CONFIG_PSI */
static int cgroup_freeze_show(struct seq_file *seq, void *v)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 941c28cf9738..4e5488659339 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -374,3 +374,13 @@ struct cgroup_subsys cpuacct_cgrp_subsys = {
.legacy_cftypes = files,
.early_init = true,
};
+
+#ifdef CONFIG_PSI
+static int __init cgroup_v1_psi_init(void)
+{
+ cgroup_add_legacy_cftypes(&cpuacct_cgrp_subsys, cgroup_v1_psi_files);
+ return 0;
+}
+
+late_initcall_sync(cgroup_v1_psi_init);
+#endif
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index d50a31ecedee..0b48a74cbfac 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -752,7 +752,11 @@ static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
struct cgroup *cgroup = NULL;
if (!*iter)
+#ifdef CONFIG_PSI_CGROUP_V1
+ cgroup = task_cgroup(task, cpuacct_cgrp_id);
+#else
cgroup = task->cgroups->dfl_cgrp;
+#endif
else if (*iter == &psi_system)
return NULL;
else
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4ce573eeca4c..e29f051f824f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2585,6 +2585,10 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
}
#endif
+#ifdef CONFIG_PSI
+extern struct cftype cgroup_v1_psi_files[];
+#endif
+
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
--
2.20.1
1
58

[PATCH openEuler-1.0-LTS] blk-mq: don't free tags if the tag_set is used by other device in queue initialztion
by Yang Yingliang 10 Nov '21
by Yang Yingliang 10 Nov '21
10 Nov '21
From: Ye Bin <yebin10(a)huawei.com>
mainline inclusion
from mainline-v5.16
commit a846a8e6c9a5949582c5a6a8bbc83a7d27fd891e
category: bugfix
bugzilla: 185668
CVE: NA
-----------------------------------------------
We got UAF report on v5.10 as follows:
[ 1446.674930] ==================================================================
[ 1446.675970] BUG: KASAN: use-after-free in blk_mq_get_driver_tag+0x9a4/0xa90
[ 1446.676902] Read of size 8 at addr ffff8880185afd10 by task kworker/1:2/12348
[ 1446.677851]
[ 1446.678073] CPU: 1 PID: 12348 Comm: kworker/1:2 Not tainted 5.10.0-10177-gc9c81b1e346a #2
[ 1446.679168] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[ 1446.680692] Workqueue: kthrotld blk_throtl_dispatch_work_fn
[ 1446.681448] Call Trace:
[ 1446.681800] dump_stack+0x9b/0xce
[ 1446.682916] print_address_description.constprop.6+0x3e/0x60
[ 1446.685999] kasan_report.cold.9+0x22/0x3a
[ 1446.687186] blk_mq_get_driver_tag+0x9a4/0xa90
[ 1446.687785] blk_mq_dispatch_rq_list+0x21a/0x1d40
[ 1446.692576] __blk_mq_do_dispatch_sched+0x394/0x830
[ 1446.695758] __blk_mq_sched_dispatch_requests+0x398/0x4f0
[ 1446.698279] blk_mq_sched_dispatch_requests+0xdf/0x140
[ 1446.698967] __blk_mq_run_hw_queue+0xc0/0x270
[ 1446.699561] __blk_mq_delay_run_hw_queue+0x4cc/0x550
[ 1446.701407] blk_mq_run_hw_queue+0x13b/0x2b0
[ 1446.702593] blk_mq_sched_insert_requests+0x1de/0x390
[ 1446.703309] blk_mq_flush_plug_list+0x4b4/0x760
[ 1446.705408] blk_flush_plug_list+0x2c5/0x480
[ 1446.708471] blk_finish_plug+0x55/0xa0
[ 1446.708980] blk_throtl_dispatch_work_fn+0x23b/0x2e0
[ 1446.711236] process_one_work+0x6d4/0xfe0
[ 1446.711778] worker_thread+0x91/0xc80
[ 1446.713400] kthread+0x32d/0x3f0
[ 1446.714362] ret_from_fork+0x1f/0x30
[ 1446.714846]
[ 1446.715062] Allocated by task 1:
[ 1446.715509] kasan_save_stack+0x19/0x40
[ 1446.716026] __kasan_kmalloc.constprop.1+0xc1/0xd0
[ 1446.716673] blk_mq_init_tags+0x6d/0x330
[ 1446.717207] blk_mq_alloc_rq_map+0x50/0x1c0
[ 1446.717769] __blk_mq_alloc_map_and_request+0xe5/0x320
[ 1446.718459] blk_mq_alloc_tag_set+0x679/0xdc0
[ 1446.719050] scsi_add_host_with_dma.cold.3+0xa0/0x5db
[ 1446.719736] virtscsi_probe+0x7bf/0xbd0
[ 1446.720265] virtio_dev_probe+0x402/0x6c0
[ 1446.720808] really_probe+0x276/0xde0
[ 1446.721320] driver_probe_device+0x267/0x3d0
[ 1446.721892] device_driver_attach+0xfe/0x140
[ 1446.722491] __driver_attach+0x13a/0x2c0
[ 1446.723037] bus_for_each_dev+0x146/0x1c0
[ 1446.723603] bus_add_driver+0x3fc/0x680
[ 1446.724145] driver_register+0x1c0/0x400
[ 1446.724693] init+0xa2/0xe8
[ 1446.725091] do_one_initcall+0x9e/0x310
[ 1446.725626] kernel_init_freeable+0xc56/0xcb9
[ 1446.726231] kernel_init+0x11/0x198
[ 1446.726714] ret_from_fork+0x1f/0x30
[ 1446.727212]
[ 1446.727433] Freed by task 26992:
[ 1446.727882] kasan_save_stack+0x19/0x40
[ 1446.728420] kasan_set_track+0x1c/0x30
[ 1446.728943] kasan_set_free_info+0x1b/0x30
[ 1446.729517] __kasan_slab_free+0x111/0x160
[ 1446.730084] kfree+0xb8/0x520
[ 1446.730507] blk_mq_free_map_and_requests+0x10b/0x1b0
[ 1446.731206] blk_mq_realloc_hw_ctxs+0x8cb/0x15b0
[ 1446.731844] blk_mq_init_allocated_queue+0x374/0x1380
[ 1446.732540] blk_mq_init_queue_data+0x7f/0xd0
[ 1446.733155] scsi_mq_alloc_queue+0x45/0x170
[ 1446.733730] scsi_alloc_sdev+0x73c/0xb20
[ 1446.734281] scsi_probe_and_add_lun+0x9a6/0x2d90
[ 1446.734916] __scsi_scan_target+0x208/0xc50
[ 1446.735500] scsi_scan_channel.part.3+0x113/0x170
[ 1446.736149] scsi_scan_host_selected+0x25a/0x360
[ 1446.736783] store_scan+0x290/0x2d0
[ 1446.737275] dev_attr_store+0x55/0x80
[ 1446.737782] sysfs_kf_write+0x132/0x190
[ 1446.738313] kernfs_fop_write_iter+0x319/0x4b0
[ 1446.738921] new_sync_write+0x40e/0x5c0
[ 1446.739429] vfs_write+0x519/0x720
[ 1446.739877] ksys_write+0xf8/0x1f0
[ 1446.740332] do_syscall_64+0x2d/0x40
[ 1446.740802] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1446.741462]
[ 1446.741670] The buggy address belongs to the object at ffff8880185afd00
[ 1446.741670] which belongs to the cache kmalloc-256 of size 256
[ 1446.743276] The buggy address is located 16 bytes inside of
[ 1446.743276] 256-byte region [ffff8880185afd00, ffff8880185afe00)
[ 1446.744765] The buggy address belongs to the page:
[ 1446.745416] page:ffffea0000616b00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x185ac
[ 1446.746694] head:ffffea0000616b00 order:2 compound_mapcount:0 compound_pincount:0
[ 1446.747719] flags: 0x1fffff80010200(slab|head)
[ 1446.748337] raw: 001fffff80010200 ffffea00006a3208 ffffea000061bf08 ffff88801004f240
[ 1446.749404] raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
[ 1446.750455] page dumped because: kasan: bad access detected
[ 1446.751227]
[ 1446.751445] Memory state around the buggy address:
[ 1446.752102] ffff8880185afc00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 1446.753090] ffff8880185afc80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 1446.754079] >ffff8880185afd00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1446.755065] ^
[ 1446.755589] ffff8880185afd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1446.756574] ffff8880185afe00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 1446.757566] ==================================================================
Flag 'BLK_MQ_F_TAG_QUEUE_SHARED' will be set if the second device on the
same host initializes it's queue successfully. However, if the second
device failed to allocate memory in blk_mq_alloc_and_init_hctx() from
blk_mq_realloc_hw_ctxs() from blk_mq_init_allocated_queue(),
__blk_mq_free_map_and_rqs() will be called on error path, and if
'BLK_MQ_TAG_HCTX_SHARED' is not set, 'tag_set->tags' will be freed
while it's still used by the first device.
To fix this issue we move release newly allocated hardware context from
blk_mq_realloc_hw_ctxs to __blk_mq_update_nr_hw_queues. As there is needn't to
release hardware context in blk_mq_init_allocated_queue.
Fixes: 868f2f0b7206 ("blk-mq: dynamic h/w context count")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Link: https://lore.kernel.org/r/20211108074019.1058843-1-yebin10@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
conflicts:
block/blk-mq.c
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/blk-mq.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 55c81dcafbdc2..ef62a83314a5d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2804,8 +2804,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx = hctxs[j];
if (hctx) {
- if (hctx->tags)
- blk_mq_free_map_and_requests(set, j);
blk_mq_exit_hctx(q, set, hctx, j);
hctxs[j] = NULL;
}
@@ -3236,8 +3234,13 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
if (q->nr_hw_queues != set->nr_hw_queues) {
+ int i = prev_nr_hw_queues;
+
pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
nr_hw_queues, prev_nr_hw_queues);
+ for (; i < set->nr_hw_queues; i++)
+ blk_mq_free_map_and_requests(set, i);
+
set->nr_hw_queues = prev_nr_hw_queues;
blk_mq_map_queues(set);
goto fallback;
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 1/2] nbd: add a flush_workqueue in nbd_start_device
by Yang Yingliang 10 Nov '21
by Yang Yingliang 10 Nov '21
10 Nov '21
From: Sun Ke <sunke32(a)huawei.com>
mainline inclusion
from mainline-v5.6-rc1
commit 5c0dd228b5fc30a3b732c7ae2657e0161ec7ed80
category: bugfix
bugzilla: 185690
CVE: NA
-----------------------------------------------
When kzalloc fail, may cause trying to destroy the
workqueue from inside the workqueue.
If num_connections is m (2 < m), and NO.1 ~ NO.n
(1 < n < m) kzalloc are successful. The NO.(n + 1)
failed. Then, nbd_start_device will return ENOMEM
to nbd_start_device_ioctl, and nbd_start_device_ioctl
will return immediately without running flush_workqueue.
However, we still have n recv threads. If nbd_release
run first, recv threads may have to drop the last
config_refs and try to destroy the workqueue from
inside the workqueue.
To fix it, add a flush_workqueue in nbd_start_device.
Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs")
Signed-off-by: Sun Ke <sunke32(a)huawei.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/block/nbd.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 33a52be762d24..775cbb4c1bbcd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1306,6 +1306,16 @@ static int nbd_start_device(struct nbd_device *nbd)
args = kzalloc(sizeof(*args), GFP_KERNEL);
if (!args) {
sock_shutdown(nbd);
+ /*
+ * If num_connections is m (2 < m),
+ * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful.
+ * But NO.(n + 1) failed. We still have n recv threads.
+ * So, add flush_workqueue here to prevent recv threads
+ * dropping the last config_refs and trying to destroy
+ * the workqueue from inside the workqueue.
+ */
+ if (i)
+ flush_workqueue(nbd->recv_workq);
return -ENOMEM;
}
sk_set_memalloc(config->socks[i]->sock->sk);
--
2.25.1
1
1

[PATCH openEuler-1.0-LTS 1/2] bpf, cgroup: Assign cgroup in cgroup_sk_alloc when called from interrupt
by Yang Yingliang 10 Nov '21
by Yang Yingliang 10 Nov '21
10 Nov '21
From: Daniel Borkmann <daniel(a)iogearbox.net>
mainline inclusion
from mainline-v5.15-rc4
commit 78cc316e9583067884eb8bd154301dc1e9ee945c
category: bugfix
bugzilla: 184472
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
If cgroup_sk_alloc() is called from interrupt context, then just assign the
root cgroup to skcd->cgroup. Prior to commit 8520e224f547 ("bpf, cgroups:
Fix cgroup v2 fallback on v1/v2 mixed mode") we would just return, and later
on in sock_cgroup_ptr(), we were NULL-testing the cgroup in fast-path, and
iff indeed NULL returning the root cgroup (v ?: &cgrp_dfl_root.cgrp). Rather
than re-adding the NULL-test to the fast-path we can just assign it once from
cgroup_sk_alloc() given v1/v2 handling has been simplified. The migration from
NULL test with returning &cgrp_dfl_root.cgrp to assigning &cgrp_dfl_root.cgrp
directly does /not/ change behavior for callers of sock_cgroup_ptr().
syzkaller was able to trigger a splat in the legacy netrom code base, where
the RX handler in nr_rx_frame() calls nr_make_new() which calls sk_alloc()
and therefore cgroup_sk_alloc() with in_interrupt() condition. Thus the NULL
skcd->cgroup, where it trips over on cgroup_sk_free() side given it expects
a non-NULL object. There are a few other candidates aside from netrom which
have similar pattern where in their accept-like implementation, they just call
to sk_alloc() and thus cgroup_sk_alloc() instead of sk_clone_lock() with the
corresponding cgroup_sk_clone() which then inherits the cgroup from the parent
socket. None of them are related to core protocols where BPF cgroup programs
are running from. However, in future, they should follow to implement a similar
inheritance mechanism.
Additionally, with a !CONFIG_CGROUP_NET_PRIO and !CONFIG_CGROUP_NET_CLASSID
configuration, the same issue was exposed also prior to 8520e224f547 due to
commit e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated
cgroup") which added the early in_interrupt() return back then.
Fixes: 8520e224f547 ("bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode")
Fixes: e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated cgroup")
Reported-by: syzbot+df709157a4ecaf192b03(a)syzkaller.appspotmail.com
Reported-by: syzbot+533f389d4026d86a2a95(a)syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Tested-by: syzbot+df709157a4ecaf192b03(a)syzkaller.appspotmail.com
Tested-by: syzbot+533f389d4026d86a2a95(a)syzkaller.appspotmail.com
Acked-by: Tejun Heo <tj(a)kernel.org>
Link: https://lore.kernel.org/bpf/20210927123921.21535-1-daniel@iogearbox.net
Conflicts:
kernel/cgroup/cgroup.c
Signed-off-by: Lu Jialin <lujialin4(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
kernel/cgroup/cgroup.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 682c5e231bddc..7897f1ab77266 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5935,8 +5935,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
}
/* Don't associate the sock with unrelated interrupted task's cgroup. */
- if (in_interrupt())
+ if (in_interrupt()) {
+ cgroup_get(&cgrp_dfl_root.cgrp);
+ skcd->val = (unsigned long)&cgrp_dfl_root.cgrp;
return;
+ }
rcu_read_lock();
--
2.25.1
1
1

[PATCH openEuler-1.0-LTS 1/7] sctp: use init_tag from inithdr for ABORT chunk
by Yang Yingliang 10 Nov '21
by Yang Yingliang 10 Nov '21
10 Nov '21
From: Xin Long <lucien.xin(a)gmail.com>
mainline inclusion
from mainline-v5.15
commit 4f7019c7eb33967eb87766e0e4602b5576873680
category: bugfix
bugzilla: NA
CVE: CVE-2021-3772
-------------------------------------------------
Currently Linux SCTP uses the verification tag of the existing SCTP
asoc when failing to process and sending the packet with the ABORT
chunk. This will result in the peer accepting the ABORT chunk and
removing the SCTP asoc. One could exploit this to terminate a SCTP
asoc.
This patch is to fix it by always using the initiate tag of the
received INIT chunk for the ABORT chunk to be sent.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Xin Long <lucien.xin(a)gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner(a)gmail.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Huang Guobin <huangguobin4(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
net/sctp/sm_statefuns.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 19bd14a4eb07e..9ae0676afd5d4 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6261,6 +6261,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(
* yet.
*/
switch (chunk->chunk_hdr->type) {
+ case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
{
struct sctp_initack_chunk *initack;
--
2.25.1
1
6

10 Nov '21
mainline inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4AH11?from=project-issue
CVE: NA
int driver/md/md.c, if the function autorun_array() is called,
first called do_md_run() and then do_md_stop(), this time, the
pointer mddev->private may be double free
Signed-off-by: zhangyue <zhangyue1(a)kylinos.cn>
---
drivers/md/md.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4406bb137a27..aa204ec74066 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6000,8 +6000,10 @@ static void __md_stop(struct mddev *mddev)
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
- pers->free(mddev, mddev->private);
- mddev->private = NULL;
+ if (mddev->private) {
+ pers->free(mddev, mddev->private);
+ mddev->private = NULL;
+ }
if (pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group;
module_put(pers->owner);
--
2.30.0
1
0

[PATCH kernel-4.19] bios_parser.c: fix uninitialized variable in device_type_from_device_id
by shenzijun 10 Nov '21
by shenzijun 10 Nov '21
10 Nov '21
From: 沈子俊 <shenzijun(a)kylinos.cn>
kylin inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4AH6U?from=project-issue
CVE: NA
-----------------------------------------------------------------------------
add uninitialization of result_device_id.raw_device_tag
Signed-off-by: 沈子俊 <shenzijun(a)kylinos.cn>
---
drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index be8a2494355a..9375757fb640 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -2450,6 +2450,7 @@ static struct device_id device_type_from_device_id(uint16_t device_id)
{
struct device_id result_device_id;
+ result_device_id.raw_device_tag = 0;
switch (device_id) {
case ATOM_DEVICE_LCD1_SUPPORT:
--
2.30.0
1
0

Re: [PATCH openEuler-1.0-LTS 0/6] Fix the problem that the number of tcp timeout retransmissions is lost
by QiuLaibin 09 Nov '21
by QiuLaibin 09 Nov '21
09 Nov '21
hi zhenyuan:
针对你先前提交的“Fix the problem that the number of tcp timeout
retransmissions is lost”补丁集,我们在内部review的时候committer给了如下
检视意见
切换EDT主线有 tcp 和 fq 的几个patchset和bugfix, 这里建议先合入解决本问题
后边的3个patch。
因此是否只合后三个patch就可以满足能够解决你目前遇到的问题。
best regard
On 2021/10/22 18:52, Jiazhenyuan wrote:
>
>
>
> From: Jiazhenyuan <jiazhenyuan(a)uniontech.com>
>
> issue: https://gitee.com/openeuler/kernel/issues/I4AFRJ?from=project-issue
>
> jiazhenyuan (6):
> tcp: switch tcp and sch_fq to new earliest departure time
> net_sched: sch_fq: ensure maxrate fq parameter applies to EDT flows
> From 9efdda4e3abed13f0903b7b6e4d4c2102019440a Mon Sep 17 00:00:00 2001
> From: Eric Dumazet <edumazet(a)google.com> Date: Sat, 24 Nov 2018
> 09:12:24 -0800 Subject: [PATCH] tcp: address problems caused by EDT
> misshaps
> From 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c Mon Sep 17 00:00:00 2001
> From: Yuchung Cheng <ycheng(a)google.com> Date: Wed, 16 Jan 2019
> 15:05:30 -0800 Subject: [PATCH] tcp: always set retrans_stamp on
> recovery
> From 01a523b071618abbc634d1958229fe3bd2dfa5fa Mon Sep 17 00:00:00 2001
> From: Yuchung Cheng <ycheng(a)google.com> Date: Wed, 16 Jan 2019
> 15:05:32 -0800 Subject: [PATCH] tcp: create a helper to model
> exponential backoff
> From 3256a2d6ab1f71f9a1bd2d7f6f18eb8108c48d17 Mon Sep 17 00:00:00 2001
> From: Eric Dumazet <edumazet(a)google.com> Date: Mon, 30 Sep 2019
> 15:44:44 -0700 Subject: [PATCH] tcp: adjust rto_base in
> retransmits_timed_out()
>
> net/ipv4/tcp_bbr.c | 7 +++--
> net/ipv4/tcp_input.c | 17 +++++++-----
> net/ipv4/tcp_output.c | 29 ++++++++++++++------ > net/ipv4/tcp_timer.c | 64 ++++++++++++++++++++-----------------------
> net/sched/sch_fq.c | 46 ++++++++++++++++++-------------
> 5 files changed, 92 insertions(+), 71 deletions(-)
>
> --
> 2.27.0
>
_______________________________________________
Kernel mailing list -- kernel(a)openeuler.org
To unsubscribe send an email to kernel-leave(a)openeuler.org
2
1
。
1
0

[PATCH openEuler-1.0-LTS 01/18] drivers/perf: arm_spe: Don't error on high-order pages for aux buf
by Yang Yingliang 09 Nov '21
by Yang Yingliang 09 Nov '21
09 Nov '21
From: Will Deacon <will.deacon(a)arm.com>
mainline inclusion
from mainline-v5.1-rc3
commit 14ae42a6f0b13130a97d94d23481128961de5d38
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4A1XO
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
------------------------------------------------------------------------
Since commit 5768402fd9c6 ("perf/ring_buffer: Use high order allocations
for AUX buffers optimistically"), the perf core tends to back aux buffer
allocations with high-order pages with the order encoded in the
PagePrivate data. The Arm SPE driver explicitly rejects such pages,
causing the perf tool to fail with:
| failed to mmap with 12 (Cannot allocate memory)
In actual fact, we can simply treat these pages just like any other
since the perf core takes care to populate the page array appropriately.
In theory we could try to map with PMDs where possible, but for now,
let's just get things working again.
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Fixes: 5768402fd9c6 ("perf/ring_buffer: Use high order allocations for AUX buffers optimistically")
Reported-by: Hanjun Guo <guohanjun(a)huawei.com>
Tested-by: Hanjun Guo <guohanjun(a)huawei.com>
Tested-by: Sudeep Holla <sudeep.holla(a)arm.com>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
Signed-off-by: Qi Liu <liuqi115(a)huawei.com>
Reviewed-by: Yang Jihong <yangjihong1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/perf/arm_spe_pmu.c | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index a11951b083307..4fb65c61c8eab 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -856,16 +856,8 @@ static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages,
if (!pglist)
goto out_free_buf;
- for (i = 0; i < nr_pages; ++i) {
- struct page *page = virt_to_page(pages[i]);
-
- if (PagePrivate(page)) {
- pr_warn("unexpected high-order page for auxbuf!");
- goto out_free_pglist;
- }
-
+ for (i = 0; i < nr_pages; ++i)
pglist[i] = virt_to_page(pages[i]);
- }
buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
if (!buf->base)
--
2.25.1
1
17

[PATCH openEuler-1.0-LTS 1/2] drm/hisilicon: Support i2c driver algorithms for bit-shift adapters
by Yang Yingliang 09 Nov '21
by Yang Yingliang 09 Nov '21
09 Nov '21
From: Tian Tao <tiantao6(a)hisilicon.com>
mainline inclusion
from mainline-v5.14.0-rc7
commit 4eb4d99dfe3018d86f4529112aa7082f43b6996a
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I469VQ
CVE: NA
Remove #include <drm/drm_probe_helper.h> in hibmc_drm_i2c.c.
-----------------------------
Adding driver implementation to support i2c driver algorithms for
bit-shift adapters, so hibmc will using the interface provided by
drm to read edid.
Signed-off-by: Tian Tao <tiantao6(a)hisilicon.com>
Reviewed-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/1600778670-60370-2-git-send-e…
Signed-off-by: gouhao <gouhao(a)uniontech.com>
Reviewed-by: tian tao <tiantao6(a)hisilicon.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/gpu/drm/hisilicon/hibmc/Makefile | 3 +-
.../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 25 +++++
.../gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c | 98 +++++++++++++++++++
3 files changed, 125 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c
diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile
index 3df726696372f..71c248f4c7562 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/Makefile
+++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile
@@ -1,3 +1,4 @@
-hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_fbdev.o hibmc_ttm.o
+hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o \
+ hibmc_drm_fbdev.o hibmc_ttm.o hibmc_drm_i2c.o
obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
index 4395dc6674bbc..c246151b29942 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
@@ -19,12 +19,18 @@
#ifndef HIBMC_DRM_DRV_H
#define HIBMC_DRM_DRV_H
+#include <linux/gpio/consumer.h>
+#include <linux/i2c-algo-bit.h>
+#include <linux/i2c.h>
+
+#include <drm/drm_edid.h>
#include <drm/drmP.h>
#include <drm/drm_atomic.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_gem.h>
#include <drm/ttm/ttm_bo_driver.h>
+
struct hibmc_framebuffer {
struct drm_framebuffer fb;
struct drm_gem_object *obj;
@@ -36,6 +42,13 @@ struct hibmc_fbdev {
int size;
};
+struct hibmc_connector {
+ struct drm_connector base;
+
+ struct i2c_adapter adapter;
+ struct i2c_algo_bit_data bit_data;
+};
+
struct hibmc_drm_private {
/* hw */
void __iomem *mmio;
@@ -46,6 +59,7 @@ struct hibmc_drm_private {
/* drm */
struct drm_device *dev;
+ struct hibmc_connector connector;
bool mode_config_initialized;
struct drm_atomic_state *suspend_state;
@@ -60,6 +74,16 @@ struct hibmc_drm_private {
bool mm_inited;
};
+static inline struct hibmc_connector *to_hibmc_connector(struct drm_connector *connector)
+{
+ return container_of(connector, struct hibmc_connector, base);
+}
+
+static inline struct hibmc_drm_private *to_hibmc_drm_private(struct drm_device *dev)
+{
+ return dev->dev_private;
+}
+
#define to_hibmc_framebuffer(x) container_of(x, struct hibmc_framebuffer, fb)
struct hibmc_bo {
@@ -110,6 +134,7 @@ int hibmc_dumb_create(struct drm_file *file, struct drm_device *dev,
int hibmc_dumb_mmap_offset(struct drm_file *file, struct drm_device *dev,
u32 handle, u64 *offset);
int hibmc_mmap(struct file *filp, struct vm_area_struct *vma);
+int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_connector *connector);
extern const struct drm_mode_config_funcs hibmc_mode_funcs;
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c
new file mode 100644
index 0000000000000..ffd7c7bf4b7d8
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Hisilicon Hibmc SoC drm driver
+ *
+ * Based on the bochs drm driver.
+ *
+ * Copyright (c) 2016 Huawei Limited.
+ *
+ * Author:
+ * Tian Tao <tiantao6(a)hisilicon.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+
+#include <drm/drm_atomic_helper.h>
+
+#include "hibmc_drm_drv.h"
+
+#define GPIO_DATA 0x0802A0
+#define GPIO_DATA_DIRECTION 0x0802A4
+
+#define I2C_SCL_MASK BIT(0)
+#define I2C_SDA_MASK BIT(1)
+
+static void hibmc_set_i2c_signal(void *data, u32 mask, int value)
+{
+ struct hibmc_connector *hibmc_connector = data;
+ struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev);
+ u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION);
+
+ if (value) {
+ tmp_dir &= ~mask;
+ writel(tmp_dir, priv->mmio + GPIO_DATA_DIRECTION);
+ } else {
+ u32 tmp_data = readl(priv->mmio + GPIO_DATA);
+
+ tmp_data &= ~mask;
+ writel(tmp_data, priv->mmio + GPIO_DATA);
+
+ tmp_dir |= mask;
+ writel(tmp_dir, priv->mmio + GPIO_DATA_DIRECTION);
+ }
+}
+
+static int hibmc_get_i2c_signal(void *data, u32 mask)
+{
+ struct hibmc_connector *hibmc_connector = data;
+ struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev);
+ u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION);
+
+ if ((tmp_dir & mask) != mask) {
+ tmp_dir &= ~mask;
+ writel(tmp_dir, priv->mmio + GPIO_DATA_DIRECTION);
+ }
+
+ return (readl(priv->mmio + GPIO_DATA) & mask) ? 1 : 0;
+}
+
+static void hibmc_ddc_setsda(void *data, int state)
+{
+ hibmc_set_i2c_signal(data, I2C_SDA_MASK, state);
+}
+
+static void hibmc_ddc_setscl(void *data, int state)
+{
+ hibmc_set_i2c_signal(data, I2C_SCL_MASK, state);
+}
+
+static int hibmc_ddc_getsda(void *data)
+{
+ return hibmc_get_i2c_signal(data, I2C_SDA_MASK);
+}
+
+static int hibmc_ddc_getscl(void *data)
+{
+ return hibmc_get_i2c_signal(data, I2C_SCL_MASK);
+}
+
+int hibmc_ddc_create(struct drm_device *drm_dev,
+ struct hibmc_connector *connector)
+{
+ connector->adapter.owner = THIS_MODULE;
+ connector->adapter.class = I2C_CLASS_DDC;
+ snprintf(connector->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus");
+ connector->adapter.dev.parent = &drm_dev->pdev->dev;
+ i2c_set_adapdata(&connector->adapter, connector);
+ connector->adapter.algo_data = &connector->bit_data;
+
+ connector->bit_data.udelay = 20;
+ connector->bit_data.timeout = usecs_to_jiffies(2000);
+ connector->bit_data.data = connector;
+ connector->bit_data.setsda = hibmc_ddc_setsda;
+ connector->bit_data.setscl = hibmc_ddc_setscl;
+ connector->bit_data.getsda = hibmc_ddc_getsda;
+ connector->bit_data.getscl = hibmc_ddc_getscl;
+
+ return i2c_bit_add_bus(&connector->adapter);
+}
--
2.25.1
1
1

09 Nov '21
From: Arvind Sankar <nivedita(a)alum.mit.edu>
stable inclusion
from linux-4.19.164
commit b207caff4176e3a6ba273243da2db2e595e4aad2
CVE: CVE-2021-0938
--------------------------------
commit 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 upstream.
Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
mutually exclusive") neglected to copy barrier_data() from
compiler-gcc.h into compiler-clang.h.
The definition in compiler-gcc.h was really to work around clang's more
aggressive optimization, so this broke barrier_data() on clang, and
consequently memzero_explicit() as well.
For example, this results in at least the memzero_explicit() call in
lib/crypto/sha256.c:sha256_transform() being optimized away by clang.
Fix this by moving the definition of barrier_data() into compiler.h.
Also move the gcc/clang definition of barrier() into compiler.h,
__memory_barrier() is icc-specific (and barrier() is already defined
using it in compiler-intel.h) and doesn't belong in compiler.h.
[rdunlap(a)infradead.org: fix ALPHA builds when SMP is not enabled]
Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org
Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive")
Signed-off-by: Arvind Sankar <nivedita(a)alum.mit.edu>
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Tested-by: Nick Desaulniers <ndesaulniers(a)google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers(a)google.com>
Reviewed-by: Kees Cook <keescook(a)chromium.org>
Cc: <stable(a)vger.kernel.org>
Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[nd: backport to account for missing
commit e506ea451254a ("compiler.h: Split {READ,WRITE}_ONCE definitions out into rwonce.h")
commit d08b9f0ca6605 ("scs: Add support for Clang's Shadow Call Stack (SCS)")
commit a3f8a30f3f00 ("Compiler Attributes: use feature checks instead of version checks")]
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
include/linux/compiler-clang.h | 1 -
include/linux/compiler-gcc.h | 19 -------------------
include/linux/compiler.h | 18 ++++++++++++++++--
3 files changed, 16 insertions(+), 22 deletions(-)
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d756f2318efe0..2d6e5e4bb5d93 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -39,7 +39,6 @@
* and may be redefined here because they should not be shared with other
* compilers, like ICC.
*/
-#define barrier() __asm__ __volatile__("" : : : "memory")
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
#define __assume_aligned(a, ...) \
__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 3ebee1ce6f982..14be095371093 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -14,25 +14,6 @@
# error Sorry, your compiler is too old - please upgrade it.
#endif
-/* Optimization barrier */
-
-/* The "volatile" is due to gcc bugs */
-#define barrier() __asm__ __volatile__("": : :"memory")
-/*
- * This version is i.e. to prevent dead stores elimination on @ptr
- * where gcc and llvm may behave differently when otherwise using
- * normal barrier(): while gcc behavior gets along with a normal
- * barrier(), llvm needs an explicit input variable to be assumed
- * clobbered. The issue is as follows: while the inline asm might
- * access any memory it wants, the compiler could have fit all of
- * @ptr into memory registers instead, and since @ptr never escaped
- * from that, it proved that the inline asm wasn't touching any of
- * it. This version works well with both compilers, i.e. we're telling
- * the compiler that the inline asm absolutely may see the contents
- * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
- */
-#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
-
/*
* This macro obfuscates arithmetic on a variable address so that gcc
* shouldn't recognize the original var, and make assumptions about it.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index bb22908c79e83..0e769548e14f8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -79,11 +79,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
/* Optimization barrier */
#ifndef barrier
-# define barrier() __memory_barrier()
+/* The "volatile" is due to gcc bugs */
+# define barrier() __asm__ __volatile__("": : :"memory")
#endif
#ifndef barrier_data
-# define barrier_data(ptr) barrier()
+/*
+ * This version is i.e. to prevent dead stores elimination on @ptr
+ * where gcc and llvm may behave differently when otherwise using
+ * normal barrier(): while gcc behavior gets along with a normal
+ * barrier(), llvm needs an explicit input variable to be assumed
+ * clobbered. The issue is as follows: while the inline asm might
+ * access any memory it wants, the compiler could have fit all of
+ * @ptr into memory registers instead, and since @ptr never escaped
+ * from that, it proved that the inline asm wasn't touching any of
+ * it. This version works well with both compilers, i.e. we're telling
+ * the compiler that the inline asm absolutely may see the contents
+ * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
+ */
+# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
#endif
/* workaround for GCC PR82365 if needed */
--
2.25.1
1
0

09 Nov '21
From: Yanling Song <songyl(a)ramaxel.com>
Ramaxel inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4DBD7
CVE: NA
-----------------------------------------
Fix two compile errors:
1. Compile failed when O=xxx is specified;
2. Compile conflicted when spfc and spnic are compiled in parallel
by -j option because they share some .c files.
make O=tmp -j200 > build.log
In file included from ../drivers/scsi/spfc/hw/spfc_utils.c:4:0:
../drivers/scsi/spfc/hw/spfc_utils.h:7:10: fatal error: unf_type.h: No such file or directory
#include "unf_type.h"
^~~~~~~~~~~~
compilation terminated.
make[4]: *** [drivers/scsi/spfc/hw/spfc_utils.o] Error 1
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [drivers/scsi/spfc] Error 2
make[3]: *** Waiting for unfinished jobs....
../drivers/net/ethernet/ramaxel/spnic/spnic_sriov.c:9:10: fatal error: sphw_common.h: No such file or directory
#include "sphw_common.h"
^~~~~~~~~~~~~~~
compilation terminated.
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
Reviewed-by: Zhang Lei<zhanglei48(a)huawei.com>
[zzk: Adjusted commit message]
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
drivers/net/ethernet/ramaxel/spnic/Makefile | 2 +-
drivers/scsi/spfc/Makefile | 30 ++++++++++-----------
drivers/scsi/spfc/sphw_api_cmd.c | 1 +
drivers/scsi/spfc/sphw_cmdq.c | 1 +
drivers/scsi/spfc/sphw_common.c | 1 +
drivers/scsi/spfc/sphw_eqs.c | 1 +
drivers/scsi/spfc/sphw_hw_cfg.c | 1 +
drivers/scsi/spfc/sphw_hw_comm.c | 1 +
drivers/scsi/spfc/sphw_hwdev.c | 1 +
drivers/scsi/spfc/sphw_hwif.c | 1 +
drivers/scsi/spfc/sphw_mbox.c | 1 +
drivers/scsi/spfc/sphw_mgmt.c | 1 +
drivers/scsi/spfc/sphw_prof_adap.c | 1 +
drivers/scsi/spfc/sphw_wq.c | 1 +
14 files changed, 28 insertions(+), 16 deletions(-)
create mode 120000 drivers/scsi/spfc/sphw_api_cmd.c
create mode 120000 drivers/scsi/spfc/sphw_cmdq.c
create mode 120000 drivers/scsi/spfc/sphw_common.c
create mode 120000 drivers/scsi/spfc/sphw_eqs.c
create mode 120000 drivers/scsi/spfc/sphw_hw_cfg.c
create mode 120000 drivers/scsi/spfc/sphw_hw_comm.c
create mode 120000 drivers/scsi/spfc/sphw_hwdev.c
create mode 120000 drivers/scsi/spfc/sphw_hwif.c
create mode 120000 drivers/scsi/spfc/sphw_mbox.c
create mode 120000 drivers/scsi/spfc/sphw_mgmt.c
create mode 120000 drivers/scsi/spfc/sphw_prof_adap.c
create mode 120000 drivers/scsi/spfc/sphw_wq.c
diff --git a/drivers/net/ethernet/ramaxel/spnic/Makefile b/drivers/net/ethernet/ramaxel/spnic/Makefile
index f86ccff374f6..207e1d9c431a 100644
--- a/drivers/net/ethernet/ramaxel/spnic/Makefile
+++ b/drivers/net/ethernet/ramaxel/spnic/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPNIC) += spnic.o
-subdir-ccflags-y += -I$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
spnic-objs := hw/sphw_common.o \
hw/sphw_hwif.o \
diff --git a/drivers/scsi/spfc/Makefile b/drivers/scsi/spfc/Makefile
index 02fe0213e048..849b730ac733 100644
--- a/drivers/scsi/spfc/Makefile
+++ b/drivers/scsi/spfc/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPFC) += spfc.o
-subdir-ccflags-y += -I$(src)/../../net/ethernet/ramaxel/spnic/hw
-subdir-ccflags-y += -I$(src)/hw
-subdir-ccflags-y += -I$(src)/common
+subdir-ccflags-y += -I$(srctree)/$(src)/../../net/ethernet/ramaxel/spnic/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/common
spfc-objs := common/unf_init.o \
common/unf_event.o \
@@ -33,15 +33,15 @@ spfc-objs := common/unf_init.o \
hw/spfc_cqm_bitmap_table.o \
hw/spfc_cqm_main.o \
hw/spfc_cqm_object.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hwdev.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hw_cfg.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hw_comm.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_prof_adap.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_common.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hwif.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_wq.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_cmdq.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_eqs.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_mbox.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_mgmt.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_api_cmd.o
+ sphw_hwdev.o \
+ sphw_hw_cfg.o \
+ sphw_hw_comm.o \
+ sphw_prof_adap.o \
+ sphw_common.o \
+ sphw_hwif.o \
+ sphw_wq.o \
+ sphw_cmdq.o \
+ sphw_eqs.o \
+ sphw_mbox.o \
+ sphw_mgmt.o \
+ sphw_api_cmd.o
diff --git a/drivers/scsi/spfc/sphw_api_cmd.c b/drivers/scsi/spfc/sphw_api_cmd.c
new file mode 120000
index 000000000000..27c7c0770fa3
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_api_cmd.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_api_cmd.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_cmdq.c b/drivers/scsi/spfc/sphw_cmdq.c
new file mode 120000
index 000000000000..5ac779ba274b
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_cmdq.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_cmdq.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_common.c b/drivers/scsi/spfc/sphw_common.c
new file mode 120000
index 000000000000..a1a30a4840e1
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_common.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_common.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_eqs.c b/drivers/scsi/spfc/sphw_eqs.c
new file mode 120000
index 000000000000..74430dcb9dc5
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_eqs.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_eqs.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hw_cfg.c b/drivers/scsi/spfc/sphw_hw_cfg.c
new file mode 120000
index 000000000000..4f43d68624c1
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hw_cfg.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hw_cfg.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hw_comm.c b/drivers/scsi/spfc/sphw_hw_comm.c
new file mode 120000
index 000000000000..c943b3b2933a
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hw_comm.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hw_comm.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hwdev.c b/drivers/scsi/spfc/sphw_hwdev.c
new file mode 120000
index 000000000000..b7279f17eaa2
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hwdev.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hwdev.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hwif.c b/drivers/scsi/spfc/sphw_hwif.c
new file mode 120000
index 000000000000..d40ef71f9033
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hwif.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hwif.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_mbox.c b/drivers/scsi/spfc/sphw_mbox.c
new file mode 120000
index 000000000000..1b00fe7289cc
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_mbox.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_mbox.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_mgmt.c b/drivers/scsi/spfc/sphw_mgmt.c
new file mode 120000
index 000000000000..fd18a73e9d3a
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_mgmt.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_mgmt.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_prof_adap.c b/drivers/scsi/spfc/sphw_prof_adap.c
new file mode 120000
index 000000000000..fbc7db05dd27
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_prof_adap.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_prof_adap.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_wq.c b/drivers/scsi/spfc/sphw_wq.c
new file mode 120000
index 000000000000..cdfcb3a610c0
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_wq.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_wq.c
\ No newline at end of file
--
2.20.1
1
0

09 Nov '21
Ramaxel inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4DBD7
CVE: NA
Fix two compile errors:
1. Compile failed when O=xxx is specified;
2. Compile confliction when spfc and spnic are compiled in parallel by -j option because they share some .c files
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
---
drivers/net/ethernet/ramaxel/spnic/Makefile | 2 +-
drivers/scsi/spfc/Makefile | 30 ++++++++++-----------
drivers/scsi/spfc/sphw_api_cmd.c | 1 +
drivers/scsi/spfc/sphw_cmdq.c | 1 +
drivers/scsi/spfc/sphw_common.c | 1 +
drivers/scsi/spfc/sphw_eqs.c | 1 +
drivers/scsi/spfc/sphw_hw_cfg.c | 1 +
drivers/scsi/spfc/sphw_hw_comm.c | 1 +
drivers/scsi/spfc/sphw_hwdev.c | 1 +
drivers/scsi/spfc/sphw_hwif.c | 1 +
drivers/scsi/spfc/sphw_mbox.c | 1 +
drivers/scsi/spfc/sphw_mgmt.c | 1 +
drivers/scsi/spfc/sphw_prof_adap.c | 1 +
drivers/scsi/spfc/sphw_wq.c | 1 +
14 files changed, 28 insertions(+), 16 deletions(-)
create mode 120000 drivers/scsi/spfc/sphw_api_cmd.c
create mode 120000 drivers/scsi/spfc/sphw_cmdq.c
create mode 120000 drivers/scsi/spfc/sphw_common.c
create mode 120000 drivers/scsi/spfc/sphw_eqs.c
create mode 120000 drivers/scsi/spfc/sphw_hw_cfg.c
create mode 120000 drivers/scsi/spfc/sphw_hw_comm.c
create mode 120000 drivers/scsi/spfc/sphw_hwdev.c
create mode 120000 drivers/scsi/spfc/sphw_hwif.c
create mode 120000 drivers/scsi/spfc/sphw_mbox.c
create mode 120000 drivers/scsi/spfc/sphw_mgmt.c
create mode 120000 drivers/scsi/spfc/sphw_prof_adap.c
create mode 120000 drivers/scsi/spfc/sphw_wq.c
diff --git a/drivers/net/ethernet/ramaxel/spnic/Makefile b/drivers/net/ethernet/ramaxel/spnic/Makefile
index f86ccff374f6..207e1d9c431a 100644
--- a/drivers/net/ethernet/ramaxel/spnic/Makefile
+++ b/drivers/net/ethernet/ramaxel/spnic/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPNIC) += spnic.o
-subdir-ccflags-y += -I$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
spnic-objs := hw/sphw_common.o \
hw/sphw_hwif.o \
diff --git a/drivers/scsi/spfc/Makefile b/drivers/scsi/spfc/Makefile
index 02fe0213e048..849b730ac733 100644
--- a/drivers/scsi/spfc/Makefile
+++ b/drivers/scsi/spfc/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPFC) += spfc.o
-subdir-ccflags-y += -I$(src)/../../net/ethernet/ramaxel/spnic/hw
-subdir-ccflags-y += -I$(src)/hw
-subdir-ccflags-y += -I$(src)/common
+subdir-ccflags-y += -I$(srctree)/$(src)/../../net/ethernet/ramaxel/spnic/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/common
spfc-objs := common/unf_init.o \
common/unf_event.o \
@@ -33,15 +33,15 @@ spfc-objs := common/unf_init.o \
hw/spfc_cqm_bitmap_table.o \
hw/spfc_cqm_main.o \
hw/spfc_cqm_object.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hwdev.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hw_cfg.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hw_comm.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_prof_adap.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_common.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hwif.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_wq.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_cmdq.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_eqs.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_mbox.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_mgmt.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_api_cmd.o
+ sphw_hwdev.o \
+ sphw_hw_cfg.o \
+ sphw_hw_comm.o \
+ sphw_prof_adap.o \
+ sphw_common.o \
+ sphw_hwif.o \
+ sphw_wq.o \
+ sphw_cmdq.o \
+ sphw_eqs.o \
+ sphw_mbox.o \
+ sphw_mgmt.o \
+ sphw_api_cmd.o
diff --git a/drivers/scsi/spfc/sphw_api_cmd.c b/drivers/scsi/spfc/sphw_api_cmd.c
new file mode 120000
index 000000000000..27c7c0770fa3
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_api_cmd.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_api_cmd.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_cmdq.c b/drivers/scsi/spfc/sphw_cmdq.c
new file mode 120000
index 000000000000..5ac779ba274b
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_cmdq.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_cmdq.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_common.c b/drivers/scsi/spfc/sphw_common.c
new file mode 120000
index 000000000000..a1a30a4840e1
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_common.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_common.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_eqs.c b/drivers/scsi/spfc/sphw_eqs.c
new file mode 120000
index 000000000000..74430dcb9dc5
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_eqs.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_eqs.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hw_cfg.c b/drivers/scsi/spfc/sphw_hw_cfg.c
new file mode 120000
index 000000000000..4f43d68624c1
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hw_cfg.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hw_cfg.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hw_comm.c b/drivers/scsi/spfc/sphw_hw_comm.c
new file mode 120000
index 000000000000..c943b3b2933a
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hw_comm.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hw_comm.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hwdev.c b/drivers/scsi/spfc/sphw_hwdev.c
new file mode 120000
index 000000000000..b7279f17eaa2
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hwdev.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hwdev.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hwif.c b/drivers/scsi/spfc/sphw_hwif.c
new file mode 120000
index 000000000000..d40ef71f9033
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hwif.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hwif.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_mbox.c b/drivers/scsi/spfc/sphw_mbox.c
new file mode 120000
index 000000000000..1b00fe7289cc
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_mbox.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_mbox.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_mgmt.c b/drivers/scsi/spfc/sphw_mgmt.c
new file mode 120000
index 000000000000..fd18a73e9d3a
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_mgmt.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_mgmt.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_prof_adap.c b/drivers/scsi/spfc/sphw_prof_adap.c
new file mode 120000
index 000000000000..fbc7db05dd27
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_prof_adap.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_prof_adap.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_wq.c b/drivers/scsi/spfc/sphw_wq.c
new file mode 120000
index 000000000000..cdfcb3a610c0
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_wq.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_wq.c
\ No newline at end of file
--
2.27.0
2
1
DAMON(Data Access Monitor)特性 议题申报
DAMON特性是一个能轻量级在线监控特定用户空间进程的内存访问情况的内核特性模块。通过基于region采样和自适应调整region大小两种核心机制,允许用户态和内核态两种策略定义模式,进行data access访问监控和内存优化管理。
应用场景:
1. 监控当前内存过负载或者过高的访问频率
2. 内存管理优化:
1) 高访问频率的内存区域放到LRU列表头部
2) 低访问频率的内存区域调整到LRU列表尾部或者换出
3) 大内存消耗且高访问频率的区域可以使用较大页
3. 监控一些低效率的进程间通信
DAMON总体框架如下:
[cid:image003.png@01D7D54E.EA5ABEE0]
特性补丁链接:
http://patchwork.huawei.com/project/olk5.10/list/?series=20179
1
0

[PATCH openEuler-1.0-LTS 1/7] ath: Use safer key clearing with key cache entries
by Yang Yingliang 08 Nov '21
by Yang Yingliang 08 Nov '21
08 Nov '21
From: Jouni Malinen <jouni(a)codeaurora.org>
stable inclusion
from linux-4.19.205
commit dd5815f023b89c9a28325d8a2a5f0779b57b7190
CVE: CVE-2020-3702
--------------------------------
commit 56c5485c9e444c2e85e11694b6c44f1338fc20fd upstream.
It is possible for there to be pending frames in TXQs with a reference
to the key cache entry that is being deleted. If such a key cache entry
is cleared, those pending frame in TXQ might get transmitted without
proper encryption. It is safer to leave the previously used key into the
key cache in such cases. Instead, only clear the MAC address to prevent
RX processing from using this key cache entry.
This is needed in particularly in AP mode where the TXQs cannot be
flushed on station disconnection. This change alone may not be able to
address all cases where the key cache entry might get reused for other
purposes immediately (the key cache entry should be released for reuse
only once the TXQs do not have any remaining references to them), but
this makes it less likely to get unprotected frames and the more
complete changes may end up being significantly more complex.
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
Link: https://lore.kernel.org/r/20201214172118.18100-2-jouni@codeaurora.org
Cc: Pali Rohár <pali(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/net/wireless/ath/key.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c
index 1816b4e7dc264..59618bb41f6c5 100644
--- a/drivers/net/wireless/ath/key.c
+++ b/drivers/net/wireless/ath/key.c
@@ -583,7 +583,16 @@ EXPORT_SYMBOL(ath_key_config);
*/
void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key)
{
- ath_hw_keyreset(common, key->hw_key_idx);
+ /* Leave CCMP and TKIP (main key) configured to avoid disabling
+ * encryption for potentially pending frames already in a TXQ with the
+ * keyix pointing to this key entry. Instead, only clear the MAC address
+ * to prevent RX processing from using this key cache entry.
+ */
+ if (test_bit(key->hw_key_idx, common->ccmp_keymap) ||
+ test_bit(key->hw_key_idx, common->tkip_keymap))
+ ath_hw_keysetmac(common, key->hw_key_idx, NULL);
+ else
+ ath_hw_keyreset(common, key->hw_key_idx);
if (key->hw_key_idx < IEEE80211_WEP_NKID)
return;
--
2.25.1
1
6

08 Nov '21
mainline inclusion
category: feature
bugzilla:https://gitee.com/openeuler/kernel/issues/I4GVH3?from=project-issue
CVE: NA
cap_convert_nscap() does permission checking as well as conversion of
the
xattr value conditionally based on fs's user-ns.
This is needed by overlayfs and probably other layered fs (ecryptfs) and
is
what vfs_foo() is supposed to do anyway.
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
Acked-by: James Morris <jamorris(a)linux.microsoft.com>
Signed-off-by: zhangyue <zhangyue1(a)kylinos.cn>
---
fs/xattr.c | 18 ++++++++++++------
include/linux/capability.h | 2 +-
security/commoncap.c | 3 +--
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/fs/xattr.c b/fs/xattr.c
index adb11cb82be5..653e12d6060f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -263,8 +263,16 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
+ const void *orig_value = value;
int error;
+ if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
+ error = cap_convert_nscap(dentry, &value, size);
+ if (error < 0)
+ return error;
+ size = error;
+ }
+
retry_deleg:
inode_lock(inode);
error = __vfs_setxattr_locked(dentry, name, value, size, flags,
@@ -276,6 +284,10 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (!error)
goto retry_deleg;
}
+
+ if (value != orig_value)
+ kfree(value);
+
return error;
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -530,12 +542,6 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
posix_acl_fix_xattr_from_user(kvalue, size);
- else if (strcmp(kname, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(d, &kvalue, size);
- if (error < 0)
- goto out;
- size = error;
- }
}
error = vfs_setxattr(d, kname, kvalue, size, flags);
diff --git a/include/linux/capability.h b/include/linux/capability.h
index f640dcbc880c..9fee9a86505c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -249,6 +249,6 @@ extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
/* audit system wants to get cap info from files as well */
extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
-extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size);
+extern int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size);
#endif /* !_LINUX_CAPABILITY_H */
diff --git a/security/commoncap.c b/security/commoncap.c
index 876cfe01d939..83546a782796 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -498,7 +498,7 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
*
* If all is ok, we return the new size, on error return < 0.
*/
-int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
+int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size)
{
struct vfs_ns_cap_data *nscap;
uid_t nsrootid;
@@ -541,7 +541,6 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
nscap->magic_etc = cpu_to_le32(nsmagic);
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
- kvfree(*ivalue);
*ivalue = nscap;
return newsize;
}
--
2.30.0
1
0

[PATCH openEuler-1.0-LTS] ext4: if zeroout fails fall back to splitting the extent node
by Yang Yingliang 08 Nov '21
by Yang Yingliang 08 Nov '21
08 Nov '21
From: Theodore Ts'o <tytso(a)mit.edu>
mainline inclusion
from mainline-5.15-rc1
commit 308c57ccf4318236be75dfa251c84713e694457b
category: bugfix
bugzilla: 109297
CVE: NA
---------------------------
If the underlying storage device is using thin-provisioning, it's
possible for a zeroout operation to return ENOSPC.
Commit df22291ff0fd ("ext4: Retry block allocation if we have free blocks
left") added logic to retry block allocation since we might get free block
after we commit a transaction. But the ENOSPC from thin-provisioning
will confuse ext4, and lead to an infinite loop.
Since using zeroout instead of splitting the extent node is an
optimization, if it fails, we might as well fall back to splitting the
extent node.
Reported-by: yangerkun <yangerkun(a)huawei.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Signed-off-by: yangerkun <yangerkun(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
fs/ext4/extents.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index fc00a78163117..10298c13a67d7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3696,7 +3696,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
split_map.m_len - ee_block);
err = ext4_ext_zeroout(inode, &zero_ex1);
if (err)
- goto out;
+ goto fallback;
split_map.m_len = allocated;
}
if (split_map.m_lblk - ee_block + split_map.m_len <
@@ -3710,7 +3710,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_pblock(ex));
err = ext4_ext_zeroout(inode, &zero_ex2);
if (err)
- goto out;
+ goto fallback;
}
split_map.m_len += split_map.m_lblk - ee_block;
@@ -3719,6 +3719,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
}
}
+fallback:
err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
flags);
if (err > 0)
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] dccp: don't duplicate ccid when cloning dccp sock
by Yang Yingliang 08 Nov '21
by Yang Yingliang 08 Nov '21
08 Nov '21
From: "Lin, Zhenpeng" <zplin(a)psu.edu>
mainline inclusion
from mainline-v5.15-rc2
commit d9ea761fdd197351890418acd462c51f241014a7
category: bugfix
bugzilla: 85666
CVE: CVE-2020-16119
-------------------------------------------------
Commit 2677d2067731 ("dccp: don't free ccid2_hc_tx_sock ...") fixed
a UAF but reintroduced CVE-2017-6074.
When the sock is cloned, two dccps_hc_tx_ccid will reference to the
same ccid. So one can free the ccid object twice from two socks after
cloning.
This issue was found by "Hadar Manor" as well and assigned with
CVE-2020-16119, which was fixed in Ubuntu's kernel. So here I port
the patch from Ubuntu to fix it.
The patch prevents cloned socks from referencing the same ccid.
Fixes: 2677d2067731410 ("dccp: don't free ccid2_hc_tx_sock ...")
Signed-off-by: Zhenpeng Lin <zplin(a)psu.edu>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Lu Wei <luwei32(a)huawei.com>
Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
net/dccp/minisocks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ba6fc3c1186b9..e91838a7b8497 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -98,6 +98,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
newdp->dccps_service_list = NULL;
+ newdp->dccps_hc_rx_ccid = NULL;
+ newdp->dccps_hc_tx_ccid = NULL;
newdp->dccps_service = dreq->dreq_service;
newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 01/14] Revert "selftests/bpf: add test_spec_readahead_xfs_file to support specail async readahead"
by Yang Yingliang 05 Nov '21
by Yang Yingliang 05 Nov '21
05 Nov '21
From: Hou Tao <houtao1(a)huawei.com>
hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4H3JT
CVE: NA
---------------------------
There are two issues with the current solution:
1) tracepoint xfs_read_file is visible in tracefs
It forms an ABI for userspace. It is bad because new field may be added
into xfs_writable_file to export more information to userspace.
2) tracepoint xfs_read_file is specific to xfs
HDFS can be stacked on ext4.
A new solution is proposed which uses vfs bare tracepoint, so
reverts commit 69513cfbe62d267c4a5e6025f31741b1f2cb946c.
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Reviewed-by: Kuohai Xu <xukuohai(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
tools/include/uapi/linux/xfs.h | 1 -
tools/testing/selftests/bpf/Makefile | 3 +-
.../bpf/test_spec_readahead_xfs_file.c | 39 -------------------
tools/testing/selftests/bpf/test_xfs_file.c | 9 +----
4 files changed, 3 insertions(+), 49 deletions(-)
delete mode 100644 tools/testing/selftests/bpf/test_spec_readahead_xfs_file.c
diff --git a/tools/include/uapi/linux/xfs.h b/tools/include/uapi/linux/xfs.h
index 1409b45affd34..a0d37e411ee18 100644
--- a/tools/include/uapi/linux/xfs.h
+++ b/tools/include/uapi/linux/xfs.h
@@ -6,7 +6,6 @@
#define FMODE_RANDOM (0x1000)
#define FMODE_WILLNEED (0x400000)
-#define FMODE_SPC_READAHEAD (0x800000)
struct xfs_writable_file {
const unsigned char *name;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 46b1d5b864f5a..8d2737285f185 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -36,8 +36,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
- test_skb_cgroup_id_kern.o test_set_xfs_file.o test_clear_xfs_file.o \
- test_spec_readahead_xfs_file.o
+ test_skb_cgroup_id_kern.o test_set_xfs_file.o test_clear_xfs_file.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/test_spec_readahead_xfs_file.c b/tools/testing/selftests/bpf/test_spec_readahead_xfs_file.c
deleted file mode 100644
index ff8794a14cdcd..0000000000000
--- a/tools/testing/selftests/bpf/test_spec_readahead_xfs_file.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include <string.h>
-#include <linux/xfs.h>
-
-/* from /sys/kernel/debug/tracing/events/xfs/xfs_read_file */
-struct xfs_read_buffer_args {
- struct xfs_writable_file *file;
-};
-
-SEC("tracepoint/xfs/xfs_file_read")
-int bpf_prog1(struct xfs_read_buffer_args *ctx)
-{
- char fmt[] = "name: %s, set f_mode: %u\n";
- struct xfs_writable_file *file = ctx->file;
- char name[64] = {};
- char *tmp;
- unsigned long i_size;
- int len;
-
- bpf_probe_read(&tmp, 8, &(file->name));
- len = bpf_probe_read_str(name, 64, tmp);
- bpf_probe_read(&i_size, 8, &(file->i_size));
-
- if (!strncmp("blk_", name, 4)) {
- /* blk_xxx.meta or blk_xxx with size < 2M */
- if (len == 27 || (len == 15 && i_size <= 2 * 1024 * 1024))
- file->f_mode |= FMODE_WILLNEED;
- else if (len == 15) /* blk_xxx */
- file->f_mode |= FMODE_SPC_READAHEAD;
- bpf_trace_printk(fmt, sizeof(fmt), name, file->f_mode);
- }
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/test_xfs_file.c b/tools/testing/selftests/bpf/test_xfs_file.c
index 89e79d959677c..247c42be029b2 100644
--- a/tools/testing/selftests/bpf/test_xfs_file.c
+++ b/tools/testing/selftests/bpf/test_xfs_file.c
@@ -20,7 +20,6 @@ int main(int argc, char *argv[])
{
const char *set_file = "./test_set_xfs_file.o";
const char *clear_file = "./test_clear_xfs_file.o";
- const char *spec_readahead_file = "./test_spec_readahead_xfs_file.o";
const char *file = set_file;
struct bpf_object *obj;
int efd, err, prog_fd;
@@ -32,12 +31,8 @@ int main(int argc, char *argv[])
delay = strtol(str, &endptr, 10);
}
- if (argc >= 2) {
- if (!strcmp("clear", argv[1]))
- file = clear_file;
- if (!strcmp("spec_readahead", argv[1]))
- file = spec_readahead_file;
- }
+ if (argc >= 2 && !strcmp("clear", argv[1]))
+ file = clear_file;
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, &obj,
&prog_fd);
--
2.25.1
1
13

[PATCH OLK-5.10 v3] Net:NIC:SPNIC:Fix compile error when O= is specififed
by Yanling Song 05 Nov '21
by Yanling Song 05 Nov '21
05 Nov '21
Ramaxel inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4CBDP
CVE: NA
Net:NIC:SPNIC:Fix compile error when O= is specififed
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
---
Changes from V2:
Move the location of "Changes from"
Changes from V1:
1. Change category from feature to bugfix;
2. Add description
---
drivers/net/ethernet/ramaxel/spnic/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ramaxel/spnic/Makefile b/drivers/net/ethernet/ramaxel/spnic/Makefile
index f86ccff374f6..207e1d9c431a 100644
--- a/drivers/net/ethernet/ramaxel/spnic/Makefile
+++ b/drivers/net/ethernet/ramaxel/spnic/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPNIC) += spnic.o
-subdir-ccflags-y += -I$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
spnic-objs := hw/sphw_common.o \
hw/sphw_hwif.o \
--
2.27.0
1
0

[PATCH openEuler-1.0-LTS] EMMC: fix ascend hisi emmc probe failed problem according to mmc_host struct
by Yang Yingliang 04 Nov '21
by Yang Yingliang 04 Nov '21
04 Nov '21
From: zhangguijiang <zhangguijiang(a)huawei.com>
ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4GVSG
CVE: NA
-------------------
Struct mmc_host using variable private[0] to point the end of struct
mmc_host, at deaf01a6 we modified struct mmc_host but ignored the
role of private[0] and not put it to the end of struct mmc_host.
It will make mmc card probe failed, so we fix this problem, and now
ascend hisi mmc card is able to probe success.
Signed-off-by: zhangguijiang <zhangguijiang(a)huawei.com>
Reviewed-by: Ding Tianhong <dingtianhong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/mmc/core/sd.c | 38 +++-----------------------------
drivers/mmc/host/dw_mmc_extern.h | 2 +-
include/linux/mmc/host.h | 2 +-
3 files changed, 5 insertions(+), 37 deletions(-)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 20ca371b9f874..8760b749292b8 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1163,9 +1163,9 @@ static int _mmc_sd_suspend(struct mmc_host *host)
err = mmc_deselect_cards(host);
if (!err) {
- if (!(mmc_is_ascend_customized(host->parent)))
+ if (!mmc_is_ascend_customized(host->parent))
mmc_power_off(host);
- else if (mmc_card_keep_power(host))
+ else if (!mmc_card_keep_power(host))
mmc_power_off(host);
mmc_card_set_suspended(host->card);
}
@@ -1269,42 +1269,10 @@ static int mmc_sd_runtime_resume(struct mmc_host *host)
return 0;
}
-#ifdef CONFIG_ASCEND_HISI_MMC
-/*********************sd ops begin**********************/
-static int mmc_do_sd_reset(struct mmc_host *host)
-{
- struct mmc_card *card = host->card;
-
- if (!host->bus_ops->power_restore)
- return -EOPNOTSUPP;
-
- if (!card)
- return -EINVAL;
-
- /* hw_reset for ip reset */
- if (host->ops->hw_reset)
- host->ops->hw_reset(host);
-
- /* Only for K930/920 SD slow down clk*/
- if (host->ops->slowdown_clk)
- host->ops->slowdown_clk(host, host->ios.timing);
-
- mmc_power_off(host);
- mmc_set_clock(host, host->f_init);
- /* Wait at least 200 ms */
- mmc_delay(200);
- mmc_power_up(host, host->card->ocr);
- (void)mmc_select_voltage(host, host->card->ocr);
-
- return host->bus_ops->power_restore(host);
-}
-#endif
static int mmc_sd_hw_reset(struct mmc_host *host)
{
-#ifdef CONFIG_ASCEND_HISI_MMC
if (mmc_is_ascend_customized(host->parent))
- return mmc_do_sd_reset(host);
-#endif
+ return mmc_sd_reset(host);
mmc_power_cycle(host, host->card->ocr);
return mmc_sd_init_card(host, host->card->ocr, host->card);
}
diff --git a/drivers/mmc/host/dw_mmc_extern.h b/drivers/mmc/host/dw_mmc_extern.h
index 04d8c23f39e9a..ab077b4955940 100644
--- a/drivers/mmc/host/dw_mmc_extern.h
+++ b/drivers/mmc/host/dw_mmc_extern.h
@@ -8,7 +8,7 @@
#include "dw_mmc.h"
-#ifdef CONFIG_MMC_DW_HI3XXX_MODULE
+#if defined(CONFIG_MMC_DW_HI3XXX) || defined(CONFIG_MMC_DW_HI3XXX_MODULE)
extern void dw_mci_reg_dump(struct dw_mci *host);
extern void dw_mci_set_timeout(struct dw_mci *host);
extern bool dw_mci_stop_abort_cmd(struct mmc_command *cmd);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 78b4d0a813b71..fabc23d156242 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -542,13 +542,13 @@ struct mmc_host {
bool cqe_enabled;
bool cqe_on;
- unsigned long private[0] ____cacheline_aligned;
#ifdef CONFIG_ASCEND_HISI_MMC
const struct mmc_cmdq_host_ops *cmdq_ops;
int sdio_present;
unsigned int cmdq_slots;
struct mmc_cmdq_context_info cmdq_ctx;
#endif
+ unsigned long private[0] ____cacheline_aligned;
};
struct device_node;
--
2.25.1
1
0

[PATCH OLK-5.10 v2] Net:NIC:SPNIC:Fix compile error when O= is specififed
by Yanling Song 04 Nov '21
by Yanling Song 04 Nov '21
04 Nov '21
Ramaxel inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4CBDP
CVE: NA
Change from V1:
1. Change category from feature to bugfix;
2. Add description
Net:NIC:SPNIC:Fix compile error when O= is specififed
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
---
drivers/net/ethernet/ramaxel/spnic/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ramaxel/spnic/Makefile b/drivers/net/ethernet/ramaxel/spnic/Makefile
index f86ccff374f6..207e1d9c431a 100644
--- a/drivers/net/ethernet/ramaxel/spnic/Makefile
+++ b/drivers/net/ethernet/ramaxel/spnic/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPNIC) += spnic.o
-subdir-ccflags-y += -I$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
spnic-objs := hw/sphw_common.o \
hw/sphw_hwif.o \
--
2.27.0
2
1

[PATCH openEuler-1.0-LTS] Bluetooth: cmtp: fix file refcount when cmtp_attach_device fails
by Yang Yingliang 04 Nov '21
by Yang Yingliang 04 Nov '21
04 Nov '21
From: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
mainline inclusion
from mainline-v5.14-rc1
commit 3cfdf8fcaafa62a4123f92eb0f4a72650da3a479
category: bugfix
bugzilla: NA
CVE: CVE-2021-34981
-------------------------------------------------
When cmtp_attach_device fails, cmtp_add_connection returns the error value
which leads to the caller to doing fput through sockfd_put. But
cmtp_session kthread, which is stopped in this path will also call fput,
leading to a potential refcount underflow or a use-after-free.
Add a refcount before we signal the kthread to stop. The kthread will try
to grab the cmtp_session_sem mutex before doing the fput, which is held
when get_file is called, so there should be no races there.
Reported-by: Ryota Shiga
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Signed-off-by: Marcel Holtmann <marcel(a)holtmann.org>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: weiyang wang <wangweiyang2(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
net/bluetooth/cmtp/core.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 7f26a5a19ff6d..9873684a9d8ff 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -391,6 +391,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
if (!(session->flags & BIT(CMTP_LOOPBACK))) {
err = cmtp_attach_device(session);
if (err < 0) {
+ /* Caller will call fput in case of failure, and so
+ * will cmtp_session kthread.
+ */
+ get_file(session->sock->file);
+
atomic_inc(&session->terminate);
wake_up_interruptible(sk_sleep(session->sock->sk));
up_write(&cmtp_session_sem);
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] scsi: hisi_sas: print status and error when sata io abnormally completed
by Yang Yingliang 04 Nov '21
by Yang Yingliang 04 Nov '21
04 Nov '21
From: Xingui Yang <yangxingui(a)huawei.com>
driver inclusion
category: bugfix
bugzilla: NA
CVE: NA
---------------------
To help debugging efforts, print d2h status and error
D2H:
FIS Status Bits = 0x53
BSY = 0... .... Off
DRDY = .1.. .... On
DF = ..0. .... Off
DSC = ...1 .... On
DRQ = .... 0... Off
Alignment Error = .... .0.. Off
Sense Data Available = .... ..1. On
ERR = .... ...1 On
FIS Error Bits = 0x40
ICRC = 0... .... Off
UNC = .1.. .... On
MC (O) = ..0. .... Off
IDNF = ...0 .... Off
MCR (O) = .... 0... Off
ABRT = .... .0.. Off
EOM = .... ..0. Off
CCTO = .... ...0 Off
Here is an example print:
hisi_sas_v3_hw 0000:74:02.0: sata d2h status 0x53, error 0x40
Signed-off-by: Xingui Yang <yangxingui(a)huawei.com>
Reviewed by kangfenglong <kangfenglong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index babf6486af526..4508c4a2f02fc 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2419,6 +2419,19 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
error_info[0], error_info[1],
error_info[2], error_info[3]);
+ if ((complete_hdr->dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
+ (task->task_proto & SAS_PROTOCOL_SATA ||
+ task->task_proto & SAS_PROTOCOL_STP)) {
+ struct hisi_sas_status_buffer *status_buf =
+ hisi_sas_status_buf_addr_mem(slot);
+ u8 *iu = &status_buf->iu[0];
+ struct dev_to_host_fis *d2h =
+ (struct dev_to_host_fis *)iu;
+
+ dev_info(dev, "sata d2h status 0x%02x, error 0x%02x\n",
+ d2h->status, d2h->error);
+ }
+
if ((error_info[3] & RX_DATA_LEN_UNDERFLOW_MSK) &&
(task->task_proto == SAS_PROTOCOL_SSP)) {
/*print detail sense info when data underflow happened*/
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 1/2] Revert "scsi: hisi_sas: replace spin_lock_irqsave/spin_unlock_restore with spin_lock/spin_unlock"
by Yang Yingliang 04 Nov '21
by Yang Yingliang 04 Nov '21
04 Nov '21
From: Xingui Yang <yangxingui(a)huawei.com>
driver inclusion
category: bugfix
bugzilla: NA
CVE: NA
---------------------
This reverts commit 5c725a983c0dedd067e5e643633db6fb5ecbeb91.
this optimization patch depends on the kernel block MQ patch.
If the block MQ patch is not integrated, there is a possibility
that the spinlock deadlock occurs.
Signed-off-by: Xingui Yang <yangxingui(a)huawei.com>
Reviewed-by: Kangfenglong <kangfenglong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/scsi/hisi_sas/hisi_sas_main.c | 50 ++++++++++++++------------
drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 12 ++++---
2 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index bde4307596234..39fe67239f929 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -172,11 +172,13 @@ static void hisi_sas_slot_index_clear(struct hisi_hba *hisi_hba, int slot_idx)
static void hisi_sas_slot_index_free(struct hisi_hba *hisi_hba, int slot_idx)
{
+ unsigned long flags;
+
if (hisi_hba->hw->slot_index_alloc || (slot_idx >=
hisi_hba->hw->max_command_entries - HISI_SAS_RESERVED_IPTT_CNT)) {
- spin_lock(&hisi_hba->lock);
+ spin_lock_irqsave(&hisi_hba->lock, flags);
hisi_sas_slot_index_clear(hisi_hba, slot_idx);
- spin_unlock(&hisi_hba->lock);
+ spin_unlock_irqrestore(&hisi_hba->lock, flags);
}
}
@@ -192,12 +194,13 @@ static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba,
{
int index;
void *bitmap = hisi_hba->slot_index_tags;
+ unsigned long flags;
if (scsi_cmnd) {
return scsi_cmnd->request->tag;
}
- spin_lock(&hisi_hba->lock);
+ spin_lock_irqsave(&hisi_hba->lock, flags);
index = find_next_zero_bit(bitmap, hisi_hba->slot_index_count,
hisi_hba->last_slot_index + 1);
if (index >= hisi_hba->slot_index_count) {
@@ -206,13 +209,13 @@ static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba,
hisi_hba->hw->max_command_entries -
HISI_SAS_RESERVED_IPTT_CNT);
if (index >= hisi_hba->slot_index_count) {
- spin_unlock(&hisi_hba->lock);
+ spin_unlock_irqrestore(&hisi_hba->lock, flags);
return -SAS_QUEUE_FULL;
}
}
hisi_sas_slot_index_set(hisi_hba, index);
hisi_hba->last_slot_index = index;
- spin_unlock(&hisi_hba->lock);
+ spin_unlock_irqrestore(&hisi_hba->lock, flags);
return index;
}
@@ -228,6 +231,7 @@ static void hisi_sas_slot_index_init(struct hisi_hba *hisi_hba)
void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, struct sas_task *task,
struct hisi_sas_slot *slot)
{
+ unsigned long flags;
int device_id = slot->device_id;
struct hisi_sas_device *sas_dev = &hisi_hba->devices[device_id];
@@ -256,9 +260,9 @@ void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, struct sas_task *task,
}
}
- spin_lock(&sas_dev->lock);
+ spin_lock_irqsave(&sas_dev->lock, flags);
list_del_init(&slot->entry);
- spin_unlock(&sas_dev->lock);
+ spin_unlock_irqrestore(&sas_dev->lock, flags);
memset(slot, 0, offsetof(struct hisi_sas_slot, buf));
@@ -510,14 +514,14 @@ static int hisi_sas_task_prep(struct sas_task *task,
slot_idx = rc;
slot = &hisi_hba->slot_info[slot_idx];
- spin_lock(&dq->lock);
+ spin_lock_irqsave(&dq->lock, flags);
wr_q_index = dq->wr_point;
dq->wr_point = (dq->wr_point + 1) % HISI_SAS_QUEUE_SLOTS;
list_add_tail(&slot->delivery, &dq->list);
- spin_unlock(&dq->lock);
- spin_lock(&sas_dev->lock);
+ spin_unlock_irqrestore(&dq->lock, flags);
+ spin_lock_irqsave(&sas_dev->lock, flags);
list_add_tail(&slot->entry, &sas_dev->list);
- spin_unlock(&sas_dev->lock);
+ spin_unlock_irqrestore(&sas_dev->lock, flags);
dlvry_queue = dq->id;
dlvry_queue_slot = wr_q_index;
@@ -583,6 +587,7 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
{
u32 rc;
u32 pass = 0;
+ unsigned long flags;
struct hisi_hba *hisi_hba;
struct device *dev;
struct domain_device *device = task->dev;
@@ -616,9 +621,9 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
dev_err(dev, "task exec: failed[%d]!\n", rc);
if (likely(pass)) {
- spin_lock(&dq->lock);
+ spin_lock_irqsave(&dq->lock, flags);
hisi_hba->hw->start_delivery(dq);
- spin_unlock(&dq->lock);
+ spin_unlock_irqrestore(&dq->lock, flags);
}
return rc;
@@ -669,11 +674,12 @@ static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
{
struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
struct hisi_sas_device *sas_dev = NULL;
+ unsigned long flags;
int first = (hisi_hba->last_dev_id + 1) % HISI_SAS_MAX_DEVICES;
int dev_id;
int i;
- spin_lock(&hisi_hba->lock);
+ spin_lock_irqsave(&hisi_hba->lock, flags);
for (i = first; i < first + HISI_SAS_MAX_DEVICES; i++) {
dev_id = i % HISI_SAS_MAX_DEVICES;
if (hisi_hba->devices[dev_id].dev_type == SAS_PHY_UNUSED) {
@@ -694,7 +700,7 @@ static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
}
if (sas_dev)
hisi_hba->last_dev_id = i;
- spin_unlock(&hisi_hba->lock);
+ spin_unlock_irqrestore(&hisi_hba->lock, flags);
return sas_dev;
}
@@ -1964,7 +1970,7 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba,
struct asd_sas_port *sas_port = device->port;
struct hisi_sas_cmd_hdr *cmd_hdr_base;
int dlvry_queue_slot, dlvry_queue, n_elem = 0, rc, slot_idx;
- unsigned long flags;
+ unsigned long flags, flags_dq = 0;
int wr_q_index;
if (unlikely(test_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags)))
@@ -1983,14 +1989,14 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba,
slot_idx = rc;
slot = &hisi_hba->slot_info[slot_idx];
- spin_lock(&dq->lock);
+ spin_lock_irqsave(&dq->lock, flags_dq);
wr_q_index = dq->wr_point;
dq->wr_point = (dq->wr_point + 1) % HISI_SAS_QUEUE_SLOTS;
list_add_tail(&slot->delivery, &dq->list);
- spin_unlock(&dq->lock);
- spin_lock(&sas_dev->lock);
+ spin_unlock_irqrestore(&dq->lock, flags_dq);
+ spin_lock_irqsave(&sas_dev->lock, flags);
list_add_tail(&slot->entry, &sas_dev->list);
- spin_unlock(&sas_dev->lock);
+ spin_unlock_irqrestore(&sas_dev->lock, flags);
dlvry_queue = dq->id;
dlvry_queue_slot = wr_q_index;
@@ -2019,9 +2025,9 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba,
spin_unlock_irqrestore(&task->task_state_lock, flags);
WRITE_ONCE(slot->ready, 1);
/* send abort command to the chip */
- spin_lock(&dq->lock);
+ spin_lock_irqsave(&dq->lock, flags);
hisi_hba->hw->start_delivery(dq);
- spin_unlock(&dq->lock);
+ spin_unlock_irqrestore(&dq->lock, flags);
return 0;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index 730191e7f55b8..fa4233454dccb 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -778,6 +778,7 @@ slot_index_alloc_quirk_v2_hw(struct hisi_hba *hisi_hba,
struct hisi_sas_device *sas_dev = device->lldd_dev;
int sata_idx = sas_dev->sata_idx;
int start, end;
+ unsigned long flags;
if (!sata_dev) {
/*
@@ -801,12 +802,12 @@ slot_index_alloc_quirk_v2_hw(struct hisi_hba *hisi_hba,
end = 64 * (sata_idx + 2);
}
- spin_lock(&hisi_hba->lock);
+ spin_lock_irqsave(&hisi_hba->lock, flags);
while (1) {
start = find_next_zero_bit(bitmap,
hisi_hba->slot_index_count, start);
if (start >= end) {
- spin_unlock(&hisi_hba->lock);
+ spin_unlock_irqrestore(&hisi_hba->lock, flags);
return -SAS_QUEUE_FULL;
}
/*
@@ -818,7 +819,7 @@ slot_index_alloc_quirk_v2_hw(struct hisi_hba *hisi_hba,
}
set_bit(start, bitmap);
- spin_unlock(&hisi_hba->lock);
+ spin_unlock_irqrestore(&hisi_hba->lock, flags);
return start;
}
@@ -847,8 +848,9 @@ hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
struct hisi_sas_device *sas_dev = NULL;
int i, sata_dev = dev_is_sata(device);
int sata_idx = -1;
+ unsigned long flags;
- spin_lock(&hisi_hba->lock);
+ spin_lock_irqsave(&hisi_hba->lock, flags);
if (sata_dev)
if (!sata_index_alloc_v2_hw(hisi_hba, &sata_idx))
@@ -879,7 +881,7 @@ hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
}
out:
- spin_unlock(&hisi_hba->lock);
+ spin_unlock_irqrestore(&hisi_hba->lock, flags);
return sas_dev;
}
--
2.25.1
1
1

[PATCH openEuler-1.0-LTS 01/18] net: hns3: limit bd numbers when getting dfx regs.
by Yang Yingliang 04 Nov '21
by Yang Yingliang 04 Nov '21
04 Nov '21
From: Yonglong Liu <liuyonglong(a)huawei.com>
driver inclusion
category: bugfix
bugzilla: NA
CVE: NA
----------------------------
When getting dfx regs, the bd numbers from the firmware may be
very big which is not expected, and cause the buffer size
overflow. This patch limit the max bd numbers to 64 to fix the
problem.
Signed-off-by: Yonglong Liu <liuyonglong(a)huawei.com>
Reviewed-by: li yongxin <liyongxin1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 8507eb60450fe..ac5f502d5e9be 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11180,6 +11180,8 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len)
static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data)
{
+#define HCLGE_DFX_BD_NUM_MAX 64
+
u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list);
int bd_num, bd_num_max, buf_len, i;
struct hclge_desc *desc_src;
@@ -11202,6 +11204,13 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data)
for (i = 1; i < dfx_reg_type_num; i++)
bd_num_max = max_t(int, bd_num_max, bd_num_list[i]);
+ if (bd_num_max > HCLGE_DFX_BD_NUM_MAX) {
+ dev_err(&hdev->pdev->dev,
+ "Get dfx reg fail, invalid bd number: %d\n",
+ bd_num_max);
+ goto out;
+ }
+
buf_len = sizeof(*desc_src) * bd_num_max;
desc_src = kzalloc(buf_len, GFP_KERNEL);
if (!desc_src) {
--
2.25.1
1
17

[PATCH openEuler-1.0-LTS 1/2] s390/bpf: Fix 64-bit subtraction of the -0x80000000 constant
by Yang Yingliang 04 Nov '21
by Yang Yingliang 04 Nov '21
04 Nov '21
From: Ilya Leoshkevich <iii(a)linux.ibm.com>
stable inclusion
from linux-4.19.207
commit e15c2fe2def24324bfdbfb7ec2837e40b2aac7fd
CVE: CVE-2021-20320
--------------------------------
commit 6e61dc9da0b7a0d91d57c2e20b5ea4fd2d4e7e53 upstream.
The JIT uses agfi for subtracting constants, but -(-0x80000000) cannot
be represented as a 32-bit signed binary integer. Fix by using algfi in
this particular case.
Reported-by: Johan Almbladh <johan.almbladh(a)anyfinetworks.com>
Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend")
Reviewed-by: Heiko Carstens <hca(a)linux.ibm.com>
Signed-off-by: Ilya Leoshkevich <iii(a)linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor(a)linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Signed-off-by: He Fengqing <hefengqing(a)huawei.com>
Reviewed-by: weiyang wang <wangweiyang2(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/s390/net/bpf_jit_comp.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 914b655eb5ba7..a8ee2f5c827d6 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -595,8 +595,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
if (!imm)
break;
- /* agfi %dst,-imm */
- EMIT6_IMM(0xc2080000, dst_reg, -imm);
+ if (imm == -0x80000000) {
+ /* algfi %dst,0x80000000 */
+ EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
+ } else {
+ /* agfi %dst,-imm */
+ EMIT6_IMM(0xc2080000, dst_reg, -imm);
+ }
break;
/*
* BPF_MUL
--
2.25.1
1
1

[PATCH openEuler-21.09 2/2] tools: add a tool to calculate the CPU utilization rate
by Hongyu Li 03 Nov '21
by Hongyu Li 03 Nov '21
03 Nov '21
openEuler inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4CIJQ
CVE: NA
----------------------------------------------------------------------
This tool can help calculate the CPU utilization rate in higher precision.
Signed-off-by: Hongyu Li <543306408(a)qq.com>
---
tools/accounting/Makefile | 2 +-
tools/accounting/cpu_rate_cal.c | 91 ++++++++++++++++++++++++++++
tools/accounting/cpu_rate_cal_readme | 15 +++++
3 files changed, 107 insertions(+), 1 deletion(-)
create mode 100644 tools/accounting/cpu_rate_cal.c
create mode 100644 tools/accounting/cpu_rate_cal_readme
diff --git a/tools/accounting/Makefile b/tools/accounting/Makefile
index 03687f19cbb1..471fbdd7b07b 100644
--- a/tools/accounting/Makefile
+++ b/tools/accounting/Makefile
@@ -2,7 +2,7 @@
CC := $(CROSS_COMPILE)gcc
CFLAGS := -I../../usr/include
-PROGS := getdelays
+PROGS := getdelays cpu_rate_cal
all: $(PROGS)
diff --git a/tools/accounting/cpu_rate_cal.c b/tools/accounting/cpu_rate_cal.c
new file mode 100644
index 000000000000..6d621b37c70e
--- /dev/null
+++ b/tools/accounting/cpu_rate_cal.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * idle_cal.c
+ *
+ * Copyright (C) 2021
+ *
+ * cpu idle time accouting
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <sys/time.h>
+
+#define BUFFSIZE 4096
+#define HZ 100
+#define FILE_NAME "/proc/stat2"
+
+struct cpu_info {
+ char name[BUFFSIZE];
+ long long value[1];
+};
+
+int main(void)
+{
+ int cpu_number = sysconf(_SC_NPROCESSORS_ONLN);
+ struct cpu_info *cpus = (struct cpu_info *)malloc(sizeof(struct cpu_info)*cpu_number);
+ struct cpu_info *cpus_2 = (struct cpu_info *)malloc(sizeof(struct cpu_info)*cpu_number);
+
+ char buf[BUFFSIZE];
+ long long sub;
+ double value;
+
+ while (1) {
+ FILE *fp = fopen(FILE_NAME, "r");
+ int i = 0;
+ struct timeval start, end;
+
+
+ while (i < cpu_number+1) {
+ int n = fscanf(fp, "%s %lld\n", cpus[i].name, &cpus[i].value[0]);
+
+ if (n < 0) {
+ printf("wrong");
+ return -1;
+ }
+ i += 1;
+ }
+
+ gettimeofday(&start, NULL);
+ fflush(fp);
+ fclose(fp);
+ i = 0;
+
+ sleep(1);
+
+ FILE *fp_2 = fopen(FILE_NAME, "r");
+
+ while (i < cpu_number+1) {
+ int n = fscanf(fp_2, "%s %lld\n", cpus_2[i].name, &cpus_2[i].value[0]);
+
+ if (n < 0) {
+ printf("wrong");
+ return -1;
+ }
+ i += 1;
+ }
+
+ gettimeofday(&end, NULL);
+ fflush(fp);
+ fclose(fp_2);
+
+ sub = end.tv_sec-start.tv_sec;
+ value = sub*1000000.0+end.tv_usec-start.tv_usec;
+ system("reset");
+ printf("CPU idle rate %f\n", 1000000/HZ*(cpus_2[0].value[0]-cpus[0].value[0])
+ /value);
+
+ for (int i = 1; i < cpu_number+1; i++) {
+ printf("CPU%d idle rate %f\n", i-1, 1-1000000/HZ
+ *(cpus_2[i].value[0]-cpus[i].value[0])/value);
+ }
+ }
+ return 0;
+}
+
diff --git a/tools/accounting/cpu_rate_cal_readme b/tools/accounting/cpu_rate_cal_readme
new file mode 100644
index 000000000000..01b5d8d930fe
--- /dev/null
+++ b/tools/accounting/cpu_rate_cal_readme
@@ -0,0 +1,15 @@
+# calculate the cpu utilization rate
+
+The cpu_rate_cal.c is a tool to calculate the cpu utilization rate. It prints the cpu utilization per 1 second. The first line is the cpu utilization of all the cpus and other lines are the cpu utilizations of each cpu.
+
+This tool can be compiled by running
+
+```C
+gcc cpu_rate_cal.c -o cpu_rate_cal
+```
+
+We can use it by running
+
+```C
+./cpu_rate_cal
+```
--
2.17.1
1
0
openEuler inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4CIJQ
CVE: NA
----------------------------------------------------------------------
The default way of calculating CPU utilization is to check which task is
executed during the interval of two ticks. This leads to the inaccurate
results of CPU utilization.
This problem can be solved by counting the idle time via scheduler rather
than the tick interval. We can record the time before executing idle
process and calculate the execute time before quiting the idle process.
The idle time of each CPU is given in the /proc/stat2 file. This way can
give higher precision in accounting the CPU idle time compared with the
/proc/stat.
Signed-off-by: Hongyu Li <543306408(a)qq.com>
---
fs/proc/Kconfig | 7 ++++
fs/proc/Makefile | 1 +
fs/proc/stat2.c | 91 ++++++++++++++++++++++++++++++++++++++++++
kernel/sched/cputime.c | 34 ++++++++++++++++
kernel/sched/idle.c | 38 ++++++++++++++++++
5 files changed, 171 insertions(+)
create mode 100644 fs/proc/stat2.c
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index c930001056f9..33588a37579e 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -107,3 +107,10 @@ config PROC_PID_ARCH_STATUS
config PROC_CPU_RESCTRL
def_bool n
depends on PROC_FS
+
+config PROC_IDLE
+ bool "include /proc/stat2 file"
+ depends on PROC_FS
+ default y
+ help
+ Provide the CPU idle time in the /proc/stat2 file.
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 8704d41dd67c..b0d5f2b347d7 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -34,5 +34,6 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o
proc-$(CONFIG_PRINTK) += kmsg.o
proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o
+proc-$(CONFIG_PROC_IDLE) += stat2.o
obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o
obj-$(CONFIG_ETMEM_SWAP) += etmem_swap.o
diff --git a/fs/proc/stat2.c b/fs/proc/stat2.c
new file mode 100644
index 000000000000..6036a946c71d
--- /dev/null
+++ b/fs/proc/stat2.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/fs/proc/stat2.c
+ *
+ * Copyright (C) 2007
+ *
+ * cpu idle time accouting
+ */
+
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/sched/stat.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/irqnr.h>
+#include <linux/sched/cputime.h>
+#include <linux/tick.h>
+
+#ifdef CONFIG_PROC_IDLE
+
+#define PROC_NAME "stat2"
+
+extern u64 cal_idle_sum_exec_runtime(int cpu);
+
+static u64 get_idle_sum_exec_runtime(int cpu)
+{
+ u64 idle = cal_idle_sum_exec_runtime(cpu);
+
+ return idle;
+}
+
+static int show_idle(struct seq_file *p, void *v)
+{
+ int i;
+ u64 idle;
+
+ idle = 0;
+
+ for_each_possible_cpu(i) {
+
+ idle += get_idle_sum_exec_runtime(i);
+
+ }
+
+ seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(idle));
+ seq_putc(p, '\n');
+
+ for_each_online_cpu(i) {
+
+ idle = get_idle_sum_exec_runtime(i);
+
+ seq_printf(p, "cpu%d", i);
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
+ seq_putc(p, '\n');
+ }
+
+ return 0;
+}
+
+static int idle_open(struct inode *inode, struct file *file)
+{
+ unsigned int size = 32 + 32 * num_online_cpus();
+
+ return single_open_size(file, show_idle, NULL, size);
+}
+
+static struct proc_ops idle_procs_ops = {
+ .proc_open = idle_open,
+ .proc_read_iter = seq_read_iter,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+};
+
+static int __init kernel_module_init(void)
+{
+ proc_create(PROC_NAME, 0, NULL, &idle_procs_ops);
+ return 0;
+}
+
+fs_initcall(kernel_module_init);
+
+#endif /*CONFIG_PROC_IDLE*/
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5a55d2300452..25218a8f822f 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -19,6 +19,8 @@
*/
DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
+extern struct static_key_true proc_idle;
+
static int sched_clock_irqtime;
void enable_sched_clock_irqtime(void)
@@ -1078,3 +1080,35 @@ void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
+
+
+#ifdef CONFIG_PROC_IDLE
+
+
+u64 cal_idle_sum_exec_runtime(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ struct sched_entity *idle_se = &rq->idle->se;
+ u64 idle = idle_se->sum_exec_runtime;
+
+ if (!static_branch_likely(&proc_idle))
+ return 0ULL;
+
+ if (rq->curr == rq->idle) {
+ u64 now = sched_clock();
+ u64 delta_exec;
+
+ delta_exec = now - idle_se->exec_start;
+ if (unlikely((s64)delta_exec <= 0))
+ return idle;
+
+ schedstat_set(idle_se->statistics.exec_max,
+ max(delta_exec, idle_se->statistics.exec_max));
+
+ idle += delta_exec;
+ }
+
+ return idle;
+}
+
+#endif /* CONFIG_PROC_IDLE */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 36b545f17206..3714a1c0d57b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -10,6 +10,8 @@
#include <trace/events/power.h>
+DEFINE_STATIC_KEY_TRUE(proc_idle);
+
/* Linker adds these: start and end of __cpuidle functions */
extern char __cpuidle_text_start[], __cpuidle_text_end[];
@@ -424,8 +426,35 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
+#ifdef CONFIG_PROC_IDLE
+ if (!static_branch_likely(&proc_idle))
+ return;
+
+ struct sched_entity *idle_se = &rq->idle->se;
+ u64 now = sched_clock();
+ u64 delta_exec;
+
+ delta_exec = now - idle_se->exec_start;
+ if (unlikely((s64)delta_exec <= 0))
+ return;
+
+ schedstat_set(idle_se->statistics.exec_max,
+ max(delta_exec, idle_se->statistics.exec_max));
+
+ idle_se->sum_exec_runtime += delta_exec;
+#endif
}
+#ifdef CONFIG_PROC_IDLE
+static int __init init_proc_idle(char *str)
+{
+ if (!strcmp(str, "false"))
+ static_branch_disable(&proc_idle);
+
+ return 1;
+}
+__setup("proc_idle=", init_proc_idle);
+#endif
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
{
update_idle_core(rq);
@@ -436,6 +465,15 @@ struct task_struct *pick_next_task_idle(struct rq *rq)
{
struct task_struct *next = rq->idle;
+#ifdef CONFIG_PROC_IDLE
+ if (static_branch_likely(&proc_idle)) {
+ struct sched_entity *idle_se = &rq->idle->se;
+ u64 now = sched_clock();
+
+ idle_se->exec_start = now;
+ }
+#endif
+
set_next_task_idle(rq, next, true);
return next;
--
2.17.1
1
0

[PATCH openEuler-21.09 0/2] Improve the precisoin of accounuting CPU utilization rate
by Hongyu Li 03 Nov '21
by Hongyu Li 03 Nov '21
03 Nov '21
The current way of calculating the CPU utilization rate is not accurate.
The accounting system only works In the interval of two ticks. However,
a process can give up the CPU before the tick ending.
This can be fixed by counting the idel time via the scheduler. We can
use the sum_exe_runtime of the idle process of each CPU to calculate the
the CPU utilization rate. The idle time of each CPU is given in the
/proc/stat2 file. An example of using this file is also attached.
Hongyu Li (2):
eulerfs: add the /proc/stat2 file
tools: add a tool to calculate the CPU utilization rate
fs/proc/Kconfig | 7 +++
fs/proc/Makefile | 1 +
fs/proc/stat2.c | 91 ++++++++++++++++++++++++++++
kernel/sched/cputime.c | 34 +++++++++++
kernel/sched/idle.c | 38 ++++++++++++
tools/accounting/Makefile | 2 +-
tools/accounting/cpu_rate_cal.c | 91 ++++++++++++++++++++++++++++
tools/accounting/cpu_rate_cal_readme | 15 +++++
8 files changed, 278 insertions(+), 1 deletion(-)
create mode 100644 fs/proc/stat2.c
create mode 100644 tools/accounting/cpu_rate_cal.c
create mode 100644 tools/accounting/cpu_rate_cal_readme
--
2.17.1
1
0
Ramaxel inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4DBD7
CVE: NA
Fix two compile errors:
1. Compile failed when O=xxx is specified;
2. Compile confliction when spfc and spnic are compiled in parallel by -j option because they share some .c files
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
---
drivers/scsi/spfc/Makefile | 30 +++++++++++++++---------------
drivers/scsi/spfc/sphw_api_cmd.c | 1 +
drivers/scsi/spfc/sphw_cmdq.c | 1 +
drivers/scsi/spfc/sphw_common.c | 1 +
drivers/scsi/spfc/sphw_eqs.c | 1 +
drivers/scsi/spfc/sphw_hw_cfg.c | 1 +
drivers/scsi/spfc/sphw_hw_comm.c | 1 +
drivers/scsi/spfc/sphw_hwdev.c | 1 +
drivers/scsi/spfc/sphw_hwif.c | 1 +
drivers/scsi/spfc/sphw_mbox.c | 1 +
drivers/scsi/spfc/sphw_mgmt.c | 1 +
drivers/scsi/spfc/sphw_prof_adap.c | 1 +
drivers/scsi/spfc/sphw_wq.c | 1 +
13 files changed, 27 insertions(+), 15 deletions(-)
create mode 120000 drivers/scsi/spfc/sphw_api_cmd.c
create mode 120000 drivers/scsi/spfc/sphw_cmdq.c
create mode 120000 drivers/scsi/spfc/sphw_common.c
create mode 120000 drivers/scsi/spfc/sphw_eqs.c
create mode 120000 drivers/scsi/spfc/sphw_hw_cfg.c
create mode 120000 drivers/scsi/spfc/sphw_hw_comm.c
create mode 120000 drivers/scsi/spfc/sphw_hwdev.c
create mode 120000 drivers/scsi/spfc/sphw_hwif.c
create mode 120000 drivers/scsi/spfc/sphw_mbox.c
create mode 120000 drivers/scsi/spfc/sphw_mgmt.c
create mode 120000 drivers/scsi/spfc/sphw_prof_adap.c
create mode 120000 drivers/scsi/spfc/sphw_wq.c
diff --git a/drivers/scsi/spfc/Makefile b/drivers/scsi/spfc/Makefile
index 02fe0213e048..849b730ac733 100644
--- a/drivers/scsi/spfc/Makefile
+++ b/drivers/scsi/spfc/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPFC) += spfc.o
-subdir-ccflags-y += -I$(src)/../../net/ethernet/ramaxel/spnic/hw
-subdir-ccflags-y += -I$(src)/hw
-subdir-ccflags-y += -I$(src)/common
+subdir-ccflags-y += -I$(srctree)/$(src)/../../net/ethernet/ramaxel/spnic/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/common
spfc-objs := common/unf_init.o \
common/unf_event.o \
@@ -33,15 +33,15 @@ spfc-objs := common/unf_init.o \
hw/spfc_cqm_bitmap_table.o \
hw/spfc_cqm_main.o \
hw/spfc_cqm_object.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hwdev.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hw_cfg.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hw_comm.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_prof_adap.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_common.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_hwif.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_wq.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_cmdq.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_eqs.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_mbox.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_mgmt.o \
- ../../net/ethernet/ramaxel/spnic/hw/sphw_api_cmd.o
+ sphw_hwdev.o \
+ sphw_hw_cfg.o \
+ sphw_hw_comm.o \
+ sphw_prof_adap.o \
+ sphw_common.o \
+ sphw_hwif.o \
+ sphw_wq.o \
+ sphw_cmdq.o \
+ sphw_eqs.o \
+ sphw_mbox.o \
+ sphw_mgmt.o \
+ sphw_api_cmd.o
diff --git a/drivers/scsi/spfc/sphw_api_cmd.c b/drivers/scsi/spfc/sphw_api_cmd.c
new file mode 120000
index 000000000000..27c7c0770fa3
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_api_cmd.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_api_cmd.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_cmdq.c b/drivers/scsi/spfc/sphw_cmdq.c
new file mode 120000
index 000000000000..5ac779ba274b
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_cmdq.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_cmdq.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_common.c b/drivers/scsi/spfc/sphw_common.c
new file mode 120000
index 000000000000..a1a30a4840e1
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_common.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_common.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_eqs.c b/drivers/scsi/spfc/sphw_eqs.c
new file mode 120000
index 000000000000..74430dcb9dc5
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_eqs.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_eqs.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hw_cfg.c b/drivers/scsi/spfc/sphw_hw_cfg.c
new file mode 120000
index 000000000000..4f43d68624c1
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hw_cfg.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hw_cfg.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hw_comm.c b/drivers/scsi/spfc/sphw_hw_comm.c
new file mode 120000
index 000000000000..c943b3b2933a
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hw_comm.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hw_comm.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hwdev.c b/drivers/scsi/spfc/sphw_hwdev.c
new file mode 120000
index 000000000000..b7279f17eaa2
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hwdev.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hwdev.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_hwif.c b/drivers/scsi/spfc/sphw_hwif.c
new file mode 120000
index 000000000000..d40ef71f9033
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_hwif.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_hwif.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_mbox.c b/drivers/scsi/spfc/sphw_mbox.c
new file mode 120000
index 000000000000..1b00fe7289cc
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_mbox.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_mbox.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_mgmt.c b/drivers/scsi/spfc/sphw_mgmt.c
new file mode 120000
index 000000000000..fd18a73e9d3a
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_mgmt.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_mgmt.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_prof_adap.c b/drivers/scsi/spfc/sphw_prof_adap.c
new file mode 120000
index 000000000000..fbc7db05dd27
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_prof_adap.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_prof_adap.c
\ No newline at end of file
diff --git a/drivers/scsi/spfc/sphw_wq.c b/drivers/scsi/spfc/sphw_wq.c
new file mode 120000
index 000000000000..cdfcb3a610c0
--- /dev/null
+++ b/drivers/scsi/spfc/sphw_wq.c
@@ -0,0 +1 @@
+../../net/ethernet/ramaxel/spnic/hw/sphw_wq.c
\ No newline at end of file
--
2.27.0
1
0

02 Nov '21
From: Yu Kuai <yukuai3(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 182920, https://gitee.com/openeuler/kernel/issues/I4GLNX
CVE: NA
---------------------------
When user pass 0x100000 as index, nbd will end up create sysfs dir
"/sys/block/43:0":
nbd_dev_add
disk->first_minor = index << part_shift
-> default part_shift is 5, 0x100000 << 5 = 0x2000000
device_add_disk
blk_alloc_devt
MKDEV(disk->major, disk->first_minor + part->partno)
-> (0x2b << 20) | (0x2000000) = 0x2b00000
register_disk
device_add
device_create_sys_dev_entry
format_dev_t
MAJOR(devt) -> 0x2b00000 >> 20 = 0x2b
MINOR(devt) -> 0x2b00000 & 0xfffff = 0
sysfs_create_link -> /sys/block/43:0
If nbd created device with index 0 aready, then sysfs will compalin
about dumplicated creation.
On the other hand, the similar dumplicated creation will happen if
"index << part_shift" over flow to a value that is less than MINORMASK.
Thus fix the problem by adding sanity check for first_minor.
Fixes: b0d9111a2d53 ("nbd: use an idr to keep track of nbd devices")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/block/nbd.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2a3794801704a..33a52be762d24 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1755,7 +1755,18 @@ static int nbd_dev_add(int index)
refcount_set(&nbd->refs, 1);
INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR;
+
+ /*
+ * Too big index can cause duplicate creation of sysfs files/links,
+ * because MKDEV() expect that the max first minor is MINORMASK, or
+ * index << part_shift can overflow.
+ */
disk->first_minor = index << part_shift;
+ if (disk->first_minor < index || disk->first_minor > MINORMASK) {
+ err = -EINVAL;
+ goto out_free_tags;
+ }
+
disk->fops = &nbd_fops;
disk->private_data = nbd;
sprintf(disk->disk_name, "nbd%d", index);
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] perf: hisi: Fix compile error if defined MODULE
by Yang Yingliang 02 Nov '21
by Yang Yingliang 02 Nov '21
02 Nov '21
From: Lijun Fang <fanglijun3(a)huawei.com>
ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4D4WR
CVE: NA
---------------------------
Fix compile error if defined MODULE.
Signed-off-by: Lijun Fang <fanglijun3(a)huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c
index 8f8b211788e0f..ca395252ccc3d 100644
--- a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c
@@ -241,7 +241,7 @@ static int hisi_lpddrc_pmu_init_irq(struct hisi_pmu *lpddrc_pmu,
static const struct of_device_id lpddrc_of_match[] = {
{ .compatible = "hisilicon,lpddrc-pmu", },
{},
-}
+};
MODULE_DEVICE_TABLE(of, lpddrc_of_match);
static int hisi_lpddrc_pmu_init_data(struct platform_device *pdev,
--
2.25.1
1
0

02 Nov '21
From: Lin Ma <linma(a)zju.edu.cn>
mainline inclusion
from mainline-v5.15-rc6
commit 1b1499a817c90fd1ce9453a2c98d2a01cca0e775
category: bugfix
bugzilla: NA
CVE: CVE-2021-3760
-------------------------------------------------
The nci_core_conn_close_rsp_packet() function will release the conn_info
with given conn_id. However, it needs to set the rf_conn_info to NULL to
prevent other routines like nci_rf_intf_activated_ntf_packet() to trigger
the UAF.
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
Signed-off-by: Lin Ma <linma(a)zju.edu.cn>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
net/nfc/nci/rsp.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index e3bbf1937d0e9..7681f89dc312b 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -289,6 +289,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
ndev->cur_conn_id);
if (conn_info) {
list_del(&conn_info->list);
+ if (conn_info == ndev->rf_conn_info)
+ ndev->rf_conn_info = NULL;
devm_kfree(&ndev->nfc_dev->dev, conn_info);
}
}
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 1/4] ipv4: use siphash instead of Jenkins in fnhe_hashfun()
by Yang Yingliang 02 Nov '21
by Yang Yingliang 02 Nov '21
02 Nov '21
From: Eric Dumazet <edumazet(a)google.com>
mainline inclusion
from mainline-v5.14
commit 6457378fe796815c973f631a1904e147d6ee33b1
category: bugfix
bugzilla: NA
CVE: CVE-2021-20322
-------------------------------------------------
A group of security researchers brought to our attention
the weakness of hash function used in fnhe_hashfun().
Lets use siphash instead of Jenkins Hash, to considerably
reduce security risks.
Also remove the inline keyword, this really is distracting.
Fixes: d546c621542d ("ipv4: harden fnhe_hashfun()")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: Keyu Man <kman001(a)ucr.edu>
Cc: Willy Tarreau <w(a)1wt.eu>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Xu Jia <xujia39(a)huawei.com>
Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
net/ipv4/route.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 958df3427c34f..dd43e798194ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -614,14 +614,14 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
return oldest;
}
-static inline u32 fnhe_hashfun(__be32 daddr)
+static u32 fnhe_hashfun(__be32 daddr)
{
- static u32 fnhe_hashrnd __read_mostly;
- u32 hval;
+ static siphash_key_t fnhe_hash_key __read_mostly;
+ u64 hval;
- net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
- hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
- return hash_32(hval, FNHE_HASH_SHIFT);
+ net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key));
+ hval = siphash_1u32((__force u32) daddr, &fnhe_hash_key);
+ return hash_64(hval, FNHE_HASH_SHIFT);
}
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
--
2.25.1
1
3
From: Laibin Qiu <qiulaibin(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4FS3G?from=project-issue
CVE: NA
---------------------------
There are some language problems in the README file, and MarkDown fromat
syntax is not effective, and it needs to be adjusted.
Signed-off-by: suqin <suqin2(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
README | 291 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 155 insertions(+), 136 deletions(-)
diff --git a/README b/README
index 46c9ea3522c1c..21b2e09d62db2 100644
--- a/README
+++ b/README
@@ -1,174 +1,188 @@
-Contributions to openEuler kernel project
-=========================================
+# How to Contribute
+-------
-Sign CLA
---------
+- [How to Contribute](#How to Contribute)
-Before submitting any Contributions to openEuler, you have to sign CLA.
+ \- [Sign the CLA](#Sign the CLA)
-See:
- https://openeuler.org/zh/cla.html
- https://openeuler.org/en/cla.html
+ \- [Steps of submitting patches](#Steps of submitting patches)
-Steps of submitting patches
----------------------------
+ \- [Use the unified patch format](#Use the unified patch format)
-1. Compile and test your patches successfully.
-2. Generate patches
- Your patches should be based on top of latest openEuler branch, and should
- use git-format-patch to generate patches, and if it's a patchset, it's
- better to use --cover-letter option to describe what the patchset does.
+ \- [Define the patch format](#Define the patch format)
- Using scripts/checkpatch.pl to make sure there's no coding style issue.
+ \- [Examples](#Examples)
- And make sure your patch follow unified openEuler patch format describe
- below.
+ \- [Email client - Thunderbird settings](#Email client - Thunderbird settings)
-3. Send patch to openEuler mailing list
- Use this command to send patches to openEuler mailing list:
+- [Linux kernel](#Linux kernel)
- git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all
+### Sign the CLA
- *NOTE*: that you must add --suppress-cc=all if you use git send-email,
- otherwise the email will be cced to the people in upstream community and mailing
- lists.
+-------
- *See*: How to send patches using git-send-email
- https://git-scm.com/docs/git-send-email
+Before making any contributions to openEuler, sign the CLA first.
-4. Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions
- to send out.
+Address: [https://openeuler.org/en/cla.html](https://openeuler.org/en/cla.html)
- Use --subject-prefix="PATCH v2" option to add v2 tag for patchset.
- git format-patch --subject-prefix="PATCH v2" -1
+### Steps of submitting patches
+-------
- Subject examples:
- Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
- Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+**Step 1** Compile and test your patches.
-5. Upstream your kernel patch to kernel community is strongly recommended.
- openEuler will sync up with kernel master timely.
+**Step 2** Generate patches.
-6. Sign your work - the Developer’s Certificate of Origin
- As the same of upstream kernel community, you also need to sign your patch.
+Your patches should be generated based on the latest openEuler branch using git-format-patch. If your patches are in a patchset, it is better to use the **--cover-letter** option to describe what the patchset does.
- See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html
+Use **scripts/checkpatch.pl** to ensure that no coding style issue exists.
- The sign-off is a simple line at the end of the explanation for the patch,
- which certifies that you wrote it or otherwise have the right to pass it
- on as an open-source patch. The rules are pretty simple: if you can certify
- the below:
+In addition, ensure that your patches comply with the unified openEuler patch format described below.
- Developer’s Certificate of Origin 1.1
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+**Step 3** Send your patches to the openEuler mailing list.
- By making a contribution to this project, I certify that:
+To do so, run the following command:
- (a) The contribution was created in whole or in part by me and I have
- the right to submit it under the open source license indicated in
- the file; or
+ `git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all`
- (b The contribution is based upon previous work that, to the best of
- my knowledge, is covered under an appropriate open source license
- and I have the right under that license to submit that work with
- modifications, whether created in whole or in part by me, under
- the same open source license (unless I am permitted to submit under
- a different license), as indicated in the file; or
+*NOTE*: Add **--suppress-cc=all** if you use git-send-email; otherwise, the email will be copied to all people in the upstream community and mailing lists.
- (c) The contribution was provided directly to me by some other person
- who certified (a), (b) or (c) and I have not modified it.
+For details about how to send patches using git-send-email, see [https://git-scm.com/docs/git-send-email](https://git-scm.com/docs/git-send-….
- (d) I understand and agree that this project and the contribution are
- public and that a record of the contribution (including all personal
- information I submit with it, including my sign-off) is maintained
- indefinitely and may be redistributed consistent with this project
- or the open source license(s) involved.
+**Step 4** Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions to send out.
- then you just add a line saying:
+Use the **--subject-prefix="PATCH v2"** option to add the v2 tag to the patchset.
- Signed-off-by: Random J Developer <random(a)developer.example.org>
+ `git format-patch --subject-prefix="PATCH v2" -1`
- using your real name (sorry, no pseudonyms or anonymous contributions.)
+Subject examples:
-Use unified patch format
-------------------------
+ Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
+
+ Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+
+**Step 5** Upstream your kernel patches to the kernel community (recommended). openEuler will synchronize with the kernel master in a timely manner.
+
+**Step 6** Sign your work - the Developer’s Certificate of Origin.
+
+ Similar to the upstream kernel community, you also need to sign your patch.
+
+ For details, see [https://www.kernel.org/doc/html/latest/process/submitting-patches.html](htt….
+
+ The sign-off is a simple line at the end of the explanation of the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open source patch. The rules are pretty simple. You can certify as below:
+
+ Developer’s Certificate of Origin 1.1
+
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file;
+
+ (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file;
+
+ (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
+
+ (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
+
+Then you add a line saying:
+
+Signed-off-by: Random J Developer <random(a)developer.example.org>
+
+Use your real name (sorry, no pseudonyms or anonymous contributions).
+
+### Use the unified patch format
+-------
Reasons:
-1. long term maintainability
- openEuler will merge massive patches. If all patches are merged by casual
- changelog format without a unified format, the git log will be messy, and
- then it's hard to figure out the original patch.
+1. Long term maintainability
+
+ openEuler will merge massive patches. If all patches are merged by casual
+
+ changelog formats without a unified format, the git logs will be messy, and
+
+ then it is hard to figure out the original patches.
+
+2. Kernel upgrade
-2. kernel upgrade
- We definitely will upgrade our openEuler kernel in someday, using strict
- patch management will alleviate the pain to migrate patches during big upgrade.
+ We definitely will upgrade our openEuler kernel in someday, so strict patch management
-3. easy for script parsing
- Keyword highlighting is necessary for script parsing.
+ will alleviate the pain to migrate patches during big upgrades.
-Patch format definition
------------------------
+3. Easy for script parsing
+
+ Keyword highlighting is necessary for script parsing.
+
+### Define the patch format
+-------
+
+[M] stands for "mandatory".
+
+[O] stands for "option".
-[M] stands for "mandatory"
-[O] stands for "option"
$category can be: bug preparation, bugfix, perf, feature, doc, other...
-If category is feature, then we also need to add feature name like below:
- category: feature
- feature: YYY (the feature name)
+If category is feature, we need to add a feature name as below:
-If the patch is related to CVE or bugzilla, then we need add the corresponding
-tag like below (In general, it should include at least one of the following):
- CVE: $cve-id
- bugzilla: $bug-id
+```cpp
+category: feature
+feature: YYY (the feature name)
+```
-Additional changelog should include at least one of the flollwing:
- 1) Why we should apply this patch
- 2) What real problem in product does this patch resolved
- 3) How could we reproduce this bug or how to test
- 4) Other useful information for help to understand this patch or problem
+If the patch is related to CVE or bugzilla, we need to add the corresponding tag as below (In general, it should include at least one of the following):
-The detail information is very useful for porting patch to another kenrel branch.
+```cpp
+CVE: $cve-id
+bugzilla: $bug-id
+```
-Example for mainline patch:
+Additional changelog should include at least one of the following:
- mainline inclusion [M]
- from $mainline-version [M]
- commit $id [M]
- category: $category [M]
- bugzilla: $bug-id [O]
- CVE: $cve-id [O]
+1. Why we should apply this patch
- additional changelog [O]
+2. What real problems in the product does this patch resolved
+
+3. How could we reproduce this bug or how to test
+
+4. Other useful information for help to understand this patch or problem
+
+The detailed information is very useful for migrating a patch to another kernel branch.
+
+Example for mainline patch:
- --------------------------------
+```cpp
+mainline inclusion [M]
+from $mainline-version [M]
+commit $id [M]
+category: $category [M]
+bugzilla: $bug-id [O]
+CVE: $cve-id [O]
- original changelog
+additional changelog [O]
- Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+--------------------------------
- ($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+original changelog
+Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+```
-Examples
---------
+### Examples
+-------
+```cpp
mainline inclusion
from mainline-4.10
commit 0becc0ae5b42828785b589f686725ff5bc3b9b25
category: bugfix
bugzilla: 3004
-CVE: NA
-
-The patch fixes a BUG_ON in the product: injecting single bit ECC error
-to memory before system boot use hardware inject tools, which cause a
-large amount of CMCI during system booting .
+CVE: N/A
-[ 1.146580] mce: [Hardware Error]: Machine check events logged
-[ 1.152908] ------------[ cut here ]------------
-[ 1.157751] kernel BUG at kernel/timer.c:951!
-[ 1.162321] invalid opcode: 0000 [#1] SMP
-...
+The patch fixes a BUG_ON in the product: Injecting a single bit ECC error to the memory before system boot using hardware inject tools will cause a large amount of CMCI during system booting .
+[ 1.146580] mce: [Hardware Error]: Machine check events logged
+[ 1.152908] ------------[ cut here ]------------
+[ 1.157751] kernel BUG at kernel/timer.c:951!
+[ 1.162321] invalid opcode: 0000 [#1] SMP
-------------------------------------------------
@@ -177,33 +191,38 @@ original changelog
<original S-O-B>
Signed-off-by: Zhang San <zhangsan(a)huawei.com>
Tested-by: Li Si <lisi(a)huawei.com>
+```
+
+### Email client - Thunderbird settings
+-------
+
+If you are a new developer in the kernel community, it is highly recommended that you use the Thunderbird mail client.
+
+1. Thunderbird Installation
+
+ Obtain the English version of Thunderbird from [http://www.mozilla.org/]( http://www.mozilla.org/) and install it on your system.
+
+ Download URL: https://www.thunderbird.net/en-US/thunderbird/all/
+
+2. Settings
+
+ 2.1 Use the plain text format instead of the HTML format.
+
+ Choose **Options > Account Settings > Composition & Addressing**, and do **NOT** select Compose message in HTML format.
-Email Client - Thunderbird Settings
------------------------------------
+ 2.2 Editor settings
-If you are newly developer in the kernel community, it is highly recommended
-to use thunderbird mail client.
+ **Tools > Options> Advanced > Config editor**
-1. Thunderbird Installation
- Get English version Thunderbird from http://www.mozilla.org/ and install
- it on your system。
+ \- To bring up the Thunderbird's registry editor, set **mailnews.send_plaintext_flowed** to **false**.
- Download url: https://www.thunderbird.net/en-US/thunderbird/all/
+ \- Disable HTML Format: Set **mail.identity.id1.compose_html** to **false**.
-2. Settings
- 2.1 Use plain text format instead of HTML format
- Options -> Account Settings -> Composition & Addressing, do *NOT* select
- "Compose message in HTML format".
+ \- Enable UTF-8: Set **prefs.converted-to-utf8** to **true**.
- 2.2 Editor Settings
- Tools->Options->Advanced->Config editor.
+ \- View messages in UTF-8: Set **mailnews.view_default_charset** to **UTF-8**.
- - To bring up the thunderbird's registry editor, and set:
- "mailnews.send_plaintext_flowed" to "false".
- - Disable HTML Format: Set "mail.identity.id1.compose_html" to "false".
- - Enable UTF8: Set "prefs.converted-to-utf8" to "true".
- - View message in UTF-8: Set "mailnews.view_default_charset" to "UTF-8".
- - Set mailnews.wraplength to 9999 for avoiding auto-wrap
+ \- Set **mailnews.wraplength** to **9999** to avoid auto-wrap.
Linux kernel
============
--
2.25.1
1
0

01 Nov '21
Ramaxel inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4CBDP
CVE: NA
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
---
drivers/net/ethernet/ramaxel/spnic/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ramaxel/spnic/Makefile b/drivers/net/ethernet/ramaxel/spnic/Makefile
index f86ccff374f6..207e1d9c431a 100644
--- a/drivers/net/ethernet/ramaxel/spnic/Makefile
+++ b/drivers/net/ethernet/ramaxel/spnic/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPNIC) += spnic.o
-subdir-ccflags-y += -I$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
spnic-objs := hw/sphw_common.o \
hw/sphw_hwif.o \
--
2.27.0
2
1

01 Nov '21
Ramaxel inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4DBD7
CVE: NA
Signed-off-by: Yanling Song <songyl(a)ramaxel.com>
---
drivers/scsi/spfc/Makefile | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/spfc/Makefile b/drivers/scsi/spfc/Makefile
index 02fe0213e048..205eadc35318 100644
--- a/drivers/scsi/spfc/Makefile
+++ b/drivers/scsi/spfc/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SPFC) += spfc.o
-subdir-ccflags-y += -I$(src)/../../net/ethernet/ramaxel/spnic/hw
-subdir-ccflags-y += -I$(src)/hw
-subdir-ccflags-y += -I$(src)/common
+subdir-ccflags-y += -I$(srctree)/$(src)/../../net/ethernet/ramaxel/spnic/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/hw
+subdir-ccflags-y += -I$(srctree)/$(src)/common
spfc-objs := common/unf_init.o \
common/unf_event.o \
--
2.27.0
2
1
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4FS3G?from=project-issue
CVE: NA
---------------------------
There are some language problems in the README file, and MarkDown fromat
syntax is not effective, and it needs to be adjusted.
Signed-off-by: suqin <suqin2(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
---
README | 291 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 155 insertions(+), 136 deletions(-)
diff --git a/README b/README
index 46c9ea352..21b2e09d6 100644
--- a/README
+++ b/README
@@ -1,174 +1,188 @@
-Contributions to openEuler kernel project
-=========================================
+# How to Contribute
+-------
-Sign CLA
---------
+- [How to Contribute](#How to Contribute)
-Before submitting any Contributions to openEuler, you have to sign CLA.
+ \- [Sign the CLA](#Sign the CLA)
-See:
- https://openeuler.org/zh/cla.html
- https://openeuler.org/en/cla.html
+ \- [Steps of submitting patches](#Steps of submitting patches)
-Steps of submitting patches
----------------------------
+ \- [Use the unified patch format](#Use the unified patch format)
-1. Compile and test your patches successfully.
-2. Generate patches
- Your patches should be based on top of latest openEuler branch, and should
- use git-format-patch to generate patches, and if it's a patchset, it's
- better to use --cover-letter option to describe what the patchset does.
+ \- [Define the patch format](#Define the patch format)
- Using scripts/checkpatch.pl to make sure there's no coding style issue.
+ \- [Examples](#Examples)
- And make sure your patch follow unified openEuler patch format describe
- below.
+ \- [Email client - Thunderbird settings](#Email client - Thunderbird settings)
-3. Send patch to openEuler mailing list
- Use this command to send patches to openEuler mailing list:
+- [Linux kernel](#Linux kernel)
- git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all
+### Sign the CLA
- *NOTE*: that you must add --suppress-cc=all if you use git send-email,
- otherwise the email will be cced to the people in upstream community and mailing
- lists.
+-------
- *See*: How to send patches using git-send-email
- https://git-scm.com/docs/git-send-email
+Before making any contributions to openEuler, sign the CLA first.
-4. Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions
- to send out.
+Address: [https://openeuler.org/en/cla.html](https://openeuler.org/en/cla.html)
- Use --subject-prefix="PATCH v2" option to add v2 tag for patchset.
- git format-patch --subject-prefix="PATCH v2" -1
+### Steps of submitting patches
+-------
- Subject examples:
- Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
- Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+**Step 1** Compile and test your patches.
-5. Upstream your kernel patch to kernel community is strongly recommended.
- openEuler will sync up with kernel master timely.
+**Step 2** Generate patches.
-6. Sign your work - the Developer’s Certificate of Origin
- As the same of upstream kernel community, you also need to sign your patch.
+Your patches should be generated based on the latest openEuler branch using git-format-patch. If your patches are in a patchset, it is better to use the **--cover-letter** option to describe what the patchset does.
- See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html
+Use **scripts/checkpatch.pl** to ensure that no coding style issue exists.
- The sign-off is a simple line at the end of the explanation for the patch,
- which certifies that you wrote it or otherwise have the right to pass it
- on as an open-source patch. The rules are pretty simple: if you can certify
- the below:
+In addition, ensure that your patches comply with the unified openEuler patch format described below.
- Developer’s Certificate of Origin 1.1
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+**Step 3** Send your patches to the openEuler mailing list.
- By making a contribution to this project, I certify that:
+To do so, run the following command:
- (a) The contribution was created in whole or in part by me and I have
- the right to submit it under the open source license indicated in
- the file; or
+ `git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all`
- (b The contribution is based upon previous work that, to the best of
- my knowledge, is covered under an appropriate open source license
- and I have the right under that license to submit that work with
- modifications, whether created in whole or in part by me, under
- the same open source license (unless I am permitted to submit under
- a different license), as indicated in the file; or
+*NOTE*: Add **--suppress-cc=all** if you use git-send-email; otherwise, the email will be copied to all people in the upstream community and mailing lists.
- (c) The contribution was provided directly to me by some other person
- who certified (a), (b) or (c) and I have not modified it.
+For details about how to send patches using git-send-email, see [https://git-scm.com/docs/git-send-email](https://git-scm.com/docs/git-send-….
- (d) I understand and agree that this project and the contribution are
- public and that a record of the contribution (including all personal
- information I submit with it, including my sign-off) is maintained
- indefinitely and may be redistributed consistent with this project
- or the open source license(s) involved.
+**Step 4** Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions to send out.
- then you just add a line saying:
+Use the **--subject-prefix="PATCH v2"** option to add the v2 tag to the patchset.
- Signed-off-by: Random J Developer <random(a)developer.example.org>
+ `git format-patch --subject-prefix="PATCH v2" -1`
- using your real name (sorry, no pseudonyms or anonymous contributions.)
+Subject examples:
-Use unified patch format
-------------------------
+ Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
+
+ Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+
+**Step 5** Upstream your kernel patches to the kernel community (recommended). openEuler will synchronize with the kernel master in a timely manner.
+
+**Step 6** Sign your work - the Developer’s Certificate of Origin.
+
+ Similar to the upstream kernel community, you also need to sign your patch.
+
+ For details, see [https://www.kernel.org/doc/html/latest/process/submitting-patches.html](htt….
+
+ The sign-off is a simple line at the end of the explanation of the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open source patch. The rules are pretty simple. You can certify as below:
+
+ Developer’s Certificate of Origin 1.1
+
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file;
+
+ (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file;
+
+ (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
+
+ (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
+
+Then you add a line saying:
+
+Signed-off-by: Random J Developer <random(a)developer.example.org>
+
+Use your real name (sorry, no pseudonyms or anonymous contributions).
+
+### Use the unified patch format
+-------
Reasons:
-1. long term maintainability
- openEuler will merge massive patches. If all patches are merged by casual
- changelog format without a unified format, the git log will be messy, and
- then it's hard to figure out the original patch.
+1. Long term maintainability
+
+ openEuler will merge massive patches. If all patches are merged by casual
+
+ changelog formats without a unified format, the git logs will be messy, and
+
+ then it is hard to figure out the original patches.
+
+2. Kernel upgrade
-2. kernel upgrade
- We definitely will upgrade our openEuler kernel in someday, using strict
- patch management will alleviate the pain to migrate patches during big upgrade.
+ We definitely will upgrade our openEuler kernel in someday, so strict patch management
-3. easy for script parsing
- Keyword highlighting is necessary for script parsing.
+ will alleviate the pain to migrate patches during big upgrades.
-Patch format definition
------------------------
+3. Easy for script parsing
+
+ Keyword highlighting is necessary for script parsing.
+
+### Define the patch format
+-------
+
+[M] stands for "mandatory".
+
+[O] stands for "option".
-[M] stands for "mandatory"
-[O] stands for "option"
$category can be: bug preparation, bugfix, perf, feature, doc, other...
-If category is feature, then we also need to add feature name like below:
- category: feature
- feature: YYY (the feature name)
+If category is feature, we need to add a feature name as below:
-If the patch is related to CVE or bugzilla, then we need add the corresponding
-tag like below (In general, it should include at least one of the following):
- CVE: $cve-id
- bugzilla: $bug-id
+```cpp
+category: feature
+feature: YYY (the feature name)
+```
-Additional changelog should include at least one of the flollwing:
- 1) Why we should apply this patch
- 2) What real problem in product does this patch resolved
- 3) How could we reproduce this bug or how to test
- 4) Other useful information for help to understand this patch or problem
+If the patch is related to CVE or bugzilla, we need to add the corresponding tag as below (In general, it should include at least one of the following):
-The detail information is very useful for porting patch to another kenrel branch.
+```cpp
+CVE: $cve-id
+bugzilla: $bug-id
+```
-Example for mainline patch:
+Additional changelog should include at least one of the following:
- mainline inclusion [M]
- from $mainline-version [M]
- commit $id [M]
- category: $category [M]
- bugzilla: $bug-id [O]
- CVE: $cve-id [O]
+1. Why we should apply this patch
- additional changelog [O]
+2. What real problems in the product does this patch resolved
+
+3. How could we reproduce this bug or how to test
+
+4. Other useful information for help to understand this patch or problem
+
+The detailed information is very useful for migrating a patch to another kernel branch.
+
+Example for mainline patch:
- --------------------------------
+```cpp
+mainline inclusion [M]
+from $mainline-version [M]
+commit $id [M]
+category: $category [M]
+bugzilla: $bug-id [O]
+CVE: $cve-id [O]
- original changelog
+additional changelog [O]
- Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+--------------------------------
- ($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+original changelog
+Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+```
-Examples
---------
+### Examples
+-------
+```cpp
mainline inclusion
from mainline-4.10
commit 0becc0ae5b42828785b589f686725ff5bc3b9b25
category: bugfix
bugzilla: 3004
-CVE: NA
-
-The patch fixes a BUG_ON in the product: injecting single bit ECC error
-to memory before system boot use hardware inject tools, which cause a
-large amount of CMCI during system booting .
+CVE: N/A
-[ 1.146580] mce: [Hardware Error]: Machine check events logged
-[ 1.152908] ------------[ cut here ]------------
-[ 1.157751] kernel BUG at kernel/timer.c:951!
-[ 1.162321] invalid opcode: 0000 [#1] SMP
-...
+The patch fixes a BUG_ON in the product: Injecting a single bit ECC error to the memory before system boot using hardware inject tools will cause a large amount of CMCI during system booting .
+[ 1.146580] mce: [Hardware Error]: Machine check events logged
+[ 1.152908] ------------[ cut here ]------------
+[ 1.157751] kernel BUG at kernel/timer.c:951!
+[ 1.162321] invalid opcode: 0000 [#1] SMP
-------------------------------------------------
@@ -177,33 +191,38 @@ original changelog
<original S-O-B>
Signed-off-by: Zhang San <zhangsan(a)huawei.com>
Tested-by: Li Si <lisi(a)huawei.com>
+```
+
+### Email client - Thunderbird settings
+-------
+
+If you are a new developer in the kernel community, it is highly recommended that you use the Thunderbird mail client.
+
+1. Thunderbird Installation
+
+ Obtain the English version of Thunderbird from [http://www.mozilla.org/]( http://www.mozilla.org/) and install it on your system.
+
+ Download URL: https://www.thunderbird.net/en-US/thunderbird/all/
+
+2. Settings
+
+ 2.1 Use the plain text format instead of the HTML format.
+
+ Choose **Options > Account Settings > Composition & Addressing**, and do **NOT** select Compose message in HTML format.
-Email Client - Thunderbird Settings
------------------------------------
+ 2.2 Editor settings
-If you are newly developer in the kernel community, it is highly recommended
-to use thunderbird mail client.
+ **Tools > Options> Advanced > Config editor**
-1. Thunderbird Installation
- Get English version Thunderbird from http://www.mozilla.org/ and install
- it on your system。
+ \- To bring up the Thunderbird's registry editor, set **mailnews.send_plaintext_flowed** to **false**.
- Download url: https://www.thunderbird.net/en-US/thunderbird/all/
+ \- Disable HTML Format: Set **mail.identity.id1.compose_html** to **false**.
-2. Settings
- 2.1 Use plain text format instead of HTML format
- Options -> Account Settings -> Composition & Addressing, do *NOT* select
- "Compose message in HTML format".
+ \- Enable UTF-8: Set **prefs.converted-to-utf8** to **true**.
- 2.2 Editor Settings
- Tools->Options->Advanced->Config editor.
+ \- View messages in UTF-8: Set **mailnews.view_default_charset** to **UTF-8**.
- - To bring up the thunderbird's registry editor, and set:
- "mailnews.send_plaintext_flowed" to "false".
- - Disable HTML Format: Set "mail.identity.id1.compose_html" to "false".
- - Enable UTF8: Set "prefs.converted-to-utf8" to "true".
- - View message in UTF-8: Set "mailnews.view_default_charset" to "UTF-8".
- - Set mailnews.wraplength to 9999 for avoiding auto-wrap
+ \- Set **mailnews.wraplength** to **9999** to avoid auto-wrap.
Linux kernel
============
--
2.22.0
1
0

[PATCH openEuler-1.0-LTS] PM: hibernate: Get block device exclusively in swsusp_check()
by Yang Yingliang 01 Nov '21
by Yang Yingliang 01 Nov '21
01 Nov '21
From: Ye Bin <yebin10(a)huawei.com>
mainline inclusion
from mainline-v5.16
commit 39fbef4b0f77f9c89c8f014749ca533643a37c9f
category: bugfix
bugzilla: 182871
CVE: NA
-----------------------------------------------
The following kernel crash can be triggered:
[ 89.266592] ------------[ cut here ]------------
[ 89.267427] kernel BUG at fs/buffer.c:3020!
[ 89.268264] invalid opcode: 0000 [#1] SMP KASAN PTI
[ 89.269116] CPU: 7 PID: 1750 Comm: kmmpd-loop0 Not tainted 5.10.0-862.14.0.6.x86_64-08610-gc932cda3cef4-dirty #20
[ 89.273169] RIP: 0010:submit_bh_wbc.isra.0+0x538/0x6d0
[ 89.277157] RSP: 0018:ffff888105ddfd08 EFLAGS: 00010246
[ 89.278093] RAX: 0000000000000005 RBX: ffff888124231498 RCX: ffffffffb2772612
[ 89.279332] RDX: 1ffff11024846293 RSI: 0000000000000008 RDI: ffff888124231498
[ 89.280591] RBP: ffff8881248cc000 R08: 0000000000000001 R09: ffffed1024846294
[ 89.281851] R10: ffff88812423149f R11: ffffed1024846293 R12: 0000000000003800
[ 89.283095] R13: 0000000000000001 R14: 0000000000000000 R15: ffff8881161f7000
[ 89.284342] FS: 0000000000000000(0000) GS:ffff88839b5c0000(0000) knlGS:0000000000000000
[ 89.285711] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 89.286701] CR2: 00007f166ebc01a0 CR3: 0000000435c0e000 CR4: 00000000000006e0
[ 89.287919] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 89.289138] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 89.290368] Call Trace:
[ 89.290842] write_mmp_block+0x2ca/0x510
[ 89.292218] kmmpd+0x433/0x9a0
[ 89.294902] kthread+0x2dd/0x3e0
[ 89.296268] ret_from_fork+0x22/0x30
[ 89.296906] Modules linked in:
by running the following commands:
1. mkfs.ext4 -O mmp /dev/sda -b 1024
2. mount /dev/sda /home/test
3. echo "/dev/sda" > /sys/power/resume
That happens because swsusp_check() calls set_blocksize() on the
target partition which confuses the file system:
Thread1 Thread2
mount /dev/sda /home/test
get s_mmp_bh --> has mapped flag
start kmmpd thread
echo "/dev/sda" > /sys/power/resume
resume_store
software_resume
swsusp_check
set_blocksize
truncate_inode_pages_range
truncate_cleanup_page
block_invalidatepage
discard_buffer --> clean mapped flag
write_mmp_block
submit_bh
submit_bh_wbc
BUG_ON(!buffer_mapped(bh))
To address this issue, modify swsusp_check() to open the target block
device with exclusive access.
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
kernel/power/swap.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index d7f6c1a288d33..4fde37fce4ea4 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -1512,9 +1512,10 @@ int swsusp_read(unsigned int *flags_p)
int swsusp_check(void)
{
int error;
+ void *holder;
hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
- FMODE_READ, NULL);
+ FMODE_READ | FMODE_EXCL, &holder);
if (!IS_ERR(hib_resume_bdev)) {
set_blocksize(hib_resume_bdev, PAGE_SIZE);
clear_page(swsusp_header);
@@ -1536,7 +1537,7 @@ int swsusp_check(void)
put:
if (error)
- blkdev_put(hib_resume_bdev, FMODE_READ);
+ blkdev_put(hib_resume_bdev, FMODE_READ | FMODE_EXCL);
else
pr_debug("Image signature found, resuming\n");
} else {
--
2.25.1
1
0
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4FS3G?from=project-issue
CVE: NA
---------------------------
There are some language problems in the README file, and MarkDown fromat
syntax is not effective, and it needs to be adjusted.
Signed-off-by: suqin <suqin2(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian(a)huawei.com>
---
README | 311 +++++++++++++++++++++++++++++----------------------------
1 file changed, 161 insertions(+), 150 deletions(-)
diff --git a/README b/README
index 46c9ea352..3374c4726 100644
--- a/README
+++ b/README
@@ -1,174 +1,188 @@
-Contributions to openEuler kernel project
-=========================================
+# How to Contribute
+-------
-Sign CLA
---------
+- [How to Contribute](#How to Contribute)
-Before submitting any Contributions to openEuler, you have to sign CLA.
+ \- [Sign the CLA](#Sign the CLA)
-See:
- https://openeuler.org/zh/cla.html
- https://openeuler.org/en/cla.html
+ \- [Steps of submitting patches](#Steps of submitting patches)
-Steps of submitting patches
----------------------------
+ \- [Use the unified patch format](#Use the unified patch format)
-1. Compile and test your patches successfully.
-2. Generate patches
- Your patches should be based on top of latest openEuler branch, and should
- use git-format-patch to generate patches, and if it's a patchset, it's
- better to use --cover-letter option to describe what the patchset does.
+ \- [Define the patch format](#Define the patch format)
- Using scripts/checkpatch.pl to make sure there's no coding style issue.
+ \- [Examples](#Examples)
- And make sure your patch follow unified openEuler patch format describe
- below.
+ \- [Email client - Thunderbird settings](#Email client - Thunderbird settings)
-3. Send patch to openEuler mailing list
- Use this command to send patches to openEuler mailing list:
+- [Linux kernel](#Linux kernel)
- git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all
+### Sign the CLA
- *NOTE*: that you must add --suppress-cc=all if you use git send-email,
- otherwise the email will be cced to the people in upstream community and mailing
- lists.
+-------
- *See*: How to send patches using git-send-email
- https://git-scm.com/docs/git-send-email
+Before making any contributions to openEuler, sign the CLA first.
-4. Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions
- to send out.
+Address: [https://openeuler.org/en/cla.html](https://openeuler.org/en/cla.html)
- Use --subject-prefix="PATCH v2" option to add v2 tag for patchset.
- git format-patch --subject-prefix="PATCH v2" -1
+### Steps of submitting patches
+-------
- Subject examples:
- Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
- Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+**Step 1** Compile and test your patches.
-5. Upstream your kernel patch to kernel community is strongly recommended.
- openEuler will sync up with kernel master timely.
+**Step 2** Generate patches.
-6. Sign your work - the Developer’s Certificate of Origin
- As the same of upstream kernel community, you also need to sign your patch.
+Your patches should be generated based on the latest openEuler branch using git-format-patch. If your patches are in a patchset, it is better to use the **--cover-letter** option to describe what the patchset does.
- See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html
+Use **scripts/checkpatch.pl** to ensure that no coding style issue exists.
- The sign-off is a simple line at the end of the explanation for the patch,
- which certifies that you wrote it or otherwise have the right to pass it
- on as an open-source patch. The rules are pretty simple: if you can certify
- the below:
+In addition, ensure that your patches comply with the unified openEuler patch format described below.
- Developer’s Certificate of Origin 1.1
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+**Step 3** Send your patches to the openEuler mailing list.
- By making a contribution to this project, I certify that:
+To do so, run the following command:
- (a) The contribution was created in whole or in part by me and I have
- the right to submit it under the open source license indicated in
- the file; or
+ `git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all`
- (b The contribution is based upon previous work that, to the best of
- my knowledge, is covered under an appropriate open source license
- and I have the right under that license to submit that work with
- modifications, whether created in whole or in part by me, under
- the same open source license (unless I am permitted to submit under
- a different license), as indicated in the file; or
+*NOTE*: Add **--suppress-cc=all** if you use git-send-email; otherwise, the email will be copied to all people in the upstream community and mailing lists.
- (c) The contribution was provided directly to me by some other person
- who certified (a), (b) or (c) and I have not modified it.
+For details about how to send patches using git-send-email, see [https://git-scm.com/docs/git-send-email](https://git-scm.com/docs/git-send-….
- (d) I understand and agree that this project and the contribution are
- public and that a record of the contribution (including all personal
- information I submit with it, including my sign-off) is maintained
- indefinitely and may be redistributed consistent with this project
- or the open source license(s) involved.
+**Step 4** Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions to send out.
- then you just add a line saying:
+Use the **--subject-prefix="PATCH v2"** option to add the v2 tag to the patchset.
- Signed-off-by: Random J Developer <random(a)developer.example.org>
+ `git format-patch --subject-prefix="PATCH v2" -1`
- using your real name (sorry, no pseudonyms or anonymous contributions.)
+Subject examples:
-Use unified patch format
-------------------------
+ Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
+
+ Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+
+**Step 5** Upstream your kernel patches to the kernel community (recommended). openEuler will synchronize with the kernel master in a timely manner.
+
+**Step 6** Sign your work - the Developer’s Certificate of Origin.
+
+ Similar to the upstream kernel community, you also need to sign your patch.
+
+ For details, see [https://www.kernel.org/doc/html/latest/process/submitting-patches.html](htt….
+
+ The sign-off is a simple line at the end of the explanation of the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open source patch. The rules are pretty simple. You can certify as below:
+
+ Developer’s Certificate of Origin 1.1
+
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file;
+
+ (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file;
+
+ (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
+
+ (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
+
+Then you add a line saying:
+
+Signed-off-by: Random J Developer <random(a)developer.example.org>
+
+Use your real name (sorry, no pseudonyms or anonymous contributions).
+
+### Use the unified patch format
+-------
Reasons:
-1. long term maintainability
- openEuler will merge massive patches. If all patches are merged by casual
- changelog format without a unified format, the git log will be messy, and
- then it's hard to figure out the original patch.
+1. Long term maintainability
+
+ openEuler will merge massive patches. If all patches are merged by casual
+
+ changelog formats without a unified format, the git logs will be messy, and
+
+ then it is hard to figure out the original patches.
+
+2. Kernel upgrade
-2. kernel upgrade
- We definitely will upgrade our openEuler kernel in someday, using strict
- patch management will alleviate the pain to migrate patches during big upgrade.
+ We definitely will upgrade our openEuler kernel in someday, so strict patch management
-3. easy for script parsing
- Keyword highlighting is necessary for script parsing.
+ will alleviate the pain to migrate patches during big upgrades.
-Patch format definition
------------------------
+3. Easy for script parsing
+
+ Keyword highlighting is necessary for script parsing.
+
+### Define the patch format
+-------
+
+[M] stands for "mandatory".
+
+[O] stands for "option".
-[M] stands for "mandatory"
-[O] stands for "option"
$category can be: bug preparation, bugfix, perf, feature, doc, other...
-If category is feature, then we also need to add feature name like below:
- category: feature
- feature: YYY (the feature name)
+If category is feature, we need to add a feature name as below:
-If the patch is related to CVE or bugzilla, then we need add the corresponding
-tag like below (In general, it should include at least one of the following):
- CVE: $cve-id
- bugzilla: $bug-id
+```cpp
+category: feature
+feature: YYY (the feature name)
+```
-Additional changelog should include at least one of the flollwing:
- 1) Why we should apply this patch
- 2) What real problem in product does this patch resolved
- 3) How could we reproduce this bug or how to test
- 4) Other useful information for help to understand this patch or problem
+If the patch is related to CVE or bugzilla, we need to add the corresponding tag as below (In general, it should include at least one of the following):
-The detail information is very useful for porting patch to another kenrel branch.
+```cpp
+CVE: $cve-id
+bugzilla: $bug-id
+```
-Example for mainline patch:
+Additional changelog should include at least one of the following:
- mainline inclusion [M]
- from $mainline-version [M]
- commit $id [M]
- category: $category [M]
- bugzilla: $bug-id [O]
- CVE: $cve-id [O]
+1. Why we should apply this patch
- additional changelog [O]
+2. What real problems in the product does this patch resolved
+
+3. How could we reproduce this bug or how to test
+
+4. Other useful information for help to understand this patch or problem
+
+The detailed information is very useful for migrating a patch to another kernel branch.
+
+Example for mainline patch:
- --------------------------------
+```cpp
+mainline inclusion [M]
+from $mainline-version [M]
+commit $id [M]
+category: $category [M]
+bugzilla: $bug-id [O]
+CVE: $cve-id [O]
- original changelog
+additional changelog [O]
- Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+--------------------------------
- ($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+original changelog
+Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+```
-Examples
---------
+### Examples
+-------
+```cpp
mainline inclusion
from mainline-4.10
commit 0becc0ae5b42828785b589f686725ff5bc3b9b25
category: bugfix
bugzilla: 3004
-CVE: NA
-
-The patch fixes a BUG_ON in the product: injecting single bit ECC error
-to memory before system boot use hardware inject tools, which cause a
-large amount of CMCI during system booting .
+CVE: N/A
-[ 1.146580] mce: [Hardware Error]: Machine check events logged
-[ 1.152908] ------------[ cut here ]------------
-[ 1.157751] kernel BUG at kernel/timer.c:951!
-[ 1.162321] invalid opcode: 0000 [#1] SMP
-...
+The patch fixes a BUG_ON in the product: Injecting a single bit ECC error to the memory before system boot using hardware inject tools will cause a large amount of CMCI during system booting .
+[ 1.146580] mce: [Hardware Error]: Machine check events logged
+[ 1.152908] ------------[ cut here ]------------
+[ 1.157751] kernel BUG at kernel/timer.c:951!
+[ 1.162321] invalid opcode: 0000 [#1] SMP
-------------------------------------------------
@@ -177,50 +191,47 @@ original changelog
<original S-O-B>
Signed-off-by: Zhang San <zhangsan(a)huawei.com>
Tested-by: Li Si <lisi(a)huawei.com>
+```
+
+### Email client - Thunderbird settings
+-------
+
+If you are a new developer in the kernel community, it is highly recommended that you use the Thunderbird mail client.
+
+1. Thunderbird Installation
+
+ Obtain the English version of Thunderbird from [http://www.mozilla.org/]( http://www.mozilla.org/) and install it on your system.
+
+ Download URL: https://www.thunderbird.net/en-US/thunderbird/all/
+
+2. Settings
+
+ 2.1 Use the plain text format instead of the HTML format.
+
+ Choose **Options > Account Settings > Composition & Addressing**, and do **NOT** select Compose message in HTML format.
-Email Client - Thunderbird Settings
------------------------------------
+ 2.2 Editor settings
-If you are newly developer in the kernel community, it is highly recommended
-to use thunderbird mail client.
+ **Tools > Options> Advanced > Config editor**
-1. Thunderbird Installation
- Get English version Thunderbird from http://www.mozilla.org/ and install
- it on your system。
+ \- To bring up the Thunderbird's registry editor, set **mailnews.send_plaintext_flowed** to **false**.
- Download url: https://www.thunderbird.net/en-US/thunderbird/all/
+ \- Disable HTML Format: Set **mail.identity.id1.compose_html** to **false**.
-2. Settings
- 2.1 Use plain text format instead of HTML format
- Options -> Account Settings -> Composition & Addressing, do *NOT* select
- "Compose message in HTML format".
+ \- Enable UTF-8: Set **prefs.converted-to-utf8** to **true**.
- 2.2 Editor Settings
- Tools->Options->Advanced->Config editor.
+ \- View messages in UTF-8: Set **mailnews.view_default_charset** to **UTF-8**.
- - To bring up the thunderbird's registry editor, and set:
- "mailnews.send_plaintext_flowed" to "false".
- - Disable HTML Format: Set "mail.identity.id1.compose_html" to "false".
- - Enable UTF8: Set "prefs.converted-to-utf8" to "true".
- - View message in UTF-8: Set "mailnews.view_default_charset" to "UTF-8".
- - Set mailnews.wraplength to 9999 for avoiding auto-wrap
+ \- Set **mailnews.wraplength** to **9999** to avoid auto-wrap.
-Linux kernel
-============
+# Linux kernel
+-------
-There are several guides for kernel developers and users. These guides can
-be rendered in a number of formats, like HTML and PDF. Please read
-Documentation/admin-guide/README.rst first.
+There are several guides for kernel developers and users, which can be rendered in a number of formats, like HTML and PDF. You can read **Documentation/admin-guide/README.rst** first.
-In order to build the documentation, use ``make htmldocs`` or
-``make pdfdocs``. The formatted documentation can also be read online at:
+In order to build the documentation, use **make htmldocs** or **make pdfdocs**. The formatted documentation can also be read online at: https://www.kernel.org/doc/html/latest/
- https://www.kernel.org/doc/html/latest/
+There are various text files in the Documentation/ subdirectory, several of which use the Restructured Text markup notation. See Documentation/00-INDEX for a list of what is contained in each file.
-There are various text files in the Documentation/ subdirectory,
-several of them using the Restructured Text markup notation.
-See Documentation/00-INDEX for a list of what is contained in each file.
+Read the **Documentation/process/changes.rst** file, as it contains the requirements for building and running the kernel, and information about the problems that may be caused by upgrading your kernel.
-Please read the Documentation/process/changes.rst file, as it contains the
-requirements for building and running the kernel, and information about
-the problems which may result by upgrading your kernel.
--
2.22.0
1
0

[PATCH openEuler-1.0-LTS] blk-cgroup: synchronize blkg creation against policy deactivation
by Yang Yingliang 01 Nov '21
by Yang Yingliang 01 Nov '21
01 Nov '21
From: Yu Kuai <yukuai3(a)huawei.com>
mainline inclusion
from mainline-v5.16
commit 0c9d338c8443b06da8e8d3bfce824c5ea6d3488f
category: bugfix
bugzilla: 182378
CVE: NA
---------------------------
Our test reports a null pointer dereference:
[ 168.534653] ==================================================================
[ 168.535614] Disabling lock debugging due to kernel taint
[ 168.536346] BUG: kernel NULL pointer dereference, address: 0000000000000008
[ 168.537274] #PF: supervisor read access in kernel mode
[ 168.537964] #PF: error_code(0x0000) - not-present page
[ 168.538667] PGD 0 P4D 0
[ 168.539025] Oops: 0000 [#1] PREEMPT SMP KASAN
[ 168.539656] CPU: 13 PID: 759 Comm: bash Tainted: G B 5.15.0-rc2-next-202100
[ 168.540954] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_0738364
[ 168.542736] RIP: 0010:bfq_pd_init+0x88/0x1e0
[ 168.543318] Code: 98 00 00 00 e8 c9 e4 5b ff 4c 8b 65 00 49 8d 7c 24 08 e8 bb e4 5b ff 4d0
[ 168.545803] RSP: 0018:ffff88817095f9c0 EFLAGS: 00010002
[ 168.546497] RAX: 0000000000000001 RBX: ffff888101a1c000 RCX: 0000000000000000
[ 168.547438] RDX: 0000000000000003 RSI: 0000000000000002 RDI: ffff888106553428
[ 168.548402] RBP: ffff888106553400 R08: ffffffff961bcaf4 R09: 0000000000000001
[ 168.549365] R10: ffffffffa2e16c27 R11: fffffbfff45c2d84 R12: 0000000000000000
[ 168.550291] R13: ffff888101a1c098 R14: ffff88810c7a08c8 R15: ffffffffa55541a0
[ 168.551221] FS: 00007fac75227700(0000) GS:ffff88839ba80000(0000) knlGS:0000000000000000
[ 168.552278] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 168.553040] CR2: 0000000000000008 CR3: 0000000165ce7000 CR4: 00000000000006e0
[ 168.554000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 168.554929] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 168.555888] Call Trace:
[ 168.556221] <TASK>
[ 168.556510] blkg_create+0x1c0/0x8c0
[ 168.556989] blkg_conf_prep+0x574/0x650
[ 168.557502] ? stack_trace_save+0x99/0xd0
[ 168.558033] ? blkcg_conf_open_bdev+0x1b0/0x1b0
[ 168.558629] tg_set_conf.constprop.0+0xb9/0x280
[ 168.559231] ? kasan_set_track+0x29/0x40
[ 168.559758] ? kasan_set_free_info+0x30/0x60
[ 168.560344] ? tg_set_limit+0xae0/0xae0
[ 168.560853] ? do_sys_openat2+0x33b/0x640
[ 168.561383] ? do_sys_open+0xa2/0x100
[ 168.561877] ? __x64_sys_open+0x4e/0x60
[ 168.562383] ? __kasan_check_write+0x20/0x30
[ 168.562951] ? copyin+0x48/0x70
[ 168.563390] ? _copy_from_iter+0x234/0x9e0
[ 168.563948] tg_set_conf_u64+0x17/0x20
[ 168.564467] cgroup_file_write+0x1ad/0x380
[ 168.565014] ? cgroup_file_poll+0x80/0x80
[ 168.565568] ? __mutex_lock_slowpath+0x30/0x30
[ 168.566165] ? pgd_free+0x100/0x160
[ 168.566649] kernfs_fop_write_iter+0x21d/0x340
[ 168.567246] ? cgroup_file_poll+0x80/0x80
[ 168.567796] new_sync_write+0x29f/0x3c0
[ 168.568314] ? new_sync_read+0x410/0x410
[ 168.568840] ? __handle_mm_fault+0x1c97/0x2d80
[ 168.569425] ? copy_page_range+0x2b10/0x2b10
[ 168.570007] ? _raw_read_lock_bh+0xa0/0xa0
[ 168.570622] vfs_write+0x46e/0x630
[ 168.571091] ksys_write+0xcd/0x1e0
[ 168.571563] ? __x64_sys_read+0x60/0x60
[ 168.572081] ? __kasan_check_write+0x20/0x30
[ 168.572659] ? do_user_addr_fault+0x446/0xff0
[ 168.573264] __x64_sys_write+0x46/0x60
[ 168.573774] do_syscall_64+0x35/0x80
[ 168.574264] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 168.574960] RIP: 0033:0x7fac74915130
[ 168.575456] Code: 73 01 c3 48 8b 0d 58 ed 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 444
[ 168.577969] RSP: 002b:00007ffc3080e288 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 168.578986] RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007fac74915130
[ 168.579937] RDX: 0000000000000009 RSI: 000056007669f080 RDI: 0000000000000001
[ 168.580884] RBP: 000056007669f080 R08: 000000000000000a R09: 00007fac75227700
[ 168.581841] R10: 000056007655c8f0 R11: 0000000000000246 R12: 0000000000000009
[ 168.582796] R13: 0000000000000001 R14: 00007fac74be55e0 R15: 00007fac74be08c0
[ 168.583757] </TASK>
[ 168.584063] Modules linked in:
[ 168.584494] CR2: 0000000000000008
[ 168.584964] ---[ end trace 2475611ad0f77a1a ]---
This is because blkg_alloc() is called from blkg_conf_prep() without
holding 'q->queue_lock', and elevator is exited before blkg_create():
thread 1 thread 2
blkg_conf_prep
spin_lock_irq(&q->queue_lock);
blkg_lookup_check -> return NULL
spin_unlock_irq(&q->queue_lock);
blkg_alloc
blkcg_policy_enabled -> true
pd = ->pd_alloc_fn
blkg->pd[i] = pd
blk_mq_exit_sched
bfq_exit_queue
blkcg_deactivate_policy
spin_lock_irq(&q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols);
spin_unlock_irq(&q->queue_lock);
q->elevator = NULL;
spin_lock_irq(&q->queue_lock);
blkg_create
if (blkg->pd[i])
->pd_init_fn -> q->elevator is NULL
spin_unlock_irq(&q->queue_lock);
Because blkcg_deactivate_policy() requires queue to be frozen, we can
grab q_usage_counter to synchoronize blkg_conf_prep() against
blkcg_deactivate_policy().
Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Acked-by: Tejun Heo <tj(a)kernel.org>
Link: https://lore.kernel.org/r/20211020014036.2141723-1-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Conflict: block/blk-cgroup.c
- commit ed6cddefdfd3 ("block: convert the rest of block to
bdev_get_queue") is not backported.
- commit 015d254cb02b ("blkcg: separate blkcg_conf_get_disk() out of
blkg_conf_prep()") is not backported.
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
block/blk-cgroup.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7dca3a0144243..d4a8d8fbe1a0e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -857,6 +857,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
q = disk->queue;
+ /*
+ * blkcg_deactivate_policy() requires queue to be frozen, we can grab
+ * q_usage_counter to prevent concurrent with blkcg_deactivate_policy().
+ */
+ ret = blk_queue_enter(q, 0);
+ if (ret)
+ goto fail;
+
rcu_read_lock();
spin_lock_irq(q->queue_lock);
@@ -891,13 +899,13 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
- goto fail;
+ goto fail_exit_queue;
}
if (radix_tree_preload(GFP_KERNEL)) {
blkg_free(new_blkg);
ret = -ENOMEM;
- goto fail;
+ goto fail_exit_queue;
}
rcu_read_lock();
@@ -926,6 +934,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
goto success;
}
success:
+ blk_queue_exit(q);
ctx->disk = disk;
ctx->blkg = blkg;
ctx->body = body;
@@ -936,6 +945,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
fail_unlock:
spin_unlock_irq(q->queue_lock);
rcu_read_unlock();
+fail_exit_queue:
+ blk_queue_exit(q);
fail:
put_disk_and_module(disk);
/*
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] isdn: cpai: check ctr->cnr to avoid array index out of bound
by Yang Yingliang 01 Nov '21
by Yang Yingliang 01 Nov '21
01 Nov '21
From: Xiaolong Huang <butterflyhuangxx(a)gmail.com>
stable inclusion
from linux-4.19.214
commit 7d91adc0ccb060ce564103315189466eb822cc6a
CVE: CVE-2021-3896
--------------------------------
commit 1f3e2e97c003f80c4b087092b225c8787ff91e4d upstream.
The cmtp_add_connection() would add a cmtp session to a controller
and run a kernel thread to process cmtp.
__module_get(THIS_MODULE);
session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d",
session->num);
During this process, the kernel thread would call detach_capi_ctr()
to detach a register controller. if the controller
was not attached yet, detach_capi_ctr() would
trigger an array-index-out-bounds bug.
[ 46.866069][ T6479] UBSAN: array-index-out-of-bounds in
drivers/isdn/capi/kcapi.c:483:21
[ 46.867196][ T6479] index -1 is out of range for type 'capi_ctr *[32]'
[ 46.867982][ T6479] CPU: 1 PID: 6479 Comm: kcmtpd_ctr_0 Not tainted
5.15.0-rc2+ #8
[ 46.869002][ T6479] Hardware name: QEMU Standard PC (i440FX + PIIX,
1996), BIOS 1.14.0-2 04/01/2014
[ 46.870107][ T6479] Call Trace:
[ 46.870473][ T6479] dump_stack_lvl+0x57/0x7d
[ 46.870974][ T6479] ubsan_epilogue+0x5/0x40
[ 46.871458][ T6479] __ubsan_handle_out_of_bounds.cold+0x43/0x48
[ 46.872135][ T6479] detach_capi_ctr+0x64/0xc0
[ 46.872639][ T6479] cmtp_session+0x5c8/0x5d0
[ 46.873131][ T6479] ? __init_waitqueue_head+0x60/0x60
[ 46.873712][ T6479] ? cmtp_add_msgpart+0x120/0x120
[ 46.874256][ T6479] kthread+0x147/0x170
[ 46.874709][ T6479] ? set_kthread_struct+0x40/0x40
[ 46.875248][ T6479] ret_from_fork+0x1f/0x30
[ 46.875773][ T6479]
Signed-off-by: Xiaolong Huang <butterflyhuangxx(a)gmail.com>
Acked-by: Arnd Bergmann <arnd(a)arndb.de>
Link: https://lore.kernel.org/r/20211008065830.305057-1-butterflyhuangxx@gmail.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Rui Xiang <rui.xiang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/isdn/capi/kcapi.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index a4ceb61c5b603..ed9ee2bbf232e 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -565,6 +565,11 @@ int detach_capi_ctr(struct capi_ctr *ctr)
ctr_down(ctr, CAPI_CTR_DETACHED);
+ if (ctr->cnr < 1 || ctr->cnr - 1 >= CAPI_MAXCONTR) {
+ err = -EINVAL;
+ goto unlock_out;
+ }
+
if (capi_controller[ctr->cnr - 1] != ctr) {
err = -EINVAL;
goto unlock_out;
--
2.25.1
1
0

01 Nov '21
From: Ye Bin <yebin10(a)huawei.com>
mainline inclusion
from mainline-v5.16
commit 0c98057be9efa32de78dbc4685fc73da9d71faa1
category: bugfix
bugzilla: 182939
CVE: NA
-----------------------------------------------
I got issue as follows:
[ 263.886511] BUG: KASAN: use-after-free in pid_show+0x11f/0x13f
[ 263.888359] Read of size 4 at addr ffff8880bf0648c0 by task cat/746
[ 263.890479] CPU: 0 PID: 746 Comm: cat Not tainted 4.19.90-dirty #140
[ 263.893162] Call Trace:
[ 263.893509] dump_stack+0x108/0x15f
[ 263.893999] print_address_description+0xa5/0x372
[ 263.894641] kasan_report.cold+0x236/0x2a8
[ 263.895696] __asan_report_load4_noabort+0x25/0x30
[ 263.896365] pid_show+0x11f/0x13f
[ 263.897422] dev_attr_show+0x48/0x90
[ 263.898361] sysfs_kf_seq_show+0x24d/0x4b0
[ 263.899479] kernfs_seq_show+0x14e/0x1b0
[ 263.900029] seq_read+0x43f/0x1150
[ 263.900499] kernfs_fop_read+0xc7/0x5a0
[ 263.903764] vfs_read+0x113/0x350
[ 263.904231] ksys_read+0x103/0x270
[ 263.905230] __x64_sys_read+0x77/0xc0
[ 263.906284] do_syscall_64+0x106/0x360
[ 263.906797] entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reproduce this issue as follows:
1. nbd-server 8000 /tmp/disk
2. nbd-client localhost 8000 /dev/nbd1
3. cat /sys/block/nbd1/pid
Then trigger use-after-free in pid_show.
Reason is after do step '2', nbd-client progress is already exit. So
it's task_struct already freed.
To solve this issue, revert part of 6521d39a64b3's modify and remove
useless 'recv_task' member of nbd_device.
Fixes: 6521d39a64b3 ("nbd: Remove variable 'pid'")
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Link: https://lore.kernel.org/r/20211020073959.2679255-1-yebin10@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
conflicts:
drivers/block/nbd.c
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/block/nbd.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 45e6ae6add382..2a3794801704a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -112,11 +112,11 @@ struct nbd_device {
struct workqueue_struct *recv_workq;
struct list_head list;
- struct task_struct *task_recv;
struct task_struct *task_setup;
struct completion *destroy_complete;
unsigned long flags;
+ pid_t pid; /* pid of nbd-client, if attached */
};
#define NBD_CMD_REQUEUED 1
@@ -211,7 +211,7 @@ static ssize_t pid_show(struct device *dev,
struct gendisk *disk = dev_to_disk(dev);
struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
- return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
+ return sprintf(buf, "%d\n", nbd->pid);
}
static const struct device_attribute pid_attr = {
@@ -329,7 +329,7 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
struct nbd_config *config = nbd->config;
config->blksize = blocksize;
config->bytesize = blocksize * nr_blocks;
- if (nbd->task_recv != NULL)
+ if (nbd->pid)
nbd_size_update(nbd, false);
}
@@ -1234,7 +1234,7 @@ static void nbd_config_put(struct nbd_device *nbd)
if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
&config->runtime_flags))
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
- nbd->task_recv = NULL;
+ nbd->pid = 0;
nbd_clear_sock(nbd);
if (config->num_connections) {
int i;
@@ -1269,7 +1269,7 @@ static int nbd_start_device(struct nbd_device *nbd)
int num_connections = config->num_connections;
int error = 0, i;
- if (nbd->task_recv)
+ if (nbd->pid)
return -EBUSY;
if (!config->socks)
return -EINVAL;
@@ -1288,7 +1288,7 @@ static int nbd_start_device(struct nbd_device *nbd)
}
blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
- nbd->task_recv = current;
+ nbd->pid = task_pid_nr(current);
nbd_parse_flags(nbd);
@@ -1546,8 +1546,8 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
{
struct nbd_device *nbd = s->private;
- if (nbd->task_recv)
- seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
+ if (nbd->pid)
+ seq_printf(s, "recv: %d\n", nbd->pid);
return 0;
}
@@ -2106,7 +2106,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&nbd->config_lock);
config = nbd->config;
if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
- !nbd->task_recv) {
+ !nbd->pid) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
ret = -EINVAL;
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS] iommu/arm-smmu-v3: Add suspend and resume support
by Yang Yingliang 01 Nov '21
by Yang Yingliang 01 Nov '21
01 Nov '21
From: Bixuan Cui <cuibixuan(a)huawei.com>
ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4DZ7Q
CVE: NA
-------------------------------------------------------
Add suspend and resume support for smmuv3. The smmu is
stopped when suspending and started when resuming.
Signed-off-by: Bixuan Cui <cuibixuan(a)huawei.com>
Signed-off-by: Zhou Guanghui <zhouguanghui1(a)huawei.com>
Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/iommu/arm-smmu-v3.c | 98 +++++++++++++++++++++++++++++++++----
1 file changed, 88 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 560980c54014a..12d503bb3b1e0 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -693,6 +693,7 @@ struct arm_smmu_device {
unsigned int mpam_partid_max;
unsigned int mpam_pmg_max;
+ bool bypass;
};
struct arm_smmu_stream {
@@ -3455,6 +3456,13 @@ static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
doorbell &= MSI_CFG0_ADDR_MASK;
+#ifdef CONFIG_PM_SLEEP
+ /* Saves the msg (base addr of msi irq) and restores it during resume */
+ desc->msg.address_lo = msg->address_lo;
+ desc->msg.address_hi = msg->address_hi;
+ desc->msg.data = msg->data;
+#endif
+
writeq_relaxed(doorbell, smmu->base + cfg[0]);
writel_relaxed(msg->data, smmu->base + cfg[1]);
writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
@@ -3510,6 +3518,40 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
devm_add_action(dev, arm_smmu_free_msis, dev);
}
+#ifdef CONFIG_PM_SLEEP
+static void arm_smmu_resume_msis(struct arm_smmu_device *smmu)
+{
+ struct msi_desc *desc;
+ struct device *dev = smmu->dev;
+
+ for_each_msi_entry(desc, dev) {
+ switch (desc->platform.msi_index) {
+ case EVTQ_MSI_INDEX:
+ case GERROR_MSI_INDEX:
+ case PRIQ_MSI_INDEX: {
+ phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
+ struct msi_msg *msg = &desc->msg;
+ phys_addr_t doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+
+ doorbell &= MSI_CFG0_ADDR_MASK;
+ writeq_relaxed(doorbell, smmu->base + cfg[0]);
+ writel_relaxed(msg->data, smmu->base + cfg[1]);
+ writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE,
+ smmu->base + cfg[2]);
+ break;
+ }
+ default:
+ continue;
+
+ }
+ }
+}
+#else
+static void arm_smmu_resume_msis(struct arm_smmu_device *smmu)
+{
+}
+#endif
+
static void arm_smmu_setup_message_based_spi(struct arm_smmu_device *smmu)
{
struct irq_desc *desc;
@@ -3541,11 +3583,17 @@ static void arm_smmu_setup_message_based_spi(struct arm_smmu_device *smmu)
}
}
-static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
+static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu, bool resume)
{
int irq, ret;
- arm_smmu_setup_msis(smmu);
+ if (!resume)
+ arm_smmu_setup_msis(smmu);
+ else {
+ /* The irq doesn't need to be re-requested during resume */
+ arm_smmu_resume_msis(smmu);
+ return;
+ }
/* Request interrupt lines */
irq = smmu->evtq.q.irq;
@@ -3587,7 +3635,7 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
}
}
-static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu, bool resume)
{
int ret, irq;
u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
@@ -3614,7 +3662,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
if (ret < 0)
dev_warn(smmu->dev, "failed to enable combined irq\n");
} else
- arm_smmu_setup_unique_irqs(smmu);
+ arm_smmu_setup_unique_irqs(smmu, resume);
if (smmu->features & ARM_SMMU_FEAT_PRI)
irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
@@ -3642,7 +3690,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
return ret;
}
-static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
+static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool resume)
{
int ret;
u32 reg, enables;
@@ -3747,7 +3795,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
}
}
- ret = arm_smmu_setup_irqs(smmu);
+ ret = arm_smmu_setup_irqs(smmu, resume);
if (ret) {
dev_err(smmu->dev, "failed to setup irqs\n");
return ret;
@@ -3757,7 +3805,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
/* Enable the SMMU interface, or ensure bypass */
- if (!bypass || disable_bypass) {
+ if (!smmu->bypass || disable_bypass) {
enables |= CR0_SMMUEN;
} else {
ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
@@ -4383,6 +4431,26 @@ int arm_smmu_get_dev_user_mpam_en(struct device *dev, int *user_mpam_en)
}
EXPORT_SYMBOL(arm_smmu_get_dev_user_mpam_en);
+#ifdef CONFIG_PM_SLEEP
+static int arm_smmu_suspend(struct device *dev)
+{
+ /*
+ * The smmu is powered off and related registers are automatically
+ * cleared when suspend. No need to do anything.
+ */
+ return 0;
+}
+
+static int arm_smmu_resume(struct device *dev)
+{
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
+ arm_smmu_device_reset(smmu, true);
+
+ return 0;
+}
+#endif
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
@@ -4390,7 +4458,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
resource_size_t ioaddr;
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
- bool bypass;
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu) {
@@ -4408,7 +4475,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
}
/* Set bypass mode according to firmware probing result */
- bypass = !!ret;
+ smmu->bypass = !!ret;
/* Base address */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -4454,7 +4521,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, smmu);
/* Reset the device */
- ret = arm_smmu_device_reset(smmu, bypass);
+ ret = arm_smmu_device_reset(smmu, false);
if (ret)
return ret;
@@ -4527,11 +4594,22 @@ static const struct of_device_id arm_smmu_of_match[] = {
};
MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
+#ifdef CONFIG_PM_SLEEP
+static const struct dev_pm_ops arm_smmu_pm_ops = {
+ .suspend = arm_smmu_suspend,
+ .resume = arm_smmu_resume,
+};
+#define ARM_SMMU_PM_OPS (&arm_smmu_pm_ops)
+#else
+#define ARM_SMMU_PM_OPS NULL
+#endif
+
static struct platform_driver arm_smmu_driver = {
.driver = {
.name = "arm-smmu-v3",
.of_match_table = of_match_ptr(arm_smmu_of_match),
.suppress_bind_attrs = true,
+ .pm = ARM_SMMU_PM_OPS,
},
.probe = arm_smmu_device_probe,
.remove = arm_smmu_device_remove,
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 1/2] scsi: scsi_debug: Fix out-of-bound read in resp_readcap16()
by Yang Yingliang 01 Nov '21
by Yang Yingliang 01 Nov '21
01 Nov '21
From: Ye Bin <yebin10(a)huawei.com>
mainline inclusion
from mainline-v5.16
commit 4e3ace0051e7e504b55d239daab8789dd89b863c
category: bugfix
bugzilla: 176010
CVE: NA
-----------------------------------------------
The following warning was observed running syzkaller:
[ 3813.830724] sg_write: data in/out 65466/242 bytes for SCSI command 0x9e-- guessing data in;
[ 3813.830724] program syz-executor not setting count and/or reply_len properly
[ 3813.836956] ==================================================================
[ 3813.839465] BUG: KASAN: stack-out-of-bounds in sg_copy_buffer+0x157/0x1e0
[ 3813.841773] Read of size 4096 at addr ffff8883cf80f540 by task syz-executor/1549
[ 3813.846612] Call Trace:
[ 3813.846995] dump_stack+0x108/0x15f
[ 3813.847524] print_address_description+0xa5/0x372
[ 3813.848243] kasan_report.cold+0x236/0x2a8
[ 3813.849439] check_memory_region+0x240/0x270
[ 3813.850094] memcpy+0x30/0x80
[ 3813.850553] sg_copy_buffer+0x157/0x1e0
[ 3813.853032] sg_copy_from_buffer+0x13/0x20
[ 3813.853660] fill_from_dev_buffer+0x135/0x370
[ 3813.854329] resp_readcap16+0x1ac/0x280
[ 3813.856917] schedule_resp+0x41f/0x1630
[ 3813.858203] scsi_debug_queuecommand+0xb32/0x17e0
[ 3813.862699] scsi_dispatch_cmd+0x330/0x950
[ 3813.863329] scsi_request_fn+0xd8e/0x1710
[ 3813.863946] __blk_run_queue+0x10b/0x230
[ 3813.864544] blk_execute_rq_nowait+0x1d8/0x400
[ 3813.865220] sg_common_write.isra.0+0xe61/0x2420
[ 3813.871637] sg_write+0x6c8/0xef0
[ 3813.878853] __vfs_write+0xe4/0x800
[ 3813.883487] vfs_write+0x17b/0x530
[ 3813.884008] ksys_write+0x103/0x270
[ 3813.886268] __x64_sys_write+0x77/0xc0
[ 3813.886841] do_syscall_64+0x106/0x360
[ 3813.887415] entry_SYSCALL_64_after_hwframe+0x44/0xa9
This issue can be reproduced with the following syzkaller log:
r0 = openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x26e1, 0x0)
r1 = syz_open_procfs(0xffffffffffffffff, &(0x7f0000000000)='fd/3\x00')
open_by_handle_at(r1, &(0x7f00000003c0)=ANY=[@ANYRESHEX], 0x602000)
r2 = syz_open_dev$sg(&(0x7f0000000000), 0x0, 0x40782)
write$binfmt_aout(r2, &(0x7f0000000340)=ANY=[@ANYBLOB="00000000deff000000000000000000000000000000000000000000000000000047f007af9e107a41ec395f1bded7be24277a1501ff6196a83366f4e6362bc0ff2b247f68a972989b094b2da4fb3607fcf611a22dd04310d28c75039d"], 0x126)
In resp_readcap16() we get "int alloc_len" value -1104926854, and then pass
the huge arr_len to fill_from_dev_buffer(), but arr is only 32 bytes. This
leads to OOB in sg_copy_buffer().
To solve this issue, define alloc_len as u32.
Link: https://lore.kernel.org/r/20211013033913.2551004-2-yebin10@huawei.com
Acked-by: Douglas Gilbert <dgilbert(a)interlog.com>
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
conflicts:
drivers/scsi/scsi_debug.c
Signed-off-by: Ye Bin <yebin10(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/scsi/scsi_debug.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 082b2695e02f3..d8befc6fbec9c 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1665,7 +1665,7 @@ static int resp_readcap16(struct scsi_cmnd *scp,
{
unsigned char *cmd = scp->cmnd;
unsigned char arr[SDEBUG_READCAP16_ARR_SZ];
- int alloc_len;
+ u32 alloc_len;
alloc_len = get_unaligned_be32(cmd + 10);
/* following just in case virtual_gb changed */
@@ -1694,7 +1694,7 @@ static int resp_readcap16(struct scsi_cmnd *scp,
}
return fill_from_dev_buffer(scp, arr,
- min(alloc_len, SDEBUG_READCAP16_ARR_SZ));
+ min_t(u32, alloc_len, SDEBUG_READCAP16_ARR_SZ));
}
#define SDEBUG_MAX_TGTPGS_ARR_SZ 1412
--
2.25.1
1
1
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4FS3G?from=project-issue
CVE: NA
---------------------------
There are some language problems in the README file, and MarkDown fromat
syntax is not effective, and it needs to be adjusted.
Signed-off-by: suqin <suqin2(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
---
README | 226 ----------------------------------------
README.md | 237 ++++++++++++++++++++++++++++++++++++++++++
scripts/checkpatch.pl | 2 +-
3 files changed, 238 insertions(+), 227 deletions(-)
delete mode 100644 README
create mode 100644 README.md
diff --git a/README b/README
deleted file mode 100644
index 46c9ea352..000000000
--- a/README
+++ /dev/null
@@ -1,226 +0,0 @@
-Contributions to openEuler kernel project
-=========================================
-
-Sign CLA
---------
-
-Before submitting any Contributions to openEuler, you have to sign CLA.
-
-See:
- https://openeuler.org/zh/cla.html
- https://openeuler.org/en/cla.html
-
-Steps of submitting patches
----------------------------
-
-1. Compile and test your patches successfully.
-2. Generate patches
- Your patches should be based on top of latest openEuler branch, and should
- use git-format-patch to generate patches, and if it's a patchset, it's
- better to use --cover-letter option to describe what the patchset does.
-
- Using scripts/checkpatch.pl to make sure there's no coding style issue.
-
- And make sure your patch follow unified openEuler patch format describe
- below.
-
-3. Send patch to openEuler mailing list
- Use this command to send patches to openEuler mailing list:
-
- git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all
-
- *NOTE*: that you must add --suppress-cc=all if you use git send-email,
- otherwise the email will be cced to the people in upstream community and mailing
- lists.
-
- *See*: How to send patches using git-send-email
- https://git-scm.com/docs/git-send-email
-
-4. Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions
- to send out.
-
- Use --subject-prefix="PATCH v2" option to add v2 tag for patchset.
- git format-patch --subject-prefix="PATCH v2" -1
-
- Subject examples:
- Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
- Subject: [PATCH v3] ext2: improve scalability of bitmap searching
-
-5. Upstream your kernel patch to kernel community is strongly recommended.
- openEuler will sync up with kernel master timely.
-
-6. Sign your work - the Developer’s Certificate of Origin
- As the same of upstream kernel community, you also need to sign your patch.
-
- See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html
-
- The sign-off is a simple line at the end of the explanation for the patch,
- which certifies that you wrote it or otherwise have the right to pass it
- on as an open-source patch. The rules are pretty simple: if you can certify
- the below:
-
- Developer’s Certificate of Origin 1.1
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
- By making a contribution to this project, I certify that:
-
- (a) The contribution was created in whole or in part by me and I have
- the right to submit it under the open source license indicated in
- the file; or
-
- (b The contribution is based upon previous work that, to the best of
- my knowledge, is covered under an appropriate open source license
- and I have the right under that license to submit that work with
- modifications, whether created in whole or in part by me, under
- the same open source license (unless I am permitted to submit under
- a different license), as indicated in the file; or
-
- (c) The contribution was provided directly to me by some other person
- who certified (a), (b) or (c) and I have not modified it.
-
- (d) I understand and agree that this project and the contribution are
- public and that a record of the contribution (including all personal
- information I submit with it, including my sign-off) is maintained
- indefinitely and may be redistributed consistent with this project
- or the open source license(s) involved.
-
- then you just add a line saying:
-
- Signed-off-by: Random J Developer <random(a)developer.example.org>
-
- using your real name (sorry, no pseudonyms or anonymous contributions.)
-
-Use unified patch format
-------------------------
-
-Reasons:
-
-1. long term maintainability
- openEuler will merge massive patches. If all patches are merged by casual
- changelog format without a unified format, the git log will be messy, and
- then it's hard to figure out the original patch.
-
-2. kernel upgrade
- We definitely will upgrade our openEuler kernel in someday, using strict
- patch management will alleviate the pain to migrate patches during big upgrade.
-
-3. easy for script parsing
- Keyword highlighting is necessary for script parsing.
-
-Patch format definition
------------------------
-
-[M] stands for "mandatory"
-[O] stands for "option"
-$category can be: bug preparation, bugfix, perf, feature, doc, other...
-
-If category is feature, then we also need to add feature name like below:
- category: feature
- feature: YYY (the feature name)
-
-If the patch is related to CVE or bugzilla, then we need add the corresponding
-tag like below (In general, it should include at least one of the following):
- CVE: $cve-id
- bugzilla: $bug-id
-
-Additional changelog should include at least one of the flollwing:
- 1) Why we should apply this patch
- 2) What real problem in product does this patch resolved
- 3) How could we reproduce this bug or how to test
- 4) Other useful information for help to understand this patch or problem
-
-The detail information is very useful for porting patch to another kenrel branch.
-
-Example for mainline patch:
-
- mainline inclusion [M]
- from $mainline-version [M]
- commit $id [M]
- category: $category [M]
- bugzilla: $bug-id [O]
- CVE: $cve-id [O]
-
- additional changelog [O]
-
- --------------------------------
-
- original changelog
-
- Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
-
- ($mainline-version could be mainline-3.5, mainline-3.6, etc...)
-
-Examples
---------
-
-mainline inclusion
-from mainline-4.10
-commit 0becc0ae5b42828785b589f686725ff5bc3b9b25
-category: bugfix
-bugzilla: 3004
-CVE: NA
-
-The patch fixes a BUG_ON in the product: injecting single bit ECC error
-to memory before system boot use hardware inject tools, which cause a
-large amount of CMCI during system booting .
-
-[ 1.146580] mce: [Hardware Error]: Machine check events logged
-[ 1.152908] ------------[ cut here ]------------
-[ 1.157751] kernel BUG at kernel/timer.c:951!
-[ 1.162321] invalid opcode: 0000 [#1] SMP
-...
-
--------------------------------------------------
-
-original changelog
-
-<original S-O-B>
-Signed-off-by: Zhang San <zhangsan(a)huawei.com>
-Tested-by: Li Si <lisi(a)huawei.com>
-
-Email Client - Thunderbird Settings
------------------------------------
-
-If you are newly developer in the kernel community, it is highly recommended
-to use thunderbird mail client.
-
-1. Thunderbird Installation
- Get English version Thunderbird from http://www.mozilla.org/ and install
- it on your system。
-
- Download url: https://www.thunderbird.net/en-US/thunderbird/all/
-
-2. Settings
- 2.1 Use plain text format instead of HTML format
- Options -> Account Settings -> Composition & Addressing, do *NOT* select
- "Compose message in HTML format".
-
- 2.2 Editor Settings
- Tools->Options->Advanced->Config editor.
-
- - To bring up the thunderbird's registry editor, and set:
- "mailnews.send_plaintext_flowed" to "false".
- - Disable HTML Format: Set "mail.identity.id1.compose_html" to "false".
- - Enable UTF8: Set "prefs.converted-to-utf8" to "true".
- - View message in UTF-8: Set "mailnews.view_default_charset" to "UTF-8".
- - Set mailnews.wraplength to 9999 for avoiding auto-wrap
-
-Linux kernel
-============
-
-There are several guides for kernel developers and users. These guides can
-be rendered in a number of formats, like HTML and PDF. Please read
-Documentation/admin-guide/README.rst first.
-
-In order to build the documentation, use ``make htmldocs`` or
-``make pdfdocs``. The formatted documentation can also be read online at:
-
- https://www.kernel.org/doc/html/latest/
-
-There are various text files in the Documentation/ subdirectory,
-several of them using the Restructured Text markup notation.
-See Documentation/00-INDEX for a list of what is contained in each file.
-
-Please read the Documentation/process/changes.rst file, as it contains the
-requirements for building and running the kernel, and information about
-the problems which may result by upgrading your kernel.
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..3374c4726
--- /dev/null
+++ b/README.md
@@ -0,0 +1,237 @@
+# How to Contribute
+-------
+
+- [How to Contribute](#How to Contribute)
+
+ \- [Sign the CLA](#Sign the CLA)
+
+ \- [Steps of submitting patches](#Steps of submitting patches)
+
+ \- [Use the unified patch format](#Use the unified patch format)
+
+ \- [Define the patch format](#Define the patch format)
+
+ \- [Examples](#Examples)
+
+ \- [Email client - Thunderbird settings](#Email client - Thunderbird settings)
+
+- [Linux kernel](#Linux kernel)
+
+### Sign the CLA
+
+-------
+
+Before making any contributions to openEuler, sign the CLA first.
+
+Address: [https://openeuler.org/en/cla.html](https://openeuler.org/en/cla.html)
+
+### Steps of submitting patches
+-------
+
+**Step 1** Compile and test your patches.
+
+**Step 2** Generate patches.
+
+Your patches should be generated based on the latest openEuler branch using git-format-patch. If your patches are in a patchset, it is better to use the **--cover-letter** option to describe what the patchset does.
+
+Use **scripts/checkpatch.pl** to ensure that no coding style issue exists.
+
+In addition, ensure that your patches comply with the unified openEuler patch format described below.
+
+**Step 3** Send your patches to the openEuler mailing list.
+
+To do so, run the following command:
+
+ `git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all`
+
+*NOTE*: Add **--suppress-cc=all** if you use git-send-email; otherwise, the email will be copied to all people in the upstream community and mailing lists.
+
+For details about how to send patches using git-send-email, see [https://git-scm.com/docs/git-send-email](https://git-scm.com/docs/git-send-….
+
+**Step 4** Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions to send out.
+
+Use the **--subject-prefix="PATCH v2"** option to add the v2 tag to the patchset.
+
+ `git format-patch --subject-prefix="PATCH v2" -1`
+
+Subject examples:
+
+ Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
+
+ Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+
+**Step 5** Upstream your kernel patches to the kernel community (recommended). openEuler will synchronize with the kernel master in a timely manner.
+
+**Step 6** Sign your work - the Developer’s Certificate of Origin.
+
+ Similar to the upstream kernel community, you also need to sign your patch.
+
+ For details, see [https://www.kernel.org/doc/html/latest/process/submitting-patches.html](htt….
+
+ The sign-off is a simple line at the end of the explanation of the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open source patch. The rules are pretty simple. You can certify as below:
+
+ Developer’s Certificate of Origin 1.1
+
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file;
+
+ (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file;
+
+ (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
+
+ (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
+
+Then you add a line saying:
+
+Signed-off-by: Random J Developer <random(a)developer.example.org>
+
+Use your real name (sorry, no pseudonyms or anonymous contributions).
+
+### Use the unified patch format
+-------
+
+Reasons:
+
+1. Long term maintainability
+
+ openEuler will merge massive patches. If all patches are merged by casual
+
+ changelog formats without a unified format, the git logs will be messy, and
+
+ then it is hard to figure out the original patches.
+
+2. Kernel upgrade
+
+ We definitely will upgrade our openEuler kernel in someday, so strict patch management
+
+ will alleviate the pain to migrate patches during big upgrades.
+
+3. Easy for script parsing
+
+ Keyword highlighting is necessary for script parsing.
+
+### Define the patch format
+-------
+
+[M] stands for "mandatory".
+
+[O] stands for "option".
+
+$category can be: bug preparation, bugfix, perf, feature, doc, other...
+
+If category is feature, we need to add a feature name as below:
+
+```cpp
+category: feature
+feature: YYY (the feature name)
+```
+
+If the patch is related to CVE or bugzilla, we need to add the corresponding tag as below (In general, it should include at least one of the following):
+
+```cpp
+CVE: $cve-id
+bugzilla: $bug-id
+```
+
+Additional changelog should include at least one of the following:
+
+1. Why we should apply this patch
+
+2. What real problems in the product does this patch resolved
+
+3. How could we reproduce this bug or how to test
+
+4. Other useful information for help to understand this patch or problem
+
+The detailed information is very useful for migrating a patch to another kernel branch.
+
+Example for mainline patch:
+
+```cpp
+mainline inclusion [M]
+from $mainline-version [M]
+commit $id [M]
+category: $category [M]
+bugzilla: $bug-id [O]
+CVE: $cve-id [O]
+
+additional changelog [O]
+
+--------------------------------
+
+original changelog
+Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+```
+
+### Examples
+-------
+
+```cpp
+mainline inclusion
+from mainline-4.10
+commit 0becc0ae5b42828785b589f686725ff5bc3b9b25
+category: bugfix
+bugzilla: 3004
+CVE: N/A
+
+The patch fixes a BUG_ON in the product: Injecting a single bit ECC error to the memory before system boot using hardware inject tools will cause a large amount of CMCI during system booting .
+[ 1.146580] mce: [Hardware Error]: Machine check events logged
+[ 1.152908] ------------[ cut here ]------------
+[ 1.157751] kernel BUG at kernel/timer.c:951!
+[ 1.162321] invalid opcode: 0000 [#1] SMP
+
+-------------------------------------------------
+
+original changelog
+
+<original S-O-B>
+Signed-off-by: Zhang San <zhangsan(a)huawei.com>
+Tested-by: Li Si <lisi(a)huawei.com>
+```
+
+### Email client - Thunderbird settings
+-------
+
+If you are a new developer in the kernel community, it is highly recommended that you use the Thunderbird mail client.
+
+1. Thunderbird Installation
+
+ Obtain the English version of Thunderbird from [http://www.mozilla.org/]( http://www.mozilla.org/) and install it on your system.
+
+ Download URL: https://www.thunderbird.net/en-US/thunderbird/all/
+
+2. Settings
+
+ 2.1 Use the plain text format instead of the HTML format.
+
+ Choose **Options > Account Settings > Composition & Addressing**, and do **NOT** select Compose message in HTML format.
+
+ 2.2 Editor settings
+
+ **Tools > Options> Advanced > Config editor**
+
+ \- To bring up the Thunderbird's registry editor, set **mailnews.send_plaintext_flowed** to **false**.
+
+ \- Disable HTML Format: Set **mail.identity.id1.compose_html** to **false**.
+
+ \- Enable UTF-8: Set **prefs.converted-to-utf8** to **true**.
+
+ \- View messages in UTF-8: Set **mailnews.view_default_charset** to **UTF-8**.
+
+ \- Set **mailnews.wraplength** to **9999** to avoid auto-wrap.
+
+# Linux kernel
+-------
+
+There are several guides for kernel developers and users, which can be rendered in a number of formats, like HTML and PDF. You can read **Documentation/admin-guide/README.rst** first.
+
+In order to build the documentation, use **make htmldocs** or **make pdfdocs**. The formatted documentation can also be read online at: https://www.kernel.org/doc/html/latest/
+
+There are various text files in the Documentation/ subdirectory, several of which use the Restructured Text markup notation. See Documentation/00-INDEX for a list of what is contained in each file.
+
+Read the **Documentation/process/changes.rst** file, as it contains the requirements for building and running the kernel, and information about the problems that may be caused by upgrading your kernel.
+
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 161b0224d..d9a8ae4da 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1063,7 +1063,7 @@ sub top_of_kernel_tree {
my @tree_check = (
"COPYING", "CREDITS", "Kbuild", "MAINTAINERS", "Makefile",
- "README", "Documentation", "arch", "include", "drivers",
+ "README.md", "Documentation", "arch", "include", "drivers",
"fs", "init", "ipc", "kernel", "lib", "scripts",
);
--
2.22.0
1
0

[PATCH openEuler-1.0-LTS] scsi: hisi_sas: unsupported DIX between OS and HBA only for SATA device
by Yang Yingliang 01 Nov '21
by Yang Yingliang 01 Nov '21
01 Nov '21
From: Yang Xingui <yangxingui(a)huawei.com>
driver inclusion
category: bugfix
bugzilla: NA
CVE: NA
Signed-off-by: Yang Xingui <yangxingui(a)huawei.com>
Reviewed-by: Ouyangdelong <ouyangdelong(a)huawei.com>
Reviewed-by: Kangfenglong <kangfenglong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/scsi/hisi_sas/hisi_sas.h | 10 ++++++++++
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 7 ++++++-
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index 3fd32606ecb00..8e9424e62a150 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -77,6 +77,16 @@
#define HISI_SAS_SATA_PROTOCOL_FPDMA 0x8
#define HISI_SAS_SATA_PROTOCOL_ATAPI 0x10
+#define HISI_SAS_DIF_PROT_MASK (SHOST_DIF_TYPE1_PROTECTION | \
+ SHOST_DIF_TYPE2_PROTECTION | \
+ SHOST_DIF_TYPE3_PROTECTION)
+
+#define HISI_SAS_DIX_PROT_MASK (SHOST_DIX_TYPE1_PROTECTION | \
+ SHOST_DIX_TYPE2_PROTECTION | \
+ SHOST_DIX_TYPE3_PROTECTION)
+
+#define HISI_SAS_PROT_MASK (HISI_SAS_DIF_PROT_MASK | HISI_SAS_DIX_PROT_MASK)
+
#define CLEAR_ITCT_TIMEOUT 20
struct hisi_hba;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 0e4cc16e542d6..9d6e21be35841 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -3357,9 +3357,14 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev)
hisi_hba->dev = dev;
hisi_hba->shost = shost;
SHOST_TO_SAS_HA(shost) = &hisi_hba->sha;
- hisi_hba->enable_dix_dif = enable_dix_dif;
hisi_hba->user_ctl_irq = user_ctl_irq;
+ if (enable_dix_dif & ~HISI_SAS_PROT_MASK)
+ dev_err(dev, "unsupported protection mask 0x%x, using default (0x0)\n",
+ enable_dix_dif);
+ else
+ hisi_hba->enable_dix_dif = enable_dix_dif;
+
timer_setup(&hisi_hba->timer, NULL, 0);
if (hisi_sas_get_fw_info(hisi_hba) < 0)
--
2.25.1
1
0

[PATCH openEuler-1.0-LTS 01/19] nbd: don't handle response without a corresponding request message
by Yang Yingliang 30 Oct '21
by Yang Yingliang 30 Oct '21
30 Oct '21
From: Yu Kuai <yukuai3(a)huawei.com>
mainline inclusion
from mainline-next-20211018
commit b5644a3a79bf3be5f1238db1b2f241374b27b0f0
category: bugfix
bugzilla: 49890
CVE: NA
---------------------------
While handling a response message from server, nbd_read_stat() will
try to get request by tag, and then complete the request. However,
this is problematic if nbd haven't sent a corresponding request
message:
t1 t2
submit_bio
nbd_queue_rq
blk_mq_start_request
recv_work
nbd_read_stat
blk_mq_tag_to_rq
blk_mq_complete_request
nbd_send_cmd
Thus add a new cmd flag 'NBD_CMD_INFLIGHT', it will be set in
nbd_send_cmd() and checked in nbd_read_stat().
Noted that this patch can't fix that blk_mq_tag_to_rq() might
return a freed request, and this will be fixed in following
patches.
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Link: https://lore.kernel.org/r/20210916093350.1410403-2-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/block/nbd.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6a72c07ce3cba..05153b84d5400 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -120,6 +120,12 @@ struct nbd_device {
};
#define NBD_CMD_REQUEUED 1
+/*
+ * This flag will be set if nbd_queue_rq() succeed, and will be checked and
+ * cleared in completion. Both setting and clearing of the flag are protected
+ * by cmd->lock.
+ */
+#define NBD_CMD_INFLIGHT 2
struct nbd_cmd {
struct nbd_device *nbd;
@@ -369,6 +375,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
mutex_unlock(&cmd->lock);
@@ -674,6 +681,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
+ tag, cmd->status, cmd->flags);
+ ret = -ENOENT;
+ goto out;
+ }
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
@@ -768,6 +781,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
@@ -903,7 +917,13 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
* returns EAGAIN can be retried on a different socket.
*/
ret = nbd_send_cmd(nbd, cmd, index);
- if (ret == -EAGAIN) {
+ /*
+ * Access to this flag is protected by cmd->lock, thus it's safe to set
+ * the flag after nbd_send_cmd() succeed to send request to server.
+ */
+ if (!ret)
+ __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ else if (ret == -EAGAIN) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
nbd_mark_nsock_dead(nbd, nsock, 1);
--
2.25.1
1
18

[PATCH openEuler-1.0-LTS 1/5] numa: Move the management structures for cdm nodes to ddr
by Yang Yingliang 30 Oct '21
by Yang Yingliang 30 Oct '21
30 Oct '21
From: Wang Wensheng <wangwensheng4(a)huawei.com>
ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I
CVE: NA
-------------------------------------------------
The cdm nodes are easiler to raise an ECC error and it may cause the
kernel crash if the essential structures went wrong. So move the
management structures for hbm nodes to the ddr nodes of the same
partion to reduce the probability of kernel crashes.
Signed-off-by: Wang Wensheng <wangwensheng4(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/Kconfig | 10 ++++++++
arch/arm64/mm/numa.c | 54 +++++++++++++++++++++++++++++++++++++++-
include/linux/nodemask.h | 7 ++++++
mm/sparse.c | 8 +++---
4 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9d49b9524e1d4..2f34aef79179e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1470,6 +1470,16 @@ config ASCEND_SHARE_POOL
help
This feature allows multiple processes to share virtual memory both
in kernel and user level, which is only enabled for ascend platform.
+
+config ASCEND_CLEAN_CDM
+ bool "move the management structure for HBM to DDR"
+ def_bool n
+ depends on COHERENT_DEVICE
+ help
+ The cdm nodes sometimes are easiler to raise an ECC error and it may
+ cause the kernel crash if the essential structures went wrong. So move
+ the management structures for hbm nodes to the ddr nodes of the same
+ partion to reduce the probability of kernel crashes.
endif
endmenu
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index a9d3ad5ee0cc3..a194bad6fdfcf 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -45,6 +45,57 @@ inline int arch_check_node_cdm(int nid)
return node_isset(nid, cdmmask);
}
+#ifdef CONFIG_ASCEND_CLEAN_CDM
+/**
+ * cdm_node_to_ddr_node - Convert the cdm node to the ddr node of the
+ * same partion.
+ * @nid: input node ID
+ *
+ * Here is a typical memory topology in usage.
+ * There are some DDR and HBM in each partion and DDRs present at first, then
+ * come all the HBMs of the first partion, then HBMs of the second partion, etc.
+ *
+ * -------------------------
+ * | P0 | P1 |
+ * ----------- | -----------
+ * |node0 DDR| | |node1 DDR|
+ * |---------- | ----------|
+ * |node2 HBM| | |node4 HBM|
+ * |---------- | ----------|
+ * |node3 HBM| | |node5 HBM|
+ * ----------- | -----------
+ *
+ * Return:
+ * This function returns a ddr node which is of the same partion with the input
+ * node if the input node is a HBM node.
+ * The input nid is returned if it is a DDR node or if the memory topology of
+ * the system doesn't apply to the above model.
+ */
+int __init cdm_node_to_ddr_node(int nid)
+{
+ nodemask_t ddr_mask;
+ int nr_ddr, cdm_per_part, fake_nid;
+ int nr_cdm = nodes_weight(cdmmask);
+
+ if (!nr_cdm || nodes_empty(numa_nodes_parsed))
+ return nid;
+
+ if (!node_isset(nid, cdmmask))
+ return nid;
+
+ nodes_xor(ddr_mask, cdmmask, numa_nodes_parsed);
+ nr_ddr = nodes_weight(ddr_mask);
+ cdm_per_part = nr_cdm / nr_ddr ? : 1;
+
+ fake_nid = (nid - nr_ddr) / cdm_per_part;
+ fake_nid = !node_isset(fake_nid, cdmmask) ? fake_nid : nid;
+
+ pr_info("nid: %d, fake_nid: %d\n", nid, fake_nid);
+
+ return fake_nid;
+}
+#endif
+
static int __init cdm_nodes_setup(char *s)
{
int nid;
@@ -264,11 +315,12 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
u64 nd_pa;
void *nd;
int tnid;
+ int fake_nid = cdm_node_to_ddr_node(nid);
if (start_pfn >= end_pfn)
pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
- nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, fake_nid);
nd = __va(nd_pa);
/* report and initialize */
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 41fb047bdba80..7c0571b95ce4d 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -508,6 +508,12 @@ static inline int node_random(const nodemask_t *mask)
#ifdef CONFIG_COHERENT_DEVICE
extern int arch_check_node_cdm(int nid);
+#ifdef CONFIG_ASCEND_CLEAN_CDM
+extern int cdm_node_to_ddr_node(int nid);
+#else
+static inline int cdm_node_to_ddr_node(int nid) { return nid; }
+#endif
+
static inline nodemask_t system_mem_nodemask(void)
{
nodemask_t system_mem;
@@ -551,6 +557,7 @@ static inline void node_clear_state_cdm(int node)
#else
static inline int arch_check_node_cdm(int nid) { return 0; }
+static inline int cdm_node_to_ddr_node(int nid) { return nid; }
static inline nodemask_t system_mem_nodemask(void)
{
diff --git a/mm/sparse.c b/mm/sparse.c
index 62ae3880a9add..f19d2ca250cee 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -458,21 +458,23 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
{
unsigned long pnum, usemap_longs, *usemap;
struct page *map;
+ int fake_nid = cdm_node_to_ddr_node(nid);
usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS);
- usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
+ usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(fake_nid),
usemap_size() *
map_count);
if (!usemap) {
pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
goto failed;
}
- sparse_buffer_init(map_count * section_map_size(), nid);
+
+ sparse_buffer_init(map_count * section_map_size(), fake_nid);
for_each_present_section_nr(pnum_begin, pnum) {
if (pnum >= pnum_end)
break;
- map = sparse_mem_map_populate(pnum, nid, NULL);
+ map = sparse_mem_map_populate(pnum, fake_nid, NULL);
if (!map) {
pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
__func__, nid);
--
2.25.1
1
4

[PATCH openEuler-1.0-LTS 1/4] perf: hisi: Add support for HiSilicon SoC PMU driver dt probe
by Yang Yingliang 30 Oct '21
by Yang Yingliang 30 Oct '21
30 Oct '21
From: Fang Lijun <fanglijun3(a)huawei.com>
ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4D4WR
CVE: NA
---------------------------
Add support for hisi PMU driver dt probe, Fix its compile error
when disable CONFIG_ACPI.
Signed-off-by: Fang Lijun <fanglijun3(a)huawei.com>
Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
drivers/perf/Kconfig | 2 +-
drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 +
drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 23 +++++++++++++++----
drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 20 +++++++++++++---
4 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 92be6a36a128f..d4b9681418f88 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -72,7 +72,7 @@ config ARM_DSU_PMU
config HISI_PMU
bool "HiSilicon SoC PMU"
- depends on ARM64 && ACPI
+ depends on ARM64
help
Support for HiSilicon SoC uncore performance monitoring
unit (PMU), such as: L3C, HHA and DDRC.
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 090667d487504..3f3f4ab3aacce 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -17,6 +17,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/list.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/smp.h>
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index c35bc248db7e7..4dd4d6b650aed 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -17,6 +17,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/list.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/smp.h>
@@ -235,20 +236,34 @@ static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = {
};
MODULE_DEVICE_TABLE(acpi, hisi_hha_pmu_acpi_match);
-static int hisi_hha_pmu_init_data(struct platform_device *pdev,
+#ifdef CONFIG_ACPI
+static int hisi_hha_pmu_init_index(struct platform_device *pdev,
struct hisi_pmu *hha_pmu)
{
- unsigned long long id;
- struct resource *res;
acpi_status status;
+ unsigned long long id;
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
- "_UID", NULL, &id);
+ "_UID", NULL, &id);
if (ACPI_FAILURE(status))
return -EINVAL;
hha_pmu->index_id = id;
+ return 0;
+}
+#endif
+
+static int hisi_hha_pmu_init_data(struct platform_device *pdev,
+ struct hisi_pmu *hha_pmu)
+{
+ struct resource *res;
+
+#ifdef CONFIG_ACPI
+ if (hisi_hha_pmu_init_index(pdev, hha_pmu))
+ dev_info(&pdev->dev, "Can not init index id by acpi!\n");
+#endif
+
/*
* Use SCCL_ID and UID to identify the HHA PMU, while
* SCCL_ID is in MPIDR[aff2].
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 6ce1d69c63198..4a42926800e50 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -17,6 +17,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/list.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/smp.h>
@@ -234,20 +235,33 @@ static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
};
MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
-static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
+#ifdef CONFIG_ACPI
+static int hisi_l3c_pmu_init_index(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
unsigned long long id;
- struct resource *res;
acpi_status status;
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
- "_UID", NULL, &id);
+ "_UID", NULL, &id);
if (ACPI_FAILURE(status))
return -EINVAL;
l3c_pmu->index_id = id;
+ return 0;
+}
+#endif
+
+static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
+ struct hisi_pmu *l3c_pmu)
+{
+ struct resource *res;
+
+#ifdef CONFIG_ACPI
+ if (hisi_l3c_pmu_init_index(pdev, l3c_pmu))
+ dev_info(&pdev->dev, "Can not init index id by acpi!");
+#endif
/*
* Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
* SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
--
2.25.1
1
3

[PATCH openEuler-1.0-LTS 1/7] corelockup: Add support of cpu core hang check
by Yang Yingliang 30 Oct '21
by Yang Yingliang 30 Oct '21
30 Oct '21
From: Dong Kai <dongkai11(a)huawei.com>
ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4F3V1
CVE: NA
--------------------------------
The softlockup and hardlockup detector only check the status
of the cpu which it resides. If certain cpu core suspends,
they are both not works. There is no any valid log but the
cpu already abnormal and brings a lot of problems of system.
To detect this case, we add the corelockup detector.
First we use whether cpu core can responds to nmi as a sectence
to determine if it is suspended. Then things is simple. Per cpu
core maintains it's nmi interrupt counts and detector the
nmi_counts of next cpu core. If the nmi interrupt counts not
changed any more which means it can't respond nmi normally, we
regard it as suspend.
To ensure robustness, only consecutive lost nmi more than two
times then trigger the warn.
The detection chain is as following:
cpu0->cpu1->...->cpuN->cpu0
Signed-off-by: Dong Kai <dongkai11(a)huawei.com>
Reviewed-by: Kuohai Xu <xukuohai(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
Reviewed-by: Ding Tianhong <dingtianhong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
include/linux/nmi.h | 6 ++
kernel/watchdog.c | 15 +++-
kernel/watchdog_hld.c | 165 ++++++++++++++++++++++++++++++++++++++++++
lib/Kconfig.debug | 8 ++
4 files changed, 192 insertions(+), 2 deletions(-)
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 84f324d65068b..745d66c36e244 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -124,6 +124,12 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
# endif
#endif
+#ifdef CONFIG_CORELOCKUP_DETECTOR
+extern void corelockup_detector_init(void);
+extern void corelockup_detector_online_cpu(unsigned int cpu);
+extern void corelockup_detector_offline_cpu(unsigned int cpu);
+#endif
+
void watchdog_nmi_stop(void);
void watchdog_nmi_start(void);
int watchdog_nmi_probe(void);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 0dd17265dcbd4..8b54fd30a597f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -551,15 +551,23 @@ static void softlockup_start_all(void)
int lockup_detector_online_cpu(unsigned int cpu)
{
- if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) {
watchdog_enable(cpu);
+#ifdef CONFIG_CORELOCKUP_DETECTOR
+ corelockup_detector_online_cpu(cpu);
+#endif
+ }
return 0;
}
int lockup_detector_offline_cpu(unsigned int cpu)
{
- if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) {
watchdog_disable(cpu);
+#ifdef CONFIG_CORELOCKUP_DETECTOR
+ corelockup_detector_offline_cpu(cpu);
+#endif
+ }
return 0;
}
@@ -783,4 +791,7 @@ void __init lockup_detector_init(void)
if (!watchdog_nmi_probe())
nmi_watchdog_available = true;
lockup_detector_setup();
+#ifdef CONFIG_CORELOCKUP_DETECTOR
+ corelockup_detector_init();
+#endif
}
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 904a95262fcf6..e965c31958203 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -39,6 +39,163 @@ notrace void __weak arch_touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+#ifdef CONFIG_CORELOCKUP_DETECTOR
+/*
+ * The softlockup and hardlockup detector only check the status
+ * of the cpu which it resides. If certain cpu core suspends,
+ * they are both not works. There is no any valid log but the
+ * cpu already abnormal and brings a lot of problems of system.
+ * To detect this case, we add the corelockup detector.
+ *
+ * First we use whether cpu core can responds to nmi as a sectence
+ * to determine if it is suspended. Then things is simple. Per cpu
+ * core maintains it's nmi interrupt counts and detector the
+ * nmi_counts of next cpu core. If the nmi interrupt counts not
+ * changed any more which means it can't respond nmi normally, we
+ * regard it as suspend.
+ *
+ * To ensure robustness, only consecutive lost nmi more than two
+ * times then trigger the warn.
+ *
+ * The detection chain is as following:
+ * cpu0->cpu1->...->cpuN->cpu0
+ *
+ * detector_cpu: the target cpu to detector of current cpu
+ * nmi_interrupts: the nmi counts of current cpu
+ * nmi_cnt_saved: saved nmi counts of detector_cpu
+ * nmi_cnt_missed: the nmi consecutive miss counts of detector_cpu
+ */
+static DEFINE_PER_CPU(unsigned int, detector_cpu);
+static DEFINE_PER_CPU(unsigned long, nmi_interrupts);
+static DEFINE_PER_CPU(unsigned long, nmi_cnt_saved);
+static DEFINE_PER_CPU(unsigned long, nmi_cnt_missed);
+static DEFINE_PER_CPU(bool, core_watchdog_warn);
+
+static void watchdog_nmi_interrupts(void)
+{
+ __this_cpu_inc(nmi_interrupts);
+}
+
+static void corelockup_status_copy(unsigned int from, unsigned int to)
+{
+ per_cpu(nmi_cnt_saved, to) = per_cpu(nmi_cnt_saved, from);
+ per_cpu(nmi_cnt_missed, to) = per_cpu(nmi_cnt_missed, from);
+
+ /* always update detector cpu at the end */
+ per_cpu(detector_cpu, to) = per_cpu(detector_cpu, from);
+}
+
+static void corelockup_status_init(unsigned int cpu, unsigned int target)
+{
+ /*
+ * initialize saved count to max to avoid unnecessary misjudge
+ * caused by delay running of nmi on target cpu
+ */
+ per_cpu(nmi_cnt_saved, cpu) = ULONG_MAX;
+ per_cpu(nmi_cnt_missed, cpu) = 0;
+
+ /* always update detector cpu at the end */
+ per_cpu(detector_cpu, cpu) = target;
+}
+
+void __init corelockup_detector_init(void)
+{
+ unsigned int cpu, next;
+
+ /* detector cpu is set to the next valid logically one */
+ for_each_cpu_and(cpu, &watchdog_cpumask, cpu_online_mask) {
+ next = cpumask_next_and(cpu, &watchdog_cpumask,
+ cpu_online_mask);
+ if (next >= nr_cpu_ids)
+ next = cpumask_first_and(&watchdog_cpumask,
+ cpu_online_mask);
+ corelockup_status_init(cpu, next);
+ }
+}
+
+/*
+ * Before: first->next
+ * After: first->[new]->next
+ */
+void corelockup_detector_online_cpu(unsigned int cpu)
+{
+ unsigned int first = cpumask_first_and(&watchdog_cpumask,
+ cpu_online_mask);
+
+ if (WARN_ON(first >= nr_cpu_ids))
+ return;
+
+ /* cpu->next */
+ corelockup_status_copy(first, cpu);
+
+ /* first->cpu */
+ corelockup_status_init(first, cpu);
+}
+
+/*
+ * Before: prev->cpu->next
+ * After: prev->next
+ */
+void corelockup_detector_offline_cpu(unsigned int cpu)
+{
+ unsigned int prev = nr_cpu_ids;
+ unsigned int i;
+
+ /* found prev cpu */
+ for_each_cpu_and(i, &watchdog_cpumask, cpu_online_mask) {
+ if (per_cpu(detector_cpu, i) == cpu) {
+ prev = i;
+ break;
+ }
+ }
+
+ if (WARN_ON(prev == nr_cpu_ids))
+ return;
+
+ /* prev->next */
+ corelockup_status_copy(cpu, prev);
+}
+
+static bool is_corelockup(unsigned int cpu)
+{
+ unsigned long nmi_int = per_cpu(nmi_interrupts, cpu);
+
+ /* skip check if only one cpu online */
+ if (cpu == smp_processor_id())
+ return false;
+
+ if (__this_cpu_read(nmi_cnt_saved) != nmi_int) {
+ __this_cpu_write(nmi_cnt_saved, nmi_int);
+ __this_cpu_write(nmi_cnt_missed, 0);
+ per_cpu(core_watchdog_warn, cpu) = false;
+ return false;
+ }
+
+ __this_cpu_inc(nmi_cnt_missed);
+ if (__this_cpu_read(nmi_cnt_missed) > 2)
+ return true;
+
+ return false;
+}
+NOKPROBE_SYMBOL(is_corelockup);
+
+static void watchdog_corelockup_check(struct pt_regs *regs)
+{
+ unsigned int cpu = __this_cpu_read(detector_cpu);
+
+ if (is_corelockup(cpu)) {
+ if (per_cpu(core_watchdog_warn, cpu) == true)
+ return;
+ pr_emerg("Watchdog detected core LOCKUP on cpu %d\n", cpu);
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Core LOCKUP");
+
+ per_cpu(core_watchdog_warn, cpu) = true;
+ }
+}
+#endif
+
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
static DEFINE_PER_CPU(ktime_t, last_timestamp);
static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
@@ -106,6 +263,14 @@ static inline bool watchdog_check_timestamp(void)
void watchdog_hardlockup_check(struct pt_regs *regs)
{
+#ifdef CONFIG_CORELOCKUP_DETECTOR
+ /* Kick nmi interrupts */
+ watchdog_nmi_interrupts();
+
+ /* corelockup check */
+ watchdog_corelockup_check(regs);
+#endif
+
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0ee305de7d0ec..4a78bacd405bd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -881,6 +881,14 @@ config HARDLOCKUP_DETECTOR
chance to run. The current stack trace is displayed upon detection
and the system will stay locked up.
+config CORELOCKUP_DETECTOR
+ bool "Detect Core Lockups"
+ depends on HARDLOCKUP_DETECTOR && SOFTLOCKUP_DETECTOR
+ depends on ARM64
+ default n
+ help
+ Corelockups is used to check whether cpu core hungup or not.
+
config BOOTPARAM_HARDLOCKUP_PANIC
bool "Panic (Reboot) On Hard Lockups"
depends on HARDLOCKUP_DETECTOR
--
2.25.1
1
6

[PATCH openEuler-1.0-LTS 01/41] share_pool: Rename sp_stat_idr to sp_proc_stat_idr
by Yang Yingliang 30 Oct '21
by Yang Yingliang 30 Oct '21
30 Oct '21
From: Tang Yizhou <tangyizhou(a)huawei.com>
ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI
CVE: NA
-------------------------------------------------
We are going to redesign the accounting subsystem of share pool.
We need to disambiguate the meaning of sp_stat_idr, as we will
introduce an struct which representing per-spg statistics.
Signed-off-by: Tang Yizhou <tangyizhou(a)huawei.com>
Reviewed-by: Ding Tianhong <dingtianhong(a)huawei.com>
Signed-off-by: Zhou Guanghui <zhouguanghui1(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
mm/share_pool.c | 44 ++++++++++++++++++++++----------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c
index cd6e137fe6698..c4b8daa47fcfd 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -95,9 +95,9 @@ static DEFINE_IDA(sp_group_id_ida);
/*** Statistical and maintenance tools ***/
/* idr of all sp_proc_stats */
-static DEFINE_IDR(sp_stat_idr);
-/* rw semaphore for sp_stat_idr and mm->sp_stat_id */
-static DECLARE_RWSEM(sp_stat_sem);
+static DEFINE_IDR(sp_proc_stat_idr);
+/* rw semaphore for sp_proc_stat_idr */
+static DECLARE_RWSEM(sp_proc_stat_sem);
/* for kthread buff_module_guard_work */
static struct sp_proc_stat kthread_stat;
@@ -107,7 +107,7 @@ static struct sp_proc_stat *sp_get_proc_stat_locked(int tgid)
{
struct sp_proc_stat *stat;
- stat = idr_find(&sp_stat_idr, tgid);
+ stat = idr_find(&sp_proc_stat_idr, tgid);
/* maybe NULL or not, we always return it */
return stat;
@@ -118,7 +118,7 @@ static struct sp_proc_stat *sp_get_proc_stat_ref_locked(int tgid)
{
struct sp_proc_stat *stat;
- stat = idr_find(&sp_stat_idr, tgid);
+ stat = idr_find(&sp_proc_stat_idr, tgid);
if (!stat || !atomic_inc_not_zero(&stat->use_count))
stat = NULL;
@@ -137,16 +137,16 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk,
int id, tgid = tsk->tgid;
int ret;
- down_write(&sp_stat_sem);
+ down_write(&sp_proc_stat_sem);
id = mm->sp_group_master->sp_stat_id;
if (id) {
/* other threads in the same process may have initialized it */
stat = sp_get_proc_stat_locked(tgid);
if (stat) {
- up_write(&sp_stat_sem);
+ up_write(&sp_proc_stat_sem);
return stat;
} else {
- up_write(&sp_stat_sem);
+ up_write(&sp_proc_stat_sem);
/* if enter this branch, that's our mistake */
pr_err_ratelimited("share pool: proc stat invalid id %d\n", id);
return ERR_PTR(-EBUSY);
@@ -155,7 +155,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk,
stat = kzalloc(sizeof(*stat), GFP_KERNEL);
if (stat == NULL) {
- up_write(&sp_stat_sem);
+ up_write(&sp_proc_stat_sem);
pr_err_ratelimited("share pool: alloc proc stat failed due to lack of memory\n");
return ERR_PTR(-ENOMEM);
}
@@ -167,16 +167,16 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk,
stat->mm = mm;
get_task_comm(stat->comm, tsk);
- ret = idr_alloc(&sp_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL);
+ ret = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL);
if (ret < 0) {
- up_write(&sp_stat_sem);
+ up_write(&sp_proc_stat_sem);
pr_err_ratelimited("share pool: proc stat idr alloc failed %d\n", ret);
kfree(stat);
return ERR_PTR(ret);
}
mm->sp_group_master->sp_stat_id = ret;
- up_write(&sp_stat_sem);
+ up_write(&sp_proc_stat_sem);
return stat;
}
@@ -184,9 +184,9 @@ static struct sp_proc_stat *sp_get_proc_stat(int tgid)
{
struct sp_proc_stat *stat;
- down_read(&sp_stat_sem);
+ down_read(&sp_proc_stat_sem);
stat = sp_get_proc_stat_locked(tgid);
- up_read(&sp_stat_sem);
+ up_read(&sp_proc_stat_sem);
return stat;
}
@@ -195,9 +195,9 @@ struct sp_proc_stat *sp_get_proc_stat_ref(int tgid)
{
struct sp_proc_stat *stat;
- down_read(&sp_stat_sem);
+ down_read(&sp_proc_stat_sem);
stat = sp_get_proc_stat_ref_locked(tgid);
- up_read(&sp_stat_sem);
+ up_read(&sp_proc_stat_sem);
return stat;
}
@@ -2850,10 +2850,10 @@ __setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode);
static void free_sp_proc_stat(struct sp_proc_stat *stat)
{
- down_write(&sp_stat_sem);
+ down_write(&sp_proc_stat_sem);
stat->mm->sp_group_master->sp_stat_id = 0;
- idr_remove(&sp_stat_idr, stat->tgid);
- up_write(&sp_stat_sem);
+ idr_remove(&sp_proc_stat_idr, stat->tgid);
+ up_write(&sp_proc_stat_sem);
kfree(stat);
}
@@ -3163,9 +3163,9 @@ static int proc_stat_show(struct seq_file *seq, void *offset)
byte2kb(atomic64_read(&kthread_stat.k2u_size)));
/* pay attention to potential ABBA deadlock */
- down_read(&sp_stat_sem);
- idr_for_each(&sp_stat_idr, idr_proc_stat_cb, seq);
- up_read(&sp_stat_sem);
+ down_read(&sp_proc_stat_sem);
+ idr_for_each(&sp_proc_stat_idr, idr_proc_stat_cb, seq);
+ up_read(&sp_proc_stat_sem);
return 0;
}
--
2.25.1
1
40
From: Wang Wensheng <wangwensheng4(a)huawei.com>
ascend inclusion
category: bugfix
bugzilla: NA
CVE: NA
---------------------------
To avoid mmap vspace reserved for sharepool, we currently change the
high_limit to MMAP_SHARE_POOL_START in arch_get_unmapped_area() and
arch_get_unmapped_area_topdown(). In mmap-topdown scene, this make the
start address of mmap being always MMAP_SHARE_POOL_START. ASLR got
broken.
To fix this, this patch set the mm->mmap_base based on
MMAP_SHARE_POOL_START instead of STACK_TOP in topdown scene.
Fixes: 4bdd5c21793e ("ascend: memory: introduce do_mm_populate and hugetlb_insert_hugepage")
Signed-off-by: Wang Wensheng <wangwensheng4(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Reviewed-by: Ding Tianhong <dingtianhong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/mm/mmap.c | 6 +++++-
include/linux/share_pool.h | 4 ++--
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index ac89686c4af89..87f29df8126ba 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -28,6 +28,7 @@
#include <linux/io.h>
#include <linux/personality.h>
#include <linux/random.h>
+#include <linux/share_pool.h>
#include <asm/cputype.h>
@@ -80,7 +81,10 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
else if (gap > MAX_GAP)
gap = MAX_GAP;
- return PAGE_ALIGN(STACK_TOP - gap - rnd);
+ if (sp_is_enabled())
+ return ALIGN_DOWN(MMAP_SHARE_POOL_START - rnd, PAGE_SIZE);
+ else
+ return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
/*
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h
index 9650f257b3ad7..9557a8be46677 100644
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -130,8 +130,6 @@ struct sp_proc_stat {
atomic64_t k2u_size;
};
-#ifdef CONFIG_ASCEND_SHARE_POOL
-
#define MAP_SHARE_POOL 0x100000
#define MMAP_TOP_4G_SIZE 0x100000000UL
@@ -148,6 +146,8 @@ struct sp_proc_stat {
#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE)
#define MMAP_SHARE_POOL_16G_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE)
+#ifdef CONFIG_ASCEND_SHARE_POOL
+
static inline void sp_init_mm(struct mm_struct *mm)
{
mm->sp_group = NULL;
--
2.25.1
1
6
From: Andi Kleen <andi(a)firstfloor.org>
mainline inclusion
from mainline-5.11
commit 55a4de94c64bacffbcd802c954764e0de2ab217f
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4CMQA
CVE: NA
--------------------------------
Add a new --quiet option to 'perf stat'. This is useful with 'perf stat
record' to write the data only to the perf.data file, which can lower
measurement overhead because the data doesn't need to be formatted.
On my 4C desktop:
% time ./perf stat record -e $(python -c 'print ",\
".join(["cycles"]*1000)') -a -I 1000 sleep 5
...
real 0m5.377s
user 0m0.238s
sys 0m0.452s
% time ./perf stat record --quiet -e $(python -c 'print ",\
".join(["cycles"]*1000)') -a -I 1000 sleep 5
real 0m5.452s
user 0m0.183s
sys 0m0.423s
In this example it cuts the user time by 20%. On systems with more cores
the savings are higher.
Signed-off-by: Andi Kleen <andi(a)firstfloor.org>
Acked-by: Jiri Olsa <jolsa(a)kernel.org>
Cc: Alexey Budankov <alexey.budankov(a)linux.intel.com>
Link: http://lore.kernel.org/lkml/20201027002737.30942-1-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Signed-off-by: yin-xiujiang <yinxiujiang(a)kylinos.cn>
Reviewed-by: Wang ShaoBo <bobo.shaobowang(a)huawei.com>
Reviewed-by: Yang Jihong <yangjihong1(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
tools/perf/Documentation/perf-stat.txt | 4 ++++
tools/perf/builtin-stat.c | 6 +++++-
tools/perf/util/stat.h | 1 +
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9f9f29025e49..f9bcd95bf352 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -320,6 +320,10 @@ STAT RECORD
-----------
Stores stat data into perf data file.
+--quiet::
+Don't print output. This is useful with perf stat record below to only
+write data to the perf.data file.
+
-o file::
--output file::
Output file name.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b01af171d94f..89e80a3bc9c3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -973,6 +973,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
if (STAT_RECORD && perf_stat.data.is_pipe)
return;
+ if (stat_config.quiet)
+ return;
perf_evlist__print_counters(evsel_list, &stat_config, &target,
ts, argc, argv);
}
@@ -1171,6 +1173,8 @@ static struct option stat_options[] = {
"threads of same physical core"),
OPT_BOOLEAN(0, "summary", &stat_config.summary,
"print summary for interval mode"),
+ OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
+ "don't print output (useful with record)"),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
@@ -2132,7 +2136,7 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
- if (!output) {
+ if (!output && !stat_config.quiet) {
struct timespec tm;
mode = append_file ? "a" : "w";
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 487010c624be..05adf8165025 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -122,6 +122,7 @@ struct perf_stat_config {
bool metric_no_group;
bool metric_no_merge;
bool stop_read_counter;
+ bool quiet;
FILE *output;
unsigned int interval;
unsigned int timeout;
--
2.20.1
1
19

[PATCH openEuler-1.0-LTS 001/103] mm/vmalloc: Hugepage vmalloc mappings
by Yang Yingliang 30 Oct '21
by Yang Yingliang 30 Oct '21
30 Oct '21
From: Nicholas Piggin <npiggin(a)gmail.com>
ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI
CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-12-npiggin@gmail.com/
Don't distinction between vmalloc and hugepage vmalloc, because there is no size
print in alloc_large_system_hash in v4.19.
And this patch add page_order in vm_struct, it will break kabi.
--------------
Support huge page vmalloc mappings. Config option HAVE_ARCH_HUGE_VMALLOC
enables support on architectures that define HAVE_ARCH_HUGE_VMAP and
supports PMD sized vmap mappings.
vmalloc will attempt to allocate PMD-sized pages if allocating PMD size or
larger, and fall back to small pages if that was unsuccessful.
Allocations that do not use PAGE_KERNEL prot are not permitted to use huge
pages, because not all callers expect this (e.g., module allocations vs
strict module rwx).
This reduces TLB misses by nearly 30x on a `git diff` workload on a 2-node
POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%.
This can result in more internal fragmentation and memory overhead for a
given allocation, an option nohugevmalloc is added to disable at boot.
Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com>
Signed-off-by: Rui Xiang <rui.xiang(a)huawei.com>
Reviewed-by: Ding Tianhong <dingtianhong(a)huawei.com>
Reviewed-by: Zefan Li <lizefan(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/Kconfig | 4 +
include/linux/vmalloc.h | 1 +
mm/vmalloc.c | 160 +++++++++++++++++++++++++++++++---------
3 files changed, 130 insertions(+), 35 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index e906cbb213444..00f55932ba781 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -559,6 +559,10 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
config HAVE_ARCH_HUGE_VMAP
bool
+config HAVE_ARCH_HUGE_VMALLOC
+ depends on HAVE_ARCH_HUGE_VMAP
+ bool
+
config HAVE_ARCH_SOFT_DIRTY
bool
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 496ac80046c01..07b4b1141ed8a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -39,6 +39,7 @@ struct vm_struct {
unsigned long size;
unsigned long flags;
struct page **pages;
+ unsigned int page_order;
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index fc6394184a1ba..e76b806a6c003 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -41,6 +41,19 @@
#include "internal.h"
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
+static bool __ro_after_init vmap_allow_huge = true;
+
+static int __init set_nohugevmalloc(char *str)
+{
+ vmap_allow_huge = false;
+ return 0;
+}
+early_param("nohugevmalloc", set_nohugevmalloc);
+#else /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
+static const bool vmap_allow_huge = false;
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
+
struct vfree_deferred {
struct llist_head list;
struct work_struct wq;
@@ -410,6 +423,61 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
return 0;
}
+static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ int err = 0;
+ int nr = 0;
+
+ BUG_ON(addr >= end);
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr);
+ if (err)
+ return err;
+ } while (pgd++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int page_shift)
+{
+ unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
+
+ WARN_ON(page_shift < PAGE_SHIFT);
+
+ if (page_shift == PAGE_SHIFT)
+ return vmap_small_pages_range_noflush(addr, end, prot, pages);
+
+ for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
+ int err;
+
+ err = vmap_range_noflush(addr, addr + (1UL << page_shift),
+ __pa(page_address(pages[i])), prot,
+ page_shift);
+ if (err)
+ return err;
+
+ addr += 1UL << page_shift;
+ }
+
+ return 0;
+}
+
+static int vmap_pages_range(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int page_shift)
+{
+ int err;
+
+ err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
+ flush_cache_vmap(addr, end);
+ return err;
+}
+
/**
* map_kernel_range_noflush - map kernel VM area with the specified pages
* @addr: start of the VM area to map
@@ -431,22 +499,7 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
int map_kernel_range_noflush(unsigned long addr, unsigned long size,
pgprot_t prot, struct page **pages)
{
- unsigned long end = addr + size;
- unsigned long next;
- pgd_t *pgd;
- int err = 0;
- int nr = 0;
-
- BUG_ON(addr >= end);
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, end);
- err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr);
- if (err)
- return err;
- } while (pgd++, addr = next, addr != end);
-
- return 0;
+ return vmap_pages_range_noflush(addr, addr + size, prot, pages, PAGE_SHIFT);
}
int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
@@ -2270,11 +2323,11 @@ static void __vunmap(const void *addr, int deallocate_pages)
if (deallocate_pages) {
int i;
- for (i = 0; i < area->nr_pages; i++) {
+ for (i = 0; i < area->nr_pages; i += 1U << area->page_order) {
struct page *page = area->pages[i];
BUG_ON(!page);
- __free_pages(page, 0);
+ __free_pages(page, area->page_order);
}
kvfree(area->pages);
@@ -2403,9 +2456,12 @@ static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller);
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
- pgprot_t prot, int node)
+ pgprot_t prot, unsigned int page_shift, int node)
{
struct page **pages;
+ unsigned long addr = (unsigned long)area->addr;
+ unsigned long size = get_vm_area_size(area);
+ unsigned int page_order = page_shift - PAGE_SHIFT;
unsigned int nr_pages;
unsigned long array_size;
unsigned int i;
@@ -2415,7 +2471,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
0 :
__GFP_HIGHMEM;
- nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
+ nr_pages = size >> PAGE_SHIFT;
array_size = (unsigned long)nr_pages * sizeof(struct page *);
/* Please note that the recursion is strictly bounded. */
@@ -2434,27 +2490,27 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
area->pages = pages;
area->nr_pages = nr_pages;
+ area->page_order = page_order;
- for (i = 0; i < area->nr_pages; i++) {
+ for (i = 0; i < area->nr_pages; i += 1U << page_order) {
struct page *page;
+ int p;
- if (node == NUMA_NO_NODE)
- page = alloc_page(alloc_mask|highmem_mask);
- else
- page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
-
+ page = alloc_pages_node(node, alloc_mask|highmem_mask, page_order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
goto fail;
}
- area->pages[i] = page;
+
+ for (p = 0; p < (1U << page_order); p++)
+ area->pages[i + p] = page + p;
+
if (gfpflags_allow_blocking(gfp_mask|highmem_mask))
cond_resched();
}
- if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
- prot, pages) < 0)
+ if (vmap_pages_range(addr, addr + size, prot, pages, page_shift) < 0)
goto fail;
return area->addr;
@@ -2462,7 +2518,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
- (area->nr_pages*PAGE_SIZE), area->size);
+ (area->nr_pages*PAGE_SIZE), size);
vfree(area->addr);
return NULL;
}
@@ -2491,19 +2547,42 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
+ unsigned long real_align = align;
+ unsigned int shift = PAGE_SHIFT;
- size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
goto fail;
+ if (vmap_allow_huge && (pgprot_val(prot) == pgprot_val(PAGE_KERNEL))) {
+ unsigned long size_per_node;
+
+ /*
+ * Try huge pages. Only try for PAGE_KERNEL allocations,
+ * others like modules don't yet expect huge pages in
+ * their allocations due to apply_to_page_range not
+ * supporting them.
+ */
+
+ size_per_node = size;
+ if (node == NUMA_NO_NODE)
+ size_per_node /= num_online_nodes();
+ if (size_per_node >= PMD_SIZE) {
+ shift = PMD_SHIFT;
+ align = max(real_align, 1UL << shift);
+ size = ALIGN(real_size, 1UL << shift);
+ }
+ }
+
+again:
+ size = PAGE_ALIGN(size);
area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
- addr = __vmalloc_area_node(area, gfp_mask, prot, node);
+ addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
if (!addr)
- return NULL;
+ goto fail;
/*
* First make sure the mappings are removed from all page-tables
@@ -2523,8 +2602,19 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return addr;
fail:
- warn_alloc(gfp_mask, NULL,
+ if (shift > PAGE_SHIFT) {
+ free_vm_area(area);
+ shift = PAGE_SHIFT;
+ align = real_align;
+ size = real_size;
+ goto again;
+ }
+
+ if (!area) {
+ /* Warn for area allocation, page allocations already warn */
+ warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure: %lu bytes", real_size);
+ }
return NULL;
}
@@ -3503,7 +3593,7 @@ static int s_show(struct seq_file *m, void *p)
seq_printf(m, " %pS", v->caller);
if (v->nr_pages)
- seq_printf(m, " pages=%d", v->nr_pages);
+ seq_printf(m, " pages=%d order=%d", v->nr_pages, v->page_order);
if (v->phys_addr)
seq_printf(m, " phys=%pa", &v->phys_addr);
--
2.25.1
1
102
From: Fang Lijun <fanglijun3(a)huawei.com>
ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I
CVE: NA
-------------------------------------------------
An interface do_vm_mmap is added to support the allocation in
the address spaces of other processes.
Signed-off-by: Fang Lijun <fanglijun3(a)huawei.com>
Signed-off-by: Zhou Guanghui <zhouguanghui1(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
include/linux/mm.h | 3 +++
mm/mmap.c | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75d94ea5d1c20..58fe28dd959b9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2436,6 +2436,9 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {}
extern int __must_check vm_brk(unsigned long, unsigned long);
extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
+extern unsigned long do_vm_mmap(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flag, unsigned long pgoff);
extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
diff --git a/mm/mmap.c b/mm/mmap.c
index 80779bbb1c048..f7f1fd3b5fa39 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3094,6 +3094,40 @@ int vm_munmap(unsigned long start, size_t len)
}
EXPORT_SYMBOL(vm_munmap);
+/*
+ * Must acquire an additional reference to the mm struct to prevent the
+ * mm struct of other process from being released.
+ *
+ * This interface is applicable only to kernel thread scenarios.
+ */
+unsigned long do_vm_mmap(struct mm_struct *mm, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flag, unsigned long pgoff)
+{
+ unsigned long ret;
+ unsigned long populate;
+ LIST_HEAD(uf);
+
+ if (mm == NULL || current->mm)
+ return -EINVAL;
+
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+
+ current->mm = mm;
+ ret = do_mmap_pgoff(0, addr, len, prot, flag, pgoff,
+ &populate, &uf);
+
+ current->mm = NULL;
+ up_write(&mm->mmap_sem);
+ userfaultfd_unmap_complete(mm, &uf);
+ if (populate)
+ mm_populate(ret, populate);
+
+ return ret;
+}
+EXPORT_SYMBOL(do_vm_mmap);
+
SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
profile_munmap(addr);
--
2.25.1
1
25

Re: [PATCH openEuler-21.03] vfio-ccw: Reset FSM state to IDLE inside FSM
by Wangshaobo (bobo) 29 Oct '21
by Wangshaobo (bobo) 29 Oct '21
29 Oct '21
Reviewed-by: Wang ShaoBo <bobo.shaobowang(a)huawei.com>
在 2021/10/28 22:42, Chen Silong 写道:
> From: Eric Farman <farman(a)linux.ibm.com>
>
> stable inclusion
> from stable-v5.10.44
> commit cad3dc73c0645d00adfe96cebc8d950897cc1227
> bugzilla:https://bugzilla.openeuler.org/show_bug.cgi?id=453
> CVE: NA
>
> -------------------------------------------------
>
> [ Upstream commit 6c02ac4c9211edabe17bda437ac97e578756f31b ]
>
> When an I/O request is made, the fsm_io_request() routine
> moves the FSM state from IDLE to CP_PROCESSING, and then
> fsm_io_helper() moves it to CP_PENDING if the START SUBCHANNEL
> received a cc0. Yet, the error case to go from CP_PROCESSING
> back to IDLE is done after the FSM call returns.
>
> Let's move this up into the FSM proper, to provide some
> better symmetry when unwinding in this case.
>
> Signed-off-by: Eric Farman <farman(a)linux.ibm.com>
> Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
> Acked-by: Matthew Rosato <mjrosato(a)linux.ibm.com>
> Message-Id: <20210511195631.3995081-3-farman(a)linux.ibm.com>
> Signed-off-by: Cornelia Huck <cohuck(a)redhat.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
> Signed-off-by: Chen Silong <2019117735(a)my.swjtu.edu.cn>
> ---
> drivers/s390/cio/vfio_ccw_fsm.c | 1 +
> drivers/s390/cio/vfio_ccw_ops.c | 2 --
> 2 files changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index 23e61aa638e4..e435a9cd92da 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -318,6 +318,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
> }
>
> err_out:
> + private->state = VFIO_CCW_STATE_IDLE;
> trace_vfio_ccw_fsm_io_request(scsw->cmd.fctl, schid,
> io_region->ret_code, errstr);
> }
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index 1ad5f7018ec2..2280f51dd679 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -276,8 +276,6 @@ static ssize_t vfio_ccw_mdev_write_io_region(struct vfio_ccw_private *private,
> }
>
> vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
> - if (region->ret_code != 0)
> - private->state = VFIO_CCW_STATE_IDLE;
> ret = (region->ret_code != 0) ? region->ret_code : count;
>
> out_unlock:
1
0

[PATCH openEuler-1.0-LTS V3 0/6] Fix the problem that the number of tcp timeout retransmissions is lost
by Laibin Qiu 29 Oct '21
by Laibin Qiu 29 Oct '21
29 Oct '21
issue: https://gitee.com/openeuler/kernel/issues/I4AFRJ?from=project-issue
Eric Dumazet (4):
tcp: switch tcp and sch_fq to new earliest departure time model
net_sched: sch_fq: ensure maxrate fq parameter applies to EDT flows
tcp: address problems caused by EDT misshaps
tcp: adjust rto_base in retransmits_timed_out()
Yuchung Cheng (2):
tcp: always set retrans_stamp on recovery
tcp: create a helper to model exponential backoff
net/ipv4/tcp_bbr.c | 7 +++--
net/ipv4/tcp_input.c | 17 +++++++-----
net/ipv4/tcp_output.c | 31 +++++++++++++++------
net/ipv4/tcp_timer.c | 64 ++++++++++++++++++++-----------------------
net/sched/sch_fq.c | 46 ++++++++++++++++++-------------
5 files changed, 93 insertions(+), 72 deletions(-)
--
2.22.0
1
6
issue: https://gitee.com/openeuler/kernel/issues/I4AFRJ?from=project-issue
Eric Dumazet (4):
tcp: switch tcp and sch_fq to new earliest departure time model
net_sched: sch_fq: ensure maxrate fq parameter applies to EDT flows
tcp: address problems caused by EDT misshaps
tcp: adjust rto_base in retransmits_timed_out()
Yuchung Cheng (2):
tcp: always set retrans_stamp on recovery
tcp: create a helper to model exponential backoff
net/ipv4/tcp_bbr.c | 7 +++--
net/ipv4/tcp_input.c | 17 +++++++-----
net/ipv4/tcp_output.c | 31 +++++++++++++++------
net/ipv4/tcp_timer.c | 64 ++++++++++++++++++++-----------------------
net/sched/sch_fq.c | 46 ++++++++++++++++++-------------
5 files changed, 93 insertions(+), 72 deletions(-)
--
2.22.0
1
6
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4FS3G?from=project-issue
CVE: NA
---------------------------
There are some language problems in the README file, and MarkDown fromat
syntax is not effective, and it needs to be adjusted.
Signed-off-by: suqin <suqin2(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
---
README | 226 ---------------------------------------------------
README.md | 237 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 237 insertions(+), 226 deletions(-)
delete mode 100644 README
create mode 100644 README.md
diff --git a/README b/README
deleted file mode 100644
index 46c9ea352..000000000
--- a/README
+++ /dev/null
@@ -1,226 +0,0 @@
-Contributions to openEuler kernel project
-=========================================
-
-Sign CLA
---------
-
-Before submitting any Contributions to openEuler, you have to sign CLA.
-
-See:
- https://openeuler.org/zh/cla.html
- https://openeuler.org/en/cla.html
-
-Steps of submitting patches
----------------------------
-
-1. Compile and test your patches successfully.
-2. Generate patches
- Your patches should be based on top of latest openEuler branch, and should
- use git-format-patch to generate patches, and if it's a patchset, it's
- better to use --cover-letter option to describe what the patchset does.
-
- Using scripts/checkpatch.pl to make sure there's no coding style issue.
-
- And make sure your patch follow unified openEuler patch format describe
- below.
-
-3. Send patch to openEuler mailing list
- Use this command to send patches to openEuler mailing list:
-
- git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all
-
- *NOTE*: that you must add --suppress-cc=all if you use git send-email,
- otherwise the email will be cced to the people in upstream community and mailing
- lists.
-
- *See*: How to send patches using git-send-email
- https://git-scm.com/docs/git-send-email
-
-4. Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions
- to send out.
-
- Use --subject-prefix="PATCH v2" option to add v2 tag for patchset.
- git format-patch --subject-prefix="PATCH v2" -1
-
- Subject examples:
- Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
- Subject: [PATCH v3] ext2: improve scalability of bitmap searching
-
-5. Upstream your kernel patch to kernel community is strongly recommended.
- openEuler will sync up with kernel master timely.
-
-6. Sign your work - the Developer’s Certificate of Origin
- As the same of upstream kernel community, you also need to sign your patch.
-
- See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html
-
- The sign-off is a simple line at the end of the explanation for the patch,
- which certifies that you wrote it or otherwise have the right to pass it
- on as an open-source patch. The rules are pretty simple: if you can certify
- the below:
-
- Developer’s Certificate of Origin 1.1
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
- By making a contribution to this project, I certify that:
-
- (a) The contribution was created in whole or in part by me and I have
- the right to submit it under the open source license indicated in
- the file; or
-
- (b The contribution is based upon previous work that, to the best of
- my knowledge, is covered under an appropriate open source license
- and I have the right under that license to submit that work with
- modifications, whether created in whole or in part by me, under
- the same open source license (unless I am permitted to submit under
- a different license), as indicated in the file; or
-
- (c) The contribution was provided directly to me by some other person
- who certified (a), (b) or (c) and I have not modified it.
-
- (d) I understand and agree that this project and the contribution are
- public and that a record of the contribution (including all personal
- information I submit with it, including my sign-off) is maintained
- indefinitely and may be redistributed consistent with this project
- or the open source license(s) involved.
-
- then you just add a line saying:
-
- Signed-off-by: Random J Developer <random(a)developer.example.org>
-
- using your real name (sorry, no pseudonyms or anonymous contributions.)
-
-Use unified patch format
-------------------------
-
-Reasons:
-
-1. long term maintainability
- openEuler will merge massive patches. If all patches are merged by casual
- changelog format without a unified format, the git log will be messy, and
- then it's hard to figure out the original patch.
-
-2. kernel upgrade
- We definitely will upgrade our openEuler kernel in someday, using strict
- patch management will alleviate the pain to migrate patches during big upgrade.
-
-3. easy for script parsing
- Keyword highlighting is necessary for script parsing.
-
-Patch format definition
------------------------
-
-[M] stands for "mandatory"
-[O] stands for "option"
-$category can be: bug preparation, bugfix, perf, feature, doc, other...
-
-If category is feature, then we also need to add feature name like below:
- category: feature
- feature: YYY (the feature name)
-
-If the patch is related to CVE or bugzilla, then we need add the corresponding
-tag like below (In general, it should include at least one of the following):
- CVE: $cve-id
- bugzilla: $bug-id
-
-Additional changelog should include at least one of the flollwing:
- 1) Why we should apply this patch
- 2) What real problem in product does this patch resolved
- 3) How could we reproduce this bug or how to test
- 4) Other useful information for help to understand this patch or problem
-
-The detail information is very useful for porting patch to another kenrel branch.
-
-Example for mainline patch:
-
- mainline inclusion [M]
- from $mainline-version [M]
- commit $id [M]
- category: $category [M]
- bugzilla: $bug-id [O]
- CVE: $cve-id [O]
-
- additional changelog [O]
-
- --------------------------------
-
- original changelog
-
- Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
-
- ($mainline-version could be mainline-3.5, mainline-3.6, etc...)
-
-Examples
---------
-
-mainline inclusion
-from mainline-4.10
-commit 0becc0ae5b42828785b589f686725ff5bc3b9b25
-category: bugfix
-bugzilla: 3004
-CVE: NA
-
-The patch fixes a BUG_ON in the product: injecting single bit ECC error
-to memory before system boot use hardware inject tools, which cause a
-large amount of CMCI during system booting .
-
-[ 1.146580] mce: [Hardware Error]: Machine check events logged
-[ 1.152908] ------------[ cut here ]------------
-[ 1.157751] kernel BUG at kernel/timer.c:951!
-[ 1.162321] invalid opcode: 0000 [#1] SMP
-...
-
--------------------------------------------------
-
-original changelog
-
-<original S-O-B>
-Signed-off-by: Zhang San <zhangsan(a)huawei.com>
-Tested-by: Li Si <lisi(a)huawei.com>
-
-Email Client - Thunderbird Settings
------------------------------------
-
-If you are newly developer in the kernel community, it is highly recommended
-to use thunderbird mail client.
-
-1. Thunderbird Installation
- Get English version Thunderbird from http://www.mozilla.org/ and install
- it on your system。
-
- Download url: https://www.thunderbird.net/en-US/thunderbird/all/
-
-2. Settings
- 2.1 Use plain text format instead of HTML format
- Options -> Account Settings -> Composition & Addressing, do *NOT* select
- "Compose message in HTML format".
-
- 2.2 Editor Settings
- Tools->Options->Advanced->Config editor.
-
- - To bring up the thunderbird's registry editor, and set:
- "mailnews.send_plaintext_flowed" to "false".
- - Disable HTML Format: Set "mail.identity.id1.compose_html" to "false".
- - Enable UTF8: Set "prefs.converted-to-utf8" to "true".
- - View message in UTF-8: Set "mailnews.view_default_charset" to "UTF-8".
- - Set mailnews.wraplength to 9999 for avoiding auto-wrap
-
-Linux kernel
-============
-
-There are several guides for kernel developers and users. These guides can
-be rendered in a number of formats, like HTML and PDF. Please read
-Documentation/admin-guide/README.rst first.
-
-In order to build the documentation, use ``make htmldocs`` or
-``make pdfdocs``. The formatted documentation can also be read online at:
-
- https://www.kernel.org/doc/html/latest/
-
-There are various text files in the Documentation/ subdirectory,
-several of them using the Restructured Text markup notation.
-See Documentation/00-INDEX for a list of what is contained in each file.
-
-Please read the Documentation/process/changes.rst file, as it contains the
-requirements for building and running the kernel, and information about
-the problems which may result by upgrading your kernel.
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..20832fd85
--- /dev/null
+++ b/README.md
@@ -0,0 +1,237 @@
+# How to Contribute
+-------
+
+- [How to Contribute](#How to Contribute)
+
+ \- [Sign the CLA](#Sign the CLA)
+
+ \- [Steps of submitting patches](#Steps of submitting patches)
+
+ \- [Use the unified patch format](#Use the unified patch format)
+
+ \- [Define the patch format](#Define the patch format)
+
+ \- [Examples](#Examples)
+
+ \- [Email client - Thunderbird settings](#Email client - Thunderbird settings)
+
+- [Linux kernel](#Linux kernel)
+
+### Sign the CLA
+
+-------
+
+Before making any contributions to openEuler, sign the CLA first.
+
+Address: [https://openeuler.org/en/cla.html](https://openeuler.org/en/cla.html)
+
+### Steps of submitting patches
+-------
+
+**Step 1** Compile and test your patches.
+
+**Step 2** Generate patches.
+
+Your patches should be generated based on the latest openEuler branch using git-format-patch. If your patches are in a patchset, it is better to use the **--cover-letter** option to describe what the patchset does.
+
+Use **scripts/checkpatch.pl** to ensure that no coding style issue exists.
+
+In addition, ensure that your patches comply with the unified openEuler patch format described below.
+
+**Step 3** Send your patches to the openEuler mailing list.
+
+To do so, run the following command:
+
+ `git send-email *.patch -to="kernel(a)openeuler.org" --suppress-cc=all`
+
+*NOTE*: Add **--suppress-cc=all** if you use git-send-email; otherwise, the email will be copied to all people in the upstream community and mailing lists.
+
+For details about how to send patches using git-send-email, see [https://git-scm.com/docs/git-send-email](https://git-scm.com/docs/git-send-….
+
+**Step 4** Mark "v1, v2, v3 ..." in your patch subject if you have multiple versions to send out.
+
+Use the **--subject-prefix="PATCH v2"** option to add the v2 tag to the patchset.
+
+ `git format-patch --subject-prefix="PATCH v2" -1`
+
+Subject examples:
+
+ Subject: [PATCH v2 01/27] fork: fix some -Wmissing-prototypes warnings
+
+ Subject: [PATCH v3] ext2: improve scalability of bitmap searching
+
+**Step 5** Upstream your kernel patches to the kernel community (recommended). openEuler will synchronize with the kernel master in a timely manner.
+
+**Step 6** Sign your work - the Developer’s Certificate of Origin.
+
+ Similar to the upstream kernel community, you also need to sign your patch.
+
+ For details, see [https://www.kernel.org/doc/html/latest/process/submitting-patches.html](htt….
+
+ The sign-off is a simple line at the end of the explanation of the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open source patch. The rules are pretty simple. You can certify as below:
+
+ Developer’s Certificate of Origin 1.1
+
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file;
+
+ (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file;
+
+ (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
+
+ (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
+
+Then you add a line saying:
+
+Signed-off-by: Random J Developer <random(a)developer.example.org>
+
+Use your real name (sorry, no pseudonyms or anonymous contributions).
+
+### Use the unified patch format
+-------
+
+Reasons:
+
+1. Long term maintainability
+
+ openEuler will merge massive patches. If all patches are merged by casual
+
+ changelog formats without a unified format, the git logs will be messy, and
+
+ then it is hard to figure out the original patches.
+
+2. Kernel upgrade
+
+ We definitely will upgrade our openEuler kernel in someday, so strict patch management
+
+ will alleviate the pain to migrate patches during big upgrades.
+
+3. Easy for script parsing
+
+ Keyword highlighting is necessary for script parsing.
+
+### Define the patch format
+-------
+
+[M] stands for "mandatory".
+
+[O] stands for "option".
+
+$category can be: bug preparation, bugfix, perf, feature, doc, other...
+
+If category is feature, we need to add a feature name as below:
+
+```cpp
+category: feature
+feature: YYY (the feature name)
+```
+
+If the patch is related to CVE or bugzilla, we need to add the corresponding tag as below (In general, it should include at least one of the following):
+
+```cpp
+CVE: $cve-id
+bugzilla: $bug-id
+```
+
+Additional changelog should include at least one of the following:
+
+1. Why we should apply this patch
+
+2. What real problems in the product does this patch resolved
+
+3. How could we reproduce this bug or how to test
+
+4. Other useful information for help to understand this patch or problem
+
+The detailed information is very useful for migrating a patch to another kernel branch.
+
+Example for mainline patch:
+
+```cpp
+mainline inclusion [M]
+from $mainline-version [M]
+commit $id [M]
+category: $category [M]
+bugzilla: $bug-id [O]
+CVE: $cve-id [O]
+
+additional changelog [O]
+
+--------------------------------
+
+original changelog
+Signed-off-by: $yourname <$yourname(a)huawei.com> [M]
+($mainline-version could be mainline-3.5, mainline-3.6, etc...)
+```
+
+### Examples
+-------
+
+```cpp
+mainline inclusion
+from mainline-4.10
+commit 0becc0ae5b42828785b589f686725ff5bc3b9b25
+category: bugfix
+bugzilla: 3004
+CVE: N/A
+
+The patch fixes a BUG_ON in the product: Injecting a single bit ECC error to the memory before system boot using hardware inject tools will cause a large amount of CMCI during system booting .
+[ 1.146580] mce: [Hardware Error]: Machine check events logged
+[ 1.152908] ------------[ cut here ]------------
+[ 1.157751] kernel BUG at kernel/timer.c:951!
+[ 1.162321] invalid opcode: 0000 [#1] SMP
+
+-------------------------------------------------
+
+original changelog
+
+<original S-O-B>
+Signed-off-by: Zhang San <zhangsan(a)huawei.com>
+Tested-by: Li Si <lisi(a)huawei.com>
+```
+
+### Email client - Thunderbird settings
+-------
+
+If you are a new developer in the kernel community, it is highly recommended that you use the Thunderbird mail client.
+
+1. Thunderbird Installation
+
+ Obtain the English version of Thunderbird from [http://www.mozilla.org/]( http://www.mozilla.org/) and install it on your system.
+
+ Download URL: https://www.thunderbird.net/en-US/thunderbird/all/
+
+2. Settings
+
+ 2.1 Use the plain text format instead of the HTML format.
+
+ Choose **Options > Account Settings > Composition & Addressing**, and do **NOT** select Compose message in HTML format.
+
+ 2.2 Editor settings
+
+ **Tools > Options> Advanced > Config editor**
+
+ \- To bring up the Thunderbird's registry editor, set **mailnews.send_plaintext_flowed** to **false**.
+
+ \- Disable HTML Format: Set **mail.identity.id1.compose_html** to **false**.
+
+ \- Enable UTF-8: Set **prefs.converted-to-utf8** to **true**.
+
+ \- View messages in UTF-8: Set **mailnews.view_default_charset** to **UTF-8**.
+
+ \- Set **mailnews.wraplength** to **9999** to avoid auto-wrap.
+
+# Linux kernel
+-------
+
+There are several guides for kernel developers and users, which can be rendered in a number of formats, like HTML and PDF. You can read **Documentation/admin-guide/README.rst** first.
+
+In order to build the documentation, use **make htmldocs** or **make pdfdocs**. The formatted documentation can also be read online at: https://www.kernel.org/doc/html/latest/
+
+There are various text files in the Documentation/ subdirectory, several of which use the Restructured Text markup notation. See Documentation/00-INDEX for a list of what is contained in each file.
+
+Read the **Documentation/process/changes.rst** file, as it contains the requirements for building and running the kernel, and information about the problems that may be caused by upgrading your kernel.
+
--
2.22.0
1
0
backport psi feature and avoid kabi change
bugzilla: https://gitee.com/openeuler/kernel/issues/I47QS2
Baruch Siach (1):
psi: fix reference to kernel commandline enable
Dan Schatzberg (1):
kernel/sched/psi.c: expose pressure metrics on root cgroup
Johannes Weiner (12):
mm: workingset: tell cache transitions from workingset thrashing
sched: loadavg: consolidate LOAD_INT, LOAD_FRAC, CALC_LOAD
sched: loadavg: make calc_load_n() public
sched: sched.h: make rq locking and clock functions available in
stats.h
sched: introduce this_rq_lock_irq()
psi: pressure stall information for CPU, memory, and IO
psi: cgroup support
psi: make disabling/enabling easier for vendor kernels
psi: fix aggregation idle shut-off
psi: avoid divide-by-zero crash inside virtual machines
fs: kernfs: add poll file operation
sched/psi: Fix sampling error and rare div0 crashes with cgroups and
high uptime
Josef Bacik (1):
blk-iolatency: use a percentile approache for ssd's
Liu Xinpeng (2):
psi:enable psi in config
psi:avoid kabi change
Miklos Szeredi (1):
fuse: ignore PG_workingset after stealing
Olof Johansson (1):
kernel/sched/psi.c: simplify cgroup_move_task()
Suren Baghdasaryan (6):
psi: introduce state_mask to represent stalled psi states
psi: make psi_enable static
psi: rename psi fields in preparation for psi trigger addition
psi: split update_stats into parts
psi: track changed states
include/: refactor headers to allow kthread.h inclusion in psi_types.h
Yafang Shao (1):
mm, memcg: add workingset_restore in memory.stat
Documentation/accounting/psi.txt | 73 +++
Documentation/admin-guide/cgroup-v2.rst | 22 +
Documentation/admin-guide/kernel-parameters.txt | 4 +
arch/arm64/configs/openeuler_defconfig | 2 +
arch/powerpc/platforms/cell/cpufreq_spudemand.c | 2 +-
arch/powerpc/platforms/cell/spufs/sched.c | 9 +-
arch/s390/appldata/appldata_os.c | 4 -
arch/x86/configs/openeuler_defconfig | 2 +
block/blk-iolatency.c | 183 +++++-
drivers/cpuidle/governors/menu.c | 4 -
drivers/spi/spi-rockchip.c | 1 +
fs/fuse/dev.c | 1 +
fs/kernfs/file.c | 31 +-
fs/proc/loadavg.c | 3 -
include/linux/cgroup-defs.h | 12 +
include/linux/cgroup.h | 17 +
include/linux/kernfs.h | 8 +
include/linux/kthread.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/page-flags.h | 5 +
include/linux/psi.h | 55 ++
include/linux/psi_types.h | 95 +++
include/linux/sched.h | 13 +
include/linux/sched/loadavg.h | 24 +-
include/linux/swap.h | 1 +
include/trace/events/mmflags.h | 1 +
init/Kconfig | 28 +
kernel/cgroup/cgroup.c | 55 +-
kernel/debug/kdb/kdb_main.c | 7 +-
kernel/fork.c | 4 +
kernel/kthread.c | 3 +
kernel/sched/Makefile | 1 +
kernel/sched/core.c | 16 +-
kernel/sched/loadavg.c | 139 ++--
kernel/sched/psi.c | 823 ++++++++++++++++++++++++
kernel/sched/sched.h | 178 ++---
kernel/sched/stats.h | 86 +++
kernel/workqueue.c | 23 +
kernel/workqueue_internal.h | 6 +-
mm/compaction.c | 5 +
mm/filemap.c | 20 +-
mm/huge_memory.c | 1 +
mm/memcontrol.c | 2 +
mm/migrate.c | 2 +
mm/page_alloc.c | 9 +
mm/swap_state.c | 1 +
mm/vmscan.c | 10 +
mm/vmstat.c | 1 +
mm/workingset.c | 117 +++-
49 files changed, 1837 insertions(+), 279 deletions(-)
create mode 100644 Documentation/accounting/psi.txt
create mode 100644 include/linux/psi.h
create mode 100644 include/linux/psi_types.h
create mode 100644 kernel/sched/psi.c
--
1.8.3.1
1
25