[PATCH openEuler-5.10 60/90] sw64: add basic support for sw64

22 Feb 2022

From: Gu Zitao guzitao@wxiat.com
Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SPZD
CVE: NA
-------------------------------
This patch contains basic architecture support, including boot code,
devicetree support, memory management and DMA support, process and
signal management, time management, interrupt handling, syscalls,
clocks, loadable module support, ptrace support, headers and library
code.
Now, it works on SW3231 and SW831.
Signed-off-by: Gu Zitao guzitao@wxiat.com #openEuler_contributor
Signed-off-by: Laibin Qiu qiulaibin@huawei.com
Reviewed-by: Hanjun Guo guohanjun@huawei.com
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com
---
 arch/sw_64/Kconfig                         |  686 ++++++
 arch/sw_64/Kconfig.debug                   |   46 +
 arch/sw_64/Makefile                        |   70 +
 arch/sw_64/Makefile.postlink               |   36 +
 arch/sw_64/boot/.gitignore                 |    2 +
 arch/sw_64/boot/Makefile                   |   29 +
 arch/sw_64/boot/dts/Makefile               |   21 +
 arch/sw_64/boot/dts/chip3.dts              |  195 ++
 arch/sw_64/boot/dts/chip_vt.dts            |   38 +
 arch/sw_64/chip/Makefile                   |    2 +
 arch/sw_64/chip/chip3/Makefile             |    8 +
 arch/sw_64/chip/chip3/chip.c               |  795 +++++++
 arch/sw_64/chip/chip3/cpufreq_debugfs.c    |  153 ++
 arch/sw_64/chip/chip3/i2c-lib.c            |  425 ++++
 arch/sw_64/chip/chip3/irq_chip.c           |   96 +
 arch/sw_64/chip/chip3/msi.c                |  471 ++++
 arch/sw_64/chip/chip3/pci-quirks.c         |  247 +++
 arch/sw_64/chip/chip3/vt_msi.c             |  150 ++
 arch/sw_64/defconfig                       |   73 +
 arch/sw_64/include/asm/Kbuild              |   22 +
 arch/sw_64/include/asm/a.out-core.h        |   80 +
 arch/sw_64/include/asm/a.out.h             |   16 +
 arch/sw_64/include/asm/acenv.h             |   40 +
 arch/sw_64/include/asm/acpi.h              |   97 +
 arch/sw_64/include/asm/agp.h               |   19 +
 arch/sw_64/include/asm/asm-offsets.h       |    7 +
 arch/sw_64/include/asm/asm-prototypes.h    |   23 +
 arch/sw_64/include/asm/ast2400.h           |  168 ++
 arch/sw_64/include/asm/atomic.h            |  373 ++++
 arch/sw_64/include/asm/barrier.h           |   24 +
 arch/sw_64/include/asm/bitops.h            |  470 ++++
 arch/sw_64/include/asm/bug.h               |    8 +
 arch/sw_64/include/asm/bugs.h              |    9 +
 arch/sw_64/include/asm/cache.h             |   13 +
 arch/sw_64/include/asm/cacheflush.h        |   95 +
 arch/sw_64/include/asm/checksum.h          |   74 +
 arch/sw_64/include/asm/chip3_io.h          |  315 +++
 arch/sw_64/include/asm/cmpxchg.h           |   72 +
 arch/sw_64/include/asm/compiler.h          |    7 +
 arch/sw_64/include/asm/console.h           |   11 +
 arch/sw_64/include/asm/core.h              |   48 +
 arch/sw_64/include/asm/cpu.h               |    1 +
 arch/sw_64/include/asm/cputime.h           |    7 +
 arch/sw_64/include/asm/current.h           |   10 +
 arch/sw_64/include/asm/debug.h             |   27 +
 arch/sw_64/include/asm/delay.h             |   11 +
 arch/sw_64/include/asm/device.h            |   13 +
 arch/sw_64/include/asm/div64.h             |    7 +
 arch/sw_64/include/asm/dma-direct.h        |   15 +
 arch/sw_64/include/asm/dma-mapping.h       |   12 +
 arch/sw_64/include/asm/dma.h               |  356 +++
 arch/sw_64/include/asm/dmi.h               |   30 +
 arch/sw_64/include/asm/early_ioremap.h     |   30 +
 arch/sw_64/include/asm/efi.h               |   38 +
 arch/sw_64/include/asm/elf.h               |  170 ++
 arch/sw_64/include/asm/emergency-restart.h |    7 +
 arch/sw_64/include/asm/exec.h              |    7 +
 arch/sw_64/include/asm/extable.h           |   55 +
 arch/sw_64/include/asm/floppy.h            |  116 +
 arch/sw_64/include/asm/fpu.h               |   91 +
 arch/sw_64/include/asm/ftrace.h            |   40 +
 arch/sw_64/include/asm/futex.h             |  133 ++
 arch/sw_64/include/asm/hardirq.h           |   24 +
 arch/sw_64/include/asm/hcall.h             |   41 +
 arch/sw_64/include/asm/hmcall.h            |  205 ++
 arch/sw_64/include/asm/hugetlb.h           |   25 +
 arch/sw_64/include/asm/hw_init.h           |  180 ++
 arch/sw_64/include/asm/hw_irq.h            |   16 +
 arch/sw_64/include/asm/insn.h              |   96 +
 arch/sw_64/include/asm/io.h                |  291 +++
 arch/sw_64/include/asm/irq.h               |   31 +
 arch/sw_64/include/asm/irq_impl.h          |   47 +
 arch/sw_64/include/asm/irq_regs.h          |    7 +
 arch/sw_64/include/asm/irqflags.h          |   63 +
 arch/sw_64/include/asm/jump_label.h        |   50 +
 arch/sw_64/include/asm/kdebug.h            |   15 +
 arch/sw_64/include/asm/kexec.h             |   70 +
 arch/sw_64/include/asm/kgdb.h              |   68 +
 arch/sw_64/include/asm/kmap_types.h        |   15 +
 arch/sw_64/include/asm/kprobes.h           |   71 +
 arch/sw_64/include/asm/kvm_asm.h           |   14 +
 arch/sw_64/include/asm/kvm_cma.h           |   11 +
 arch/sw_64/include/asm/kvm_emulate.h       |   46 +
 arch/sw_64/include/asm/kvm_host.h          |  119 +
 arch/sw_64/include/asm/kvm_mmio.h          |   17 +
 arch/sw_64/include/asm/kvm_para.h          |   26 +
 arch/sw_64/include/asm/kvm_timer.h         |    9 +
 arch/sw_64/include/asm/linkage.h           |    9 +
 arch/sw_64/include/asm/local.h             |  125 ++
 arch/sw_64/include/asm/local64.h           |    7 +
 arch/sw_64/include/asm/memory.h            |   34 +
 arch/sw_64/include/asm/mmu.h               |   10 +
 arch/sw_64/include/asm/mmu_context.h       |  218 ++
 arch/sw_64/include/asm/mmzone.h            |   47 +
 arch/sw_64/include/asm/module.h            |   23 +
 arch/sw_64/include/asm/msi.h               |   47 +
 arch/sw_64/include/asm/numa.h              |   34 +
 arch/sw_64/include/asm/page.h              |   63 +
 arch/sw_64/include/asm/param.h             |   11 +
 arch/sw_64/include/asm/parport.h           |   19 +
 arch/sw_64/include/asm/pci.h               |  151 ++
 arch/sw_64/include/asm/percpu.h            |   19 +
 arch/sw_64/include/asm/perf_event.h        |    7 +
 arch/sw_64/include/asm/pgalloc.h           |   44 +
 arch/sw_64/include/asm/pgtable-4level.h    |   32 +
 arch/sw_64/include/asm/pgtable.h           |  634 ++++++
 arch/sw_64/include/asm/platform.h          |   17 +
 arch/sw_64/include/asm/preempt.h           |    7 +
 arch/sw_64/include/asm/processor.h         |  130 ++
 arch/sw_64/include/asm/ptrace.h            |   49 +
 arch/sw_64/include/asm/seccomp.h           |   15 +
 arch/sw_64/include/asm/sections.h          |    8 +
 arch/sw_64/include/asm/segment.h           |    7 +
 arch/sw_64/include/asm/serial.h            |   16 +
 arch/sw_64/include/asm/setup.h             |   46 +
 arch/sw_64/include/asm/sfp-machine.h       |   69 +
 arch/sw_64/include/asm/shmparam.h          |    7 +
 arch/sw_64/include/asm/signal.h            |   22 +
 arch/sw_64/include/asm/smp.h               |  181 ++
 arch/sw_64/include/asm/socket.h            |   11 +
 arch/sw_64/include/asm/sparsemem.h         |    9 +
 arch/sw_64/include/asm/special_insns.h     |   20 +
 arch/sw_64/include/asm/spinlock.h          |   24 +
 arch/sw_64/include/asm/spinlock_types.h    |    8 +
 arch/sw_64/include/asm/string.h            |   50 +
 arch/sw_64/include/asm/suspend.h           |   48 +
 arch/sw_64/include/asm/sw64_init.h         |   45 +
 arch/sw_64/include/asm/sw64io.h            |  115 +
 arch/sw_64/include/asm/switch_to.h         |   26 +
 arch/sw_64/include/asm/syscall.h           |   75 +
 arch/sw_64/include/asm/tc.h                |   16 +
 arch/sw_64/include/asm/termios.h           |   81 +
 arch/sw_64/include/asm/thread_info.h       |  133 ++
 arch/sw_64/include/asm/timex.h             |   24 +
 arch/sw_64/include/asm/tlb.h               |   18 +
 arch/sw_64/include/asm/tlbflush.h          |  119 +
 arch/sw_64/include/asm/topology.h          |   68 +
 arch/sw_64/include/asm/trace_clock.h       |   10 +
 arch/sw_64/include/asm/types.h             |    7 +
 arch/sw_64/include/asm/uaccess.h           |  313 +++
 arch/sw_64/include/asm/ucontext.h          |   14 +
 arch/sw_64/include/asm/unaligned.h         |   12 +
 arch/sw_64/include/asm/unistd.h            |   26 +
 arch/sw_64/include/asm/uprobes.h           |   38 +
 arch/sw_64/include/asm/user.h              |   53 +
 arch/sw_64/include/asm/vcpu.h              |   47 +
 arch/sw_64/include/asm/vdso.h              |  116 +
 arch/sw_64/include/asm/vga.h               |   85 +
 arch/sw_64/include/asm/vmalloc.h           |    5 +
 arch/sw_64/include/asm/word-at-a-time.h    |   43 +
 arch/sw_64/include/asm/wrperfmon.h         |   62 +
 arch/sw_64/include/asm/xchg.h              |  328 +++
 arch/sw_64/include/asm/xor.h               |  847 ++++++++
 arch/sw_64/include/uapi/asm/Kbuild         |    4 +
 arch/sw_64/include/uapi/asm/a.out.h        |   88 +
 arch/sw_64/include/uapi/asm/auxvec.h       |   28 +
 arch/sw_64/include/uapi/asm/bitsperlong.h  |    9 +
 arch/sw_64/include/uapi/asm/bootparam.h    |   22 +
 arch/sw_64/include/uapi/asm/byteorder.h    |    7 +
 arch/sw_64/include/uapi/asm/compiler.h     |   83 +
 arch/sw_64/include/uapi/asm/console.h      |   51 +
 arch/sw_64/include/uapi/asm/errno.h        |  128 ++
 arch/sw_64/include/uapi/asm/fcntl.h        |   58 +
 arch/sw_64/include/uapi/asm/fpu.h          |  218 ++
 arch/sw_64/include/uapi/asm/gentrap.h      |   38 +
 arch/sw_64/include/uapi/asm/hmcall.h       |   15 +
 arch/sw_64/include/uapi/asm/ioctl.h        |   19 +
 arch/sw_64/include/uapi/asm/ioctls.h       |  123 ++
 arch/sw_64/include/uapi/asm/ipcbuf.h       |    7 +
 arch/sw_64/include/uapi/asm/kvm.h          |  129 ++
 arch/sw_64/include/uapi/asm/kvm_para.h     |    7 +
 arch/sw_64/include/uapi/asm/mman.h         |   83 +
 arch/sw_64/include/uapi/asm/msgbuf.h       |   28 +
 arch/sw_64/include/uapi/asm/param.h        |   16 +
 arch/sw_64/include/uapi/asm/perf_regs.h    |   38 +
 arch/sw_64/include/uapi/asm/poll.h         |    7 +
 arch/sw_64/include/uapi/asm/posix_types.h  |   18 +
 arch/sw_64/include/uapi/asm/ptrace.h       |   94 +
 arch/sw_64/include/uapi/asm/reg.h          |   53 +
 arch/sw_64/include/uapi/asm/regdef.h       |   45 +
 arch/sw_64/include/uapi/asm/resource.h     |   23 +
 arch/sw_64/include/uapi/asm/sembuf.h       |   23 +
 arch/sw_64/include/uapi/asm/setup.h        |    7 +
 arch/sw_64/include/uapi/asm/shmbuf.h       |   39 +
 arch/sw_64/include/uapi/asm/sigcontext.h   |   35 +
 arch/sw_64/include/uapi/asm/siginfo.h      |   11 +
 arch/sw_64/include/uapi/asm/signal.h       |  119 +
 arch/sw_64/include/uapi/asm/socket.h       |  127 ++
 arch/sw_64/include/uapi/asm/sockios.h      |   17 +
 arch/sw_64/include/uapi/asm/stat.h         |   51 +
 arch/sw_64/include/uapi/asm/statfs.h       |    9 +
 arch/sw_64/include/uapi/asm/swab.h         |   43 +
 arch/sw_64/include/uapi/asm/sysinfo.h      |   20 +
 arch/sw_64/include/uapi/asm/termbits.h     |  202 ++
 arch/sw_64/include/uapi/asm/termios.h      |   70 +
 arch/sw_64/include/uapi/asm/types.h        |   28 +
 arch/sw_64/include/uapi/asm/unistd.h       |   17 +
 arch/sw_64/kernel/.gitignore               |    2 +
 arch/sw_64/kernel/Makefile                 |   56 +
 arch/sw_64/kernel/acpi.c                   |  396 ++++
 arch/sw_64/kernel/asm-offsets.c            |  262 +++
 arch/sw_64/kernel/audit.c                  |   61 +
 arch/sw_64/kernel/cacheinfo.c              |  100 +
 arch/sw_64/kernel/core.c                   |   72 +
 arch/sw_64/kernel/crash_dump.c             |   58 +
 arch/sw_64/kernel/dma_swiotlb.c            |   25 +
 arch/sw_64/kernel/dup_print.c              |   92 +
 arch/sw_64/kernel/early_init.c             |   30 +
 arch/sw_64/kernel/early_printk.c           |  186 ++
 arch/sw_64/kernel/entry-ftrace.S           |  195 ++
 arch/sw_64/kernel/entry.S                  |  706 ++++++
 arch/sw_64/kernel/ftrace.c                 |  176 ++
 arch/sw_64/kernel/head.S                   |  109 +
 arch/sw_64/kernel/hibernate.c              |   80 +
 arch/sw_64/kernel/hibernate_asm.S          |  124 ++
 arch/sw_64/kernel/insn.c                   |  123 ++
 arch/sw_64/kernel/irq.c                    |  123 ++
 arch/sw_64/kernel/irq_sw64.c               |   93 +
 arch/sw_64/kernel/jump_label.c             |   33 +
 arch/sw_64/kernel/kgdb.c                   |  236 ++
 arch/sw_64/kernel/kprobes/Makefile         |    3 +
 arch/sw_64/kernel/kprobes/common.h         |    9 +
 arch/sw_64/kernel/kprobes/decode-insn.c    |  103 +
 arch/sw_64/kernel/kprobes/kprobes.c        |  316 +++
 arch/sw_64/kernel/kvm_cma.c                |  273 +++
 arch/sw_64/kernel/machine_kexec.c          |  217 ++
 arch/sw_64/kernel/module.c                 |  291 +++
 arch/sw_64/kernel/msi.c                    |   58 +
 arch/sw_64/kernel/pci-noop.c               |  145 ++
 arch/sw_64/kernel/pci-sysfs.c              |  368 ++++
 arch/sw_64/kernel/pci.c                    |  733 +++++++
 arch/sw_64/kernel/pci_common.c             |  285 +++
 arch/sw_64/kernel/pci_impl.h               |   75 +
 arch/sw_64/kernel/pci_iommu.c              |  772 +++++++
 arch/sw_64/kernel/perf_event.c             |  763 +++++++
 arch/sw_64/kernel/perf_regs.c              |   37 +
 arch/sw_64/kernel/proc_misc.c              |   25 +
 arch/sw_64/kernel/process.c                |  352 +++
 arch/sw_64/kernel/proto.h                  |   25 +
 arch/sw_64/kernel/ptrace.c                 |  707 ++++++
 arch/sw_64/kernel/relocate.c               |  313 +++
 arch/sw_64/kernel/relocate_kernel.S        |  176 ++
 arch/sw_64/kernel/segvdbg.c                |   32 +
 arch/sw_64/kernel/setup.c                  | 1047 +++++++++
 arch/sw_64/kernel/signal.c                 |  415 ++++
 arch/sw_64/kernel/smp.c                    |  810 +++++++
 arch/sw_64/kernel/stacktrace.c             |   46 +
 arch/sw_64/kernel/suspend.c                |   79 +
 arch/sw_64/kernel/suspend_asm.S            |   99 +
 arch/sw_64/kernel/sys_sw64.c               |  151 ++
 arch/sw_64/kernel/syscalls/Makefile        |   38 +
 arch/sw_64/kernel/syscalls/syscall.tbl     |  528 +++++
 arch/sw_64/kernel/syscalls/syscallhdr.sh   |   36 +
 arch/sw_64/kernel/syscalls/syscalltbl.sh   |   32 +
 arch/sw_64/kernel/systbls.S                |   16 +
 arch/sw_64/kernel/tc.c                     |   39 +
 arch/sw_64/kernel/time.c                   |  251 +++
 arch/sw_64/kernel/timer.c                  |  149 ++
 arch/sw_64/kernel/topology.c               |  170 ++
 arch/sw_64/kernel/traps.c                  | 1651 ++++++++++++++
 arch/sw_64/kernel/unaligned.c              |   59 +
 arch/sw_64/kernel/uprobes.c                |  158 ++
 arch/sw_64/kernel/vdso.c                   |  152 ++
 arch/sw_64/kernel/vdso/.gitignore          |    4 +
 arch/sw_64/kernel/vdso/Makefile            |   74 +
 arch/sw_64/kernel/vdso/so2s.sh             |    5 +
 arch/sw_64/kernel/vdso/vdso.S              |   32 +
 arch/sw_64/kernel/vdso/vdso.lds.S          |   89 +
 arch/sw_64/kernel/vdso/vgettimeofday.c     |  179 ++
 arch/sw_64/kernel/vdso/vrt_sigreturn.S     |   29 +
 arch/sw_64/kernel/vmlinux.lds.S            |  102 +
 arch/sw_64/kvm/Kconfig                     |   47 +
 arch/sw_64/kvm/Makefile                    |   13 +
 arch/sw_64/kvm/emulate.c                   |  115 +
 arch/sw_64/kvm/entry.S                     |  285 +++
 arch/sw_64/kvm/handle_exit.c               |   45 +
 arch/sw_64/kvm/irq.h                       |   12 +
 arch/sw_64/kvm/kvm-sw64.c                  |  713 ++++++
 arch/sw_64/kvm/kvm_timer.c                 |   78 +
 arch/sw_64/kvm/mmio.c                      |   82 +
 arch/sw_64/kvm/vmem.c                      |  154 ++
 arch/sw_64/lib/Kconfig                     |   40 +
 arch/sw_64/lib/Makefile                    |   49 +
 arch/sw_64/lib/checksum.c                  |  183 ++
 arch/sw_64/lib/clear_page.S                |   46 +
 arch/sw_64/lib/clear_user.S                |  102 +
 arch/sw_64/lib/copy_page.S                 |   71 +
 arch/sw_64/lib/copy_user.S                 |  106 +
 arch/sw_64/lib/csum_ipv6_magic.S           |  113 +
 arch/sw_64/lib/csum_partial_copy.c         |  373 ++++
 arch/sw_64/lib/deep-clear_page.S           |   53 +
 arch/sw_64/lib/deep-copy_page.S            |   53 +
 arch/sw_64/lib/deep-copy_user.S            |  342 +++
 arch/sw_64/lib/deep-memcpy.S               |  240 +++
 arch/sw_64/lib/deep-memset.S               |  148 ++
 arch/sw_64/lib/divide.S                    |  190 ++
 arch/sw_64/lib/fls.c                       |   34 +
 arch/sw_64/lib/fpreg.c                     |  992 +++++++++
 arch/sw_64/lib/iomap.c                     |  508 +++++
 arch/sw_64/lib/iomap_copy.c                |   55 +
 arch/sw_64/lib/memcpy.S                    |  201 ++
 arch/sw_64/lib/memmove.S                   |  148 ++
 arch/sw_64/lib/memset.S                    |  153 ++
 arch/sw_64/lib/strcpy.S                    |  131 ++
 arch/sw_64/lib/strncpy.S                   |  156 ++
 arch/sw_64/lib/udelay.c                    |   70 +
 arch/sw_64/math-emu/Makefile               |   10 +
 arch/sw_64/math-emu/math.c                 | 2267 ++++++++++++++++++++
 arch/sw_64/math-emu/qrnnd.S                |  133 ++
 arch/sw_64/math-emu/sfp-util.h             |   36 +
 arch/sw_64/mm/Makefile                     |   12 +
 arch/sw_64/mm/fault.c                      |  361 ++++
 arch/sw_64/mm/hugetlbpage.c                |  329 +++
 arch/sw_64/mm/init.c                       |  349 +++
 arch/sw_64/mm/mmap.c                       |  102 +
 arch/sw_64/mm/numa.c                       |  460 ++++
 arch/sw_64/mm/physaddr.c                   |   39 +
 arch/sw_64/mm/thp.c                        |   64 +
 arch/sw_64/net/Makefile                    |    5 +
 arch/sw_64/net/bpf_jit.h                   |  343 +++
 arch/sw_64/net/bpf_jit_comp.c              |  973 +++++++++
 arch/sw_64/oprofile/Makefile               |   13 +
 arch/sw_64/oprofile/common.c               |  172 ++
 arch/sw_64/oprofile/op_impl.h              |   56 +
 arch/sw_64/oprofile/op_model_sw2f.c        |  280 +++
 arch/sw_64/platform/Makefile               |    2 +
 arch/sw_64/platform/platform_xuelang.c     |   64 +
 arch/sw_64/tools/.gitignore                |    2 +
 arch/sw_64/tools/Makefile                  |    8 +
 arch/sw_64/tools/relocs.c                  |  634 ++++++
 arch/sw_64/tools/relocs.h                  |   71 +
 arch/sw_64/tools/relocs_main.c             |   86 +
 332 files changed, 44356 insertions(+)
 create mode 100644 arch/sw_64/Kconfig
 create mode 100644 arch/sw_64/Kconfig.debug
 create mode 100644 arch/sw_64/Makefile
 create mode 100644 arch/sw_64/Makefile.postlink
 create mode 100644 arch/sw_64/boot/.gitignore
 create mode 100644 arch/sw_64/boot/Makefile
 create mode 100644 arch/sw_64/boot/dts/Makefile
 create mode 100644 arch/sw_64/boot/dts/chip3.dts
 create mode 100644 arch/sw_64/boot/dts/chip_vt.dts
 create mode 100644 arch/sw_64/chip/Makefile
 create mode 100644 arch/sw_64/chip/chip3/Makefile
 create mode 100644 arch/sw_64/chip/chip3/chip.c
 create mode 100644 arch/sw_64/chip/chip3/cpufreq_debugfs.c
 create mode 100644 arch/sw_64/chip/chip3/i2c-lib.c
 create mode 100644 arch/sw_64/chip/chip3/irq_chip.c
 create mode 100644 arch/sw_64/chip/chip3/msi.c
 create mode 100644 arch/sw_64/chip/chip3/pci-quirks.c
 create mode 100644 arch/sw_64/chip/chip3/vt_msi.c
 create mode 100644 arch/sw_64/defconfig
 create mode 100644 arch/sw_64/include/asm/Kbuild
 create mode 100644 arch/sw_64/include/asm/a.out-core.h
 create mode 100644 arch/sw_64/include/asm/a.out.h
 create mode 100644 arch/sw_64/include/asm/acenv.h
 create mode 100644 arch/sw_64/include/asm/acpi.h
 create mode 100644 arch/sw_64/include/asm/agp.h
 create mode 100644 arch/sw_64/include/asm/asm-offsets.h
 create mode 100644 arch/sw_64/include/asm/asm-prototypes.h
 create mode 100644 arch/sw_64/include/asm/ast2400.h
 create mode 100644 arch/sw_64/include/asm/atomic.h
 create mode 100644 arch/sw_64/include/asm/barrier.h
 create mode 100644 arch/sw_64/include/asm/bitops.h
 create mode 100644 arch/sw_64/include/asm/bug.h
 create mode 100644 arch/sw_64/include/asm/bugs.h
 create mode 100644 arch/sw_64/include/asm/cache.h
 create mode 100644 arch/sw_64/include/asm/cacheflush.h
 create mode 100644 arch/sw_64/include/asm/checksum.h
 create mode 100644 arch/sw_64/include/asm/chip3_io.h
 create mode 100644 arch/sw_64/include/asm/cmpxchg.h
 create mode 100644 arch/sw_64/include/asm/compiler.h
 create mode 100644 arch/sw_64/include/asm/console.h
 create mode 100644 arch/sw_64/include/asm/core.h
 create mode 100644 arch/sw_64/include/asm/cpu.h
 create mode 100644 arch/sw_64/include/asm/cputime.h
 create mode 100644 arch/sw_64/include/asm/current.h
 create mode 100644 arch/sw_64/include/asm/debug.h
 create mode 100644 arch/sw_64/include/asm/delay.h
 create mode 100644 arch/sw_64/include/asm/device.h
 create mode 100644 arch/sw_64/include/asm/div64.h
 create mode 100644 arch/sw_64/include/asm/dma-direct.h
 create mode 100644 arch/sw_64/include/asm/dma-mapping.h
 create mode 100644 arch/sw_64/include/asm/dma.h
 create mode 100644 arch/sw_64/include/asm/dmi.h
 create mode 100644 arch/sw_64/include/asm/early_ioremap.h
 create mode 100644 arch/sw_64/include/asm/efi.h
 create mode 100644 arch/sw_64/include/asm/elf.h
 create mode 100644 arch/sw_64/include/asm/emergency-restart.h
 create mode 100644 arch/sw_64/include/asm/exec.h
 create mode 100644 arch/sw_64/include/asm/extable.h
 create mode 100644 arch/sw_64/include/asm/floppy.h
 create mode 100644 arch/sw_64/include/asm/fpu.h
 create mode 100644 arch/sw_64/include/asm/ftrace.h
 create mode 100644 arch/sw_64/include/asm/futex.h
 create mode 100644 arch/sw_64/include/asm/hardirq.h
 create mode 100644 arch/sw_64/include/asm/hcall.h
 create mode 100644 arch/sw_64/include/asm/hmcall.h
 create mode 100644 arch/sw_64/include/asm/hugetlb.h
 create mode 100644 arch/sw_64/include/asm/hw_init.h
 create mode 100644 arch/sw_64/include/asm/hw_irq.h
 create mode 100644 arch/sw_64/include/asm/insn.h
 create mode 100644 arch/sw_64/include/asm/io.h
 create mode 100644 arch/sw_64/include/asm/irq.h
 create mode 100644 arch/sw_64/include/asm/irq_impl.h
 create mode 100644 arch/sw_64/include/asm/irq_regs.h
 create mode 100644 arch/sw_64/include/asm/irqflags.h
 create mode 100644 arch/sw_64/include/asm/jump_label.h
 create mode 100644 arch/sw_64/include/asm/kdebug.h
 create mode 100644 arch/sw_64/include/asm/kexec.h
 create mode 100644 arch/sw_64/include/asm/kgdb.h
 create mode 100644 arch/sw_64/include/asm/kmap_types.h
 create mode 100644 arch/sw_64/include/asm/kprobes.h
 create mode 100644 arch/sw_64/include/asm/kvm_asm.h
 create mode 100644 arch/sw_64/include/asm/kvm_cma.h
 create mode 100644 arch/sw_64/include/asm/kvm_emulate.h
 create mode 100644 arch/sw_64/include/asm/kvm_host.h
 create mode 100644 arch/sw_64/include/asm/kvm_mmio.h
 create mode 100644 arch/sw_64/include/asm/kvm_para.h
 create mode 100644 arch/sw_64/include/asm/kvm_timer.h
 create mode 100644 arch/sw_64/include/asm/linkage.h
 create mode 100644 arch/sw_64/include/asm/local.h
 create mode 100644 arch/sw_64/include/asm/local64.h
 create mode 100644 arch/sw_64/include/asm/memory.h
 create mode 100644 arch/sw_64/include/asm/mmu.h
 create mode 100644 arch/sw_64/include/asm/mmu_context.h
 create mode 100644 arch/sw_64/include/asm/mmzone.h
 create mode 100644 arch/sw_64/include/asm/module.h
 create mode 100644 arch/sw_64/include/asm/msi.h
 create mode 100644 arch/sw_64/include/asm/numa.h
 create mode 100644 arch/sw_64/include/asm/page.h
 create mode 100644 arch/sw_64/include/asm/param.h
 create mode 100644 arch/sw_64/include/asm/parport.h
 create mode 100644 arch/sw_64/include/asm/pci.h
 create mode 100644 arch/sw_64/include/asm/percpu.h
 create mode 100644 arch/sw_64/include/asm/perf_event.h
 create mode 100644 arch/sw_64/include/asm/pgalloc.h
 create mode 100644 arch/sw_64/include/asm/pgtable-4level.h
 create mode 100644 arch/sw_64/include/asm/pgtable.h
 create mode 100644 arch/sw_64/include/asm/platform.h
 create mode 100644 arch/sw_64/include/asm/preempt.h
 create mode 100644 arch/sw_64/include/asm/processor.h
 create mode 100644 arch/sw_64/include/asm/ptrace.h
 create mode 100644 arch/sw_64/include/asm/seccomp.h
 create mode 100644 arch/sw_64/include/asm/sections.h
 create mode 100644 arch/sw_64/include/asm/segment.h
 create mode 100644 arch/sw_64/include/asm/serial.h
 create mode 100644 arch/sw_64/include/asm/setup.h
 create mode 100644 arch/sw_64/include/asm/sfp-machine.h
 create mode 100644 arch/sw_64/include/asm/shmparam.h
 create mode 100644 arch/sw_64/include/asm/signal.h
 create mode 100644 arch/sw_64/include/asm/smp.h
 create mode 100644 arch/sw_64/include/asm/socket.h
 create mode 100644 arch/sw_64/include/asm/sparsemem.h
 create mode 100644 arch/sw_64/include/asm/special_insns.h
 create mode 100644 arch/sw_64/include/asm/spinlock.h
 create mode 100644 arch/sw_64/include/asm/spinlock_types.h
 create mode 100644 arch/sw_64/include/asm/string.h
 create mode 100644 arch/sw_64/include/asm/suspend.h
 create mode 100644 arch/sw_64/include/asm/sw64_init.h
 create mode 100644 arch/sw_64/include/asm/sw64io.h
 create mode 100644 arch/sw_64/include/asm/switch_to.h
 create mode 100644 arch/sw_64/include/asm/syscall.h
 create mode 100644 arch/sw_64/include/asm/tc.h
 create mode 100644 arch/sw_64/include/asm/termios.h
 create mode 100644 arch/sw_64/include/asm/thread_info.h
 create mode 100644 arch/sw_64/include/asm/timex.h
 create mode 100644 arch/sw_64/include/asm/tlb.h
 create mode 100644 arch/sw_64/include/asm/tlbflush.h
 create mode 100644 arch/sw_64/include/asm/topology.h
 create mode 100644 arch/sw_64/include/asm/trace_clock.h
 create mode 100644 arch/sw_64/include/asm/types.h
 create mode 100644 arch/sw_64/include/asm/uaccess.h
 create mode 100644 arch/sw_64/include/asm/ucontext.h
 create mode 100644 arch/sw_64/include/asm/unaligned.h
 create mode 100644 arch/sw_64/include/asm/unistd.h
 create mode 100644 arch/sw_64/include/asm/uprobes.h
 create mode 100644 arch/sw_64/include/asm/user.h
 create mode 100644 arch/sw_64/include/asm/vcpu.h
 create mode 100644 arch/sw_64/include/asm/vdso.h
 create mode 100644 arch/sw_64/include/asm/vga.h
 create mode 100644 arch/sw_64/include/asm/vmalloc.h
 create mode 100644 arch/sw_64/include/asm/word-at-a-time.h
 create mode 100644 arch/sw_64/include/asm/wrperfmon.h
 create mode 100644 arch/sw_64/include/asm/xchg.h
 create mode 100644 arch/sw_64/include/asm/xor.h
 create mode 100644 arch/sw_64/include/uapi/asm/Kbuild
 create mode 100644 arch/sw_64/include/uapi/asm/a.out.h
 create mode 100644 arch/sw_64/include/uapi/asm/auxvec.h
 create mode 100644 arch/sw_64/include/uapi/asm/bitsperlong.h
 create mode 100644 arch/sw_64/include/uapi/asm/bootparam.h
 create mode 100644 arch/sw_64/include/uapi/asm/byteorder.h
 create mode 100644 arch/sw_64/include/uapi/asm/compiler.h
 create mode 100644 arch/sw_64/include/uapi/asm/console.h
 create mode 100644 arch/sw_64/include/uapi/asm/errno.h
 create mode 100644 arch/sw_64/include/uapi/asm/fcntl.h
 create mode 100644 arch/sw_64/include/uapi/asm/fpu.h
 create mode 100644 arch/sw_64/include/uapi/asm/gentrap.h
 create mode 100644 arch/sw_64/include/uapi/asm/hmcall.h
 create mode 100644 arch/sw_64/include/uapi/asm/ioctl.h
 create mode 100644 arch/sw_64/include/uapi/asm/ioctls.h
 create mode 100644 arch/sw_64/include/uapi/asm/ipcbuf.h
 create mode 100644 arch/sw_64/include/uapi/asm/kvm.h
 create mode 100644 arch/sw_64/include/uapi/asm/kvm_para.h
 create mode 100644 arch/sw_64/include/uapi/asm/mman.h
 create mode 100644 arch/sw_64/include/uapi/asm/msgbuf.h
 create mode 100644 arch/sw_64/include/uapi/asm/param.h
 create mode 100644 arch/sw_64/include/uapi/asm/perf_regs.h
 create mode 100644 arch/sw_64/include/uapi/asm/poll.h
 create mode 100644 arch/sw_64/include/uapi/asm/posix_types.h
 create mode 100644 arch/sw_64/include/uapi/asm/ptrace.h
 create mode 100644 arch/sw_64/include/uapi/asm/reg.h
 create mode 100644 arch/sw_64/include/uapi/asm/regdef.h
 create mode 100644 arch/sw_64/include/uapi/asm/resource.h
 create mode 100644 arch/sw_64/include/uapi/asm/sembuf.h
 create mode 100644 arch/sw_64/include/uapi/asm/setup.h
 create mode 100644 arch/sw_64/include/uapi/asm/shmbuf.h
 create mode 100644 arch/sw_64/include/uapi/asm/sigcontext.h
 create mode 100644 arch/sw_64/include/uapi/asm/siginfo.h
 create mode 100644 arch/sw_64/include/uapi/asm/signal.h
 create mode 100644 arch/sw_64/include/uapi/asm/socket.h
 create mode 100644 arch/sw_64/include/uapi/asm/sockios.h
 create mode 100644 arch/sw_64/include/uapi/asm/stat.h
 create mode 100644 arch/sw_64/include/uapi/asm/statfs.h
 create mode 100644 arch/sw_64/include/uapi/asm/swab.h
 create mode 100644 arch/sw_64/include/uapi/asm/sysinfo.h
 create mode 100644 arch/sw_64/include/uapi/asm/termbits.h
 create mode 100644 arch/sw_64/include/uapi/asm/termios.h
 create mode 100644 arch/sw_64/include/uapi/asm/types.h
 create mode 100644 arch/sw_64/include/uapi/asm/unistd.h
 create mode 100644 arch/sw_64/kernel/.gitignore
 create mode 100644 arch/sw_64/kernel/Makefile
 create mode 100644 arch/sw_64/kernel/acpi.c
 create mode 100644 arch/sw_64/kernel/asm-offsets.c
 create mode 100644 arch/sw_64/kernel/audit.c
 create mode 100644 arch/sw_64/kernel/cacheinfo.c
 create mode 100644 arch/sw_64/kernel/core.c
 create mode 100644 arch/sw_64/kernel/crash_dump.c
 create mode 100644 arch/sw_64/kernel/dma_swiotlb.c
 create mode 100644 arch/sw_64/kernel/dup_print.c
 create mode 100644 arch/sw_64/kernel/early_init.c
 create mode 100644 arch/sw_64/kernel/early_printk.c
 create mode 100644 arch/sw_64/kernel/entry-ftrace.S
 create mode 100644 arch/sw_64/kernel/entry.S
 create mode 100644 arch/sw_64/kernel/ftrace.c
 create mode 100644 arch/sw_64/kernel/head.S
 create mode 100644 arch/sw_64/kernel/hibernate.c
 create mode 100644 arch/sw_64/kernel/hibernate_asm.S
 create mode 100644 arch/sw_64/kernel/insn.c
 create mode 100644 arch/sw_64/kernel/irq.c
 create mode 100644 arch/sw_64/kernel/irq_sw64.c
 create mode 100644 arch/sw_64/kernel/jump_label.c
 create mode 100644 arch/sw_64/kernel/kgdb.c
 create mode 100644 arch/sw_64/kernel/kprobes/Makefile
 create mode 100644 arch/sw_64/kernel/kprobes/common.h
 create mode 100644 arch/sw_64/kernel/kprobes/decode-insn.c
 create mode 100644 arch/sw_64/kernel/kprobes/kprobes.c
 create mode 100644 arch/sw_64/kernel/kvm_cma.c
 create mode 100644 arch/sw_64/kernel/machine_kexec.c
 create mode 100644 arch/sw_64/kernel/module.c
 create mode 100644 arch/sw_64/kernel/msi.c
 create mode 100644 arch/sw_64/kernel/pci-noop.c
 create mode 100644 arch/sw_64/kernel/pci-sysfs.c
 create mode 100644 arch/sw_64/kernel/pci.c
 create mode 100644 arch/sw_64/kernel/pci_common.c
 create mode 100644 arch/sw_64/kernel/pci_impl.h
 create mode 100644 arch/sw_64/kernel/pci_iommu.c
 create mode 100644 arch/sw_64/kernel/perf_event.c
 create mode 100644 arch/sw_64/kernel/perf_regs.c
 create mode 100644 arch/sw_64/kernel/proc_misc.c
 create mode 100644 arch/sw_64/kernel/process.c
 create mode 100644 arch/sw_64/kernel/proto.h
 create mode 100644 arch/sw_64/kernel/ptrace.c
 create mode 100644 arch/sw_64/kernel/relocate.c
 create mode 100644 arch/sw_64/kernel/relocate_kernel.S
 create mode 100644 arch/sw_64/kernel/segvdbg.c
 create mode 100644 arch/sw_64/kernel/setup.c
 create mode 100644 arch/sw_64/kernel/signal.c
 create mode 100644 arch/sw_64/kernel/smp.c
 create mode 100644 arch/sw_64/kernel/stacktrace.c
 create mode 100644 arch/sw_64/kernel/suspend.c
 create mode 100644 arch/sw_64/kernel/suspend_asm.S
 create mode 100644 arch/sw_64/kernel/sys_sw64.c
 create mode 100644 arch/sw_64/kernel/syscalls/Makefile
 create mode 100644 arch/sw_64/kernel/syscalls/syscall.tbl
 create mode 100644 arch/sw_64/kernel/syscalls/syscallhdr.sh
 create mode 100644 arch/sw_64/kernel/syscalls/syscalltbl.sh
 create mode 100644 arch/sw_64/kernel/systbls.S
 create mode 100644 arch/sw_64/kernel/tc.c
 create mode 100644 arch/sw_64/kernel/time.c
 create mode 100644 arch/sw_64/kernel/timer.c
 create mode 100644 arch/sw_64/kernel/topology.c
 create mode 100644 arch/sw_64/kernel/traps.c
 create mode 100644 arch/sw_64/kernel/unaligned.c
 create mode 100644 arch/sw_64/kernel/uprobes.c
 create mode 100644 arch/sw_64/kernel/vdso.c
 create mode 100644 arch/sw_64/kernel/vdso/.gitignore
 create mode 100644 arch/sw_64/kernel/vdso/Makefile
 create mode 100755 arch/sw_64/kernel/vdso/so2s.sh
 create mode 100644 arch/sw_64/kernel/vdso/vdso.S
 create mode 100644 arch/sw_64/kernel/vdso/vdso.lds.S
 create mode 100644 arch/sw_64/kernel/vdso/vgettimeofday.c
 create mode 100644 arch/sw_64/kernel/vdso/vrt_sigreturn.S
 create mode 100644 arch/sw_64/kernel/vmlinux.lds.S
 create mode 100644 arch/sw_64/kvm/Kconfig
 create mode 100644 arch/sw_64/kvm/Makefile
 create mode 100644 arch/sw_64/kvm/emulate.c
 create mode 100644 arch/sw_64/kvm/entry.S
 create mode 100644 arch/sw_64/kvm/handle_exit.c
 create mode 100644 arch/sw_64/kvm/irq.h
 create mode 100644 arch/sw_64/kvm/kvm-sw64.c
 create mode 100644 arch/sw_64/kvm/kvm_timer.c
 create mode 100644 arch/sw_64/kvm/mmio.c
 create mode 100644 arch/sw_64/kvm/vmem.c
 create mode 100644 arch/sw_64/lib/Kconfig
 create mode 100644 arch/sw_64/lib/Makefile
 create mode 100644 arch/sw_64/lib/checksum.c
 create mode 100644 arch/sw_64/lib/clear_page.S
 create mode 100644 arch/sw_64/lib/clear_user.S
 create mode 100644 arch/sw_64/lib/copy_page.S
 create mode 100644 arch/sw_64/lib/copy_user.S
 create mode 100644 arch/sw_64/lib/csum_ipv6_magic.S
 create mode 100644 arch/sw_64/lib/csum_partial_copy.c
 create mode 100644 arch/sw_64/lib/deep-clear_page.S
 create mode 100644 arch/sw_64/lib/deep-copy_page.S
 create mode 100644 arch/sw_64/lib/deep-copy_user.S
 create mode 100644 arch/sw_64/lib/deep-memcpy.S
 create mode 100644 arch/sw_64/lib/deep-memset.S
 create mode 100644 arch/sw_64/lib/divide.S
 create mode 100644 arch/sw_64/lib/fls.c
 create mode 100644 arch/sw_64/lib/fpreg.c
 create mode 100644 arch/sw_64/lib/iomap.c
 create mode 100644 arch/sw_64/lib/iomap_copy.c
 create mode 100644 arch/sw_64/lib/memcpy.S
 create mode 100644 arch/sw_64/lib/memmove.S
 create mode 100644 arch/sw_64/lib/memset.S
 create mode 100644 arch/sw_64/lib/strcpy.S
 create mode 100644 arch/sw_64/lib/strncpy.S
 create mode 100644 arch/sw_64/lib/udelay.c
 create mode 100644 arch/sw_64/math-emu/Makefile
 create mode 100644 arch/sw_64/math-emu/math.c
 create mode 100644 arch/sw_64/math-emu/qrnnd.S
 create mode 100644 arch/sw_64/math-emu/sfp-util.h
 create mode 100644 arch/sw_64/mm/Makefile
 create mode 100644 arch/sw_64/mm/fault.c
 create mode 100644 arch/sw_64/mm/hugetlbpage.c
 create mode 100644 arch/sw_64/mm/init.c
 create mode 100644 arch/sw_64/mm/mmap.c
 create mode 100644 arch/sw_64/mm/numa.c
 create mode 100644 arch/sw_64/mm/physaddr.c
 create mode 100644 arch/sw_64/mm/thp.c
 create mode 100644 arch/sw_64/net/Makefile
 create mode 100644 arch/sw_64/net/bpf_jit.h
 create mode 100644 arch/sw_64/net/bpf_jit_comp.c
 create mode 100644 arch/sw_64/oprofile/Makefile
 create mode 100644 arch/sw_64/oprofile/common.c
 create mode 100644 arch/sw_64/oprofile/op_impl.h
 create mode 100644 arch/sw_64/oprofile/op_model_sw2f.c
 create mode 100644 arch/sw_64/platform/Makefile
 create mode 100644 arch/sw_64/platform/platform_xuelang.c
 create mode 100644 arch/sw_64/tools/.gitignore
 create mode 100644 arch/sw_64/tools/Makefile
 create mode 100644 arch/sw_64/tools/relocs.c
 create mode 100644 arch/sw_64/tools/relocs.h
 create mode 100644 arch/sw_64/tools/relocs_main.c

diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig
new file mode 100644
index 000000000000..bef7ab381674
--- /dev/null
+++ b/arch/sw_64/Kconfig
@@ -0,0 +1,686 @@
+# SPDX-License-Identifier: GPL-2.0
+config SW64
+	bool
+	default y
+	select AUDIT_ARCH
+	select VIRT_IO
+	select HAVE_AOUT
+	select HAVE_IDE
+	select HAVE_OPROFILE
+#	select HAVE_SYSCALL_WRAPPERS
+	select HAVE_IRQ_WORK
+	select HAVE_PCSPKR_PLATFORM
+	select HAVE_PERF_EVENTS
+	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_IRQ_PROBE
+	select GENERIC_IRQ_LEGACY
+	select GENERIC_IDLE_LOOP
+	select GENERIC_IRQ_SHOW
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_SUPPORTS_MSI
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_NO_PREEMPT
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select GENERIC_SMP_IDLE_THREAD
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_GENERIC_RCU_GUP
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_SECCOMP_FILTER
+	select GENERIC_SIGALTSTACK
+	select OLD_SIGACTION
+	select OLD_SIGSUSPEND
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
+	select HAVE_ARCH_KGDB
+	select ARCH_HAS_PHYS_TO_DMA
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select NO_BOOTMEM
+	select ARCH_USE_QUEUED_RWLOCKS
+	select ARCH_USE_QUEUED_SPINLOCKS
+	select COMMON_CLK
+	select HANDLE_DOMAIN_IRQ
+	select ARCH_INLINE_READ_LOCK if !PREEMPT
+	select ARCH_INLINE_READ_LOCK_BH if !PREEMPT
+	select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT
+	select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT
+	select ARCH_INLINE_READ_UNLOCK if !PREEMPT
+	select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT
+	select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT
+	select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT
+	select ARCH_INLINE_WRITE_LOCK if !PREEMPT
+	select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT
+	select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT
+	select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT
+	select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT
+	select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT
+	select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT
+	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT
+	select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT
+	select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT
+	select ARCH_INLINE_SPIN_LOCK if !PREEMPT
+	select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT
+	select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT
+	select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT
+	select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT
+	select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT
+	select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT
+	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_HAS_SG_CHAIN
+	select IRQ_FORCED_THREADING
+	select GENERIC_IRQ_MIGRATION if SMP
+	select HAVE_FUNCTION_TRACER
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES
+	select HAVE_SYSCALL_TRACEPOINTS
+	select ARCH_SUPPORTS_UPROBES
+	select OF_EARLY_FLATTREE if OF
+	select HAVE_EBPF_JIT
+	select SPARSEMEM_EXTREME if SPARSEMEM
+	select HAVE_ARCH_JUMP_LABEL
+	select ARCH_WANT_FRAME_POINTERS
+	select HAVE_ASM_MODVERSIONS
+	select ARCH_HAS_ELF_RANDOMIZE
+	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_PERF_REGS
+	select ARCH_SUPPORTS_ACPI
+	select ACPI if ARCH_SUPPORTS_ACPI
+	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+	select GENERIC_TIME_VSYSCALL
+	select SET_FS
+	select PCI_MSI_ARCH_FALLBACKS
+	select DMA_OPS if PCI
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config 64BIT
+	def_bool y
+
+config MMU
+	bool
+	default y
+
+config PGTABLE_LEVELS
+	int
+	default 4
+
+config SYS_SUPPORTS_HUGETLBFS
+	def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	default y
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+	bool
+	default y
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	bool
+	default y
+
+config ARCH_HAS_ILOG2_U32
+	bool
+	default n
+
+config ARCH_HAS_ILOG2_U64
+	bool
+	default n
+
+config GENERIC_GPIO
+	bool
+
+config ZONE_DMA32
+	bool
+	default y
+
+config NEED_DMA_MAP_STATE
+	def_bool y
+
+config NEED_SG_DMA_LENGTH
+	def_bool y
+
+config ARCH_WANT_HUGE_PMD_SHARE
+	def_bool y
+
+config GENERIC_ISA_DMA
+	bool
+	default y
+
+config NONCACHE_PAGE
+	bool
+	depends on SW64
+	default y
+
+
+config AUDIT_ARCH
+	bool
+
+config SYS_HAS_EARLY_PRINTK
+	bool
+
+menu "System setup"
+
+menu "Machine Configuration"
+
+choice
+	prompt "Subarchitecture Configuration"
+
+config SUBARCH_C3B
+	bool "C3B"
+
+endchoice
+
+choice
+	prompt "Chipset Family"
+
+config SW64_CHIP3
+	bool "Chip3"
+	depends on SUBARCH_C3B
+endchoice
+
+choice
+	prompt "Runtime System"
+	depends on SW64_CHIP3
+	default SW64_ASIC
+
+config SW64_FPGA
+	bool "FPGA"
+	help
+	  Support for chip3 FPGA.
+
+config SW64_SIM
+	bool "Hardware Simulator"
+	help
+	  Support for chip3 hardware simulator.
+
+config SW64_ASIC
+	bool "ASIC"
+	help
+	  Support for chip3 asic.
+
+endchoice
+
+config SW64_CHIP3_ASIC_DEBUG
+        bool "Debug Support for Chip3 Asic"
+        depends on SW64_ASIC
+        help
+          Used for debug
+
+config CPUFREQ_DEBUGFS
+        bool "CPU Frequency debugfs interface for Chip3 Asic"
+        depends on SW64_CHIP3 && DEBUG_FS
+        help
+          Turns on the DebugFS interface for CPU Frequency.
+
+	  If you don't know what to do here, say N.
+
+choice
+	prompt "Platform Type"
+
+config PLATFORM_XUELANG
+	bool "Xuelang"
+	depends on SW64_CHIP3
+	select SPARSE_IRQ
+	select SYS_HAS_EARLY_PRINTK
+	help
+	  Sunway chip3 board chipset
+
+endchoice
+
+endmenu
+
+config LOCK_MEMB
+	bool "Insert mem barrier before lock instruction"
+	default y
+
+choice
+	prompt "DMA Mapping Type"
+	depends on SW64 && PCI
+
+config DIRECT_DMA
+	bool "Direct DMA Mapping"
+	depends on SW64 && PCI
+
+config SWIOTLB
+	bool "Software IO TLB"
+	depends on SW64 && PCI
+	help
+	  Software IO TLB
+
+endchoice
+
+# clear all implied options (don't want default values for those):
+# Most of these machines have ISA slots; not exactly sure which don't,
+# and this doesn't activate hordes of code, so do it always.
+config ISA
+	bool
+	default y
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
+	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
+
+config ISA_DMA_API
+	bool
+	default y
+
+config PCI
+	bool "PCI Support"
+	depends on SW64
+	select GENERIC_PCI_IOMAP
+	default y
+	help
+	  Find out whether you have a PCI motherboard. PCI is the name of a
+	  bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+	  VESA. If you have PCI, say Y, otherwise N.
+
+config PCI_DOMAINS
+	bool
+	default y
+
+config PCI_SYSCALL
+	def_bool PCI
+
+config IOMMU_HELPER
+	def_bool PCI
+
+config PHYSICAL_START
+	hex "Physical address where the kernel starts"
+	default "0x900000"
+	help
+	  This gives the physical address where the kernel starts, and it
+	  is 0x10000 before _text. If you plan to use kernel for capturing
+	  the crash dump change this value to start of the reserved region
+	  (the "X" value as specified in the "crashkernel=YM@XM" command
+	  line boot parameter passed to the panic-ed kernel).
+
+config KEXEC
+	bool "Kexec system call (EXPERIMENTAL)"
+	select KEXEC_CORE
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similarity to the exec system call.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
+
+config CRASH_DUMP
+	bool "Kernel crash dumps (EXPERIMENTAL)"
+	help
+	  Generate crash dump after being started by kexec.
+	  This should be normally only set in special crash dump kernels
+	  which are loaded in the main kernel with kexec-tools into
+	  a specially reserved region and then later executed after
+	  a crash by kdump/kexec. The crash dump kernel must be compiled
+	  to a memory address not used by the main kernel or firmware using
+	  PHYSICAL_START.
+
+config SECCOMP
+	def_bool y
+	prompt "Enable seccomp to safely compute untrusted bytecode"
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+	  If unsure, say Y. Only embedded should say N here.
+
+config GENERIC_HWEIGHT
+	bool
+	default y
+
+config LOCK_FIXUP
+	bool "fix up the lock"
+	depends on SW64
+	help
+	  Add an instruction("memb\n") to ensure the correctness of the lock.
+
+
+config SMP
+	bool "Symmetric multi-processing support"
+	depends on SW64
+	select USE_GENERIC_SMP_HELPERS
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, like most personal computers, say N. If
+	  you have a system with more than one CPU, say Y.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on many, but not all,
+	  singleprocessor machines. On a singleprocessor machine, the kernel
+	  will run faster if you say N here.
+
+	  See also the SMP-HOWTO available at
+	  http://www.tldp.org/docs.html#howto.
+
+	  If you don't know what to do here, say N.
+
+config ARCH_PROC_KCORE_TEXT
+        def_bool y
+
+config HAVE_DEC_LOCK
+	bool "Use arch-specified dec_and_lock"
+	depends on SMP && !NUMA
+	default y
+
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
+config ARCH_SUPPORTS_UPROBES
+	def_bool y
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-256)"
+	range 2 256
+	depends on SMP
+	default "64" if SW64_CHIP3
+	help
+	  SW6 support can handle a maximum of 256 CPUs.
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP
+	help
+	  Say Y here to allow turning CPUs off and on. CPUs can be
+	  controlled through /sys/devices/system/cpu.
+	  ( Note: power management support will enable this option
+	  automatically on SMP systems. )
+	  Say N if you want to disable CPU hotplug.
+
+config ARCH_SPARSEMEM_ENABLE
+	bool "Sparse Memory Support"
+	depends on SMP
+	select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_DISCONTIGMEM_ENABLE
+	bool "Discontiguous Memory Support"
+	depends on SMP
+	help
+	  Say Y to support efficient handling of discontiguous physical memory,
+	  for architectures which are either NUMA (Non-Uniform Memory Access)
+	  or have huge holes in the physical address space for other reasons.
+	  See file:Documentation/vm/numa for more.
+
+source "kernel/Kconfig.preempt"
+
+
+config NUMA
+	bool "NUMA Support"
+	depends on SMP && !FLATMEM
+	help
+	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
+	  Access).  This option is for configuring high-end multiprocessor
+	  server machines.  If in doubt, say N.
+
+config USE_PERCPU_NUMA_NODE_ID
+	def_bool y
+	depends on NUMA
+
+config NODES_SHIFT
+	int
+	default "7"
+	depends on NEED_MULTIPLE_NODES
+
+config RELOCATABLE
+	bool "Relocatable kernel"
+	help
+	  This builds a kernel image that retains relocation information
+	  so it can be loaded someplace besides the default 1MB.
+	  The relocations make the kernel binary about 15% larger,
+	  but are discarded at runtime
+
+config RELOCATION_TABLE_SIZE
+	hex "Relocation table size"
+	depends on RELOCATABLE
+	range 0x0 0x01000000
+	default "0x80000"
+	help
+	  A table of relocation data will be appended to the kernel binary
+	  and parsed at boot to fix up the relocated kernel.
+
+	  This option allows the amount of space reserved for the table to be
+	  adjusted, although the default of 1Mb should be ok in most cases.
+
+	  The build will fail and a valid size suggested if this is too small.
+
+	  If unsure, leave at the default value.
+
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image"
+	depends on RELOCATABLE
+	help
+	  Randomizes the physical and virtual address at which the
+	  kernel image is loaded, as a security feature that
+	  deters exploit attempts relying on knowledge of the location
+	  of kernel internals.
+
+	  Entropy is generated using any coprocessor 0 registers available.
+
+	  The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET.
+
+	  If unsure, say N.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+	hex "Maximum kASLR offset" if EXPERT
+	depends on RANDOMIZE_BASE
+	range 0x0 0x20000000
+	default "0x10000000"
+	help
+	  When kASLR is active, this provides the maximum offset that will
+	  be applied to the kernel image. It should be set according to the
+	  amount of physical RAM available in the target system minus
+	  PHYSICAL_START and must be a power of 2.
+
+	  This is limited by the size of KTEXT space, 512Mb. The default is 256MB.
+
+config HZ
+	int "HZ of the short timer"
+	default 500
+
+source "drivers/pci/Kconfig"
+source "drivers/eisa/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+source "fs/Kconfig.binfmt"
+
+source "arch/sw_64/lib/Kconfig"
+
+endmenu
+
+menu "Boot options"
+
+config SW64_IRQ_CHIP
+	bool
+
+config USE_OF
+	bool "Flattened Device Tree support"
+	select GENERIC_IRQ_CHIP
+	select IRQ_DOMAIN
+	select SW64_IRQ_CHIP
+	select OF
+	help
+	  Include support for flattened device tree machine descriptions.
+
+config SW64_BUILTIN_DTB
+	bool "Embed DTB in kernel image"
+	depends on OF
+	default n
+	help
+	  Embeds a device tree binary in the kernel image.
+
+config SW64_BUILTIN_DTB_NAME
+	string "Built in DTB"
+	depends on SW64_BUILTIN_DTB
+	help
+	  Set the name of the DTB to embed, leave blank to pick one
+	  automatically based on kernel configuration.
+
+config EFI
+	bool "UEFI runtime support"
+	select UCS2_STRING
+	select EFI_RUNTIME_WRAPPERS
+	default y
+	help
+	  This option provides support for runtime services provided
+	  by UEFI firmware (such as non-volatile variables, realtime
+	  clock, and platform reset). A UEFI stub is also provided to
+	  allow the kernel to be booted as an EFI application. This
+	  is only useful on systems that have UEFI firmware.
+
+config DMI
+	bool "Enable support for SMBIOS (DMI) tables"
+	depends on EFI
+	default y
+	help
+	  This enables SMBIOS/DMI feature for systems.
+
+	  This option is only useful on systems that have UEFI firmware.
+	  However, even with this option, the resultant kernel should
+	  continue to boot on existing non-UEFI platforms.
+
+	  NOTE: This does *NOT* enable or encourage the use of DMI quirks,
+	  i.e., the practice of identifying the platform via DMI to
+	  decide whether certain workarounds for buggy hardware and/or
+	  firmware need to be enabled. This would require the DMI subsystem
+	  to be enabled much earlier than we do on ARM, which is non-trivial.
+
+config CMDLINE_BOOL
+	bool "Built-in kernel command line"
+	help
+	  Allow for specifying boot arguments to the kernel at
+	  build time.  On some systems (e.g. embedded ones), it is
+	  necessary or convenient to provide some or all of the
+	  kernel boot arguments with the kernel itself (that is,
+	  to not rely on the boot loader to provide them.)
+
+	  To compile command line arguments into the kernel,
+	  set this option to 'Y', then fill in the
+	  boot arguments in CONFIG_CMDLINE.
+
+	  Systems with fully functional boot loaders (i.e. non-embedded)
+	  should leave this option set to 'N'.
+
+config CMDLINE
+	string "Built-in kernel command string"
+	depends on CMDLINE_BOOL
+	default ""
+	help
+	  Enter arguments here that should be compiled into the kernel
+	  image and used at boot time.  If the boot loader provides a
+	  command line at boot time, it is appended to this string to
+	  form the full kernel command line, when the system boots.
+
+	  However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+	  change this behavior.
+
+	  In most cases, the command line (whether built-in or provided
+	  by the boot loader) should specify the device for the root
+	  file system.
+
+config CMDLINE_OVERRIDE
+	bool "Built-in command line overrides boot loader arguments"
+	depends on CMDLINE_BOOL
+	help
+	  Set this option to 'Y' to have the kernel ignore the boot loader
+	  command line, and use ONLY the built-in command line.
+
+	  This is used to work around broken boot loaders.  This should
+	  be set to 'N' under normal conditions.
+
+config FORCE_MAX_ZONEORDER
+	int
+	default "16" if (HUGETLB_PAGE)
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  We make sure that we can allocate upto a HugePage size for each configuration.
+	  Hence we have :
+		MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
+
+endmenu
+
+source "drivers/firmware/Kconfig"
+
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+source "drivers/acpi/Kconfig"
+
+config ARCH_SUSPEND_POSSIBLE
+	depends on SW64
+	def_bool y
+
+config ARCH_HIBERNATION_POSSIBLE
+	depends on SW64
+	def_bool y
+
+config SW64_SUSPEND_DEEPSLEEP_NONBOOT_CORE
+	depends on SUSPEND
+	bool "SW64 non bootcore suspend into deep sleep mode"
+	default n
+
+config SW64_SUSPEND_DEEPSLEEP_BOOTCORE
+	depends on SUSPEND
+	bool "SW64 bootcore suspend into deep sleep mode"
+	default n
+
+
+source "drivers/cpuidle/Kconfig"
+
+source "drivers/idle/Kconfig"
+
+endmenu
+
+# DUMMY_CONSOLE may be defined in drivers/video/console/Kconfig
+# but we also need it if VGA_HOSE is set
+config DUMMY_CONSOLE
+	bool
+	depends on VGA_HOSE
+	default y
+
+
+source "arch/sw_64/kvm/Kconfig"
diff --git a/arch/sw_64/Kconfig.debug b/arch/sw_64/Kconfig.debug
new file mode 100644
index 000000000000..2cd2036e0996
--- /dev/null
+++ b/arch/sw_64/Kconfig.debug
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+config EARLY_PRINTK
+	bool "Early printk" if EXPERT
+	depends on SYS_HAS_EARLY_PRINTK
+	default y
+	help
+	  This option enables special console drivers which allow the kernel
+	  to print messages very early in the bootup process.
+
+	  This is useful for kernel debugging when your machine crashes very
+	  early before the console code is initialized. For normal operation,
+	  it is not recommended because it looks ugly on some machines and
+	  doesn't cooperate with an X server. You should normally say N here,
+	  unless you want to debug such a crash.
+
+config UNA_PRINT
+	bool	"Show debug info about user unalign memory access"
+	default n
+
+config MATHEMU
+	tristate "Kernel FP software completion" if DEBUG_KERNEL && !SMP
+	default y if !DEBUG_KERNEL || SMP
+	help
+	  This option is required for IEEE compliant floating point arithmetic
+	  on the SW. The only time you would ever not say Y is to say M in
+	  order to debug the code. Say Y unless you know what you are doing.
+
+config STACKTRACE_SUPPORT
+        bool
+        default y
+
+config SW64_RRU
+	bool "Enable RRU(Remote Read User)"
+	depends on SW64
+	default n
+	help
+	  Duplicate user stdout and stderr to specific space.
+	  Do not enable it in a production kernel.
+
+config SW64_RRK
+	bool "Enable RRK(Remote Read Kernel)"
+	depends on SW64
+	default y
+	help
+	  Duplicate kernel log to specific space.
+	  Do not enable it in a production kernel.
diff --git a/arch/sw_64/Makefile b/arch/sw_64/Makefile
new file mode 100644
index 000000000000..341fe6a0d9c8
--- /dev/null
+++ b/arch/sw_64/Makefile
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# sw/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+archscripts: scripts_basic
+	$(Q)$(MAKE) $(build)=arch/sw_64/tools relocs
+
+archheaders:
+	$(Q)$(MAKE) $(build)=arch/sw_64/kernel/syscalls all
+
+NM := $(NM) -B
+CCVERSION := $(shell $(CC) -dumpversion)
+LDFLAGS_vmlinux	:= -static -N #-relax
+CHECKFLAGS	+= -D__sw__
+
+ifeq ($(CONFIG_RELOCATABLE),y)
+LDFLAGS_vmlinux                        += --emit-relocs
+endif
+
+CHECKFLAGS	+= -D__sw__
+cflags-y	:= -pipe -ffixed-8 -mno-fp-regs #-msmall-data
+cflags-y	+= $(call cc-option, -fno-jump-tables)
+
+cflags-y				+= $(cpuflags-y)
+
+KBUILD_CFLAGS += $(cflags-y)
+
+head-y := arch/sw_64/kernel/head.o
+
+core-y				+= arch/sw_64/kernel/ arch/sw_64/mm/
+core-y				+= arch/sw_64/platform/
+core-y				+= arch/sw_64/chip/
+core-$(CONFIG_MATHEMU)		+= arch/sw_64/math-emu/
+drivers-$(CONFIG_OPROFILE)	+= arch/sw_64/oprofile/
+libs-y				+= arch/sw_64/lib/
+core-$(CONFIG_KVM)		+= arch/sw_64/kvm/
+core-$(CONFIG_SW64_BUILTIN_DTB)	+= arch/sw_64/boot/dts/
+core-$(CONFIG_NET)		+= arch/sw_64/net/
+
+# export what is needed by arch/sw_64/boot/Makefile
+LIBS_Y := $(patsubst %/, %/lib.a, $(libs-y))
+export LIBS_Y
+
+boot := arch/sw_64/boot
+
+#Default target when executing make with no arguments
+all: $(boot)/vmlinux.bin.gz
+
+$(boot)/vmlinux.bin.gz: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+bootimage bootpfile bootpzfile: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)$(MAKE) $(clean)=arch/sw_64/tools
+
+KBUILD_IMAGE := $(boot)/vmlinux.bin
+
+define archhelp
+  echo '* boot		- Compressed kernel image (arch/sw_64/boot/vmlinux.bin.gz)'
+endef
diff --git a/arch/sw_64/Makefile.postlink b/arch/sw_64/Makefile.postlink
new file mode 100644
index 000000000000..248844d141dd
--- /dev/null
+++ b/arch/sw_64/Makefile.postlink
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link SW64 pass
+# ===========================================================================
+#
+# 1. Insert relocations into vmlinux
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include scripts/Kbuild.include
+
+CMD_RELOCS = arch/sw_64/tools/relocs
+quiet_cmd_relocs = RELOCS $@
+      cmd_relocs = $(CMD_RELOCS) $@
+
+# `@true` prevents complaint when there is nothing to be done
+
+vmlinux: FORCE
+	@true
+ifeq ($(CONFIG_RELOCATABLE),y)
+	$(call if_changed,relocs)
+endif
+
+%.ko: FORCE
+	@true
+
+clean:
+	@true
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/sw_64/boot/.gitignore b/arch/sw_64/boot/.gitignore
new file mode 100644
index 000000000000..8a90e24c76ab
--- /dev/null
+++ b/arch/sw_64/boot/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+vmlinux
diff --git a/arch/sw_64/boot/Makefile b/arch/sw_64/boot/Makefile
new file mode 100644
index 000000000000..dd0976484649
--- /dev/null
+++ b/arch/sw_64/boot/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# arch/sw_64/boot/Makefile
+#
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Based on arch/arm64/boot/Makefile.
+#
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+
+targets	:= vmlinux vmlinux.bin vmlinux.bin.gz
+
+quiet_cmd_strip = STRIP  $@
+      cmd_strip = $(STRIP) -o $@ $<
+
+# Compressed kernel image
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+	@echo '  Kernel $@ is ready'
+
+$(obj)/vmlinux: vmlinux FORCE
+	$(call if_changed,strip)
+
+$(obj)/vmlinux.bin: $(obj)/vmlinux FORCE
+	$(call if_changed,objcopy)
diff --git a/arch/sw_64/boot/dts/Makefile b/arch/sw_64/boot/dts/Makefile
new file mode 100644
index 000000000000..b9834c70be22
--- /dev/null
+++ b/arch/sw_64/boot/dts/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+# Built-in dtb
+
+builtindtb-y            := chip3
+
+ifeq ($(CONFIG_SW64_BUILTIN_DTB), y)
+ifneq ($(CONFIG_SW64_BUILTIN_DTB_NAME),"")
+        builtindtb-y    := $(patsubst "%",%,$(CONFIG_SW64_BUILTIN_DTB_NAME))
+endif
+
+obj-y   += $(builtindtb-y).dtb.o
+dtb-y := $(builtindtb-y).dtb
+
+# for CONFIG_OF_ALL_DTBS test
+dtstree := $(srctree)/$(src)
+dtb-    := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
+else
+dtb-y := $(builtindtb-y).dtb
+endif
+
+clean-files := *.dtb  *.dtb.S
diff --git a/arch/sw_64/boot/dts/chip3.dts b/arch/sw_64/boot/dts/chip3.dts
new file mode 100644
index 000000000000..ce61dfe6e7bd
--- /dev/null
+++ b/arch/sw_64/boot/dts/chip3.dts
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Default device tree;
+ */
+
+/dts-v1/;
+/ {
+		compatible = "sunway,chip3";
+		model = "chip3";
+		#address-cells = <2>;
+		#size-cells = <2>;
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		clocks {
+			i2cclk: i2cclk {
+			compatible = "fixed-clock";
+			clock-frequency = <25000000>;
+			#clock-cells = <0>;
+			clock-output-names = "i2cclk_25mhz";
+			};
+			spiclk: spiclk {
+			compatible = "fixed-clock";
+			clock-frequency = <25000000>;
+			#clock-cells = <0>;
+			clock-output-names = "spiclk_25mhz";
+			};
+
+		};
+
+		intc: interrupt-controller{
+			compatible = "sw64,sw6_irq_controller";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+
+		uart: serial0@8033 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "sw6,sunway-apb-uart";
+			reg = <0x8033 0x0 0x0 0x1000>;
+			interrupt-parent=<&intc>;
+			interrupts = <3>;
+			reg-shift = <9>;
+			reg-io-width = <4>;
+			clock-frequency = <24000000>;
+			status = "okay";
+		};
+
+		serial1@9033 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "sw6,sunway-apb-uart";
+			reg = <0x9033 0x0 0x0 0x1000>;
+			reg-shift = <9>;
+			reg-io-width = <4>;
+			clock-frequency = <24000000>;
+			status = "okay";
+		};
+
+
+		i2c0@0x8031 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "snps,designware-i2c";
+			reg = <0x8031 0x0 0x0 0x8000>;
+			clock-frequency = <100000>;
+			clocks = <&i2cclk>;
+			interrupt-parent=<&intc>;
+			interrupts = <5>;
+			status = "okay";
+		};
+
+		i2c1@0x8034 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0x8034 0x0 0x0 0x8000>;
+			clock-frequency = <100000>;
+			clocks = <&i2cclk>;
+			interrupt-parent=<&intc>;
+			interrupts = <6>;
+			status = "okay";
+		};
+
+		i2c2@0x8035 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0x8035 0x0 0x0 0x8000>;
+			clock-frequency = <100000>;
+			clocks = <&i2cclk>;
+			interrupt-parent=<&intc>;
+			interrupts = <7>;
+			status = "okay";
+
+			rtc: pcf8523@68 {
+				compatible = "nxp,pcf8523";
+				reg = <0x68>;
+			};
+		};
+
+		spi: spi@0x8032 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "sunway,chip3-spi";
+			reg = <0x8032 0x0 0x0 0x8000>;
+			clocks = <&spiclk>;
+			interrupt-parent=<&intc>;
+			interrupts = <4>;
+			status = "okay";
+
+			flash@0 {
+				compatible = "winbond,w25q32dw", "jedec,spi-flash";
+				spi-max-frequency = <25000000>;
+				m25p,fast-read;
+				spi-cpha;
+				spi-cpol;
+				poll_mode = <1>; /* poll_mode:1  interrupt mode: 0 */
+				reg-io-width = <2>;
+				reg = <0 0 0 0 >; /* 0: flash chip selected bit */
+
+				partitions {
+					compatible = "fixed-partitions";
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					partition@0 {
+						label = "test";
+						reg = <0 0x400000>;
+					};
+				};
+			};
+
+			flash@1 {
+				compatible = "winbond,w25q32dw", "jedec,spi-flash";
+				spi-max-frequency = <25000000>;
+				m25p,fast-read;
+				spi-cpha;
+				spi-cpol;
+				poll_mode = <1>; /* poll_mode:1  interrupt mode: 0 */
+				reg-io-width = <2>;
+				reg = <1 0 0 0 >; /* 1: flash chip selected bit */
+
+				partitions {
+					compatible = "fixed-partitions";
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					partition@0 {
+						label = "test";
+						reg = <0 0x400000>;
+					};
+				};
+			};
+		};
+
+		lpc: lpc@0x8037 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "sw,sw6b_lpc";
+			reg = <0x8037 0x40000000 0x0 0x8000>;
+			interrupt-parent=<&intc>;
+			interrupts = <2>;
+			status = "okay";
+
+		};
+
+		gpio: gpio@8036 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "snps,sw-gpio";
+			reg = <0x8036 0x0 0x0 0x8000>;
+			status = "okay";
+
+			porta: gpio-contraller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <8>;
+				reg = <0 0 0 0>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				interrupt-parent=<&intc>;
+				interrupts = <0>;
+				};
+			};
+
+	};
+};
diff --git a/arch/sw_64/boot/dts/chip_vt.dts b/arch/sw_64/boot/dts/chip_vt.dts
new file mode 100644
index 000000000000..f0bcf1db1d08
--- /dev/null
+++ b/arch/sw_64/boot/dts/chip_vt.dts
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Default device tree;
+ */
+
+/dts-v1/;
+/ {
+		compatible = "sunway,chip3";
+		model = "chip3";
+		#address-cells = <2>;
+		#size-cells = <2>;
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		intc: interrupt-controller{
+			compatible = "sw64,sw6_irq_vt_controller";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart: serial0@8801 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "ns16550a";
+			reg = <0x8801 0x3f8 0x0 0x10>;
+			interrupt-parent=<&intc>;
+			interrupts = <12>;
+			reg-shift = <0>;
+			reg-io-width = <1>;
+			clock-frequency = <24000000>;
+			status = "okay";
+		};
+	};
+};
diff --git a/arch/sw_64/chip/Makefile b/arch/sw_64/chip/Makefile
new file mode 100644
index 000000000000..a64818cdf35b
--- /dev/null
+++ b/arch/sw_64/chip/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_SW64_CHIP3)	:= chip3/
diff --git a/arch/sw_64/chip/chip3/Makefile b/arch/sw_64/chip/chip3/Makefile
new file mode 100644
index 000000000000..2b7b5790003f
--- /dev/null
+++ b/arch/sw_64/chip/chip3/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y	:= chip.o i2c-lib.o
+
+obj-$(CONFIG_PCI)	+= pci-quirks.o
+obj-$(CONFIG_PCI_MSI)	+= msi.o vt_msi.o
+obj-$(CONFIG_SW64_IRQ_CHIP)	+= irq_chip.o
+obj-$(CONFIG_CPUFREQ_DEBUGFS)   += cpufreq_debugfs.o
diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c
new file mode 100644
index 000000000000..adb4d325fc91
--- /dev/null
+++ b/arch/sw_64/chip/chip3/chip.c
@@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/clocksource.h>
+#include <linux/msi.h>
+#include <linux/delay.h>
+#include <asm/sw64_init.h>
+#include <asm/sw64io.h>
+#include <asm/pci.h>
+#include <asm/core.h>
+#include <asm/irq_impl.h>
+#include <asm/wrperfmon.h>
+#include <asm/hw_init.h>
+#include "../../../../drivers/pci/pci.h"
+
+static u64 read_longtime(struct clocksource *cs)
+{
+	u64 result;
+	unsigned long node;
+
+	if (IS_ENABLED(CONFIG_SW64_FPGA) || IS_ENABLED(CONFIG_SW64_SIM))
+		node = 0;
+	else
+		node = __this_cpu_read(hard_node_id);
+	result = sw64_io_read(node, LONG_TIME);
+
+	return result;
+}
+
+static int longtime_enable(struct clocksource *cs)
+{
+	switch (cpu_desc.model) {
+	case CPU_SW3231:
+		sw64_io_write(0, GPIO_SWPORTA_DR, 0);
+		sw64_io_write(0, GPIO_SWPORTA_DDR, 0xff);
+		break;
+	case CPU_SW831:
+		sw64_io_write(0, LONG_TIME_START_EN, 0x1);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static struct clocksource clocksource_longtime = {
+	.name	= "longtime",
+	.rating	= 100,
+	.enable	= longtime_enable,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+	.mask	= CLOCKSOURCE_MASK(64),
+	.shift	= 0,
+	.mult	= 0,
+	.read	= read_longtime,
+};
+
+static u64 read_vtime(struct clocksource *cs)
+{
+	u64 result;
+	unsigned long node;
+	unsigned long vtime_addr = PAGE_OFFSET | IO_BASE | LONG_TIME;
+
+	if (is_in_guest())
+		result = rdio64(vtime_addr);
+	else
+		result = sw64_io_read(node, LONG_TIME);
+
+	return result;
+}
+
+static int vtime_enable(struct clocksource *cs)
+{
+	return 0;
+}
+
+static struct clocksource clocksource_vtime = {
+	.name	= "vtime",
+	.rating	= 100,
+	.enable	= vtime_enable,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+	.mask	= CLOCKSOURCE_MASK(64),
+	.shift	= 0,
+	.mult	= 0,
+	.read	= read_vtime,
+};
+
+void setup_chip_clocksource(void)
+{
+#ifdef CONFIG_SW64_SIM
+	clocksource_register_khz(&clocksource_longtime, 400000); /* Hardware Simulator 400Mhz */
+#elif defined(CONFIG_SW64_FPGA)
+	clocksource_register_khz(&clocksource_longtime, 1000); /* FPGA 1Mhz */
+#else
+	if (is_in_host())
+		clocksource_register_khz(&clocksource_longtime, 25000);
+	else
+		clocksource_register_khz(&clocksource_vtime, 25000);
+#endif
+}
+
+static int chip3_get_cpu_nums(void)
+{
+	unsigned long trkmode;
+	int cpus;
+
+	if (is_guest_or_emul())
+		return 1;
+
+	trkmode = sw64_io_read(0, TRKMODE);
+	trkmode = (trkmode >> 6) & 0x3;
+	cpus = 1 << trkmode;
+
+	return cpus;
+}
+
+static unsigned long chip3_get_vt_node_mem(int nodeid)
+{
+	return *(unsigned long *)MMSIZE;
+}
+
+static unsigned long chip3_get_node_mem(int nodeid)
+{
+	unsigned long mc_config, mc_online, mc_cap, mc_num;
+	unsigned long node_mem;
+
+	mc_config = sw64_io_read(nodeid, MC_CAP_CFG) & 0xf;
+	mc_cap = (1UL << mc_config) << 28;
+	mc_online = sw64_io_read(nodeid, MC_ONLINE) & 0xff;
+	mc_num = __kernel_ctpop(mc_online);
+	node_mem = mc_cap * mc_num;
+
+	return node_mem;
+}
+
+static void chip3_setup_core_start(struct cpumask *cpumask)
+{
+	int i, j, cpus;
+	unsigned long coreonline;
+
+	cpus = chip3_get_cpu_nums();
+	for (i = 0; i < cpus; i++) {
+		coreonline = sw64_io_read(i, CORE_ONLINE);
+		for (j = 0; j < 32 ; j++) {
+			if (coreonline & (1UL << j))
+				cpumask_set_cpu(i * 32 + j, cpumask);
+		}
+	}
+
+}
+
+int chip_pcie_configure(struct pci_controller *hose)
+{
+	struct pci_dev *dev;
+	struct pci_bus *bus, *top;
+	struct list_head *next;
+	unsigned int max_read_size, smallest_max_payload;
+	int max_payloadsize, iov_bus = 0;
+	unsigned long rc_index, node;
+	unsigned long piuconfig0, value;
+	unsigned int pcie_caps_offset;
+	unsigned int rc_conf_value;
+	u16 devctl, new_values;
+	bool rc_ari_disabled = false, found = false;
+
+	node = hose->node;
+	rc_index = hose->index;
+	smallest_max_payload = read_rc_conf(node, rc_index, RC_EXP_DEVCAP);
+	smallest_max_payload &= PCI_EXP_DEVCAP_PAYLOAD;
+
+	top = hose->bus;
+	bus = top;
+	next = top->devices.next;
+
+	for (;;) {
+		if (next == &bus->devices) {
+			/* end of this bus, go up or finish */
+			if (bus == top)
+				break;
+			next = bus->self->bus_list.next;
+			bus = bus->self->bus;
+			continue;
+		}
+		dev = list_entry(next, struct pci_dev, bus_list);
+		if (dev->subordinate) {
+			/* this is a pci-pci bridge, do its devices next */
+			next = dev->subordinate->devices.next;
+			bus = dev->subordinate;
+		} else
+			next = dev->bus_list.next;
+
+		if (!found) {
+			if (pci_is_root_bus(dev->bus)) {
+				if (list_empty(&dev->subordinate->devices))
+					rc_ari_disabled = true;
+			} else {
+				if (!pci_ari_enabled(dev->bus)) {
+					rc_ari_disabled = true;
+					found = true;
+				}
+			}
+		}
+
+#ifdef CONFIG_PCI_IOV
+		if (dev->is_physfn)
+			iov_bus += dev->sriov->max_VF_buses - dev->bus->number;
+#endif
+
+		/* Query device PCIe capability register  */
+		pcie_caps_offset = dev->pcie_cap;
+		if (pcie_caps_offset == 0)
+			continue;
+		max_payloadsize = dev->pcie_mpss;
+		if (max_payloadsize < smallest_max_payload)
+			smallest_max_payload = max_payloadsize;
+	}
+
+	if (rc_ari_disabled) {
+		rc_conf_value = read_rc_conf(node, rc_index, RC_EXP_DEVCTL2);
+		rc_conf_value &= ~PCI_EXP_DEVCTL2_ARI;
+		write_rc_conf(node, rc_index, RC_EXP_DEVCTL2, rc_conf_value);
+	} else {
+		rc_conf_value = read_rc_conf(node, rc_index, RC_EXP_DEVCTL2);
+		rc_conf_value |= PCI_EXP_DEVCTL2_ARI;
+		write_rc_conf(node, rc_index, RC_EXP_DEVCTL2, rc_conf_value);
+	}
+
+	rc_conf_value = read_rc_conf(node, rc_index, RC_EXP_DEVCAP);
+	rc_conf_value &= PCI_EXP_DEVCAP_PAYLOAD;
+	max_payloadsize = rc_conf_value;
+	if (max_payloadsize < smallest_max_payload)
+		smallest_max_payload = max_payloadsize;
+
+	max_read_size = 0x2;   /* Limit to 512B */
+	value = read_rc_conf(node, rc_index, RC_EXP_DEVCTL);
+	value &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
+	value |= (max_read_size << 12) | (smallest_max_payload << 5);
+	write_rc_conf(node, rc_index, RC_EXP_DEVCTL, value);
+	new_values = (max_read_size << 12) | (smallest_max_payload << 5);
+
+	piuconfig0 = read_piu_ior0(node, rc_index, PIUCONFIG0);
+	piuconfig0 &= ~(0x7fUL << 9);
+	if (smallest_max_payload == 0x2) {
+		piuconfig0 |= (0x20UL << 9);
+		write_piu_ior0(node, rc_index, PIUCONFIG0, piuconfig0);
+	} else {
+		piuconfig0 |= (0x40UL << 9);
+		write_piu_ior0(node, rc_index, PIUCONFIG0, piuconfig0);
+	}
+
+	printk("Node%ld RC%ld MPSS %luB, MRRS %luB, Piuconfig0 %#lx, ARI %s\n",
+			node, rc_index, (1UL << smallest_max_payload) << 7,
+			(1UL << max_read_size) << 7, piuconfig0,
+			rc_ari_disabled ? "disabled" : "enabled");
+
+	/* Now, set the max_payload_size for all devices to that value. */
+	bus = top;
+	next = top->devices.next;
+	for (;;) {
+		if (next == &bus->devices) {
+			/* end of this bus, go up or finish */
+			if (bus == top)
+				break;
+			next = bus->self->bus_list.next;
+			bus = bus->self->bus;
+			continue;
+		}
+		dev = list_entry(next, struct pci_dev, bus_list);
+		if (dev->subordinate) {
+			/* this is a pci-pci bridge, do its devices next */
+			next = dev->subordinate->devices.next;
+			bus = dev->subordinate;
+		} else
+			next = dev->bus_list.next;
+
+		pcie_caps_offset = dev->pcie_cap;
+		if (pcie_caps_offset == 0)
+			continue;
+
+		pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, &devctl);
+		devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
+		devctl |= new_values;
+		pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, devctl);
+	}
+
+	return iov_bus;
+}
+
+static int chip3_check_pci_vt_linkup(unsigned long node, unsigned long index)
+{
+	if (node == 0 && index == 0)
+		return 0;
+	else
+		return 1;
+}
+
+static int chip3_check_pci_linkup(unsigned long node, unsigned long index)
+{
+	unsigned long rc_debug;
+
+#ifdef CONFIG_SW64_FPGA           //for PCIE4.0
+	printk("waiting for link up...\n");
+	if (index == 0)
+		sw64_io_write(node, PIU_TOP0_CONFIG, 0x10011);
+	else
+		sw64_io_write(node, PIU_TOP1_CONFIG, 0x10011);
+	mdelay(10);
+	rc_debug = read_piu_ior1(node, index, RCDEBUGINF1);
+	while (!(rc_debug & 0x1)) {
+		udelay(10);
+		rc_debug = read_piu_ior1(node, index, RCDEBUGINF1);
+	}
+	mdelay(10);
+#endif
+#ifdef CONFIG_SW64_SIM
+	printk("waiting for link up...\n");
+	rc_debug = read_piu_ior1(node, index, RCDEBUGINF1);
+	while (!(rc_debug & 0x1)) {
+		udelay(10);
+		rc_debug = read_piu_ior1(node, index, RCDEBUGINF1);
+	}
+#endif
+	rc_debug = read_piu_ior1(node, index, RCDEBUGINF1);
+
+	return !(rc_debug & 0x1);
+}
+
+static void chip3_set_rc_piu(unsigned long node, unsigned long index)
+{
+	unsigned int i, value;
+	u32 rc_misc_ctrl;
+
+	if (is_guest_or_emul())
+		return;
+
+	/* configure RC, set PCI-E root controller */
+	write_rc_conf(node, index, RC_COMMAND, 0x00100007);
+	write_rc_conf(node, index, RC_PORT_LINK_CTL, 0x1f0020);
+	write_rc_conf(node, index, RC_EXP_DEVCTL, 0x2850);
+	write_rc_conf(node, index, RC_EXP_DEVCTL2, 0x6);
+	write_rc_conf(node, index, RC_ORDER_RULE_CTL, 0x0100);
+
+	if (IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_SW64_SIM)) {
+		value = read_rc_conf(node, index, RC_LINK_STAT);
+		value |= 0x3;
+		write_rc_conf(node, index, RC_LINK_STAT, value);
+	}
+
+	/* enable DBI_RO_WR_EN */
+	rc_misc_ctrl = read_rc_conf(node, index, RC_MISC_CONTROL_1);
+	write_rc_conf(node, index, RC_MISC_CONTROL_1, rc_misc_ctrl | 0x1);
+
+	/* fix up DEVICE_ID_VENDOR_ID register */
+	value = (PCI_DEVICE_ID_CHIP3 << 16) | PCI_VENDOR_ID_JN;
+	write_rc_conf(node, index, RC_VENDOR_ID, value);
+
+	/* set PCI-E root class code */
+	value = read_rc_conf(node, index, RC_REVISION_ID);
+	write_rc_conf(node, index, RC_REVISION_ID, (PCI_CLASS_BRIDGE_HOST << 16) | value);
+
+	/* disable DBI_RO_WR_EN */
+	write_rc_conf(node, index, RC_MISC_CONTROL_1, rc_misc_ctrl);
+
+	write_rc_conf(node, index, RC_PRIMARY_BUS, 0xffffff);
+	write_piu_ior0(node, index, PIUCONFIG0, 0x38056);
+	write_piu_ior1(node, index, PIUCONFIG1, 0x2);
+	write_piu_ior1(node, index, ERRENABLE, -1);
+
+	/* set DMA offset value PCITODMA_OFFSET */
+	write_piu_ior0(node, index, EPDMABAR, PCITODMA_OFFSET);
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		write_piu_ior0(node, index, PIUCONFIG0, 0x38076);
+		write_piu_ior0(node, index, MSIADDR, MSIX_MSG_ADDR);
+		for (i = 0; i < 256; i++)
+			write_piu_ior0(node, index, MSICONFIG0 + (i << 7), 0);
+	}
+}
+
+static void chip3_set_intx(unsigned long node, unsigned long index,
+			   unsigned long int_conf)
+{
+	if (is_guest_or_emul())
+		return;
+
+	write_piu_ior0(node, index, INTACONFIG, int_conf | (0x8UL << 10));
+	write_piu_ior0(node, index, INTBCONFIG, int_conf | (0x4UL << 10));
+	write_piu_ior0(node, index, INTCCONFIG, int_conf | (0x2UL << 10));
+	write_piu_ior0(node, index, INTDCONFIG, int_conf | (0x1UL << 10));
+}
+
+static unsigned long chip3_get_rc_enable(unsigned long node)
+{
+	unsigned long rc_enable;
+
+	if (is_guest_or_emul())
+		return 1;
+
+	if (!IS_ENABLED(CONFIG_SW64_ASIC)) {
+		rc_enable = 0x1;
+		sw64_io_write(node, IO_START, rc_enable);
+	}
+	rc_enable = sw64_io_read(node, IO_START);
+
+	return rc_enable;
+}
+
+static int chip3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct pci_controller *hose = dev->sysdata;
+
+	return hose->int_irq;
+}
+
+extern struct pci_controller *hose_head, **hose_tail;
+static void sw6_handle_intx(unsigned int offset)
+{
+	struct pci_controller *hose;
+	unsigned long value;
+
+	hose = hose_head;
+	for (hose = hose_head; hose; hose = hose->next) {
+		value = read_piu_ior0(hose->node, hose->index, INTACONFIG + (offset << 7));
+		if (value >> 63) {
+			value = value & (~(1UL << 62));
+			write_piu_ior0(hose->node, hose->index, INTACONFIG + (offset << 7), value);
+			handle_irq(hose->int_irq);
+			value = value | (1UL << 62);
+			write_piu_ior0(hose->node, hose->index, INTACONFIG + (offset << 7), value);
+		}
+		if (hose->iommu_enable) {
+			value = read_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS);
+			if (value >> 63)
+				handle_irq(hose->int_irq);
+		}
+	}
+}
+
+static void chip3_device_interrupt(unsigned long irq_info)
+{
+	unsigned int i;
+
+	if (is_guest_or_emul()) {
+		handle_irq(irq_info);
+		return;
+	}
+
+	for (i = 0; i < 4; i++) {
+		if ((irq_info >> i) & 0x1)
+			sw6_handle_intx(i);
+	}
+}
+
+static void set_devint_wken(int node, int val)
+{
+	sw64_io_write(node, DEVINT_WKEN, val);
+	sw64_io_write(node, DEVINTWK_INTEN, 0x0);
+}
+
+static void clear_rc_status(int node, int rc)
+{
+	unsigned int val, status;
+
+	val = 0x10000;
+	do {
+		write_rc_conf(node, rc, RC_STATUS, val);
+		mb();
+		status = read_rc_conf(node, rc, RC_STATUS);
+	} while (status >> 16);
+}
+
+static void chip3_suspend(int wake)
+{
+	unsigned long val;
+	unsigned int val_32;
+	unsigned long rc_start;
+	int node, rc, index, cpus;
+
+	cpus = chip3_get_cpu_nums();
+	for (node = 0; node < cpus; node++) {
+		rc = -1;
+		rc_start = sw64_io_read(node, IO_START);
+		index = ffs(rc_start);
+		while (index) {
+			rc += index;
+			if (wake) {
+				val_32 = read_rc_conf(node, rc, RC_CONTROL);
+				val_32 &= ~0x8;
+				write_rc_conf(node, rc, RC_CONTROL, val_32);
+
+				set_devint_wken(node, 0x0);
+				val = 0x8000000000000000UL;
+				write_piu_ior0(node, rc, PMEINTCONFIG, val);
+				write_piu_ior0(node, rc, PMEMSICONFIG, val);
+
+				clear_rc_status(node, rc);
+			} else {
+				val_32 = read_rc_conf(node, rc, RC_CONTROL);
+				val_32 |= 0x8;
+				write_rc_conf(node, rc, RC_CONTROL, val_32);
+
+				clear_rc_status(node, rc);
+				set_devint_wken(node, 0x1f0);
+#ifdef CONFIG_PCI_MSI    //USE MSI
+				val_32 = read_rc_conf(node, rc, RC_COMMAND);
+				val_32 |= 0x400;
+				write_rc_conf(node, rc, RC_COMMAND, val_32);
+				val_32 = read_rc_conf(node, rc, RC_MSI_CONTROL);
+				val_32 |= 0x10000;
+				write_rc_conf(node, rc, RC_MSI_CONTROL, val_32);
+				val = 0x4000000000000000UL;
+				write_piu_ior0(node, rc, PMEMSICONFIG, val);
+#else //USE INT
+				val = 0x4000000000000400UL;
+				write_piu_ior0(node, rc, PMEINTCONFIG, val);
+#endif
+			}
+			rc_start = rc_start >> index;
+			index = ffs(rc_start);
+		}
+	}
+}
+
+static void chip3_hose_init(struct pci_controller *hose)
+{
+	unsigned long pci_io_base;
+
+	hose->sparse_mem_base = 0;
+	hose->sparse_io_base = 0;
+	pci_io_base = IO_BASE | (hose->node << IO_NODE_SHIFT)
+			      | PCI_BASE | (hose->index << IO_RC_SHIFT);
+
+	hose->dense_mem_base = pci_io_base;
+	hose->dense_io_base = pci_io_base | PCI_LEGACY_IO;
+	hose->ep_config_space_base = PAGE_OFFSET | pci_io_base | PCI_EP_CFG;
+	hose->rc_config_space_base = PAGE_OFFSET | pci_io_base | PCI_RC_CFG;
+
+	if (is_in_host())
+		hose->mem_space->start = pci_io_base + PCI_32BIT_MEMIO;
+	else
+		hose->mem_space->start = pci_io_base + PCI_32BIT_VT_MEMIO;
+	hose->mem_space->end = hose->mem_space->start + PCI_32BIT_MEMIO_SIZE - 1;
+	hose->mem_space->name = "pci memory space";
+	hose->mem_space->flags = IORESOURCE_MEM;
+
+	if (request_resource(&iomem_resource, hose->mem_space) < 0)
+		pr_err("Failed to request MEM on hose %ld\n", hose->index);
+	hose->pre_mem_space->start =  pci_io_base | PCI_64BIT_MEMIO;
+	hose->pre_mem_space->end = hose->pre_mem_space->start + PCI_64BIT_MEMIO_SIZE - 1;
+	hose->pre_mem_space->name = "pci pre mem space";
+	hose->pre_mem_space->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+
+	if (request_resource(&iomem_resource, hose->pre_mem_space) < 0)
+		pr_err("Failed to request 64bit MEM on hose %ld\n", hose->index);
+	hose->io_space->start = pci_io_base | PCI_LEGACY_IO;
+	hose->io_space->end = hose->io_space->start + PCI_LEGACY_IO_SIZE - 1;
+	hose->io_space->name = "pci io space";
+	hose->io_space->flags = IORESOURCE_IO;
+
+	if (request_resource(&ioport_resource, hose->io_space) < 0)
+		pr_err("Failed to request IO on hose %ld\n", hose->index);
+	hose->busn_space->name = "PCI busn";
+	hose->busn_space->start = 0xff;
+	hose->busn_space->end = 0xff;
+	hose->busn_space->flags = IORESOURCE_BUS;
+	hose->first_busno = hose->self_busno = hose->busn_space->start;
+	hose->last_busno  = hose->busn_space->end;
+
+	if (is_in_host()) {
+		if (IS_ENABLED(CONFIG_PCI_MSI))
+			memset(hose->piu_msiconfig, 0, 256/8);
+	}
+};
+
+static void chip3_init_ops_fixup(void)
+{
+	if (is_guest_or_emul()) {
+		sw64_chip_init->early_init.get_node_mem = chip3_get_vt_node_mem;
+		sw64_chip_init->pci_init.check_pci_linkup = chip3_check_pci_vt_linkup;
+	}
+};
+
+static void chip3_ops_fixup(void)
+{
+	if (is_guest_or_emul())
+		sw64_chip->suspend = NULL;
+};
+
+static struct sw64_chip_init_ops chip3_chip_init_ops = {
+	.early_init = {
+		.setup_core_start = chip3_setup_core_start,
+		.get_node_mem = chip3_get_node_mem,
+	},
+	.pci_init = {
+		.map_irq = chip3_map_irq,
+		.get_rc_enable = chip3_get_rc_enable,
+		.hose_init = chip3_hose_init,
+		.set_rc_piu = chip3_set_rc_piu,
+		.check_pci_linkup = chip3_check_pci_linkup,
+		.set_intx = chip3_set_intx,
+	},
+	.fixup = chip3_init_ops_fixup,
+};
+
+static struct sw64_chip_ops chip3_chip_ops = {
+	.get_cpu_num = chip3_get_cpu_nums,
+	.suspend = chip3_suspend,
+	.fixup = chip3_ops_fixup,
+};
+
+void __init sw64_setup_chip_ops(void)
+{
+	sw64_chip_init = &chip3_chip_init_ops;
+	sw64_chip = &chip3_chip_ops;
+}
+
+/* Performance counter hook.  A module can override this to do something useful. */
+static void dummy_perf(unsigned long vector, struct pt_regs *regs)
+{
+	irq_err_count++;
+	pr_crit("Performance counter interrupt!\n");
+}
+
+void (*perf_irq)(unsigned long, struct pt_regs*) = dummy_perf;
+EXPORT_SYMBOL(perf_irq);
+
+#ifdef CONFIG_PCI_MSI
+extern void handle_pci_msi_interrupt(unsigned long type,
+				     unsigned long vector,
+				     unsigned long pci_msi1_addr);
+#else
+void handle_pci_msi_interrupt(unsigned long type,
+			      unsigned long vector, unsigned long pci_msi1_addr)
+{
+	pr_warn("SW arch disable CONFIG_PCI_MSI option.\n");
+}
+#endif
+
+static void handle_fault_int(void)
+{
+	int node;
+
+	node = __this_cpu_read(hard_node_id);
+	printk("enter fault int, si_fault_stat = %#lx\n",
+			sw64_io_read(node, SI_FAULT_STAT));
+	sw64_io_write(node, SI_FAULT_INT_EN, 0);
+	sw64_io_write(node, DLI_RLTD_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG0_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG1_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG2_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG3_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG4_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG5_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG6_FAULT_INTEN, 0);
+	sw64_io_write(node, DUAL_CG7_FAULT_INTEN, 0);
+}
+
+static void handle_mt_int(void)
+{
+	printk("enter mt int\n");
+}
+
+static void handle_nmi_int(void)
+{
+	printk("enter nmi int\n");
+}
+
+static void handle_dev_int(struct pt_regs *regs)
+{
+	unsigned long config_val, val, stat;
+	int node = 0;
+	unsigned int hwirq;
+
+	config_val = sw64_io_read(node, DEV_INT_CONFIG);
+	val = config_val & (~(1UL << 8));
+	sw64_io_write(node, DEV_INT_CONFIG, val);
+	stat = sw64_io_read(node, MCU_DVC_INT);
+
+	while (stat) {
+		hwirq = ffs(stat) - 1;
+		handle_domain_irq(NULL, hwirq, regs);
+		stat &= ~(1UL << hwirq);
+	}
+	/*do handle irq */
+
+	sw64_io_write(node, DEV_INT_CONFIG, config_val);
+}
+
+void handle_chip_irq(unsigned long type, unsigned long vector,
+		     unsigned long irq_arg, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	if (is_guest_or_emul()) {
+		if ((type & 0xffff) > 15) {
+			vector = type;
+			if (vector == 16)
+				type = INT_INTx;
+			else
+				type = INT_MSI;
+		}
+	}
+
+	switch (type & 0xffff) {
+	case INT_MSI:
+		old_regs = set_irq_regs(regs);
+		handle_pci_msi_interrupt(type, vector, irq_arg);
+		set_irq_regs(old_regs);
+		return;
+	case INT_INTx:
+		old_regs = set_irq_regs(regs);
+		chip3_device_interrupt(vector);
+		set_irq_regs(old_regs);
+		return;
+
+	case INT_IPI:
+#ifdef CONFIG_SMP
+		handle_ipi(regs);
+		return;
+#else
+		irq_err_count++;
+		pr_crit("Interprocessor interrupt? You must be kidding!\n");
+#endif
+		break;
+	case INT_RTC:
+		old_regs = set_irq_regs(regs);
+		sw64_timer_interrupt();
+		set_irq_regs(old_regs);
+		return;
+	case INT_VT_SERIAL:
+		old_regs = set_irq_regs(regs);
+		handle_irq(type);
+		set_irq_regs(old_regs);
+		return;
+	case INT_PC0:
+		perf_irq(PERFMON_PC0, regs);
+		return;
+	case INT_PC1:
+		perf_irq(PERFMON_PC1, regs);
+		return;
+	case INT_DEV:
+		old_regs = set_irq_regs(regs);
+		handle_dev_int(regs);
+		set_irq_regs(old_regs);
+		return;
+	case INT_FAULT:
+		old_regs = set_irq_regs(regs);
+		handle_fault_int();
+		set_irq_regs(old_regs);
+		return;
+	case INT_MT:
+		old_regs = set_irq_regs(regs);
+		handle_mt_int();
+		set_irq_regs(old_regs);
+		return;
+	case INT_NMI:
+		old_regs = set_irq_regs(regs);
+		handle_nmi_int();
+		set_irq_regs(old_regs);
+		return;
+	default:
+		pr_crit("Hardware intr	%ld %lx? uh?\n", type, vector);
+	}
+	pr_crit("PC = %016lx PS = %04lx\n", regs->pc, regs->ps);
+}
+
+/*
+ * Early fix up the chip3 Root Complex settings
+ */
+static void chip3_pci_fixup_root_complex(struct pci_dev *dev)
+{
+	int i;
+	struct pci_bus *bus = dev->bus;
+	struct pci_controller *hose = bus->sysdata;
+
+	hose->self_busno = hose->busn_space->start;
+
+	if (likely(bus->number == hose->self_busno)) {
+		if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) {
+			/* Check Root Complex port again */
+			dev->is_hotplug_bridge = 0;
+			dev->current_state = PCI_D0;
+		}
+
+		dev->class &= 0xff;
+		dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			dev->resource[i].start = 0;
+			dev->resource[i].end   = 0;
+			dev->resource[i].flags = 0;
+		}
+	}
+	atomic_inc(&dev->enable_cnt);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JN, PCI_DEVICE_ID_CHIP3, chip3_pci_fixup_root_complex);
diff --git a/arch/sw_64/chip/chip3/cpufreq_debugfs.c b/arch/sw_64/chip/chip3/cpufreq_debugfs.c
new file mode 100644
index 000000000000..3b152f84454f
--- /dev/null
+++ b/arch/sw_64/chip/chip3/cpufreq_debugfs.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/sw64io.h>
+#include <asm/hw_init.h>
+#include <asm/debug.h>
+
+#define	CLK_PRT		0x1UL
+#define	CORE_CLK0_V	(0x1UL << 1)
+#define	CORE_CLK0_R	(0x1UL << 2)
+#define	CORE_CLK2_V	(0x1UL << 15)
+#define	CORE_CLK2_R	(0x1UL << 16)
+
+#define	CLK_LV1_SEL_PRT		0x1UL
+#define	CLK_LV1_SEL_MUXA	(0x1UL << 2)
+#define	CLK_LV1_SEL_MUXB	(0x1UL << 3)
+
+#define CORE_PLL0_CFG_SHIFT	4
+#define CORE_PLL2_CFG_SHIFT	18
+
+static int cpu_freq[16] = {
+	200,	1200,	1800,	1900,
+	1950,	2000,	2050,	2100,
+	2150,	2200,	2250,	2300,
+	2350,	2400,	2450,	2500
+};
+
+static int cpufreq_show(struct seq_file *m, void *v)
+{
+	int i;
+	u64 val;
+
+	val = sw64_io_read(0, CLK_CTL);
+	val = val >> CORE_PLL2_CFG_SHIFT;
+
+	for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) {
+		if (cpu_freq[val] == cpu_freq[i])
+			seq_printf(m, "[%d] ", cpu_freq[i]);
+		else
+			seq_printf(m, "%d ", cpu_freq[i]);
+	}
+	seq_puts(m, "\n");
+
+	return 0;
+}
+
+static int cpufreq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cpufreq_show, NULL);
+}
+
+static ssize_t cpufreq_set(struct file *file, const char __user *user_buf,
+			size_t len, loff_t *ppos)
+{
+	char buf[5];
+	size_t size;
+	int cf, i, err, index;
+	u64 val;
+
+	size = min(sizeof(buf) - 1, len);
+	if (copy_from_user(buf, user_buf, size))
+		return -EFAULT;
+	buf[size] = '\0';
+
+	err = kstrtoint(buf, 10, &cf);
+	if (err)
+		return err;
+
+	index = -1;
+	for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) {
+		if (cf == cpu_freq[i]) {
+			index = i;
+			break;
+		}
+	}
+
+	if (index < 0)
+		return -EINVAL;
+
+	/* Set CLK_CTL PLL2 */
+	sw64_io_write(0, CLK_CTL, CORE_CLK2_R | CORE_CLK2_V | CLK_PRT);
+	sw64_io_write(1, CLK_CTL, CORE_CLK2_R | CORE_CLK2_V | CLK_PRT);
+	val = sw64_io_read(0, CLK_CTL);
+
+	sw64_io_write(0, CLK_CTL, val | index << CORE_PLL2_CFG_SHIFT);
+	sw64_io_write(1, CLK_CTL, val | index << CORE_PLL2_CFG_SHIFT);
+
+	udelay(1);
+
+	sw64_io_write(0, CLK_CTL, CORE_CLK2_V | CLK_PRT
+			| index << CORE_PLL2_CFG_SHIFT);
+	sw64_io_write(1, CLK_CTL, CORE_CLK2_V | CLK_PRT
+			| index << CORE_PLL2_CFG_SHIFT);
+	val = sw64_io_read(0, CLK_CTL);
+
+	/* LV1 select PLL1/PLL2 */
+	sw64_io_write(0, CLU_LV1_SEL, CLK_LV1_SEL_MUXA | CLK_LV1_SEL_PRT);
+	sw64_io_write(1, CLU_LV1_SEL, CLK_LV1_SEL_MUXA | CLK_LV1_SEL_PRT);
+
+	/* Set CLK_CTL PLL0 */
+	sw64_io_write(0, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V);
+	sw64_io_write(1, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V);
+
+	sw64_io_write(0, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V
+			| index << CORE_PLL0_CFG_SHIFT);
+	sw64_io_write(1, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V
+			| index << CORE_PLL0_CFG_SHIFT);
+
+	udelay(1);
+
+	sw64_io_write(0, CLK_CTL, val | CORE_CLK0_V
+			| index << CORE_PLL0_CFG_SHIFT);
+	sw64_io_write(1, CLK_CTL, val | CORE_CLK0_V
+			| index << CORE_PLL0_CFG_SHIFT);
+
+	/* LV1 select PLL0/PLL1 */
+	sw64_io_write(0, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT);
+	sw64_io_write(1, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT);
+
+	return len;
+}
+
+static const struct file_operations set_cpufreq_fops = {
+	.open		= cpufreq_open,
+	.read		= seq_read,
+	.write		= cpufreq_set,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static int __init cpufreq_debugfs_init(void)
+{
+	struct dentry *cpufreq_entry;
+
+	if (!sw64_debugfs_dir)
+		return -ENODEV;
+
+	cpufreq_entry = debugfs_create_file("cpufreq", 0600,
+				       sw64_debugfs_dir, NULL,
+				       &set_cpufreq_fops);
+	if (!cpufreq_entry)
+		return -ENOMEM;
+
+	return 0;
+}
+late_initcall(cpufreq_debugfs_init);
diff --git a/arch/sw_64/chip/chip3/i2c-lib.c b/arch/sw_64/chip/chip3/i2c-lib.c
new file mode 100644
index 000000000000..581f2b3d81a1
--- /dev/null
+++ b/arch/sw_64/chip/chip3/i2c-lib.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sw_64/chip/chip3/i2c-lib.c
+ *
+ *  Copyright (C) 2020 Platform Software
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * The drivers in this file are synchronous/blocking. In addition,
+ * use poll mode to read/write slave devices on the I2C bus instead
+ * of the interrupt mode.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/device.h>
+
+#define CPLD_BUSNR	 2
+
+#ifndef _I2C_DEBUG_FLAG_
+#define _I2C_DEBUG_FLAG_ 0
+#endif
+
+#define ICC_DEBUG(fmt, ...) \
+	do { \
+		if (_I2C_DEBUG_FLAG_) { \
+			printk(fmt, ##__VA_ARGS__); \
+		} \
+	} while (0)
+
+#define IC_CLK_KHZ			25000
+
+/* I2C register definitions */
+#define DW_IC_CON			0x0
+#define DW_IC_STATUS			0x3800
+#define DW_IC_DATA_CMD			0x0800
+#define DW_IC_TAR			0x00200
+#define DW_IC_ENABLE			0x3600
+#define DW_IC_CMD			0x0100
+#define DW_IC_STOP			0x0200
+#define DW_IC_SDA_HOLD			0x3e00
+#define DW_IC_SDA_SETUP			0x4a00
+#define DW_IC_SS_SCL_HCNT		0x0a00
+#define DW_IC_SS_SCL_LCNT		0x0c00
+#define DW_IC_FS_SCL_HCNT		0x0e00
+#define DW_IC_FS_SCL_LCNT		0x1000
+#define DW_IC_TX_TL			0x1e00
+#define DW_IC_RX_TL			0x1c00
+#define DW_IC_INTR_MASK			0x1800
+
+#define MAX_RETRY			10000000
+
+#define DW_IC_STATUS_ACTIVITY		0x1
+#define DW_IC_STATUS_TFNF		0x2
+#define DW_IC_STATUS_TFE		0x4
+#define DW_IC_STATUS_RFNE		0x8
+#define DW_IC_STATUS_RFF		0x10
+
+#define DW_IC_CON_MASTER		0x1
+#define DW_IC_CON_SPEED_STD	        0x2
+#define DW_IC_CON_SPEED_FAST		0x4
+#define DW_IC_CON_10BITADDR_MASTER	0x10
+#define DW_IC_CON_RESTART_EN		0x20
+#define DW_IC_CON_SLAVE_DISABLE		0x40
+
+#define INTEL_MID_STD_CFG (DW_IC_CON_MASTER | \
+		DW_IC_CON_SLAVE_DISABLE | \
+		DW_IC_CON_RESTART_EN)
+
+#define DW_IC_INTR_RX_UNDER		0x001
+#define DW_IC_INTR_RX_OVER		0x002
+#define DW_IC_INTR_RX_FULL		0x004
+#define DW_IC_INTR_TX_OVER		0x008
+#define DW_IC_INTR_TX_EMPTY		0x010
+#define DW_IC_INTR_RD_REQ		0x020
+#define DW_IC_INTR_TX_ABRT		0x040
+#define DW_IC_INTR_RX_DONE		0x080
+#define DW_IC_INTR_ACTIVITY		0x100
+#define DW_IC_INTR_STOP_DET		0x200
+#define DW_IC_INTR_START_DET	        0x400
+#define DW_IC_INTR_GEN_CALL		0x800
+
+#define DW_IC_INTR_DEFAULT_MASK (DW_IC_INTR_RX_FULL | \
+		DW_IC_INTR_TX_EMPTY | \
+		DW_IC_INTR_TX_ABRT | \
+		DW_IC_INTR_STOP_DET)
+
+enum i2c_bus_operation {
+	I2C_BUS_READ,
+	I2C_BUS_WRITE,
+};
+
+static uint64_t m_i2c_base_address;
+
+/*
+ * This function get I2Cx controller base address
+ *
+ * @param i2c_controller_index  Bus Number of I2C controller.
+ * @return I2C BAR.
+ */
+uint64_t get_i2c_bar_addr(uint8_t i2c_controller_index)
+{
+	uint64_t base_addr = 0;
+
+	if (i2c_controller_index == 0)
+		base_addr = PAGE_OFFSET | IO_BASE | IIC0_BASE;
+	else if (i2c_controller_index == 1)
+		base_addr = PAGE_OFFSET | IO_BASE | IIC1_BASE;
+	else if (i2c_controller_index == 2)
+		base_addr = PAGE_OFFSET | IO_BASE | IIC2_BASE;
+
+	return base_addr;
+}
+
+void write_cpu_i2c_controller(uint64_t offset, uint32_t data)
+{
+	mb();
+	*(volatile uint32_t *)(m_i2c_base_address + offset) = data;
+}
+
+uint32_t read_cpu_i2c_controller(uint64_t offset)
+{
+	uint32_t data;
+
+	data = *(volatile uint32_t *)(m_i2c_base_address + offset);
+	mb();
+	return data;
+}
+
+static int poll_for_status_set0(uint16_t status_bit)
+{
+	uint64_t retry = 0;
+	uint32_t temp = read_cpu_i2c_controller(DW_IC_STATUS);
+
+	temp = read_cpu_i2c_controller(DW_IC_STATUS);
+
+	while (retry < MAX_RETRY) {
+		if (read_cpu_i2c_controller(DW_IC_STATUS) & status_bit)
+			break;
+		retry++;
+	}
+
+	if (retry == MAX_RETRY)
+		return -ETIME;
+
+	return 0;
+}
+
+static uint32_t i2c_dw_scl_lcnt(uint32_t ic_clk, uint32_t t_low,
+				uint32_t tf, uint32_t offset)
+{
+	/*
+	 * Conditional expression:
+	 *
+	 *   IC_[FS]S_SCL_LCNT + 1 >= IC_CLK * (t_low + tf)
+	 *
+	 * DW I2C core starts counting the SCL CNTs for the LOW period
+	 * of the SCL clock (t_low) as soon as it pulls the SCL line.
+	 * In order to meet the t_low timing spec, we need to take into
+	 * account the fall time of SCL signal (tf).  Default tf value
+	 * should be 0.3 us, for safety.
+	 */
+	return ((ic_clk * (t_low + tf) + 500000) / 1000000) - 1 + offset;
+}
+
+static uint32_t i2c_dw_scl_hcnt(uint32_t ic_clk, uint32_t t_symbol,
+				uint32_t tf, uint32_t cond, uint32_t offset)
+{
+	/*
+	 * DesignWare I2C core doesn't seem to have solid strategy to meet
+	 * the tHD;STA timing spec. Configuring _HCNT based on tHIGH spec
+	 * will result in violation of the tHD;STA spec.
+	 */
+	if (cond)
+		/*
+		 * Conditional expression:
+		 *
+		 *   IC_[FS]S_SCL_HCNT + (1+4+3) >= IC_CLK * tHIGH
+		 *
+		 * This is based on the DW manuals, and represents an ideal
+		 * configuration. The resulting I2C bus speed will be faster
+		 * than any of the others.
+		 *
+		 * If your hardware is free from tHD;STA issue, try this one.
+		 */
+		return (ic_clk * t_symbol + 500000) / 1000000 - 8 + offset;
+		/*
+		 * Conditional expression:
+		 *
+		 *   IC_[FS]S_SCL_HCNT + 3 >= IC_CLK * (tHD;STA + tf)
+		 *
+		 * This is just experimental rule; the tHD;STA period turned
+		 * out to be proportinal to (_HCNT + 3). With this setting,
+		 * we could meet both tHIGH and tHD;STA timing specs.
+		 *
+		 * If unsure, you'd better to take this alternative.
+		 *
+		 * The reason why we need to take into account "tf" here,
+		 * is the same as described in i2c_dw_scl_lcnt().
+		 */
+	return (ic_clk * (t_symbol + tf) + 500000) / 1000000 - 3 + offset;
+}
+
+static int wait_for_cpu_i2c_bus_busy(void)
+{
+	uint64_t retry = 0;
+	uint32_t status = 0;
+
+	do {
+		retry++;
+		status = !!(read_cpu_i2c_controller(DW_IC_STATUS) & DW_IC_STATUS_ACTIVITY);
+	} while ((retry < MAX_RETRY) && status);
+
+	if (retry == MAX_RETRY)
+		return -ETIME;
+
+	return 0;
+}
+
+static int i2c_read(uint8_t reg_offset, uint8_t *buffer, uint32_t length)
+{
+	int status;
+	uint32_t i;
+
+	status = poll_for_status_set0(DW_IC_STATUS_TFE);
+	if (status)
+		return status;
+
+	write_cpu_i2c_controller(DW_IC_DATA_CMD, reg_offset);
+
+	for (i = 0; i < length; i++) {
+		if (i == length - 1)
+			write_cpu_i2c_controller(DW_IC_DATA_CMD, DW_IC_CMD | DW_IC_STOP);
+		else
+			write_cpu_i2c_controller(DW_IC_DATA_CMD, DW_IC_CMD);
+
+		if (poll_for_status_set0(DW_IC_STATUS_RFNE) == 0)
+			buffer[i] = *(uint8_t *) (m_i2c_base_address + DW_IC_DATA_CMD);
+		else
+			pr_err("Read timeout line %d.\n", __LINE__);
+	}
+
+	return 0;
+}
+
+static int i2c_write(uint8_t reg_offset, uint8_t *buffer, uint32_t length)
+{
+	int status;
+	uint32_t i;
+
+	/* Data transfer, poll till transmit ready bit is set */
+	status = poll_for_status_set0(DW_IC_STATUS_TFE);
+	if (status) {
+		pr_err("In i2c-lib.c, line %d.\n", __LINE__);
+		return status;
+	}
+
+	write_cpu_i2c_controller(DW_IC_DATA_CMD, reg_offset);
+
+	for (i = 0; i < length; i++) {
+		if (poll_for_status_set0(DW_IC_STATUS_TFNF) == 0) {
+			if (i == length - 1)
+				write_cpu_i2c_controller(DW_IC_DATA_CMD, buffer[i] | DW_IC_STOP);
+			else
+				write_cpu_i2c_controller(DW_IC_DATA_CMD, buffer[i]);
+		} else {
+			pr_err("Write timeout %d.\n", __LINE__);
+		}
+	}
+
+	mdelay(200);
+	status = poll_for_status_set0(DW_IC_STATUS_TFE);
+	if (status) {
+		pr_err("In i2c-lib.c, line %d.\n", __LINE__);
+		return status;
+	}
+
+	return 0;
+}
+
+/* Initialize I2c controller */
+void init_cpu_i2c_controller(void)
+{
+	uint32_t h_cnt;
+	uint32_t l_cnt;
+	uint32_t input_ic_clk_rate = IC_CLK_KHZ;	/* by unit KHz ie. 25MHz */
+	uint32_t sda_falling_time = 300;
+	uint32_t scl_falling_time = 300;
+
+	/*
+	 * The I2C protocol specification requires 300ns of hold time on the
+	 * SDA signal (tHD;DAT) in standard and fast speed modes, and a hold
+	 * time long enough to bridge the undefined part between logic 1 and
+	 * logic 0 of the falling edge of SCL in high speed mode.
+	 */
+	uint32_t sda_hold_time = 432;
+	uint32_t sda_hold = 0;
+
+	/* Firstly disable the controller. */
+	ICC_DEBUG("Initialize CPU I2C controller\n");
+
+	write_cpu_i2c_controller(DW_IC_ENABLE, 0);
+
+	sda_hold = (input_ic_clk_rate * sda_hold_time + 500000) / 1000000;
+	write_cpu_i2c_controller(DW_IC_SDA_HOLD, sda_hold);
+
+	/* Set standard and fast speed deviders for high/low periods. */
+	/* Standard-mode */
+	h_cnt = i2c_dw_scl_hcnt(input_ic_clk_rate, 4000, sda_falling_time, 0, 0);
+	l_cnt = i2c_dw_scl_lcnt(input_ic_clk_rate, 4700, scl_falling_time, 0);
+
+	write_cpu_i2c_controller(DW_IC_SS_SCL_HCNT, h_cnt);
+	write_cpu_i2c_controller(DW_IC_SS_SCL_LCNT, l_cnt);
+
+	ICC_DEBUG("Standard-mode HCNT=%x, LCNT=%x\n", h_cnt, l_cnt);
+
+	/* Fast-mode */
+	h_cnt = i2c_dw_scl_hcnt(input_ic_clk_rate, 600, sda_falling_time, 0, 0);
+	l_cnt = i2c_dw_scl_lcnt(input_ic_clk_rate, 1300, scl_falling_time, 0);
+
+	write_cpu_i2c_controller(DW_IC_FS_SCL_HCNT, h_cnt);
+	write_cpu_i2c_controller(DW_IC_FS_SCL_LCNT, l_cnt);
+
+	ICC_DEBUG("Fast-mode HCNT=%x, LCNT=%d\n\n", h_cnt, l_cnt);
+
+	/* Configure Tx/Rx FIFO threshold levels, since we will be working
+	 * in polling mode set both thresholds to their minimum
+	 */
+	write_cpu_i2c_controller(DW_IC_TX_TL, 0);
+	write_cpu_i2c_controller(DW_IC_RX_TL, 0);
+	write_cpu_i2c_controller(DW_IC_INTR_MASK, DW_IC_INTR_DEFAULT_MASK);
+
+	/* Configure the i2c master */
+	write_cpu_i2c_controller(DW_IC_CON,
+			      INTEL_MID_STD_CFG | DW_IC_CON_SPEED_STD);
+
+}
+
+/*
+ * This function enables I2C controllers.
+ *
+ * @param i2c_controller_index  Bus Number of I2C controllers.
+ */
+void enable_i2c_controller(uint8_t i2c_controller_index)
+{
+	m_i2c_base_address = get_i2c_bar_addr(i2c_controller_index);
+	init_cpu_i2c_controller();
+}
+
+/*
+ * Write/Read data from I2C device.
+ *
+ * @i2c_controller_index: i2c bus number
+ * @slave_address: slave address
+ * @operation: to read or write
+ * @length: number of bytes
+ * @reg_offset: register offset
+ * @buffer: in/out buffer
+ */
+int i2c_bus_rw(uint8_t i2c_controller_index, uint8_t slave_address,
+	       enum i2c_bus_operation operation, uint32_t length,
+	       uint8_t reg_offset, void *buffer)
+{
+	uint8_t *byte_buffer = buffer;
+	int status = 0;
+	uint32_t databuffer, temp;
+
+	m_i2c_base_address = get_i2c_bar_addr(i2c_controller_index);
+	status = wait_for_cpu_i2c_bus_busy();
+	if (status) {
+		pr_err("%d\n", __LINE__);
+		return status;
+	}
+
+	mdelay(1000);
+
+	/* Set the slave address. */
+	write_cpu_i2c_controller(DW_IC_ENABLE, 0x0);	/* Disable controller */
+	databuffer = read_cpu_i2c_controller(DW_IC_CON);
+	databuffer &= ~DW_IC_CON_10BITADDR_MASTER;
+	write_cpu_i2c_controller(DW_IC_CON, databuffer);
+
+	/* Fill the target addr. */
+	write_cpu_i2c_controller(DW_IC_TAR, slave_address);
+
+	temp = read_cpu_i2c_controller(DW_IC_TAR);
+
+	/* Configure Tx/Rx FIFO threshold levels. */
+	write_cpu_i2c_controller(DW_IC_ENABLE, 0x1);	/* Enable the adapter */
+	write_cpu_i2c_controller(DW_IC_INTR_MASK, DW_IC_INTR_DEFAULT_MASK);
+
+	if (operation == I2C_BUS_READ)
+		status = i2c_read(reg_offset, byte_buffer, length);
+	else if (operation == I2C_BUS_WRITE)
+		status = i2c_write(reg_offset, byte_buffer, length);
+
+	/* Disable controller */
+	write_cpu_i2c_controller(DW_IC_ENABLE, 0x0);
+
+	return status;
+}
+
+void disable_i2c_controller(uint8_t i2c_controller_index)
+{
+	m_i2c_base_address = get_i2c_bar_addr(i2c_controller_index);
+
+	/* Disable controller */
+	write_cpu_i2c_controller(DW_IC_ENABLE, 0x0);
+	m_i2c_base_address = 0;
+}
+
+void cpld_write(uint8_t slave_addr, uint8_t reg, uint8_t data)
+{
+	enable_i2c_controller(CPLD_BUSNR);
+	i2c_bus_rw(CPLD_BUSNR, slave_addr, I2C_BUS_WRITE, sizeof(uint8_t), reg, &data);
+	disable_i2c_controller(CPLD_BUSNR);
+}
diff --git a/arch/sw_64/chip/chip3/irq_chip.c b/arch/sw_64/chip/chip3/irq_chip.c
new file mode 100644
index 000000000000..ee43e87c554b
--- /dev/null
+++ b/arch/sw_64/chip/chip3/irq_chip.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+#include <asm/sw64io.h>
+#include <asm/irq_impl.h>
+
+static void fake_irq_mask(struct irq_data *data)
+{
+}
+
+static void fake_irq_unmask(struct irq_data *data)
+{
+}
+
+static struct irq_chip onchip_intc = {
+	.name           = "SW fake Intc",
+	.irq_mask       = fake_irq_mask,
+	.irq_unmask     = fake_irq_unmask,
+};
+
+static int sw64_intc_domain_map(struct irq_domain *d, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+
+	irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
+	irq_set_status_flags(irq, IRQ_LEVEL);
+	return 0;
+}
+
+static const struct irq_domain_ops sw64_intc_domain_ops = {
+	.xlate = irq_domain_xlate_onecell,
+	.map = sw64_intc_domain_map,
+};
+
+static struct irq_domain *root_domain;
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+
+	int node = 0;
+	int hwirq = 0, nirq = 8;
+
+	if (parent)
+		panic("DeviceTree incore intc not a root irq controller\n");
+
+	root_domain = irq_domain_add_linear(intc, 8,
+						&sw64_intc_domain_ops, NULL);
+
+	if (!root_domain)
+		panic("root irq domain not avail\n");
+
+	/* with this we don't need to export root_domain */
+	irq_set_default_host(root_domain);
+
+	for (hwirq = 0 ; hwirq < nirq ; hwirq++)
+		irq_create_mapping(root_domain, hwirq);
+
+	/*enable MCU_DVC_INT_EN*/
+	sw64_io_write(node, MCU_DVC_INT_EN, 0xff);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(sw64_intc, "sw64,sw6_irq_controller", init_onchip_IRQ);
+
+static int __init
+init_onchip_vt_IRQ(struct device_node *intc, struct device_node *parent)
+{
+	if (parent)
+		panic("DeviceTree incore intc not a root irq controller\n");
+
+	root_domain = irq_domain_add_legacy(intc, 16, 0, 0,
+						&sw64_intc_domain_ops, NULL);
+
+	if (!root_domain)
+		panic("root irq domain not avail\n");
+
+	/* with this we don't need to export root_domain */
+	irq_set_default_host(root_domain);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(sw64_vt_intc, "sw64,sw6_irq_vt_controller", init_onchip_vt_IRQ);
diff --git a/arch/sw_64/chip/chip3/msi.c b/arch/sw_64/chip/chip3/msi.c
new file mode 100644
index 000000000000..0c6d415e082e
--- /dev/null
+++ b/arch/sw_64/chip/chip3/msi.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/irqdomain.h>
+#include <asm/irq_impl.h>
+#include <asm/msi.h>
+#include <asm/pci.h>
+#include <asm/sw64io.h>
+
+static struct irq_domain *msi_default_domain;
+static DEFINE_RAW_SPINLOCK(vector_lock);
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+	[0 ... PERCPU_MSI_IRQS - 1] = 0,
+};
+
+struct sw64_msi_chip_data {
+	spinlock_t cdata_lock;
+	unsigned long msi_config;
+	unsigned long rc_node;
+	unsigned long rc_index;
+	unsigned int msi_config_index;
+	unsigned int dst_coreid;
+	unsigned int vector;
+	unsigned int prev_coreid;
+	unsigned int prev_vector;
+	bool move_in_progress;
+};
+
+static struct sw64_msi_chip_data *alloc_sw_msi_chip_data(struct irq_data *irq_data)
+{
+	struct sw64_msi_chip_data *data;
+	int node;
+
+	node = irq_data_get_node(irq_data);
+	data = kzalloc_node(sizeof(*data), GFP_KERNEL, node);
+	if (!data)
+		return NULL;
+	spin_lock_init(&data->cdata_lock);
+	return data;
+}
+
+static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct sw64_msi_chip_data *chip_data;
+
+	chip_data = irq_data_get_irq_chip_data(data->parent_data);
+	msg->address_hi = MSI_ADDR_BASE_HI;
+	msg->address_lo = MSI_ADDR_BASE_LO;
+	msg->data = chip_data->msi_config_index;
+}
+
+static bool find_free_core_vector(const struct cpumask *search_mask, int *found_coreid, int *found_vector)
+{
+	int vector, coreid;
+	bool found = false, find_once_global = false;
+
+	coreid = cpumask_first(search_mask);
+try_again:
+	for (vector = 0; vector < 256; vector++) {
+		while (per_cpu(vector_irq, coreid)[vector]) {
+			coreid = cpumask_next(coreid, search_mask);
+			if (coreid >= nr_cpu_ids) {
+				if (vector == 255) {
+					if (find_once_global) {
+						printk("No global free vector\n");
+						return found;
+					}
+					printk("No local free vector\n");
+					search_mask = cpu_online_mask;
+					coreid = cpumask_first(search_mask);
+					find_once_global = true;
+					goto try_again;
+				}
+				coreid = cpumask_first(search_mask);
+				break;
+			}
+		}
+		if (!per_cpu(vector_irq, coreid)[vector])
+			break;
+	}
+
+	found = true;
+	*found_coreid = coreid;
+	*found_vector = vector;
+	return found;
+}
+
+static unsigned long set_piu_msi_config(struct pci_controller *hose, int found_coreid,
+		int msiconf_index, int found_vector)
+{
+	unsigned int reg;
+	unsigned long msi_config;
+	int phy_coreid;
+
+	msi_config = (1UL << 62) | ((unsigned long)found_vector << 10);
+	phy_coreid = cpu_to_rcid(found_coreid);
+	msi_config |= ((phy_coreid >> 5) << 6) | (phy_coreid & 0x1f);
+	reg = MSICONFIG0 + (unsigned long)(msiconf_index << 7);
+	write_piu_ior0(hose->node, hose->index, reg, msi_config);
+	msi_config = read_piu_ior0(hose->node, hose->index, reg);
+	set_bit(msiconf_index, hose->piu_msiconfig);
+
+	return msi_config;
+}
+
+static int sw64_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
+{
+	struct sw64_msi_chip_data *cdata;
+	struct pci_controller *hose;
+	struct irq_data *irqd;
+	struct msi_desc *entry;
+	struct cpumask searchmask;
+	unsigned long flags, msi_config;
+	int found_vector, found_coreid;
+
+	/* Is this valid ? */
+	if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
+		return -EINVAL;
+
+	irqd = irq_domain_get_irq_data(msi_default_domain->parent, d->irq);
+	/* Don't do anything if the interrupt isn't started */
+	if (!irqd_is_started(irqd))
+		return IRQ_SET_MASK_OK;
+
+	cdata = irqd->chip_data;
+	if (!cdata)
+		return -ENOMEM;
+
+	/* If existing target coreid is already in the new mask, and is online then do nothing.*/
+	if (cpu_online(cdata->dst_coreid) && cpumask_test_cpu(cdata->dst_coreid, cpumask))
+		return IRQ_SET_MASK_OK;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+
+	cpumask_and(&searchmask, cpumask, cpu_online_mask);
+	if (!find_free_core_vector(&searchmask, &found_coreid, &found_vector)) {
+		raw_spin_unlock_irqrestore(&vector_lock, flags);
+		return -ENOSPC;
+	}
+
+	/* update new setting */
+	entry = irq_get_msi_desc(irqd->irq);
+	hose = (struct pci_controller *)msi_desc_to_pci_sysdata(entry);
+	spin_lock(&cdata->cdata_lock);
+	per_cpu(vector_irq, found_coreid)[found_vector] = irqd->irq;
+	msi_config = set_piu_msi_config(hose, found_coreid, cdata->msi_config_index, found_vector);
+	cdata->prev_vector = cdata->vector;
+	cdata->prev_coreid = cdata->dst_coreid;
+	cdata->dst_coreid = found_coreid;
+	cdata->vector = found_vector;
+	cdata->msi_config = msi_config;
+	cdata->move_in_progress = true;
+	spin_unlock(&cdata->cdata_lock);
+	cpumask_copy(irq_data_get_affinity_mask(irqd), &searchmask);
+
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	return 0;
+}
+
+static void chip_irq_ack(struct irq_data *data)
+{
+}
+
+static struct irq_chip pci_msi_controller = {
+	.name                   = "PCI-MSI",
+	.irq_unmask             = pci_msi_unmask_irq,
+	.irq_mask               = pci_msi_mask_irq,
+	.irq_ack                = chip_irq_ack,
+	.irq_compose_msi_msg    = irq_msi_compose_msg,
+	.flags                  = IRQCHIP_SKIP_SET_WAKE,
+	.irq_set_affinity	= sw64_set_affinity,
+};
+
+static int __assign_irq_vector(int virq, unsigned int nr_irqs,
+		struct irq_domain *domain, struct pci_controller *hose)
+{
+	struct irq_data *irq_data;
+	const struct cpumask *mask;
+	struct cpumask searchmask;
+	struct sw64_msi_chip_data *cdata;
+	int msiconf_index, coreid, node;
+	int i, found_vector, found_coreid;
+	unsigned long msi_config;
+	int start_index;
+
+	if (unlikely((nr_irqs > 1) && (!is_power_of_2(nr_irqs))))
+		nr_irqs = __roundup_pow_of_two(nr_irqs);
+
+	msiconf_index = bitmap_find_next_zero_area(hose->piu_msiconfig, 256, 0,
+			nr_irqs, nr_irqs - 1);
+
+	if (msiconf_index >= 256) {
+		printk("No free msi on PIU!\n");
+		return -ENOSPC;
+	}
+
+	start_index = msiconf_index;
+	irq_data = irq_domain_get_irq_data(domain, virq);
+	BUG_ON(!irq_data);
+	irq_data->chip = &pci_msi_controller;
+
+	if (irqd_affinity_is_managed(irq_data)) {
+		mask = irq_data_get_affinity_mask(irq_data);
+		cpumask_and(&searchmask, mask, cpu_online_mask);
+	} else {
+		node = irq_data_get_node(irq_data);
+		cpumask_copy(&searchmask, cpumask_of_node(node));
+	}
+
+	coreid = cpumask_first(&searchmask);
+	if (coreid >= nr_cpu_ids)
+		cpumask_copy(&searchmask, cpu_online_mask);
+
+	for (i = 0; i < nr_irqs; i++) {
+		if (!find_free_core_vector(&searchmask, &found_coreid, &found_vector))
+			return -ENOSPC;
+
+		per_cpu(vector_irq, found_coreid)[found_vector] = virq + i;
+
+		if (i) {
+			irq_data = irq_domain_get_irq_data(domain, virq + i);
+			irq_data->chip = &pci_msi_controller;
+		}
+
+		cdata = alloc_sw_msi_chip_data(irq_data);
+		if (!cdata) {
+			printk("error alloc irq chip data\n");
+			return -ENOMEM;
+		}
+
+		irq_data->chip_data = cdata;
+		msiconf_index = start_index + i;
+		msi_config = set_piu_msi_config(hose, found_coreid, msiconf_index, found_vector);
+
+		cdata->dst_coreid = found_coreid;
+		cdata->vector = found_vector;
+		cdata->rc_index = hose->index;
+		cdata->rc_node = hose->node;
+		cdata->msi_config = msi_config;
+		cdata->msi_config_index = msiconf_index;
+		cdata->prev_coreid = found_coreid;
+		cdata->prev_vector = found_vector;
+		cdata->move_in_progress = false;
+	}
+	return 0;
+}
+
+static int assign_irq_vector(int irq, unsigned int nr_irqs,
+		struct irq_domain *domain, struct pci_controller *hose)
+{
+	int err;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	err = __assign_irq_vector(irq, nr_irqs, domain, hose);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return err;
+}
+
+static void sw64_vector_free_irqs(struct irq_domain *domain,
+		unsigned int virq, unsigned int nr_irqs)
+{
+	int i;
+	struct irq_data *irq_data;
+	unsigned long flags;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_data = irq_domain_get_irq_data(domain, virq + i);
+		if (irq_data && irq_data->chip_data) {
+			struct sw64_msi_chip_data *cdata;
+			struct msi_desc *entry;
+			struct pci_controller *hose;
+
+			raw_spin_lock_irqsave(&vector_lock, flags);
+			cdata = irq_data->chip_data;
+			entry = irq_get_msi_desc(virq + i);
+			if (entry) {
+				hose = (struct pci_controller *)msi_desc_to_pci_sysdata(entry);
+				clear_bit(cdata->msi_config_index, hose->piu_msiconfig);
+			}
+			irq_domain_reset_irq_data(irq_data);
+			per_cpu(vector_irq, cdata->dst_coreid)[cdata->vector] = 0;
+			kfree(cdata);
+			raw_spin_unlock_irqrestore(&vector_lock, flags);
+		}
+	}
+}
+
+static void sw64_irq_free_descs(unsigned int virq, unsigned int nr_irqs)
+{
+	if (is_guest_or_emul())
+		return irq_free_descs(virq, nr_irqs);
+
+	return irq_domain_free_irqs(virq, nr_irqs);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *desc;
+	int i;
+
+	for_each_pci_msi_entry(desc, dev) {
+		if (desc->irq) {
+			for (i = 0; i < desc->nvec_used; i++)
+				sw64_irq_free_descs(desc->irq + i, 1);
+			desc->irq = 0;
+		}
+	}
+}
+
+static int sw64_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
+		unsigned int nr_irqs, void *arg)
+{
+	int err;
+	struct irq_alloc_info *info = arg;
+	struct pci_controller *hose;
+
+	if (arg == NULL)
+		return -ENODEV;
+	hose = info->msi_dev->sysdata;
+	err = assign_irq_vector(virq, nr_irqs, domain, hose);
+	if (err)
+		goto error;
+	return 0;
+error:
+	sw64_vector_free_irqs(domain, virq, nr_irqs);
+	return err;
+}
+
+static int pci_msi_prepare(struct irq_domain *domain, struct device *dev,
+		int nvec, msi_alloc_info_t *arg)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct msi_desc *desc = first_pci_msi_entry(pdev);
+
+	memset(arg, 0, sizeof(*arg));
+	arg->msi_dev = pdev;
+	if (desc->msi_attrib.is_msix)
+		arg->type = IRQ_ALLOC_TYPE_MSIX;
+	else
+		arg->type = IRQ_ALLOC_TYPE_MSI;
+	return 0;
+}
+
+static struct msi_domain_ops pci_msi_domain_ops = {
+	.msi_prepare    = pci_msi_prepare,
+};
+
+static struct msi_domain_info pci_msi_domain_info = {
+	.flags          = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+			  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+	.ops            = &pci_msi_domain_ops,
+	.chip           = &pci_msi_controller,
+	.handler        = handle_edge_irq,
+	.handler_name   = "edge",
+};
+
+static int sw64_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &sw64_irq_chip, handle_level_irq);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	return 0;
+}
+
+const struct irq_domain_ops sw64_msi_domain_ops = {
+	.map    = sw64_irq_map,
+	.alloc  = sw64_vector_alloc_irqs,
+	.free   = sw64_vector_free_irqs,
+};
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct irq_domain *domain;
+	int err;
+
+	if (is_guest_or_emul())
+		return sw64_setup_vt_msi_irqs(pdev, nvec, type);
+
+	domain = msi_default_domain;
+	if (domain == NULL)
+		return -ENOSYS;
+	err = msi_domain_alloc_irqs(domain, &pdev->dev, nvec);
+	return err;
+}
+
+void arch_init_msi_domain(struct irq_domain *parent)
+{
+	struct irq_domain *sw64_irq_domain;
+
+	if (is_guest_or_emul())
+		return;
+
+	sw64_irq_domain = irq_domain_add_tree(NULL, &sw64_msi_domain_ops, NULL);
+	BUG_ON(sw64_irq_domain == NULL);
+	irq_set_default_host(sw64_irq_domain);
+	msi_default_domain = pci_msi_create_irq_domain(NULL,
+			&pci_msi_domain_info, sw64_irq_domain);
+	if (!msi_default_domain)
+		pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
+}
+
+static void irq_move_complete(struct sw64_msi_chip_data *cdata, int coreid, int vector)
+{
+	if (likely(!cdata->move_in_progress))
+		return;
+	if (vector == cdata->vector && cdata->dst_coreid == coreid) {
+		raw_spin_lock(&vector_lock);
+		cdata->move_in_progress = 0;
+		per_cpu(vector_irq, cdata->prev_coreid)[cdata->prev_vector] = 0;
+		raw_spin_unlock(&vector_lock);
+	}
+}
+
+void handle_pci_msi_interrupt(unsigned long type, unsigned long vector, unsigned long pci_msi1_addr)
+{
+	int i, msi_index = 0;
+	int vector_index = 0, logical_cid;
+	unsigned long value = 0;
+	unsigned long int_pci_msi[3];
+	unsigned long *ptr;
+	struct irq_data *irq_data;
+	struct sw64_msi_chip_data *cdata;
+
+	if (is_guest_or_emul()) {
+		handle_irq(vector);
+		return;
+	}
+
+	ptr = (unsigned long *)pci_msi1_addr;
+	int_pci_msi[0] = *ptr;
+	int_pci_msi[1] = *(ptr + 1);
+	int_pci_msi[2] = *(ptr + 2);
+
+	logical_cid = smp_processor_id();
+
+	for (i = 0; i < 4; i++) {
+		vector_index = i * 64;
+		while (vector != 0) {
+			int irq = 0;
+			int piu_index = 0;
+
+			msi_index = find_next_bit(&vector, 64, msi_index);
+			if (msi_index == 64) {
+				msi_index = 0;
+				continue;
+			}
+
+			irq = per_cpu(vector_irq, logical_cid)[vector_index + msi_index];
+			irq_data = irq_domain_get_irq_data(msi_default_domain->parent, irq);
+			cdata = irq_data_get_irq_chip_data(irq_data);
+			spin_lock(&cdata->cdata_lock);
+			irq_move_complete(cdata, logical_cid, vector_index + msi_index);
+			piu_index = cdata->msi_config_index;
+			value = cdata->msi_config | (1UL << 63);
+			write_piu_ior0(cdata->rc_node, cdata->rc_index, MSICONFIG0 + (piu_index << 7), value);
+			spin_unlock(&cdata->cdata_lock);
+			handle_irq(irq);
+
+			vector = vector & (~(1UL << msi_index));
+		}
+
+		vector = int_pci_msi[i % 3];
+	}
+}
+
+MODULE_LICENSE("GPL v2");
diff --git a/arch/sw_64/chip/chip3/pci-quirks.c b/arch/sw_64/chip/chip3/pci-quirks.c
new file mode 100644
index 000000000000..e70c211df68f
--- /dev/null
+++ b/arch/sw_64/chip/chip3/pci-quirks.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <asm/pci.h>
+#include <asm/sw64io.h>
+#include <asm/hw_init.h>
+
+static int handshake(void __iomem *ptr, u32 mask, u32 done,
+		int wait_usec, int delay_usec)
+{
+	u32 result;
+
+	do {
+		result = readl(ptr);
+		result &= mask;
+		if (result == done)
+			return 0;
+		udelay(delay_usec);
+		wait_usec -= delay_usec;
+	} while (wait_usec > 0);
+	return -ETIMEDOUT;
+}
+
+#define XHCI_HCC_EXT_CAPS(p)	(((p) >> 16) & 0xffff)
+#define XHCI_EXT_CAPS_ID(p)	(((p) >> 0) & 0xff)
+#define XHCI_EXT_CAPS_NEXT(p)	(((p) >> 8) & 0xff)
+#define XHCI_HC_LENGTH(p)	(((p) >> 0) & 0x00ff)
+#define XHCI_CMD_OFFSET		(0x00)
+#define XHCI_STS_OFFSET		(0x04)
+#define XHCI_EXT_CAPS_LEGACY	(1)
+#define XHCI_HCC_PARAMS_OFFSET	(0x10)
+#define XHCI_LEGACY_CONTROL_OFFSET	(0x04)
+#define XHCI_LEGACY_DISABLE_SMI		((0x7 << 1) + (0xff << 5) + (0x7 << 17))
+#define XHCI_LEGACY_SMI_EVENTS		(0x7 << 29)
+#define XHCI_HC_BIOS_OWNED	(1 << 16)
+#define XHCI_HC_OS_OWNED	(1 << 24)
+#define XHCI_CMD_RUN		(1 << 0)
+#define XHCI_STS_HALT		(1 << 0)
+#define XHCI_MAX_HALT_USEC	(16 * 1000)
+#define XHCI_CMD_EIE		(1 << 2)
+#define XHCI_CMD_HSEIE		(1 << 3)
+#define XHCI_CMD_EWE		(1 << 10)
+#define XHCI_IRQS		(XHCI_CMD_EIE | XHCI_CMD_HSEIE | XHCI_CMD_EWE)
+#define XHCI_STS_CNR		(1 << 11)
+#define STS_FATAL		(1 << 2)
+#define STS_EINT		(1 << 3)
+#define STS_PORT		(1 << 4)
+#define STS_SRE			(1 << 10)
+#define STS_RW1C_BITS		(STS_FATAL | STS_EINT | STS_PORT | STS_SRE)
+
+static inline int xhci_find_next_ext_cap(void __iomem *base, u32 start, int id)
+{
+	u32 val;
+	u32 next;
+	u32 offset;
+
+	offset = start;
+	if (!start || start == XHCI_HCC_PARAMS_OFFSET) {
+		val = readl(base + XHCI_HCC_PARAMS_OFFSET);
+		if (val == ~0)
+			return 0;
+		offset = XHCI_HCC_EXT_CAPS(val) << 2;
+		if (!offset)
+			return 0;
+	}
+	do {
+		val = readl(base + offset);
+		if (val == ~0)
+			return 0;
+		if (offset != start && (id == 0 || XHCI_EXT_CAPS_ID(val) == id))
+			return offset;
+
+		next = XHCI_EXT_CAPS_NEXT(val);
+		offset += next << 2;
+	} while (next);
+
+	return 0;
+}
+
+extern void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev);
+
+static void
+fixup_usb_xhci_reset(struct pci_dev *dev)
+{
+	void __iomem *op_reg_base;
+	int timeout;
+	u32 xhci_command;
+	u32 tmp, val;
+	void __iomem *base;
+	struct pci_controller *hose = dev->sysdata;
+	unsigned long offset;
+	int ext_cap_offset;
+	int retries = 3;
+
+	pci_read_config_dword(dev, PCI_COMMAND, &tmp);
+	tmp |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	pci_write_config_dword(dev, PCI_COMMAND, tmp);
+
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &tmp);
+	if (tmp & PCI_BASE_ADDRESS_MEM_TYPE_MASK) {
+		pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val);
+		offset = (unsigned long)(val) << 32 | (tmp & (~0xf));
+	} else
+		offset = (unsigned long)(tmp & (~0xf));
+
+	if (offset == 0)
+		return;
+
+	base = (void *)__va(SW64_PCI_IO_BASE(hose->node, hose->index) | offset);
+
+	ext_cap_offset = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_LEGACY);
+	if (!ext_cap_offset)
+		goto hc_init;
+
+	val = readl(base + ext_cap_offset);
+
+	if ((dev->vendor == PCI_VENDOR_ID_TI && dev->device == 0x8241) ||
+			(dev->vendor == PCI_VENDOR_ID_RENESAS
+			 && dev->device == 0x0014)) {
+		val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED;
+		writel(val, base + ext_cap_offset);
+	}
+
+	if (val & XHCI_HC_BIOS_OWNED) {
+		writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset);
+
+		timeout = handshake(base + ext_cap_offset, XHCI_HC_BIOS_OWNED,
+				0, 1000000, 10);
+		if (timeout) {
+			pr_err("xHCI BIOS handoff failed (BIOS bug ?) %08x\n", val);
+			writel(val & ~XHCI_HC_BIOS_OWNED, base + ext_cap_offset);
+		}
+	}
+
+	val = readl(base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
+	val &= XHCI_LEGACY_DISABLE_SMI;
+	val |= XHCI_LEGACY_SMI_EVENTS;
+	writel(val, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
+
+hc_init:
+	if (dev->vendor == PCI_VENDOR_ID_INTEL)
+		usb_enable_intel_xhci_ports(dev);
+
+	op_reg_base = base + XHCI_HC_LENGTH(readl(base));
+
+	timeout = handshake(op_reg_base + XHCI_STS_OFFSET, XHCI_STS_CNR, 0,
+			5000000, 10);
+	if (timeout) {
+		val = readl(op_reg_base + XHCI_STS_OFFSET);
+		pr_err("xHCI HW not ready after 5 sec (HC bug?) status = 0x%x\n", val);
+	}
+
+	xhci_command = readl(op_reg_base + XHCI_CMD_OFFSET);
+	xhci_command |= 0x2;
+	writel(xhci_command, op_reg_base + XHCI_CMD_OFFSET);
+
+	timeout = handshake(op_reg_base + XHCI_CMD_OFFSET,
+			0x2, 0, 10 * 1000 * 1000, 125);
+	if (timeout)
+		pr_err("xHCI BIOS handoff time out\n");
+
+retry:
+	val = readl(op_reg_base + XHCI_STS_OFFSET);
+	val |= STS_RW1C_BITS;
+	writel(val, op_reg_base + XHCI_STS_OFFSET);
+	val = readl(op_reg_base + XHCI_STS_OFFSET);
+
+	if ((val & STS_RW1C_BITS) && retries--) {
+		pr_err("clear USB Status Register (status = %#x) failed, retry\n", val);
+		goto retry;
+	}
+
+	val = readl(op_reg_base + XHCI_CMD_OFFSET);
+	val &= ~(XHCI_CMD_RUN | XHCI_IRQS);
+	writel(val, op_reg_base + XHCI_CMD_OFFSET);
+	timeout = handshake(op_reg_base + XHCI_STS_OFFSET, XHCI_STS_HALT, 1,
+			XHCI_MAX_HALT_USEC, 125);
+	if (timeout) {
+		val = readl(op_reg_base + XHCI_STS_OFFSET);
+		pr_err("xHCI HW did not halt within %d usec status = 0x%x\n",
+				XHCI_MAX_HALT_USEC, val);
+	}
+
+	xhci_command = readl(op_reg_base + XHCI_CMD_OFFSET);
+	xhci_command |= 0x2;
+	writel(xhci_command, op_reg_base + XHCI_CMD_OFFSET);
+
+	timeout = handshake(op_reg_base + XHCI_CMD_OFFSET,
+			0x2, 0, 10 * 1000 * 1000, 125);
+	if (timeout)
+		pr_err("xHCI BIOS handoff time out\n");
+
+	pci_read_config_dword(dev, PCI_COMMAND, &tmp);
+	tmp &= ~(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	pci_write_config_dword(dev, PCI_COMMAND, tmp);
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID,
+		PCI_CLASS_SERIAL_USB_XHCI, 0, fixup_usb_xhci_reset);
+
+#ifdef CONFIG_DCA
+static void enable_sw_dca(struct pci_dev *dev)
+{
+	struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
+	unsigned long node, rc_index, dca_ctl, dca_conf;
+	int i;
+
+	if (dev->class >> 8 != PCI_CLASS_NETWORK_ETHERNET)
+		return;
+	node = hose->node;
+	rc_index = hose->index;
+	for (i = 0; i < 256; i++) {
+		dca_conf = read_piu_ior1(node, rc_index, DEVICEID0 + (i << 7));
+		if (dca_conf >> 63)
+			continue;
+		else {
+			dca_conf = (1UL << 63) | (dev->bus->number << 8) | dev->devfn;
+			printk("dca device index %d, dca_conf = %#lx\n", i, dca_conf);
+			write_piu_ior1(node, rc_index, DEVICEID0 + (i << 7), dca_conf);
+			break;
+		}
+	}
+	dca_ctl = read_piu_ior1(node, rc_index, DCACONTROL);
+	if (dca_ctl & 0x1) {
+		dca_ctl = 0x2;
+		write_piu_ior1(node, rc_index, DCACONTROL, dca_ctl);
+		printk("Node %ld RC %ld enable DCA 1.0\n", node, rc_index);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, enable_sw_dca);
+#endif
+
+void __init reserve_mem_for_pci(void)
+{
+	int ret;
+	unsigned long base;
+
+	base = is_in_host() ? PCI_32BIT_MEMIO : PCI_32BIT_VT_MEMIO;
+
+	ret = add_memmap_region(base, PCI_32BIT_MEMIO_SIZE, memmap_pci);
+	if (ret) {
+		pr_err("reserved pages for pcie memory space failed\n");
+		return;
+	}
+
+	pr_info("reserved pages for pcie memory space %lx:%lx\n", base >> PAGE_SHIFT,
+			(base + PCI_32BIT_MEMIO_SIZE) >> PAGE_SHIFT);
+}
diff --git a/arch/sw_64/chip/chip3/vt_msi.c b/arch/sw_64/chip/chip3/vt_msi.c
new file mode 100644
index 000000000000..31f49d3c3511
--- /dev/null
+++ b/arch/sw_64/chip/chip3/vt_msi.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <asm/irq_impl.h>
+#include <asm/msi.h>
+#include <asm/pci.h>
+#include <asm/sw64io.h>
+
+#define QEMU_MSIX_MSG_ADDR     (0x8000fee00000UL)
+
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+static struct irq_chip msi_chip = {
+	.name = "PCI-MSI",
+	.irq_unmask = pci_msi_unmask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_ack = sw64_irq_noop,
+};
+
+static int qemu_msi_compose_msg(unsigned int irq, struct msi_msg *msg)
+{
+	msg->address_hi = (unsigned int)(QEMU_MSIX_MSG_ADDR >> 32);
+	msg->address_lo = (unsigned int)(QEMU_MSIX_MSG_ADDR & 0xffffffff);
+	msg->data = irq;
+	return irq;
+}
+
+int chip_setup_vt_msix_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+	struct msi_msg msg;
+	int virq, val_node = 0;
+	struct irq_data *irq_data;
+	struct sw6_msi_chip_data *cdata;
+	struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
+	unsigned long flags, node, rc_index;
+	const struct cpumask *mask;
+
+	node = hose->node;
+	rc_index = hose->index;
+	mask = cpumask_of_node(node);
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	/* Find unused msi config reg in PIU-IOR0 */
+	if (!node_online(node))
+		val_node = next_node_in(node, node_online_map);
+	else
+		val_node = node;
+
+	virq = irq_alloc_descs_from(NR_IRQS_LEGACY, desc->nvec_used, val_node);
+	if (virq < 0) {
+		pr_debug("cannot allocate IRQ(base 16, count %d)\n", desc->nvec_used);
+		raw_spin_unlock_irqrestore(&vector_lock, flags);
+		return virq;
+	}
+
+	qemu_msi_compose_msg(virq, &msg);
+	irq_set_msi_desc(virq, desc);
+	pci_write_msi_msg((virq), &msg);
+	irq_set_chip_and_handler_name(virq, &msi_chip, handle_edge_irq, "edge");
+	irq_data = irq_get_irq_data(virq);
+	cdata = kzalloc(sizeof(*cdata), GFP_KERNEL);
+	if (!cdata)
+		return -ENOMEM;
+	irq_data->chip_data = cdata;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(chip_setup_vt_msix_irq);
+
+int chip_setup_vt_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *desc;
+	struct msi_msg msg;
+	struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
+	struct irq_data *irq_data;
+	struct sw6_msi_chip_data *cdata;
+	int i = 0;
+	unsigned long node, rc_index;
+	int virq = -1, val_node = 0;
+	unsigned long flags;
+
+	if (type == PCI_CAP_ID_MSI && nvec > 32)
+		return 1;
+
+	node = hose->node;
+	rc_index = hose->index;
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	for_each_msi_entry(desc, &(dev->dev)) {
+		/* Find unused msi config reg in PIU-IOR0 */
+		if (!node_online(node))
+			val_node = next_node_in(node, node_online_map);
+		else
+			val_node = node;
+		virq = irq_alloc_descs_from(NR_IRQS_LEGACY, desc->nvec_used, val_node);
+		if (virq < 0) {
+			pr_debug("cannot allocate IRQ(base 16, count %d)\n", desc->nvec_used);
+			raw_spin_unlock_irqrestore(&vector_lock, flags);
+			return virq;
+		}
+		qemu_msi_compose_msg(virq, &msg);
+		for (i = 0; i < desc->nvec_used; i++) {
+			irq_set_msi_desc_off(virq, i, desc);
+			pci_write_msi_msg((virq + i), &msg);
+			desc->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
+			irq_set_chip_and_handler_name(virq + i, &msi_chip, handle_edge_irq, "edge");
+			irq_data = irq_get_irq_data(virq + i);
+			cdata = kzalloc(sizeof(*cdata), GFP_KERNEL);
+			if (!cdata)
+				return -ENOMEM;
+			irq_data->chip_data = cdata;
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(chip_setup_vt_msi_irqs);
+
+int __arch_setup_vt_msix_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *entry;
+	int ret;
+
+	list_for_each_entry(entry, &dev->dev.msi_list, list) {
+		ret = chip_setup_vt_msix_irq(dev, entry);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sw64_setup_vt_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int ret = 0;
+
+	if (type == PCI_CAP_ID_MSI)
+		ret = chip_setup_vt_msi_irqs(dev, nvec, type);
+	else if (type == PCI_CAP_ID_MSIX)
+		ret = __arch_setup_vt_msix_irqs(dev, nvec, type);
+	else
+		pr_info("SW arch do not identify ID:%d\n", type);
+
+	return ret;
+}
+MODULE_LICENSE("GPL v2");
diff --git a/arch/sw_64/defconfig b/arch/sw_64/defconfig
new file mode 100644
index 000000000000..d641ca0c108a
--- /dev/null
+++ b/arch/sw_64/defconfig
@@ -0,0 +1,73 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_VERBOSE_MCHECK=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_VLAN_8021Q=m
+CONFIG_PNP=y
+CONFIG_ISAPNP=y
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_ALI15X3=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_VORTEX=y
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_TULIP=y
+CONFIG_TULIP_MMIO=y
+CONFIG_NET_PCI=y
+CONFIG_YELLOWFIN=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_RTC=y
+CONFIG_EXT2_FS=y
+CONFIG_REISERFS_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO=y
+CONFIG_SW64_LEGACY_START_ADDRESS=y
+CONFIG_MATHEMU=y
+CONFIG_CRYPTO_HMAC=y
diff --git a/arch/sw_64/include/asm/Kbuild b/arch/sw_64/include/asm/Kbuild
new file mode 100644
index 000000000000..ab266af1a06d
--- /dev/null
+++ b/arch/sw_64/include/asm/Kbuild
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+header-y += compiler.h
+header-y += console.h
+header-y += fpu.h
+header-y += gentrap.h
+header-y += hmcall.h
+header-y += reg.h
+header-y += regdef.h
+header-y += sysinfo.h
+header-y += page.h
+header-y += elf.h
+
+generated-y += syscall_table.h
+generic-y += export.h
+generic-y += kvm_types.h
+generic-y += rwsem.h
+
+generic-y += qrwlock.h
+generic-y += qspinlock.h
+generic-y += mcs_spinlock.h
+generic-y += clkdev.h
+generic-y += scatterlist.h
diff --git a/arch/sw_64/include/asm/a.out-core.h b/arch/sw_64/include/asm/a.out-core.h
new file mode 100644
index 000000000000..39dc16142955
--- /dev/null
+++ b/arch/sw_64/include/asm/a.out-core.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_SW64_OUT_CORE_H
+#define _ASM_SW64_OUT_CORE_H
+
+#ifdef __KERNEL__
+
+#include <linux/user.h>
+
+/*
+ * Fill in the user structure for an ECOFF core dump.
+ */
+static inline void aout_dump_thread(struct pt_regs *pt, struct user *dump)
+{
+	/* switch stack follows right below pt_regs: */
+	struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+
+	dump->magic = CMAGIC;
+	dump->start_code  = current->mm->start_code;
+	dump->start_data  = current->mm->start_data;
+	dump->start_stack = rdusp() & ~(PAGE_SIZE - 1);
+	dump->u_tsize = ((current->mm->end_code - dump->start_code)
+			>> PAGE_SHIFT);
+	dump->u_dsize = ((current->mm->brk + PAGE_SIZE - 1 - dump->start_data)
+			>> PAGE_SHIFT);
+	dump->u_ssize = (current->mm->start_stack - dump->start_stack
+			+ PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	/*
+	 * We store the registers in an order/format that makes life easier
+	 * for gdb.
+	 */
+	dump->regs[EF_V0]  = pt->r0;
+	dump->regs[EF_T0]  = pt->r1;
+	dump->regs[EF_T1]  = pt->r2;
+	dump->regs[EF_T2]  = pt->r3;
+	dump->regs[EF_T3]  = pt->r4;
+	dump->regs[EF_T4]  = pt->r5;
+	dump->regs[EF_T5]  = pt->r6;
+	dump->regs[EF_T6]  = pt->r7;
+	dump->regs[EF_T7]  = pt->r8;
+	dump->regs[EF_S0]  = sw->r9;
+	dump->regs[EF_S1]  = sw->r10;
+	dump->regs[EF_S2]  = sw->r11;
+	dump->regs[EF_S3]  = sw->r12;
+	dump->regs[EF_S4]  = sw->r13;
+	dump->regs[EF_S5]  = sw->r14;
+	dump->regs[EF_S6]  = sw->r15;
+	dump->regs[EF_A3]  = pt->r19;
+	dump->regs[EF_A4]  = pt->r20;
+	dump->regs[EF_A5]  = pt->r21;
+	dump->regs[EF_T8]  = pt->r22;
+	dump->regs[EF_T9]  = pt->r23;
+	dump->regs[EF_T10] = pt->r24;
+	dump->regs[EF_T11] = pt->r25;
+	dump->regs[EF_RA]  = pt->r26;
+	dump->regs[EF_T12] = pt->r27;
+	dump->regs[EF_AT]  = pt->r28;
+	dump->regs[EF_SP]  = rdusp();
+	dump->regs[EF_PS]  = pt->ps;
+	dump->regs[EF_PC]  = pt->pc;
+	dump->regs[EF_GP]  = pt->gp;
+	dump->regs[EF_A0]  = pt->r16;
+	dump->regs[EF_A1]  = pt->r17;
+	dump->regs[EF_A2]  = pt->r18;
+	memcpy((char *)dump->regs + EF_SIZE, sw->fp, 32 * 8);
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_SW64_OUT_CORE_H */
diff --git a/arch/sw_64/include/asm/a.out.h b/arch/sw_64/include/asm/a.out.h
new file mode 100644
index 000000000000..4f2004a7fa8e
--- /dev/null
+++ b/arch/sw_64/include/asm/a.out.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_A_OUT_H
+#define _ASM_SW64_A_OUT_H
+
+#include <uapi/asm/a.out.h>
+
+/* Assume that start addresses below 4G belong to a TASO application.
+ * Unfortunately, there is no proper bit in the exec header to check.
+ * Worse, we have to notice the start address before swapping to use
+ * /sbin/loader, which of course is _not_ a TASO application.
+ */
+#define SET_AOUT_PERSONALITY(BFPM, EX) \
+	set_personality(((BFPM->taso || EX.ah.entry < 0x100000000L \
+			? ADDR_LIMIT_32BIT : 0) | PER_OSF4))
+
+#endif /* _ASM_SW64_A_OUT_H */
diff --git a/arch/sw_64/include/asm/acenv.h b/arch/sw_64/include/asm/acenv.h
new file mode 100644
index 000000000000..53b2898718fe
--- /dev/null
+++ b/arch/sw_64/include/asm/acenv.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SW64_ACENV_H
+#define _ASM_SW64_ACENV_H
+
+#define COMPILER_DEPENDENT_INT64   long
+#define COMPILER_DEPENDENT_UINT64  unsigned long
+
+/*
+ * Calling conventions:
+ *
+ * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
+ * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
+ * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
+ * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
+ */
+#define ACPI_SYSTEM_XFACE
+#define ACPI_EXTERNAL_XFACE
+#define ACPI_INTERNAL_XFACE
+#define ACPI_INTERNAL_VAR_XFACE
+
+/* Asm macros */
+#define ACPI_FLUSH_CPU_CACHE()
+
+int __acpi_acquire_global_lock(unsigned int *lock);
+int __acpi_release_global_lock(unsigned int *lock);
+
+#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
+	((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
+
+#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
+	((Acq) = __acpi_release_global_lock(&facs->global_lock))
+
+/*
+ * Math helper asm macros
+ */
+#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32)
+
+#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo)
+#endif /* _ASM_SW64_ACENV_H */
diff --git a/arch/sw_64/include/asm/acpi.h b/arch/sw_64/include/asm/acpi.h
new file mode 100644
index 000000000000..38615b969555
--- /dev/null
+++ b/arch/sw_64/include/asm/acpi.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SW64_ACPI_H
+#define _ASM_SW64_ACPI_H
+
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/numa.h>
+
+#ifdef CONFIG_ACPI
+extern int acpi_noirq;
+extern int acpi_strict;
+extern int acpi_disabled;
+extern int acpi_pci_disabled;
+
+/* _ASM_SW64_PDC_H */
+#define ACPI_PDC_P_FFH                  (0x0001)
+#define ACPI_PDC_C_C1_HALT              (0x0002)
+#define ACPI_PDC_T_FFH                  (0x0004)
+#define ACPI_PDC_SMP_C1PT               (0x0008)
+#define ACPI_PDC_SMP_C2C3               (0x0010)
+#define ACPI_PDC_SMP_P_SWCOORD          (0x0020)
+#define ACPI_PDC_SMP_C_SWCOORD          (0x0040)
+#define ACPI_PDC_SMP_T_SWCOORD          (0x0080)
+#define ACPI_PDC_C_C1_FFH               (0x0100)
+#define ACPI_PDC_C_C2C3_FFH             (0x0200)
+#define ACPI_PDC_SMP_P_HWCOORD          (0x0800)
+
+#define ACPI_PDC_EST_CAPABILITY_SMP     (ACPI_PDC_SMP_C1PT | \
+					ACPI_PDC_C_C1_HALT | \
+					ACPI_PDC_P_FFH)
+
+#define ACPI_PDC_EST_CAPABILITY_SWSMP   (ACPI_PDC_SMP_C1PT | \
+					ACPI_PDC_C_C1_HALT | \
+					ACPI_PDC_SMP_P_SWCOORD | \
+					ACPI_PDC_SMP_P_HWCOORD | \
+					ACPI_PDC_P_FFH)
+
+#define ACPI_PDC_C_CAPABILITY_SMP	(ACPI_PDC_SMP_C2C3 | \
+					ACPI_PDC_SMP_C1PT  | \
+					ACPI_PDC_C_C1_HALT | \
+					ACPI_PDC_C_C1_FFH  | \
+					ACPI_PDC_C_C2C3_FFH)
+
+#define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
+static inline void disable_acpi(void)
+{
+	acpi_disabled = 1;
+	acpi_pci_disabled = 1;
+	acpi_noirq = 1;
+}
+
+static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
+static inline void acpi_disable_pci(void)
+{
+	acpi_pci_disabled = 1;
+	acpi_noirq_set();
+}
+
+static inline bool acpi_has_cpu_in_madt(void)
+{
+	return true;
+}
+
+/* Low-level suspend routine. */
+extern int (*acpi_suspend_lowlevel)(void);
+extern unsigned long long arch_acpi_wakeup_start;
+
+/* Physical address to resume after wakeup */
+#define acpi_wakeup_address arch_acpi_wakeup_start
+
+/*
+ * Check if the CPU can handle C2 and deeper
+ */
+static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
+{
+	return max_cstate;
+}
+
+static inline bool arch_has_acpi_pdc(void)
+{
+	return false;
+}
+
+static inline void arch_acpi_set_pdc_bits(u32 *buf)
+{
+}
+#else /* !CONFIG_ACPI */
+
+static inline void acpi_noirq_set(void) { }
+static inline void acpi_disable_pci(void) { }
+static inline void disable_acpi(void) { }
+
+#endif /* !CONFIG_ACPI */
+
+#define acpi_unlazy_tlb(x)
+#endif /* _ASM_SW64_ACPI_H */
diff --git a/arch/sw_64/include/asm/agp.h b/arch/sw_64/include/asm/agp.h
new file mode 100644
index 000000000000..e9d16888910e
--- /dev/null
+++ b/arch/sw_64/include/asm/agp.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_AGP_H
+#define _ASM_SW64_AGP_H 1
+
+#include <asm/io.h>
+
+/* dummy for now */
+
+#define map_page_into_agp(page)
+#define unmap_page_from_agp(page)
+#define flush_agp_cache() mb()
+
+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
+#define alloc_gatt_pages(order)		\
+	((char *)__get_free_pages(GFP_KERNEL, (order)))
+#define free_gatt_pages(table, order)	\
+	free_pages((unsigned long)(table), (order))
+
+#endif
diff --git a/arch/sw_64/include/asm/asm-offsets.h b/arch/sw_64/include/asm/asm-offsets.h
new file mode 100644
index 000000000000..72cd408a9c6f
--- /dev/null
+++ b/arch/sw_64/include/asm/asm-offsets.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_ASM_OFFSETS_H
+#define _ASM_SW64_ASM_OFFSETS_H
+
+#include <generated/asm-offsets.h>
+
+#endif
diff --git a/arch/sw_64/include/asm/asm-prototypes.h b/arch/sw_64/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..21f4f494d74d
--- /dev/null
+++ b/arch/sw_64/include/asm/asm-prototypes.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_ASM_PROTOTYPES_H
+#define _ASM_SW64_ASM_PROTOTYPES_H
+
+#include <linux/spinlock.h>
+#include <asm/checksum.h>
+#include <asm/console.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <linux/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+extern void __divl(void);
+extern void __reml(void);
+extern void __divw(void);
+extern void __remw(void);
+extern void __divlu(void);
+extern void __remlu(void);
+extern void __divwu(void);
+extern void __remwu(void);
+
+#endif
diff --git a/arch/sw_64/include/asm/ast2400.h b/arch/sw_64/include/asm/ast2400.h
new file mode 100644
index 000000000000..5f4cc84ff3a8
--- /dev/null
+++ b/arch/sw_64/include/asm/ast2400.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015 Weiqiang Su David.suwq@gmail.com
+ *
+ * Both AST2400D and AST2400F package variants are supported.
+ */
+
+#ifndef _ASM_SW64_AST2400_H
+#define _ASM_SW64_AST2400_H
+
+#include <linux/device.h>
+
+/* Logical Device Numbers (LDN). */
+#define AST2400_FDC		0x00 /* Floppy */
+#define AST2400_PP		0x01 /* Parallel port */
+#define AST2400_SP1		0x02 /* Com1 */
+#define AST2400_SP2		0x03 /* Com2 & IR */
+#define AST2400_KBC		0x05 /* PS/2 keyboard and mouse */
+#define AST2400_CIR		0x06
+#define AST2400_GPIO6789_V	0x07
+#define AST2400_WDT1_GPIO01A_V	0x08
+#define AST2400_GPIO1234567_V	0x09
+#define AST2400_ACPI		0x0A
+#define AST2400_HWM_FPLED	0x0B /* Hardware monitor & front LED */
+#define AST2400_VID		0x0D
+#define AST2400_CIRWKUP		0x0E /* CIR wakeup */
+#define AST2400_GPIO_PP_OD	0x0F /* GPIO Push-Pull/Open drain select */
+#define AST2400_SVID		0x14
+#define AST2400_DSLP		0x16 /* Deep sleep */
+#define AST2400_GPIOA_LDN	0x17
+
+/* virtual LDN for GPIO and WDT */
+#define AST2400_WDT1		((0 << 8) | AST2400_WDT1_GPIO01A_V)
+
+#define AST2400_GPIOBASE	((0 << 8) | AST2400_WDT1_GPIO01A_V) //?
+
+#define AST2400_GPIO0		((1 << 8) | AST2400_WDT1_GPIO01A_V)
+#define AST2400_GPIO1		((1 << 8) | AST2400_GPIO1234567_V)
+#define AST2400_GPIO2		((2 << 8) | AST2400_GPIO1234567_V)
+#define AST2400_GPIO3		((3 << 8) | AST2400_GPIO1234567_V)
+#define AST2400_GPIO4		((4 << 8) | AST2400_GPIO1234567_V)
+#define AST2400_GPIO5		((5 << 8) | AST2400_GPIO1234567_V)
+#define AST2400_GPIO6		((6 << 8) | AST2400_GPIO1234567_V)
+#define AST2400_GPIO7		((7 << 8) | AST2400_GPIO1234567_V)
+#define AST2400_GPIO8		((0 << 8) | AST2400_GPIO6789_V)
+#define AST2400_GPIO9		((1 << 8) | AST2400_GPIO6789_V)
+#define AST2400_GPIOA		((2 << 8) | AST2400_WDT1_GPIO01A_V)
+
+#define SUPERIO_PNP_PORT	0x2E
+#define SUPERIO_CHIPID		0xC333
+
+struct device_operations;
+typedef struct pnp_device {
+	unsigned int port;
+	unsigned int device;
+
+	struct device_operations *ops;
+} *device_t;
+
+struct pnp_mode_ops {
+	void (*enter_conf_mode)(device_t dev);
+	void (*exit_conf_mode)(device_t dev);
+};
+
+
+struct device_operations {
+	void (*read_resources)(device_t dev);
+	void (*set_resources)(device_t dev);
+	void (*enable_resources)(device_t dev);
+	void (*init)(device_t dev);
+	void (*final)(device_t dev);
+	void (*enable)(device_t dev);
+	void (*disable)(device_t dev);
+
+	const struct pnp_mode_ops *ops_pnp_mode;
+};
+
+/* PNP helper operations */
+struct io_info {
+	unsigned int mask, set;
+};
+
+struct pnp_info {
+	bool enabled;		/* set if we should enable the device */
+	struct pnp_device pnp_device;
+	unsigned int function;	/* Must be at least 16 bits (virtual LDNs)! */
+};
+
+/* Chip operations */
+struct chip_operations {
+	void (*enable_dev)(struct device *dev);
+	void (*init)(void *chip_info);
+	void (*final)(void *chip_info);
+	unsigned int initialized : 1;
+	unsigned int finalized : 1;
+	const char *name;
+};
+
+typedef struct superio_ast2400_device {
+	struct device	*dev;
+	const char	*name;
+	unsigned int	enabled : 1;		/* set if we should enable the device */
+	unsigned int	superio_ast2400_efir;	/* extended function index register */
+	unsigned int	superio_ast2400_efdr;	/* extended function data register */
+	struct chip_operations *chip_ops;
+	const void	*chip_info;
+} *superio_device_t;
+
+
+static inline void pnp_enter_conf_mode_a5a5(device_t dev)
+{
+	outb(0xa5, dev->port);
+	outb(0xa5, dev->port);
+}
+
+static inline void pnp_exit_conf_mode_aa(device_t dev)
+{
+	outb(0xaa, dev->port);
+}
+
+/* PNP config mode wrappers */
+
+static inline void pnp_enter_conf_mode(device_t dev)
+{
+	if (dev->ops->ops_pnp_mode)
+		dev->ops->ops_pnp_mode->enter_conf_mode(dev);
+}
+
+static inline void pnp_exit_conf_mode(device_t dev)
+{
+	if (dev->ops->ops_pnp_mode)
+		dev->ops->ops_pnp_mode->exit_conf_mode(dev);
+}
+
+/* PNP device operations */
+static inline u8 pnp_read_config(device_t dev, u8 reg)
+{
+	outb(reg, dev->port);
+	return inb(dev->port + 1);
+}
+
+static inline void pnp_write_config(device_t dev, u8 reg, u8 value)
+{
+	outb(reg, dev->port);
+	outb(value, dev->port + 1);
+}
+
+static inline void pnp_set_logical_device(device_t dev)
+{
+	pnp_write_config(dev, 0x07, dev->device & 0xff);
+//	pnp_write_config(dev, 0x07, 0x3);
+}
+
+static inline void pnp_set_enable(device_t dev, int enable)
+{
+	u8 tmp;
+
+	tmp = pnp_read_config(dev, 0x30);
+
+	if (enable)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+
+	pnp_write_config(dev, 0x30, tmp);
+}
+
+#endif /* _ASM_SW64_AST2400_H */
diff --git a/arch/sw_64/include/asm/atomic.h b/arch/sw_64/include/asm/atomic.h
new file mode 100644
index 000000000000..126417a1aeee
--- /dev/null
+++ b/arch/sw_64/include/asm/atomic.h
@@ -0,0 +1,373 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_ATOMIC_H
+#define _ASM_SW64_ATOMIC_H
+
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc...
+ *
+ * But use these as seldom as possible since they are much slower
+ * than regular operations.
+ */
+
+#define ATOMIC_INIT(i)		{ (i) }
+#define ATOMIC64_INIT(i)	{ (i) }
+
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
+
+#define atomic_set(v, i)	WRITE_ONCE((v)->counter, (i))
+#define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
+
+/*
+ * To get proper branch prediction for the main line, we must branch
+ * forward to code at the end of this object's .text section, then
+ * branch back to restart the operation.
+ */
+#define atomic64_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+
+/**
+ * atomic_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns the old value of @v.
+ */
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	int old, new, c;
+	unsigned long addr;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%3, %2\n"
+	"1:	lldw	%0, 0(%3)\n"
+	"	cmpeq	%0, %5, %4\n"
+	"	seleq	%4, 1, $31, %4\n"
+	"	wr_f	%4\n"
+	"	addw	%0, %6, %1\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%1, 0(%3)\n"
+	"	rd_f	%1\n"
+	"	beq	%4, 2f\n"
+	"	beq	%1, 3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r" (old), "=&r" (new), "=m" (v->counter), "=&r" (addr), "=&r" (c)
+	: "Ir" (u), "Ir" (a), "m" (v->counter));
+	return old;
+}
+#define atomic_fetch_add_unless atomic_fetch_add_unless
+/**
+ * atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns the old value of @v.
+ */
+static inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+{
+	long old, new, c;
+	unsigned long addr;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%3, %2\n"
+	"1:	lldl	%0, 0(%3)\n"
+	"	cmpeq	%0, %5, %4\n"
+	"	seleq	%4, 1, $31, %4\n"
+	"	wr_f	%4\n"
+	"	addl	%0, %6, %1\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl	%1, 0(%3)\n"
+	"	rd_f	%1\n"
+	"	beq	%4, 2f\n"
+	"	beq	%1, 3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r" (old), "=&r" (new), "=m" (v->counter), "=&r" (addr), "=&r" (c)
+	: "Ir" (u), "Ir" (a), "m" (v->counter));
+	return old;
+}
+#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+	unsigned long old, temp1, addr, temp2;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%3, %2\n"
+	"1:	lldl	%4, 0(%3)\n"
+	"	cmple	%4, 0, %0\n"
+	"	seleq	%0, 1, $31, %0\n"
+	"	wr_f	%0\n"
+	"	subl	%4, 1, %1\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl	%1, 0(%3)\n"
+	"	rd_f	%1\n"
+	"	beq	%0, 2f\n"
+	"	beq	%1, 3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (v->counter), "=&r" (addr), "=&r" (old)
+	: "m" (v->counter));
+	return old - 1;
+}
+
+
+#define atomic64_dec_if_positive atomic64_dec_if_positive
+
+#ifdef CONFIG_LOCK_MEMB
+#define LOCK_MEMB	"memb\n"
+#else
+#define LOCK_MEMB
+#endif
+
+#ifdef CONFIG_LOCK_FIXUP
+#define LOCK_FIXUP	"memb\n"
+#else
+#define LOCK_FIXUP
+#endif
+
+
+#define ATOMIC_OP(op, asm_op)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long temp1, temp2, addr;				\
+	__asm__ __volatile__(						\
+	LOCK_MEMB							\
+	"	ldi	%3, %2\n"					\
+	"1:	lldw	%0, 0(%3)\n"					\
+	"	ldi	%1, 1\n"					\
+	"	wr_f	%1\n"						\
+	"	" #asm_op " %0, %4, %0\n"				\
+	LOCK_FIXUP							\
+	"	lstw	%0, 0(%3)\n"					\
+	"	rd_f	%0\n"						\
+	"	beq	%0, 2f\n"					\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	: "=&r" (temp1), "=&r" (temp2), "=m" (v->counter), "=&r" (addr) \
+	: "Ir" (i), "m" (v->counter));					\
+}									\
+
+
+#define ATOMIC_OP_RETURN(op, asm_op)					\
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+{									\
+	int temp1, temp2;						\
+	unsigned long addr;						\
+	__asm__ __volatile__(						\
+	LOCK_MEMB							\
+	"	ldi	%3, %2\n"					\
+	"1:	lldw	%0, 0(%3)\n"					\
+	"	ldi	%1, 1\n"					\
+	"	wr_f	%1\n"						\
+	"	" #asm_op " %0, %4, %1\n"				\
+	"	" #asm_op " %0, %4, %0\n"				\
+	LOCK_FIXUP							\
+	"	lstw	%1, 0(%3)\n"					\
+	"	rd_f	%1\n"						\
+	"	beq	%1, 2f\n"					\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	: "=&r" (temp1), "=&r" (temp2), "=m" (v->counter), "=&r" (addr)	\
+	: "Ir" (i), "m" (v->counter));					\
+	return temp1;							\
+}                                                                       \
+
+
+
+#define ATOMIC_FETCH_OP(op, asm_op)                                     \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	int temp1, temp2;						\
+	unsigned long addr;						\
+	__asm__ __volatile__(						\
+	LOCK_MEMB							\
+	"	ldi	%3, %2\n"					\
+	"1:	lldw	%0, 0(%3)\n"					\
+	"	ldi	%1, 1\n"					\
+	"	wr_f	%1\n"						\
+	"	" #asm_op " %0, %4, %1\n"				\
+	LOCK_FIXUP							\
+	"	lstw	%1, 0(%3)\n"					\
+	"	rd_f	%1\n"						\
+	"	beq	%1, 2f\n"					\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	: "=&r" (temp1), "=&r" (temp2), "=m" (v->counter), "=&r" (addr)	\
+	: "Ir" (i), "m" (v->counter));					\
+	return temp1;							\
+}                                                                       \
+
+
+#define ATOMIC64_OP(op, asm_op)                                         \
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	unsigned long temp1, temp2, addr;				\
+	__asm__ __volatile__(						\
+	LOCK_MEMB							\
+	"	ldi	%3, %2\n"					\
+	"1:	lldl	%0, 0(%3)\n"					\
+	"	ldi	%1, 1\n"					\
+	"	wr_f	%1\n"						\
+	"	" #asm_op " %0, %4, %0\n"				\
+	LOCK_FIXUP							\
+	"	lstl	%0, 0(%3)\n"					\
+	"	rd_f	%0\n"						\
+	"	beq	%0, 2f\n"					\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	: "=&r" (temp1), "=&r" (temp2), "=m" (v->counter), "=&r" (addr)	\
+	: "Ir" (i), "m" (v->counter));					\
+}									\
+
+
+#define ATOMIC64_OP_RETURN(op, asm_op)                                  \
+static inline long atomic64_##op##_return_relaxed(long i, atomic64_t *v)\
+{									\
+	long temp1, temp2;						\
+	unsigned long addr;						\
+	__asm__ __volatile__(						\
+	LOCK_MEMB							\
+	"	ldi	%3, %2\n"					\
+	"1:	lldl	%0, 0(%3)\n"					\
+	"	ldi	%1, 1\n"					\
+	"	wr_f	%1\n"						\
+	"	" #asm_op " %0, %4, %1\n"				\
+	"	" #asm_op " %0, %4, %0\n"				\
+	LOCK_FIXUP							\
+	"	lstl	%1, 0(%3)\n"					\
+	"	rd_f	%1\n"						\
+	"	beq	%1, 2f\n"					\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	: "=&r" (temp1), "=&r" (temp2), "=m" (v->counter), "=&r" (addr)	\
+	: "Ir" (i), "m" (v->counter));					\
+	return temp1;							\
+}
+
+#define ATOMIC64_FETCH_OP(op, asm_op)					\
+static inline long atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \
+{									\
+	long temp1, temp2;						\
+	unsigned long addr;						\
+	__asm__ __volatile__(						\
+	LOCK_MEMB							\
+	"	ldi	%3, %2\n"					\
+	"1:	lldl	%0, 0(%3)\n"					\
+	"	ldi	%1, 1\n"					\
+	"	wr_f	%1\n"						\
+	"	" #asm_op " %0, %4, %1\n"				\
+	LOCK_FIXUP							\
+	"	lstl	%1, 0(%3)\n"					\
+	"	rd_f	%1\n"						\
+	"	beq	%1, 2f\n"					\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	: "=&r" (temp1), "=&r" (temp2), "=m" (v->counter), "=&r" (addr)	\
+	: "Ir" (i), "m" (v->counter));					\
+	return temp1;							\
+}                                                                       \
+
+#define ATOMIC_OPS(op)                                                  \
+	ATOMIC_OP(op, op##w)                                            \
+	ATOMIC_OP_RETURN(op, op##w)					\
+	ATOMIC_FETCH_OP(op, op##w)					\
+	ATOMIC64_OP(op, op##l)                                          \
+	ATOMIC64_OP_RETURN(op, op##l)					\
+	ATOMIC64_FETCH_OP(op, op##l)					\
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(sub)
+
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
+#undef  ATOMIC_OPS
+
+#define ATOMIC_OPS(op, asm)						\
+	ATOMIC_OP(op, asm)                                              \
+	ATOMIC_FETCH_OP(op, asm)					\
+	ATOMIC64_OP(op, asm)                                            \
+	ATOMIC64_FETCH_OP(op, asm)					\
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, bis)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+
+#undef ATOMIC_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define atomic_andnot atomic_andnot
+#define atomic64_andnot atomic64_andnot
+
+#endif /* _ASM_SW64_ATOMIC_H */
diff --git a/arch/sw_64/include/asm/barrier.h b/arch/sw_64/include/asm/barrier.h
new file mode 100644
index 000000000000..c691038919cd
--- /dev/null
+++ b/arch/sw_64/include/asm/barrier.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_BARRIER_H
+#define _ASM_SW64_BARRIER_H
+
+#include <asm/compiler.h>
+
+#define mb()	__asm__ __volatile__("memb" : : : "memory")
+
+#define rmb()	__asm__ __volatile__("memb" : : : "memory")
+
+#define wmb()	__asm__ __volatile__("memb" : : : "memory")
+
+#ifdef CONFIG_SMP
+#define __ASM_SMP_MB	"\tmemb\n"
+#else
+#define __ASM_SMP_MB
+#endif
+
+#define __smp_mb__before_atomic()	barrier()
+#define __smp_mb__after_atomic()	barrier()
+
+#include <asm-generic/barrier.h>
+
+#endif		/* _ASM_SW64_BARRIER_H */
diff --git a/arch/sw_64/include/asm/bitops.h b/arch/sw_64/include/asm/bitops.h
new file mode 100644
index 000000000000..8c4844e8d067
--- /dev/null
+++ b/arch/sw_64/include/asm/bitops.h
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_BITOPS_H
+#define _ASM_SW64_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <asm/compiler.h>
+#include <asm/barrier.h>
+
+/*
+ * Copyright 1994, Linus Torvalds.
+ */
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * To get proper branch prediction for the main line, we must branch
+ * forward to code at the end of this object's .text section, then
+ * branch back to restart the operation.
+ *
+ * bit 0 is the LSB of addr; bit 64 is the LSB of (addr+1).
+ */
+
+static inline void
+set_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long temp1, temp2, base;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%3, %5\n"
+	"1:	lldw	%0, 0(%3)\n"
+	"	ldi	%1, 1\n"
+	"	wr_f	%1\n"
+	"	bis	%0, %4, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%0, 0(%3)\n"
+	"	rd_f	%1\n"
+	"	beq	%1, 2f\n"
+	".subsection 2\n"
+	"2:	br	1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (*m), "=&r" (base)
+	: "Ir" (1UL << (nr & 31)), "m" (*m));
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline void
+__set_bit(unsigned long nr, volatile void *addr)
+{
+	int *m = ((int *) addr) + (nr >> 5);
+
+	*m |= 1 << (nr & 31);
+}
+
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
+
+static inline void
+clear_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long temp1, temp2, base;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%3, %5\n"
+	"1:	lldw	%0, 0(%3)\n"
+	"	ldi	%1, 1\n"
+	"	wr_f	%1\n"
+	"	bic	%0, %4, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%0, 0(%3)\n"
+	"	rd_f	%1\n"
+	"	beq	%1, 2f\n"
+	".subsection 2\n"
+	"2:	br	1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (*m), "=&r" (base)
+	: "Ir" (1UL << (nr & 31)), "m" (*m));
+}
+
+static inline void
+clear_bit_unlock(unsigned long nr, volatile void *addr)
+{
+	smp_mb();
+	clear_bit(nr, addr);
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline void
+__clear_bit(unsigned long nr, volatile void *addr)
+{
+	int *m = ((int *) addr) + (nr >> 5);
+
+	*m &= ~(1 << (nr & 31));
+}
+
+static inline void
+__clear_bit_unlock(unsigned long nr, volatile void *addr)
+{
+	smp_mb();
+	__clear_bit(nr, addr);
+}
+
+static inline void
+change_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long temp1, temp2, base;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%3, %5\n"
+	"1:	lldw	%0, 0(%3)\n"
+	"	ldi	%1, 1\n"
+	"	wr_f	%1\n"
+	"	xor	%0, %4, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%0, 0(%3)\n"
+	"	rd_f	%1\n"
+	"	beq	%1, 2f\n"
+	".subsection 2\n"
+	"2:	br	1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (*m), "=&r" (base)
+	: "Ir" (1UL << (nr & 31)), "m" (*m));
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline void
+__change_bit(unsigned long nr, volatile void *addr)
+{
+	int *m = ((int *) addr) + (nr >> 5);
+
+	*m ^= 1 << (nr & 31);
+}
+
+
+static inline int
+test_and_set_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long oldbit;
+	unsigned long temp1, temp2, base;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%4, %6\n"
+	"1:	lldw	%0, 0(%4)\n"
+	"	and	%0, %5, %3\n"
+	"	seleq	%3, 1, $31, %1\n"
+	"	wr_f	%1\n"
+	"	bis	%0, %5, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%0, 0(%4)\n"
+	"	rd_f	%0\n"
+	"	bne	%3, 2f\n"		// %3 is not zero, no need to set, return
+	"	beq	%0, 3f\n"		// failed to set, try again.
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (*m), "=&r" (oldbit), "=&r" (base)
+	: "Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+static inline int
+test_and_set_bit_lock(unsigned long nr, volatile void *addr)
+{
+	unsigned long oldbit;
+	unsigned long temp1, temp2, base;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%4, %6\n"
+	"1:	lldw	%0, 0(%4)\n"
+	"	and	%0, %5, %3\n"
+	"	seleq	%3, 1, $31, %1\n"
+	"	wr_f	%1\n"
+	"	bis	%0, %5, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%0, 0(%4)\n"
+	"	rd_f	%0\n"
+	"	bne	%3, 2f\n"		// %3 is not zero, no need to set, return
+	"	beq	%0, 3f\n"		// failed to set, try again.
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (*m), "=&r" (oldbit), "=&r" (base)
+	: "Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline int
+__test_and_set_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long mask = 1 << (nr & 0x1f);
+	int *m = ((int *) addr) + (nr >> 5);
+	int old = *m;
+
+	*m = old | mask;
+	return (old & mask) != 0;
+}
+
+static inline int
+test_and_clear_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long oldbit;
+	unsigned long temp1, temp2, base;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%4, %6\n"
+	"1:	lldw	%0, 0(%4)\n"
+	"	and	%0, %5, %3\n"
+	"	selne	%3, 1, $31, %1\n"	//Note: here is SELNE!!!
+	"	wr_f	%1\n"
+	"	bic	%0, %5, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%0, 0(%4)\n"
+	"	rd_f	%0\n"
+	"	beq	%3, 2f\n"		// %3 is zero, no need to set, return
+	"	beq	%0, 3f\n"		// failed to set, try again.
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (*m), "=&r" (oldbit), "=&r" (base)
+	: "Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline int
+__test_and_clear_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long mask = 1 << (nr & 0x1f);
+	int *m = ((int *) addr) + (nr >> 5);
+	int old = *m;
+
+	*m = old & ~mask;
+	return (old & mask) != 0;
+}
+
+static inline int
+test_and_change_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long oldbit;
+	unsigned long temp, base;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi	%3, %5\n"
+	"1:	lldw	%0, 0(%3)\n"
+	"	ldi	%2, 1\n"
+	"	wr_f	%2\n"
+	"	and	%0, %4, %2\n"
+	"	xor	%0, %4, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw	%0, 0(%3)\n"
+	"	rd_f	%0\n"
+	"	beq	%0, 3f\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r" (temp), "=m" (*m), "=&r" (oldbit), "=&r" (base)
+	: "Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline int
+__test_and_change_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long mask = 1 << (nr & 0x1f);
+	int *m = ((int *) addr) + (nr >> 5);
+	int old = *m;
+
+	*m = old ^ mask;
+	return (old & mask) != 0;
+}
+
+static inline int
+test_bit(int nr, const volatile void *addr)
+{
+	return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL;
+}
+
+/*
+ * ffz = Find First Zero in word. Undefined if no zero exists,
+ * so code should check against ~0UL first..
+ *
+ * Do a binary search on the bits.  Due to the nature of large
+ * constants on the sw64, it is worthwhile to split the search.
+ */
+static inline unsigned long ffz_b(unsigned long x)
+{
+	unsigned long sum, x1, x2, x4;
+
+	x = ~x & -~x;		/* set first 0 bit, clear others */
+	x1 = x & 0xAA;
+	x2 = x & 0xCC;
+	x4 = x & 0xF0;
+	sum = x2 ? 2 : 0;
+	sum += (x4 != 0) * 4;
+	sum += (x1 != 0);
+
+	return sum;
+}
+
+static inline unsigned long ffz(unsigned long word)
+{
+	return __kernel_cttz(~word);
+}
+
+/*
+ * __ffs = Find First set bit in word.  Undefined if no set bit exists.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	return __kernel_cttz(word);
+}
+
+#ifdef __KERNEL__
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above __ffs.
+ */
+
+static inline int ffs(int word)
+{
+	int result = __ffs(word) + 1;
+
+	return word ? result : 0;
+}
+
+/*
+ * fls: find last bit set.
+ */
+static inline int fls64(unsigned long word)
+{
+	return 64 - __kernel_ctlz(word);
+}
+
+static inline unsigned long __fls(unsigned long x)
+{
+	return fls64(x) - 1;
+}
+
+static inline int fls(int x)
+{
+	return fls64((unsigned int) x);
+}
+
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+
+static inline unsigned long __arch_hweight64(unsigned long w)
+{
+	return __kernel_ctpop(w);
+}
+
+static inline unsigned int __arch_hweight32(unsigned int w)
+{
+	return __arch_hweight64(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+	return __arch_hweight64(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+	return __arch_hweight64(w & 0xff);
+}
+
+#include <asm-generic/bitops/const_hweight.h>
+
+#endif /* __KERNEL__ */
+
+#include <asm-generic/bitops/find.h>
+
+#ifdef __KERNEL__
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 100-bit bitmap.  It's guaranteed that at least
+ * one of the 100 bits is cleared.
+ */
+static inline unsigned long
+sched_find_first_bit(const unsigned long b[2])
+{
+	unsigned long b0, b1, ofs, tmp;
+
+	b0 = b[0];
+	b1 = b[1];
+	ofs = (b0 ? 0 : 64);
+	tmp = (b0 ? b0 : b1);
+
+	return __ffs(tmp) + ofs;
+}
+
+#include <asm-generic/bitops/le.h>
+
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SW64_BITOPS_H */
diff --git a/arch/sw_64/include/asm/bug.h b/arch/sw_64/include/asm/bug.h
new file mode 100644
index 000000000000..4a179f236ccf
--- /dev/null
+++ b/arch/sw_64/include/asm/bug.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_BUG_H
+#define _ASM_SW64_BUG_H
+
+#include <linux/linkage.h>
+#include <asm-generic/bug.h>
+
+#endif /* _ASM_SW64_BUG_H */
diff --git a/arch/sw_64/include/asm/bugs.h b/arch/sw_64/include/asm/bugs.h
new file mode 100644
index 000000000000..c4a336fe04a2
--- /dev/null
+++ b/arch/sw_64/include/asm/bugs.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_BUGS_H
+#define _ASM_SW64_BUGS_H
+
+static void check_bugs(void)
+{
+}
+
+#endif
diff --git a/arch/sw_64/include/asm/cache.h b/arch/sw_64/include/asm/cache.h
new file mode 100644
index 000000000000..a59a74110884
--- /dev/null
+++ b/arch/sw_64/include/asm/cache.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm/cache.h
+ */
+#ifndef _ASM_SW64_CACHE_H
+#define _ASM_SW64_CACHE_H
+
+#define L1_CACHE_BYTES		128
+#define L1_CACHE_SHIFT		7
+
+#define SMP_CACHE_BYTES		L1_CACHE_BYTES
+
+#endif
diff --git a/arch/sw_64/include/asm/cacheflush.h b/arch/sw_64/include/asm/cacheflush.h
new file mode 100644
index 000000000000..985161896f71
--- /dev/null
+++ b/arch/sw_64/include/asm/cacheflush.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CACHEFLUSH_H
+#define _ASM_SW64_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <asm/hw_init.h>
+
+/* Caches aren't brain-dead on the sw64. */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE	0
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+/* Note that the following two definitions are _highly_ dependent
+ * on the contexts in which they are used in the kernel.  I personally
+ * think it is criminal how loosely defined these macros are.
+ */
+
+/* We need to flush the kernel's icache after loading modules.  The
+ * only other use of this macro is in load_aout_interp which is not
+ * used on sw64.
+
+ * Note that this definition should *not* be used for userspace
+ * icache flushing.  While functional, it is _way_ overkill.  The
+ * icache is tagged with ASNs and it suffices to allocate a new ASN
+ * for the process.
+ */
+#ifndef CONFIG_SMP
+static inline void
+flush_icache_range(unsigned long start, unsigned long end)
+{
+	if (icache_is_vivt_no_ictag())
+		imb();
+}
+#define flush_icache_range flush_icache_range
+#else
+extern void smp_imb(void);
+static inline void
+flush_icache_range(unsigned long start, unsigned long end)
+{
+	if (icache_is_vivt_no_ictag())
+		smp_imb();
+}
+#define flush_icache_range flush_icache_range
+#endif
+
+/* We need to flush the userspace icache after setting breakpoints in
+ * ptrace.
+
+ * Instead of indiscriminately using imb, take advantage of the fact
+ * that icache entries are tagged with the ASN and load a new mm context.
+ */
+/* ??? Ought to use this in arch/sw_64/kernel/signal.c too.  */
+
+#ifndef CONFIG_SMP
+#include <linux/sched.h>
+
+extern void __load_new_mm_context(struct mm_struct *);
+static inline void
+flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+			unsigned long addr, int len)
+{
+	if ((vma->vm_flags & VM_EXEC) && icache_is_vivt_no_ictag())
+		imb();
+}
+#define flush_icache_user_page flush_icache_user_page
+#else
+extern void flush_icache_user_page(struct vm_area_struct *vma,
+				    struct page *page,
+				    unsigned long addr, int len);
+#define flush_icache_user_page flush_icache_user_page
+#endif
+
+/* This is used only in __do_fault and do_swap_page.  */
+#define flush_icache_page(vma, page) \
+	flush_icache_user_page((vma), (page), 0, 0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+do {	\
+	memcpy(dst, src, len); \
+	flush_icache_user_page(vma, page, vaddr, len); \
+} while (0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+
+#include <asm-generic/cacheflush.h>
+
+#endif /* _ASM_SW64_CACHEFLUSH_H */
diff --git a/arch/sw_64/include/asm/checksum.h b/arch/sw_64/include/asm/checksum.h
new file mode 100644
index 000000000000..0bb933350dc6
--- /dev/null
+++ b/arch/sw_64/include/asm/checksum.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CHECKSUM_H
+#define _ASM_SW64_CHECKSUM_H
+
+#include <linux/in6.h>
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+__sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum);
+
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+#define _HAVE_ARCH_CSUM_AND_COPY
+__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
+
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+/*
+ *	Fold a partial checksum without adding pseudo headers
+ */
+
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			       const struct in6_addr *daddr, __u32 len,
+			       __u8 proto, __wsum sum);
+#endif
diff --git a/arch/sw_64/include/asm/chip3_io.h b/arch/sw_64/include/asm/chip3_io.h
new file mode 100644
index 000000000000..1028842f7a81
--- /dev/null
+++ b/arch/sw_64/include/asm/chip3_io.h
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CHIP3_IO_H
+#define _ASM_SW64_CHIP3_IO_H
+
+#include <asm/platform.h>
+
+#define IO_BASE			(0x1UL << 47)
+#define PCI_BASE		(0x1UL << 43)
+#define PCI_IOR0_BASE		(0x2UL << 32)
+#define PCI_IOR1_BASE		(0x3UL << 32)
+
+#ifdef CONFIG_SW64_FPGA
+#define PCI_RC_CFG		(0x4UL << 32)
+#else
+#define PCI_RC_CFG		(0x5UL << 32)
+#endif
+
+#define PCI_EP_CFG		(0x3UL << 33)
+#define PCI_LEGACY_IO		(0x1UL << 32)
+#define PCI_LEGACY_IO_SIZE	(0x100000000UL)
+#define PCI_MEM_UNPRE		0x0UL
+#define PCI_32BIT_VT_MEMIO	(0xc0000000UL)
+#define PCI_32BIT_MEMIO		(0xe0000000UL)
+#define PCI_32BIT_MEMIO_SIZE	(0x20000000UL)
+#define PCI_64BIT_MEMIO		(0x1UL << 39)
+#define PCI_64BIT_MEMIO_SIZE	(0x8000000000UL)
+
+#define IO_RC_SHIFT		40
+#define IO_NODE_SHIFT		44
+#define IO_MARK_BIT		47
+
+/* MSIConfig */
+#define MSICONFIG_VALID		(0x1UL << 63)
+#define MSICONFIG_EN		(0x1UL << 62)
+#define MSICONFIG_VECTOR_SHIFT	10
+
+#define SW64_PCI_IO_BASE(m, n)	\
+	(IO_BASE | ((m) << IO_NODE_SHIFT) | PCI_BASE | ((n) << IO_RC_SHIFT))
+#define SW64_IO_BASE(x)		(IO_BASE | ((x) << IO_NODE_SHIFT))
+
+#define SW64_PCI0_BUS		0
+#define PCI0_BUS		SW64_PCI0_BUS
+
+#define MAX_NR_NODES		0x2
+#define MAX_NR_RCS		0x6
+
+#define SW64_PCI_DEBUG		0
+#if SW64_PCI_DEBUG
+#define PCIINFO(fmt, args...)	printk(fmt, ##args)
+#else
+#define PCIINFO(fmt, args...)
+#endif
+
+#define MCU_BASE		(0x3UL << 36)
+#define CAB0_BASE		(0x10UL << 32)
+#define INTPU_BASE		(0x2aUL << 32)
+#define IIC0_BASE		(0x31UL << 32)
+#define SPI_BASE		(0x32UL << 32)
+#define UART_BASE		(0x33UL << 32)
+#define IIC1_BASE		(0x34UL << 32)
+#define IIC2_BASE		(0x35UL << 32)
+#define GPIO_BASE		(0x36UL << 32)
+#define LPC_BASE		(0x37UL << 32)
+#define LPC_LEGACY_IO		(0x1UL << 28 | IO_BASE | LPC_BASE)
+#define LPC_MEM_IO		(0x2UL << 28 | IO_BASE | LPC_BASE)
+#define LPC_FIRMWARE_IO		(0x3UL << 28 | IO_BASE | LPC_BASE)
+#define DLIA_BASE		(0x20UL << 32)
+#define DLIB_BASE		(0x21UL << 32)
+#define DLIC_BASE		(0x22UL << 32)
+#define DLI_PHY_CTL		(0x10UL << 24)
+#define PCI_VT_LEGACY_IO	(IO_BASE | PCI_BASE | PCI_LEGACY_IO)
+
+/*-----------------------addr-----------------------*/
+/* CAB0 REG */
+enum {
+	TRKMODE =		CAB0_BASE | 0x80UL,
+};
+
+/* DLIA IO REG */
+enum {
+	DLIA_BWTEST_PAT =	DLIA_BASE | 0x100980UL,
+	DLIA_PHY_VLDLANE =	DLIA_BASE | DLI_PHY_CTL | 0x300UL,
+};
+
+/* DLIB IO REG */
+enum {
+	DLIB_BWTEST_PAT =	DLIB_BASE | 0x100980UL,
+	DLIB_PHY_VLDLANE =	DLIB_BASE | DLI_PHY_CTL | 0x300UL,
+};
+
+/* DLIC IO REG */
+enum {
+	DLIC_BWTEST_PAT =	DLIC_BASE | 0x100980UL,
+	DLIC_PHY_VLDLANE =	DLIC_BASE | DLI_PHY_CTL | 0x300UL,
+};
+/* INTPU REG */
+enum {
+	LCORE_SLEEPY =		INTPU_BASE | 0x0UL,
+	LCORE_SLEEP =		INTPU_BASE | 0x80UL,
+	DEVICE_MISS =		INTPU_BASE | 0x100UL,
+	LONG_TIME =		INTPU_BASE | 0x180UL,
+	LCORE_IDLE =		INTPU_BASE | 0x280UL,
+	MT_INT_CONFIG =		INTPU_BASE | 0x300UL,
+	DEV_INT_CONFIG =	INTPU_BASE | 0x480UL,
+	FMT_ERR	=		INTPU_BASE | 0x700UL,
+	FAULT_INT_CONFIG =	INTPU_BASE | 0x780UL,
+	SERR_CNTTH =		INTPU_BASE | 0x880UL,
+	MCUSERR_CNT =		INTPU_BASE | 0x900UL,
+	IRUSERR_CNT =		INTPU_BASE | 0xa80UL,
+	ERRRPT_EN =		INTPU_BASE | 0xb00UL,
+	IINT_MISS_VECTOR =	INTPU_BASE | 0x1100UL,
+	IINT_MIS =		INTPU_BASE | 0x1180UL,
+	IINT_MISS_RPTEN =	INTPU_BASE | 0x1200UL,
+	DEVINT_MISS_RPTEN =	INTPU_BASE | 0x1280UL,
+	ECCSERR =		INTPU_BASE | 0x1300UL,
+	ECCSERR_RPTEN =		INTPU_BASE | 0x1380UL,
+	ECCMERR =		INTPU_BASE | 0x1400UL,
+	ECCMERR_RPTEN =		INTPU_BASE | 0x1480UL,
+	DEVINT_WKEN =		INTPU_BASE | 0x1500UL,
+	NMI_INT_CONFIG =	INTPU_BASE | 0x1580UL,
+	DEVINTWK_INTEN =	INTPU_BASE | 0x1600UL,
+};
+
+/* MC IO REG */
+enum {
+	CFGDEC =		0x400UL,
+	CFGCR =			0x480UL,
+	INIT_CTRL =		0x580UL,
+	CFGERR =		0xd00UL,
+	FSMSTAT =		0xe00UL,
+	PUB_INTERFACE =		0x1000UL,
+	POWERCTRL =		0x1080UL,
+	CFGMR0 =		0x1280UL,
+	CFGMR1 =		0x1300UL,
+	CFGMR2 =		0x1380UL,
+	CFGMR3 =		0x1400UL,
+	PERF_CTRL =		0x1480UL,
+	MC_PERF0 =		0x1500UL,
+	CFGMR4 =		0x1800UL,
+	CFGMR5 =		0x1880UL,
+	CFGMR6 =		0x1900UL,
+	MC_CTRL =		0x1c00UL,
+	MEMSERR_P =		0x1c80UL,
+	MEMSERR =		0x1d00UL,
+};
+
+/* MCU CSR */
+enum {
+	INIT_CTL =		MCU_BASE | 0x680UL,
+	MT_STATE =		MCU_BASE | 0x700UL,
+	CORE_ONLINE =		MCU_BASE | 0x780UL,
+	MT_INT =		MCU_BASE | 0x800UL,
+	MT_INT_END =		MCU_BASE | 0x880UL,
+	CPU_ID =		MCU_BASE | 0x900UL,
+	DLI_RLTD_FAULT =	MCU_BASE | 0x980UL,
+	DLI_RLTD_FAULT_EN =	MCU_BASE | 0xa00UL,
+	DLI_RLTD_FAULT_INTEN =	MCU_BASE | 0xa80UL,
+	FAULT_SOURCE =		MCU_BASE | 0xb00UL,
+	INT_SOURCE =		MCU_BASE | 0xb80UL,
+	CORE_STATE0 =		MCU_BASE | 0xc00UL,
+	CORE_STATE1 =		MCU_BASE | 0xc80UL,
+	CFG_INFO =		MCU_BASE | 0x1100UL,
+	MC_CAP_CFG =		MCU_BASE | 0x1180UL,
+	IO_START =		MCU_BASE | 0x1300UL,
+	UART_ONLINE =		MCU_BASE | 0x1780UL,
+	MCU_DVC_INT =		MCU_BASE | 0x3000UL,
+	MCU_DVC_INT_EN =	MCU_BASE | 0x3080UL,
+	SI_FAULT_STAT =		MCU_BASE | 0x3100UL,
+	SI_FAULT_EN =		MCU_BASE | 0x3180UL,
+	SI_FAULT_INT_EN =	MCU_BASE | 0x3200UL,
+	FIFO_SYNSEL =		MCU_BASE | 0x3400UL,
+	CPU_INFO =		MCU_BASE | 0x3480UL,
+	WAKEUP_CTL =		MCU_BASE | 0x3500UL,
+	FLAGREG =		MCU_BASE | 0x3580UL,
+	NMI_CTL =		MCU_BASE | 0x3600UL,
+	PIUPLL_CNT =		MCU_BASE | 0x3680UL,
+	MC_ONLINE =		MCU_BASE | 0x3780UL,
+	FLASH_INFO =		MCU_BASE | 0x3800UL,
+	RTPUSROMCNT =		MCU_BASE | 0x3880UL,
+	CLU_LV1_SEL =		MCU_BASE | 0x3a80UL,
+	CLU_LV2_SEL =		MCU_BASE | 0x3b00UL,
+	CLK_CTL =		MCU_BASE | 0x3b80UL,
+	SLEEP_WAIT_CNT =	MCU_BASE | 0x4980UL,
+	CHIP_ID =		MCU_BASE | 0x4b00UL,
+	PIU_TOP0_CONFIG =	MCU_BASE | 0x4c80UL,
+	PIU_TOP1_CONFIG =	MCU_BASE | 0x4d00UL,
+	LVDS_CTL =		MCU_BASE | 0x4d80UL,
+	LPC_DMAREQ_TOTH =	MCU_BASE | 0x5100UL,
+	DLI_ONLINE =		MCU_BASE | 0x6180UL,
+	LPC_DMAREQ_HADR =	MCU_BASE | 0x6200UL,
+	PIU_PHY_SRST_H =	MCU_BASE | 0x6280UL,
+	CLK_SEL_PCIE0 =		MCU_BASE | 0x6280UL,
+	CLK_SEL_PCIE1 =		MCU_BASE | 0x6300UL,
+	CLK_SEL_PCIE2 =		MCU_BASE | 0x6380UL,
+	CLK_SEL_PCIE3 =		MCU_BASE | 0x6400UL,
+	CLK_SEL_PCIE4 =		MCU_BASE | 0x6480UL,
+	CLK_SEL_PCIE5 =		MCU_BASE | 0x6500UL,
+	PERST_N_PCIE0 =		MCU_BASE | 0x6680UL,
+	PERST_N_PCIE1 =		MCU_BASE | 0x6700UL,
+	PERST_N_PCIE2 =		MCU_BASE | 0x6780UL,
+	PERST_N_PCIE3 =		MCU_BASE | 0x6800UL,
+	PERST_N_PCIE4 =		MCU_BASE | 0x6880UL,
+	PERST_N_PCIE5 =		MCU_BASE | 0x6900UL,
+	BUTTON_RST_N_PCIE0 =	MCU_BASE | 0x6a80UL,
+	BUTTON_RST_N_PCIE1 =	MCU_BASE | 0x6b00UL,
+	BUTTON_RST_N_PCIE2 =	MCU_BASE | 0x6b80UL,
+	BUTTON_RST_N_PCIE3 =	MCU_BASE | 0x6c00UL,
+	BUTTON_RST_N_PCIE4 =	MCU_BASE | 0x6c80UL,
+	BUTTON_RST_N_PCIE5 =	MCU_BASE | 0x6d00UL,
+	DUAL_CG0_FAULT =	MCU_BASE | 0x6d80UL,
+	DUAL_CG1_FAULT =	MCU_BASE | 0x6e00UL,
+	DUAL_CG2_FAULT =	MCU_BASE | 0x6e80UL,
+	DUAL_CG3_FAULT =	MCU_BASE | 0x6f00UL,
+	DUAL_CG4_FAULT =	MCU_BASE | 0x6f80UL,
+	DUAL_CG5_FAULT =	MCU_BASE | 0x7000UL,
+	DUAL_CG6_FAULT =	MCU_BASE | 0x7080UL,
+	DUAL_CG7_FAULT =	MCU_BASE | 0x7100UL,
+	DUAL_CG0_FAULT_EN =	MCU_BASE | 0x7180UL,
+	DUAL_CG1_FAULT_EN =	MCU_BASE | 0x7200UL,
+	DUAL_CG2_FAULT_EN =	MCU_BASE | 0x7280UL,
+	DUAL_CG3_FAULT_EN =	MCU_BASE | 0x7300UL,
+	DUAL_CG4_FAULT_EN =	MCU_BASE | 0x7380UL,
+	DUAL_CG5_FAULT_EN =	MCU_BASE | 0x7400UL,
+	DUAL_CG6_FAULT_EN =	MCU_BASE | 0x7480UL,
+	DUAL_CG7_FAULT_EN =	MCU_BASE | 0x7500UL,
+	DUAL_CG0_FAULT_INTEN =	MCU_BASE | 0x7580UL,
+	DUAL_CG1_FAULT_INTEN =	MCU_BASE | 0x7600UL,
+	DUAL_CG2_FAULT_INTEN =	MCU_BASE | 0x7680UL,
+	DUAL_CG3_FAULT_INTEN =	MCU_BASE | 0x7700UL,
+	DUAL_CG4_FAULT_INTEN =	MCU_BASE | 0x7780UL,
+	DUAL_CG5_FAULT_INTEN =	MCU_BASE | 0x7800UL,
+	DUAL_CG6_FAULT_INTEN =	MCU_BASE | 0x7880UL,
+	DUAL_CG7_FAULT_INTEN =	MCU_BASE | 0x7900UL,
+	SOFT_INFO0 =		MCU_BASE | 0x7f00UL,
+	LONG_TIME_START_EN =    MCU_BASE | 0x9000UL,
+};
+
+/*--------------------------offset-----------------------------------*/
+/* PIU IOR0 */
+enum {
+	PIUCONFIG0 =		0x0UL,
+	EPDMABAR =		0x80UL,
+	IOMMUSEGITEM0 =		0x100UL,
+	IOMMUEXCPT_CTRL =	0x2100UL,
+	MSIADDR =		0x2180UL,
+	MSICONFIG0 =		0x2200UL,
+	INTACONFIG =		0xa200UL,
+	INTBCONFIG =		0xa280UL,
+	INTCCONFIG =		0xa300UL,
+	INTDCONFIG =		0xa380UL,
+	AERERRINTCONFIG =	0xa400UL,
+	AERERRMSICONFIG =	0xa480UL,
+	PMEINTCONFIG =		0xa500UL,
+	PMEMSICONFIG =		0xa580UL,
+	HPINTCONFIG =		0xa600UL,
+	HPMSICONFIG =		0xa680UL,
+	DTBASEADDR =		0xb000UL,
+	DTLB_FLUSHALL =		0xb080UL,
+	DTLB_FLUSHDEV =		0xb100UL,
+	PTLB_FLUSHALL =		0xb180UL,
+	PTLB_FLUSHDEV =		0xb200UL,
+	PTLB_FLUSHVADDR =	0xb280UL,
+	PCACHE_FLUSHALL =	0xb300UL,
+	PCACHE_FLUSHDEV =	0xb380UL,
+	PCACHE_FLUSHPADDR =	0xb400UL,
+	TIMEOUT_CONFIG =	0xb480UL,
+	IOMMUEXCPT_STATUS =	0xb500UL,
+	IOMMUPAGE_PADDR1 =	0xb580UL,
+	IOMMUPAGE_PADDR2 =	0xb600UL,
+	IOMMUPAGE_PADDR3 =	0xb680UL,
+	PTLB_ACCESS =		0xb700UL,
+	PTLB_ITEM_TAG =		0xb780UL,
+	PTLB_ITEM_DATA =	0xb800UL,
+	PCACHE_ACCESS =		0xb880UL,
+	PCACHE_ITEM_TAG =	0xb900UL,
+	PCACHE_ITEM_DATA0 =	0xb980UL,
+};
+
+/* PIU IOR1 */
+enum {
+	PIUCONFIG1 =		0x0UL,
+	ERRENABLE =		0x880UL,
+	RCDEBUGINF1 =		0xc80UL,
+	DCACONTROL =		0x1a00UL,
+	DEVICEID0 =		0x1a80UL,
+};
+
+/* RC */
+enum {
+	RC_VENDOR_ID =		0x0UL,
+	RC_COMMAND =		0x80UL,
+	RC_REVISION_ID =	0x100UL,
+	RC_PRIMARY_BUS =	0x300UL,
+	RC_MSI_CONTROL =	0xa00UL,
+	RC_EXP_DEVCAP =		0xe80UL,
+	RC_EXP_DEVCTL =		0xf00UL,
+	RC_SLOT_CTRL =		0x1100UL,
+	RC_LINK_STAT =		0x1000UL,
+	RC_CONTROL =		0X1180UL,
+	RC_STATUS =		0X1200UL,
+	RC_EXP_DEVCTL2 =	0x1300UL,
+	RC_PORT_LINK_CTL =	0xe200UL,
+	RC_ORDER_RULE_CTL =     0x11680UL,
+	RC_MISC_CONTROL_1 =	0x11780UL,
+	RC_PHY_INT_REG =	0x80000UL,
+	RC_PHY_EXT_GEN1 =	0x82400UL,
+	RC_PHY_EXT_GEN2 =	0x82480UL,
+};
+/* GPIO */
+enum {
+	GPIO_SWPORTA_DR =	GPIO_BASE | 0x0UL,
+	GPIO_SWPORTA_DDR =	GPIO_BASE | 0x200UL,
+};
+/*--------------------------------------------------------------------------*/
+#endif
diff --git a/arch/sw_64/include/asm/cmpxchg.h b/arch/sw_64/include/asm/cmpxchg.h
new file mode 100644
index 000000000000..e07abc47c7dd
--- /dev/null
+++ b/arch/sw_64/include/asm/cmpxchg.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CMPXCHG_H
+#define _ASM_SW64_CMPXCHG_H
+
+/*
+ * Atomic exchange routines.
+ */
+
+#define __ASM__MB
+#define ____xchg(type, args...)		__xchg ## type ## _local(args)
+#define ____cmpxchg(type, args...)	__cmpxchg ## type ## _local(args)
+#include <asm/xchg.h>
+
+#define xchg_local(ptr, x)						\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_,	\
+				       sizeof(*(ptr)));			\
+})
+
+#define cmpxchg_local(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_,	\
+					  (unsigned long)_n_,		\
+					  sizeof(*(ptr)));		\
+})
+
+#define cmpxchg64_local(ptr, o, n)					\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg_local((ptr), (o), (n));					\
+})
+
+#ifdef CONFIG_SMP
+#undef __ASM__MB
+#define __ASM__MB	"\tmemb\n"
+#endif
+#undef ____xchg
+#undef ____cmpxchg
+#define ____xchg(type, args...)		__xchg ##type(args)
+#define ____cmpxchg(type, args...)	__cmpxchg ##type(args)
+#include <asm/xchg.h>
+
+#define xchg(ptr, x)							\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_,		\
+				 sizeof(*(ptr)));			\
+})
+
+#define cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,	\
+				    (unsigned long)_n_,	sizeof(*(ptr)));\
+})
+
+#define cmpxchg64(ptr, o, n)						\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg((ptr), (o), (n));					\
+})
+
+#undef __ASM__MB
+#undef ____cmpxchg
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+#endif /* _ASM_SW64_CMPXCHG_H */
diff --git a/arch/sw_64/include/asm/compiler.h b/arch/sw_64/include/asm/compiler.h
new file mode 100644
index 000000000000..9a80aa6a0ba8
--- /dev/null
+++ b/arch/sw_64/include/asm/compiler.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_COMPILER_H
+#define _ASM_SW64_COMPILER_H
+
+#include <uapi/asm/compiler.h>
+
+#endif /* _ASM_SW64_COMPILER_H */
diff --git a/arch/sw_64/include/asm/console.h b/arch/sw_64/include/asm/console.h
new file mode 100644
index 000000000000..0c01cb740bce
--- /dev/null
+++ b/arch/sw_64/include/asm/console.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CONSOLE_H
+#define _ASM_SW64_CONSOLE_H
+
+#include <uapi/asm/console.h>
+#ifndef __ASSEMBLY__
+struct crb_struct;
+extern int callback_init_done;
+extern void callback_init(void);
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_SW64_CONSOLE_H */
diff --git a/arch/sw_64/include/asm/core.h b/arch/sw_64/include/asm/core.h
new file mode 100644
index 000000000000..72d752c87412
--- /dev/null
+++ b/arch/sw_64/include/asm/core.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CORE_H
+#define _ASM_SW64_CORE_H
+
+#define II_II0			0
+#define II_II1			1
+#define II_SLEEP		2
+#define II_WAKE			3
+#define II_NMII			6
+
+#ifdef CONFIG_SW64_CHIP3
+#define II_RESET		II_NMII
+#define CORES_PER_NODE_SHIFT	5
+#endif
+#define CORES_PER_NODE		(1UL << CORES_PER_NODE_SHIFT)
+
+/*
+ * 0x00 ~ 0xff for hardware mm fault
+ */
+
+#define MMCSR__TNV		0x0
+#define MMCSR__IACV		0x1
+#define MMCSR__FOR		0x2
+#define MMCSR__FOE		0x3
+#define MMCSR__FOW		0x4
+
+#define MMCSR__BAD_DVA		0x6
+#define MMCSR__ACV1		0x7
+#define MMCSR__ACV0		0xc
+#define MMCSR__BAD_IVA		0xf
+
+/* 0x100 ~ 0x1ff for match debug */
+#define MMCSR__DA_MATCH		0x100
+#define MMCSR__DV_MATCH		0x101
+#define MMCSR__DAV_MATCH	0x102
+#define MMCSR__IA_MATCH		0x103
+#define MMCSR__IDA_MATCH	0x104
+
+ /* entry.S */
+extern void entArith(void);
+extern void entIF(void);
+extern void entInt(void);
+extern void entMM(void);
+extern void entSys(void);
+extern void entUna(void);
+/* head.S */
+extern void __smp_callin(unsigned long);
+#endif
diff --git a/arch/sw_64/include/asm/cpu.h b/arch/sw_64/include/asm/cpu.h
new file mode 100644
index 000000000000..ea32a7d3cf1b
--- /dev/null
+++ b/arch/sw_64/include/asm/cpu.h
@@ -0,0 +1 @@
+/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/sw_64/include/asm/cputime.h b/arch/sw_64/include/asm/cputime.h
new file mode 100644
index 000000000000..bada5a01d887
--- /dev/null
+++ b/arch/sw_64/include/asm/cputime.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CPUTIME_H
+#define _ASM_SW64_CPUTIME_H
+
+#include <asm-generic/cputime.h>
+
+#endif /* _ASM_SW64_CPUTIME_H */
diff --git a/arch/sw_64/include/asm/current.h b/arch/sw_64/include/asm/current.h
new file mode 100644
index 000000000000..219b5ce9f4fc
--- /dev/null
+++ b/arch/sw_64/include/asm/current.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CURRENT_H
+#define _ASM_SW64_CURRENT_H
+
+#include <linux/thread_info.h>
+
+#define get_current()	(current_thread_info()->task)
+#define current		get_current()
+
+#endif /* _ASM_SW64_CURRENT_H */
diff --git a/arch/sw_64/include/asm/debug.h b/arch/sw_64/include/asm/debug.h
new file mode 100644
index 000000000000..f0507acc31a7
--- /dev/null
+++ b/arch/sw_64/include/asm/debug.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Mao Minkai
+ * Author: Mao Minkai
+ *
+ * This code is taken from arch/mips/include/asm/debug.h
+ *	Copyright (C) 2015 Imagination Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef _ASM_SW64_DEBUG_H
+#define _ASM_SW64_DEBUG_H
+
+#include <linux/debugfs.h>
+
+/*
+ * sw64_debugfs_dir corresponds to the "sw_64" directory at the top level
+ * of the DebugFS hierarchy. SW64-specific DebugFS entries should be
+ * placed beneath this directory.
+ */
+extern struct dentry *sw64_debugfs_dir;
+
+#endif /* _ASM_SW64_DEBUG_H */
diff --git a/arch/sw_64/include/asm/delay.h b/arch/sw_64/include/asm/delay.h
new file mode 100644
index 000000000000..45112c7c3c01
--- /dev/null
+++ b/arch/sw_64/include/asm/delay.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_DELAY_H
+#define _ASM_SW64_DELAY_H
+
+extern void __delay(unsigned long loops);
+extern void udelay(unsigned long usecs);
+
+extern void ndelay(unsigned long nsecs);
+#define ndelay ndelay
+
+#endif /* defined(_ASM_SW64_DELAY_H) */
diff --git a/arch/sw_64/include/asm/device.h b/arch/sw_64/include/asm/device.h
new file mode 100644
index 000000000000..dadd756d6934
--- /dev/null
+++ b/arch/sw_64/include/asm/device.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_DEVICE_H
+#define _ASM_SW64_DEVICE_H
+
+struct dev_archdata {
+#if defined(CONFIG_SUNWAY_IOMMU)
+	void *iommu;
+#endif
+};
+
+struct pdev_archdata {
+};
+#endif
diff --git a/arch/sw_64/include/asm/div64.h b/arch/sw_64/include/asm/div64.h
new file mode 100644
index 000000000000..306581407ba5
--- /dev/null
+++ b/arch/sw_64/include/asm/div64.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_DIV64_H
+#define _ASM_SW64_DIV64_H
+
+#include <asm-generic/div64.h>
+
+#endif
diff --git a/arch/sw_64/include/asm/dma-direct.h b/arch/sw_64/include/asm/dma-direct.h
new file mode 100644
index 000000000000..dee1680b8f6d
--- /dev/null
+++ b/arch/sw_64/include/asm/dma-direct.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_DMA_DIRECT_H
+#define _ASM_SW64_DMA_DIRECT_H
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr;
+}
+
+#endif /* _ASM_SW64_DMA_DIRECT_H */
diff --git a/arch/sw_64/include/asm/dma-mapping.h b/arch/sw_64/include/asm/dma-mapping.h
new file mode 100644
index 000000000000..bb84690eabfe
--- /dev/null
+++ b/arch/sw_64/include/asm/dma-mapping.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_DMA_MAPPING_H
+#define _ASM_SW64_DMA_MAPPING_H
+
+extern const struct dma_map_ops *dma_ops;
+
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+{
+	return dma_ops;
+}
+
+#endif	/* _ASM_SW64_DMA_MAPPING_H */
diff --git a/arch/sw_64/include/asm/dma.h b/arch/sw_64/include/asm/dma.h
new file mode 100644
index 000000000000..1211b71f347e
--- /dev/null
+++ b/arch/sw_64/include/asm/dma.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm-sw_64/dma.h
+ *
+ * This is essentially the same as the i386 DMA stuff, as the SW64PCs
+ * use ISA-compatible dma.  The only extension is support for high-page
+ * registers that allow to set the top 8 bits of a 32-bit DMA address.
+ * This register should be written last when setting up a DMA address
+ * as this will also enable DMA across 64 KB boundaries.
+ */
+
+/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_SW64_DMA_H
+#define _ASM_SW64_DMA_H
+
+#include <linux/spinlock.h>
+#include <asm/io.h>
+
+#define dma_outb	outb
+#define dma_inb		inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation.
+ *
+ */
+
+#define MAX_DMA_CHANNELS	8
+
+/*
+ * ISA DMA limitations on sw64 platforms,
+
+ * These may be due to SIO (PCI<->ISA bridge) chipset limitation, or
+ * just a wiring limit.
+ */
+
+/*
+ * Maximum address for all the others is the complete 32-bit bus
+ * address space.
+ */
+#define MAX_ISA_DMA_ADDRESS	0x100000000UL
+
+/*
+ * If we have the iommu, we don't have any address limitations on DMA.
+ * Otherwise (Nautilus, RX164), we have to have 0-16 Mb DMA zone
+ * like i386.
+ */
+#ifdef CONFIG_IOMMU_SUPPORT
+#ifndef CONFIG_SWICH_GPU
+#define MAX_DMA_ADDRESS		(PAGE_OFFSET + 0x100000000)
+#else
+#define MAX_DMA_ADDRESS		(~0UL)
+#endif /* CONFIG_IOMMU_SUPPORT */
+#else
+#define MAX_DMA_ADDRESS		(PAGE_OFFSET + 0x80000000)
+#endif
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG		0x08	/* command register (w) */
+#define DMA1_STAT_REG		0x08	/* status register (r) */
+#define DMA1_REQ_REG		0x09    /* request register (w) */
+#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
+#define DMA1_MODE_REG		0x0B	/* mode register (w) */
+#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG		0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
+#define DMA1_CLR_MASK_REG	0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG	0x0F    /* all-channels mask (w) */
+#define DMA1_EXT_MODE_REG	(0x400 | DMA1_MODE_REG)
+
+#define DMA2_CMD_REG		0xD0	/* command register (w) */
+#define DMA2_STAT_REG		0xD0	/* status register (r) */
+#define DMA2_REQ_REG		0xD2    /* request register (w) */
+#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
+#define DMA2_MODE_REG		0xD6	/* mode register (w) */
+#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG		0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
+#define DMA2_CLR_MASK_REG	0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG	0xDE    /* all-channels mask (w) */
+#define DMA2_EXT_MODE_REG	(0x400 | DMA2_MODE_REG)
+
+#define DMA_ADDR_0		0x00    /* DMA address registers */
+#define DMA_ADDR_1		0x02
+#define DMA_ADDR_2		0x04
+#define DMA_ADDR_3		0x06
+#define DMA_ADDR_4		0xC0
+#define DMA_ADDR_5		0xC4
+#define DMA_ADDR_6		0xC8
+#define DMA_ADDR_7		0xCC
+
+#define DMA_CNT_0		0x01    /* DMA count registers */
+#define DMA_CNT_1		0x03
+#define DMA_CNT_2		0x05
+#define DMA_CNT_3		0x07
+#define DMA_CNT_4		0xC2
+#define DMA_CNT_5		0xC6
+#define DMA_CNT_6		0xCA
+#define DMA_CNT_7		0xCE
+
+#define DMA_PAGE_0		0x87    /* DMA page registers */
+#define DMA_PAGE_1		0x83
+#define DMA_PAGE_2		0x81
+#define DMA_PAGE_3		0x82
+#define DMA_PAGE_5		0x8B
+#define DMA_PAGE_6		0x89
+#define DMA_PAGE_7		0x8A
+
+#define DMA_HIPAGE_0		(0x400 | DMA_PAGE_0)
+#define DMA_HIPAGE_1		(0x400 | DMA_PAGE_1)
+#define DMA_HIPAGE_2		(0x400 | DMA_PAGE_2)
+#define DMA_HIPAGE_3		(0x400 | DMA_PAGE_3)
+#define DMA_HIPAGE_4		(0x400 | DMA_PAGE_4)
+#define DMA_HIPAGE_5		(0x400 | DMA_PAGE_5)
+#define DMA_HIPAGE_6		(0x400 | DMA_PAGE_6)
+#define DMA_HIPAGE_7		(0x400 | DMA_PAGE_7)
+
+#define DMA_MODE_READ		0x44	/* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE		0x48	/* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_CASCADE	0xC0	/* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT		0x10
+
+extern spinlock_t  dma_spin_lock;
+
+static inline unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dma_spin_lock, flags);
+	return flags;
+}
+
+static inline void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static inline void enable_dma(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(dmanr, DMA1_MASK_REG);
+	else
+		dma_outb(dmanr & 3, DMA2_MASK_REG);
+}
+
+static inline void disable_dma(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(dmanr | 4, DMA1_MASK_REG);
+	else
+		dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while interrupts are disabled! ---
+ */
+static inline void clear_dma_ff(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(0, DMA1_CLEAR_FF_REG);
+	else
+		dma_outb(0, DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static inline void set_dma_mode(unsigned int dmanr, char mode)
+{
+	if (dmanr <= 3)
+		dma_outb(mode | dmanr, DMA1_MODE_REG);
+	else
+		dma_outb(mode | (dmanr & 3), DMA2_MODE_REG);
+}
+
+/* set extended mode for a specific DMA channel */
+static inline void set_dma_ext_mode(unsigned int dmanr, char ext_mode)
+{
+	if (dmanr <= 3)
+		dma_outb(ext_mode | dmanr, DMA1_EXT_MODE_REG);
+	else
+		dma_outb(ext_mode | (dmanr & 3), DMA2_EXT_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register.
+ */
+static inline void set_dma_page(unsigned int dmanr, unsigned int pagenr)
+{
+	switch (dmanr) {
+	case 0:
+		dma_outb(pagenr, DMA_PAGE_0);
+		dma_outb((pagenr >> 8), DMA_HIPAGE_0);
+		break;
+	case 1:
+		dma_outb(pagenr, DMA_PAGE_1);
+		dma_outb((pagenr >> 8), DMA_HIPAGE_1);
+		break;
+	case 2:
+		dma_outb(pagenr, DMA_PAGE_2);
+		dma_outb((pagenr >> 8), DMA_HIPAGE_2);
+		break;
+	case 3:
+		dma_outb(pagenr, DMA_PAGE_3);
+		dma_outb((pagenr >> 8), DMA_HIPAGE_3);
+		break;
+	case 5:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+		dma_outb((pagenr >> 8), DMA_HIPAGE_5);
+		break;
+	case 6:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+		dma_outb((pagenr >> 8), DMA_HIPAGE_6);
+		break;
+	case 7:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+		dma_outb((pagenr >> 8), DMA_HIPAGE_7);
+		break;
+	}
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static inline void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+	if (dmanr <= 3) {
+		dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+		dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+	} else {
+		dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+		dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+	}
+	set_dma_page(dmanr, a >> 16);	/* set hipage last to enable 32-bit mode */
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static inline void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+	count--;
+	if (dmanr <= 3) {
+		dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+		dma_outb((count >> 8) & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+	} else {
+		dma_outb((count >> 1) & 0xff, ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+		dma_outb((count >> 9) & 0xff, ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+	}
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static inline int get_dma_residue(unsigned int dmanr)
+{
+	unsigned int io_port = (dmanr <= 3) ?
+		((dmanr & 3) << 1) + 1 + IO_DMA1_BASE :
+		((dmanr & 3) << 2) + 2 + IO_DMA2_BASE;
+
+	/* using short to get 16-bit wrap around */
+	unsigned short count;
+
+	count = 1 + dma_inb(io_port);
+	count += dma_inb(io_port) << 8;
+
+	return (dmanr <= 3) ? count : (count << 1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char *device_id);	/* reserve a DMA channel */
+extern void free_dma(unsigned int dmanr);	/* release it again */
+#define KERNEL_HAVE_CHECK_DMA
+extern int check_dma(unsigned int dmanr);
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy	(0)
+#endif
+
+
+#endif /* _ASM_SW64_DMA_H */
diff --git a/arch/sw_64/include/asm/dmi.h b/arch/sw_64/include/asm/dmi.h
new file mode 100644
index 000000000000..5142aa66ea45
--- /dev/null
+++ b/arch/sw_64/include/asm/dmi.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw_64/include/asm/dmi.h
+ *
+ * Copyright (C) 2019 Deepin Limited.
+ * Porting by: Deepin Kernel Team (kernel@deepin.com)
+ *
+ * based on arch/x864/include/asm/dmi.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASM_SW64_DMI_H
+#define _ASM_SW64_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+#include <asm/early_ioremap.h>
+
+/* Use early IO mappings for DMI because it's initialized early */
+#define dmi_early_remap(x, l)		early_ioremap(x, l)
+#define dmi_early_unmap(x, l)		early_iounmap(x, l)
+#define dmi_remap(x, l)			early_ioremap(x, l)
+#define dmi_unmap(x)			early_iounmap(x, 0)
+#define dmi_alloc(l)			kzalloc(l, GFP_KERNEL)
+
+#endif
diff --git a/arch/sw_64/include/asm/early_ioremap.h b/arch/sw_64/include/asm/early_ioremap.h
new file mode 100644
index 000000000000..6f6fc6218cb3
--- /dev/null
+++ b/arch/sw_64/include/asm/early_ioremap.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_EARLY_IOREMAP_H
+#define _ASM_SW64_EARLY_IOREMAP_H
+
+#include <asm/page.h>
+#include <asm/io.h>
+
+static inline void __iomem *
+early_ioremap(unsigned long phys_addr, unsigned long size)
+{
+	unsigned long y = 0;
+
+	if (phys_addr >= __START_KERNEL_map) {
+		y = (unsigned long) phys_to_virt(__pa(phys_addr));
+	} else {
+		y = phys_addr;
+		y += PAGE_OFFSET;
+	}
+
+	return  (void __iomem *) y;
+}
+#define early_memremap(phys_addr, size)		early_ioremap(phys_addr, size)
+
+static inline void early_iounmap(volatile void __iomem *addr, unsigned long size)
+{
+	return;
+}
+#define early_memunmap(addr, size)		early_iounmap(addr, size)
+
+#endif
diff --git a/arch/sw_64/include/asm/efi.h b/arch/sw_64/include/asm/efi.h
new file mode 100644
index 000000000000..2bc863e3b836
--- /dev/null
+++ b/arch/sw_64/include/asm/efi.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_EFI_H
+#define _ASM_SW64_EFI_H
+
+#include <asm/io.h>
+#include <asm/early_ioremap.h>
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+#else
+#define efi_init()
+#define efi_idmap_init()
+#endif
+
+#define arch_efi_call_virt_setup()
+#define arch_efi_call_virt_teardown()
+
+#define arch_efi_call_virt(p, f, args...)				\
+({									\
+	efi_##f##_t * __f;						\
+	__f = p->f;							\
+	__f(args);							\
+})
+
+#define	ARCH_EFI_IRQ_FLAGS_MASK		0x00000001
+
+/* arch specific definitions used by the stub code */
+
+/*
+ * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from
+ * start of kernel and may not cross a 2MiB boundary. We set alignment to
+ * 2MiB so we know it won't cross a 2MiB boundary.
+ */
+#define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
+#define MAX_FDT_OFFSET	SZ_512M
+
+#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+
+#endif /* _ASM_SW64_EFI_H */
diff --git a/arch/sw_64/include/asm/elf.h b/arch/sw_64/include/asm/elf.h
new file mode 100644
index 000000000000..150629b0b615
--- /dev/null
+++ b/arch/sw_64/include/asm/elf.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_ELF_H
+#define _ASM_SW64_ELF_H
+#ifdef __KERNEL__
+#include <asm/auxvec.h>
+#include <asm/special_insns.h>
+#endif
+/* Special values for the st_other field in the symbol table.  */
+
+
+#define STO_SW64_NOPV		0x80
+#define STO_SW64_STD_GPLOAD	0x88
+
+/*
+ * SW-64 ELF relocation types
+ */
+#define R_SW64_NONE		0	/* No reloc */
+#define R_SW64_REFLONG		1	/* Direct 32 bit */
+#define R_SW64_REFQUAD		2	/* Direct 64 bit */
+#define R_SW64_GPREL32		3	/* GP relative 32 bit */
+#define R_SW64_LITERAL		4	/* GP relative 16 bit w/optimization */
+#define R_SW64_LITUSE		5	/* Optimization hint for LITERAL */
+#define R_SW64_GPDISP		6	/* Add displacement to GP */
+#define R_SW64_BRADDR		7	/* PC+4 relative 23 bit shifted */
+#define R_SW64_HINT		8	/* PC+4 relative 16 bit shifted */
+#define R_SW64_SREL16		9	/* PC relative 16 bit */
+#define R_SW64_SREL32		10	/* PC relative 32 bit */
+#define R_SW64_SREL64		11	/* PC relative 64 bit */
+#define R_SW64_GPRELHIGH	17	/* GP relative 32 bit, high 16 bits */
+#define R_SW64_GPRELLOW		18	/* GP relative 32 bit, low 16 bits */
+#define R_SW64_GPREL16		19	/* GP relative 16 bit */
+#define R_SW64_COPY		24	/* Copy symbol at runtime */
+#define R_SW64_GLOB_DAT		25	/* Create GOT entry */
+#define R_SW64_JMP_SLOT		26	/* Create PLT entry */
+#define R_SW64_RELATIVE		27	/* Adjust by program base */
+#define R_SW64_BRSGP		28
+#define R_SW64_TLSGD		29
+#define R_SW64_TLS_LDM		30
+#define R_SW64_DTPMOD64		31
+#define R_SW64_GOTDTPREL	32
+#define R_SW64_DTPREL64		33
+#define R_SW64_DTPRELHI		34
+#define R_SW64_DTPRELLO		35
+#define R_SW64_DTPREL16		36
+#define R_SW64_GOTTPREL		37
+#define R_SW64_TPREL64		38
+#define R_SW64_TPRELHI		39
+#define R_SW64_TPRELLO		40
+#define R_SW64_TPREL16		41
+#define R_SW64_LITERAL_GOT	43	/* GP relative */
+
+#define SHF_SW64_GPREL		0x10000000
+
+/* Legal values for e_flags field of Elf64_Ehdr.  */
+
+#define EF_SW64_32BIT		1	/* All addresses are below 2GB */
+
+/*
+ * ELF register definitions..
+ */
+
+/*
+ * The legacy version of <sys/procfs.h> makes gregset_t 46 entries long.
+ * I have no idea why that is so.  For now, we just leave it at 33
+ * (32 general regs + processor status word).
+ */
+#define ELF_NGREG	33
+#define ELF_NFPREG	32
+
+
+typedef unsigned long elf_greg_t;
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_SW64)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS		ELFCLASS64
+#define ELF_DATA		ELFDATA2LSB
+#define ELF_ARCH		EM_SW64
+
+#define ELF_EXEC_PAGESIZE	8192
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+
+#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
+
+/*
+ * $0 is set by ld.so to a pointer to a function which might be
+ * registered using atexit.  This provides a mean for the dynamic
+ * linker to call DT_FINI functions for shared libraries that have
+ * been loaded before the code runs.
+
+ * So that we can use the same startup file with static executables,
+ * we start programs with a value of 0 to indicate that there is no
+ * such function.
+ */
+
+#define ELF_PLAT_INIT(_r, load_addr)	(_r->r0 = 0)
+
+/*
+ * The registers are laid out in pt_regs for HMCODE and syscall
+ * convenience.  Re-order them for the linear elf_gregset_t.
+ */
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int uses_interp);
+
+#ifdef __KERNEL__
+struct pt_regs;
+struct thread_info;
+struct task_struct;
+extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt,
+			    struct thread_info *ti);
+#define ELF_CORE_COPY_REGS(DEST, REGS) \
+	dump_elf_thread(DEST, REGS, current_thread_info());
+
+/* Similar, but for a thread other than current.  */
+
+extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task);
+#define ELF_CORE_COPY_TASK_REGS(TASK, DEST) dump_elf_task(*(DEST), TASK)
+
+/* Similar, but for the FP registers.  */
+
+extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task);
+#define ELF_CORE_COPY_FPREGS(TASK, DEST) dump_elf_task_fp(*(DEST), TASK)
+
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this CPU supports.  This is trivial on SW-64,
+ * but not so on other machines.
+ */
+
+#define ELF_HWCAP  (~amask(-1))
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+
+#define ELF_PLATFORM	("sw_64")
+
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+#define ARCH_DLINFO						\
+do {								\
+	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
+	(elf_addr_t)current->mm->context.vdso);			\
+} while (0)
+
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+#endif
+
+#endif /* _ASM_SW64_ELF_H */
diff --git a/arch/sw_64/include/asm/emergency-restart.h b/arch/sw_64/include/asm/emergency-restart.h
new file mode 100644
index 000000000000..fabb33ebf0eb
--- /dev/null
+++ b/arch/sw_64/include/asm/emergency-restart.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_EMERGENCY_RESTART_H
+#define _ASM_SW64_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* _ASM_SW64_EMERGENCY_RESTART_H */
diff --git a/arch/sw_64/include/asm/exec.h b/arch/sw_64/include/asm/exec.h
new file mode 100644
index 000000000000..4a9cb71c5c47
--- /dev/null
+++ b/arch/sw_64/include/asm/exec.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_EXEC_H
+#define _ASM_SW64_EXEC_H
+
+#define arch_align_stack(x) (x)
+
+#endif /* _ASM_SW64_EXEC_H */
diff --git a/arch/sw_64/include/asm/extable.h b/arch/sw_64/include/asm/extable.h
new file mode 100644
index 000000000000..12b50b68a0d2
--- /dev/null
+++ b/arch/sw_64/include/asm/extable.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_EXTABLE_H
+#define _ASM_SW64_EXTABLE_H
+
+/*
+ * About the exception table:
+ *
+ * - insn is a 32-bit pc-relative offset from the faulting insn.
+ * - nextinsn is a 16-bit offset off of the faulting instruction
+ *   (not off of the *next* instruction as branches are).
+ * - errreg is the register in which to place -EFAULT.
+ * - valreg is the final target register for the load sequence
+ *   and will be zeroed.
+ *
+ * Either errreg or valreg may be $31, in which case nothing happens.
+ *
+ * The exception fixup information "just so happens" to be arranged
+ * as in a MEM format instruction.  This lets us emit our three
+ * values like so:
+ *
+ *      lda valreg, nextinsn(errreg)
+ *
+ */
+
+struct exception_table_entry {
+	signed int insn;
+	union exception_fixup {
+		unsigned int unit;
+		struct {
+			signed int nextinsn : 16;
+			unsigned int errreg : 5;
+			unsigned int valreg : 5;
+		} bits;
+	} fixup;
+};
+
+/* Returns the new pc */
+#define fixup_exception(map_reg, _fixup, pc)			\
+({								\
+	if ((_fixup)->fixup.bits.valreg != 31)			\
+		map_reg((_fixup)->fixup.bits.valreg) = 0;	\
+	if ((_fixup)->fixup.bits.errreg != 31)			\
+		map_reg((_fixup)->fixup.bits.errreg) = -EFAULT;	\
+	(pc) + (_fixup)->fixup.bits.nextinsn;			\
+})
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#define swap_ex_entry_fixup(a, b, tmp, delta)			\
+	do {							\
+		(a)->fixup.unit = (b)->fixup.unit;		\
+		(b)->fixup.unit = (tmp).fixup.unit;		\
+	} while (0)
+
+#endif
diff --git a/arch/sw_64/include/asm/floppy.h b/arch/sw_64/include/asm/floppy.h
new file mode 100644
index 000000000000..f4646d99d80c
--- /dev/null
+++ b/arch/sw_64/include/asm/floppy.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Architecture specific parts of the Floppy driver
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995
+ */
+#ifndef _ASM_SW64_FLOPPY_H
+#define _ASM_SW64_FLOPPY_H
+
+#define fd_inb(port)		inb_p(port)
+#define fd_outb(value, port)	outb_p(value, port)
+
+#define fd_enable_dma()		enable_dma(FLOPPY_DMA)
+#define fd_disable_dma()	disable_dma(FLOPPY_DMA)
+#define fd_request_dma()	request_dma(FLOPPY_DMA, "floppy")
+#define fd_free_dma()		free_dma(FLOPPY_DMA)
+#define fd_clear_dma_ff()	clear_dma_ff(FLOPPY_DMA)
+#define fd_set_dma_mode(mode)	set_dma_mode(FLOPPY_DMA, mode)
+#define fd_set_dma_addr(addr)	set_dma_addr(FLOPPY_DMA, virt_to_bus(addr))
+#define fd_set_dma_count(count)	set_dma_count(FLOPPY_DMA, count)
+#define fd_enable_irq()		enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq()	disable_irq(FLOPPY_IRQ)
+#define fd_cacheflush(addr, size) /* nothing */
+#define fd_request_irq() \
+	request_irq(FLOPPY_IRQ, floppy_interrupt, 0, "floppy", NULL)
+#define fd_free_irq()		free_irq(FLOPPY_IRQ, NULL)
+
+#ifdef CONFIG_PCI
+
+#include <linux/pci.h>
+
+#define fd_dma_setup(addr, size, mode, io) \
+	sw64_fd_dma_setup(addr, size, mode, io)
+
+static inline int
+sw64_fd_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+	static unsigned long prev_size;
+	static dma_addr_t bus_addr;
+	static char *prev_addr;
+	static int prev_dir;
+	int dir;
+
+	dir = (mode != DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE;
+
+	if (bus_addr
+		&& (addr != prev_addr || size != prev_size || dir != prev_dir)) {
+		/* different from last time -- unmap prev */
+		bus_addr = 0;
+	}
+
+	if (!bus_addr)	/* need to map it */
+		bus_addr = virt_to_bus(addr);
+
+	/* remember this one as prev */
+	prev_addr = addr;
+	prev_size = size;
+	prev_dir = dir;
+
+	fd_clear_dma_ff();
+	fd_cacheflush(addr, size);
+	fd_set_dma_mode(mode);
+	set_dma_addr(FLOPPY_DMA, bus_addr);
+	fd_set_dma_count(size);
+	virtual_dma_port = io;
+	fd_enable_dma();
+
+	return 0;
+}
+
+#endif /* CONFIG_PCI */
+
+inline void virtual_dma_init(void)
+{
+	/* Nothing to do on an sw64 */
+}
+
+static int FDC1 = 0x3f0;
+static int FDC2 = -1;
+
+/*
+ * Again, the CMOS information doesn't work on the sw64..
+ */
+#define FLOPPY0_TYPE 6
+#define FLOPPY1_TYPE 0
+
+#define N_FDC 2
+#define N_DRIVE 8
+
+/*
+ * Most sw64s have no problems with floppy DMA crossing 64k borders,
+ * except for certain ones, like XL and RUFFIAN.
+ *
+ * However, the test is simple and fast, and this *is* floppy, after all,
+ * so we do it for all platforms, just to make sure.
+ *
+ * This is advantageous in other circumstances as well, as in moving
+ * about the PCI DMA windows and forcing the floppy to start doing
+ * scatter-gather when it never had before, and there *is* a problem
+ * on that platform... ;-}
+ */
+
+static inline unsigned long CROSS_64KB(void *a, unsigned long s)
+{
+	unsigned long p = (unsigned long)a;
+
+	return ((p + s - 1) ^ p) & ~0xffffUL;
+}
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __ASM_SW64_FLOPPY_H */
diff --git a/arch/sw_64/include/asm/fpu.h b/arch/sw_64/include/asm/fpu.h
new file mode 100644
index 000000000000..a0b0ff5af168
--- /dev/null
+++ b/arch/sw_64/include/asm/fpu.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_FPU_H
+#define _ASM_SW64_FPU_H
+
+#include <uapi/asm/fpu.h>
+#ifdef __KERNEL__
+
+/*
+ * The following two functions don't need trapb/excb instructions
+ * around the mf_fpcr/mt_fpcr instructions because (a) the kernel
+ * never generates arithmetic faults and (b) sys_call instructions
+ * are implied trap barriers.
+ */
+
+static inline unsigned long
+rdfpcr(void)
+{
+	unsigned long ret;
+	unsigned long fp[4] __aligned(32);
+
+	__asm__ __volatile__ (
+		"	vstd	$f0, %0\n\t"
+		"	rfpcr	$f0\n\t"
+		"	fimovd	$f0, %1\n\t"
+		"	vldd	$f0, %0\n\t"
+		: "=m"(*fp), "=r"(ret));
+
+	return ret;
+}
+
+static inline void
+wrfpcr(unsigned long val)
+{
+	unsigned long tmp;
+	unsigned long fp[4] __aligned(32);
+
+	__asm__ __volatile__ (
+		"	vstd	$f0, %0\n\t"
+		"	ifmovd	%2, $f0\n\t"
+		"	wfpcr	$f0\n\t"
+		"	and	%2, 0x3, %1\n\t"
+		"	beq	%1, 1f\n\t"
+		"	subl	%1, 1, %1\n\t"
+		"	beq	%1, 2f\n\t"
+		"	subl	%1, 1, %1\n\t"
+		"	beq	%1, 3f\n\t"
+		"	setfpec3\n\t"
+		"	br	6f\n\t"
+		"1:	setfpec0\n\t"
+		"	br	6f\n\t"
+		"2:	setfpec1\n\t"
+		"	br	6f\n\t"
+		"3:	setfpec2\n\t"
+		"6:	vldd	$f0, %0\n\t"
+		: "=m"(*fp), "=&r"(tmp) : "r"(val));
+}
+
+static inline unsigned long
+swcr_update_status(unsigned long swcr, unsigned long fpcr)
+{
+	/*
+	 * SW64 implements most of the bits in hardware.  Collect
+	 * the acrued exception bits from the real fpcr.
+	 */
+	swcr &= ~(IEEE_STATUS_MASK0 | IEEE_STATUS_MASK1
+				| IEEE_STATUS_MASK2 | IEEE_STATUS_MASK3);
+	swcr |= (fpcr >> 35) & IEEE_STATUS_MASK0;
+	swcr |= (fpcr >> 13) & IEEE_STATUS_MASK1;
+	swcr |= (fpcr << 14) & IEEE_STATUS_MASK2;
+	swcr |= (fpcr << 36) & IEEE_STATUS_MASK3;
+	return swcr;
+}
+
+extern unsigned long sw64_read_fp_reg(unsigned long reg);
+extern void sw64_write_fp_reg(unsigned long reg, unsigned long val);
+extern unsigned long sw64_read_fp_reg_s(unsigned long reg);
+extern void sw64_write_fp_reg_s(unsigned long reg, unsigned long val);
+
+
+extern void sw64_write_simd_fp_reg_s(unsigned long reg,
+				      unsigned long f0, unsigned long f1);
+extern void sw64_write_simd_fp_reg_d(unsigned long reg,
+				      unsigned long f0, unsigned long f1,
+				      unsigned long f2, unsigned long f3);
+extern void sw64_write_simd_fp_reg_ldwe(unsigned long reg, int a);
+extern void sw64_read_simd_fp_m_s(unsigned long reg, unsigned long *fp_value);
+extern void sw64_read_simd_fp_m_d(unsigned long reg, unsigned long *fp_value);
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SW64_FPU_H */
diff --git a/arch/sw_64/include/asm/ftrace.h b/arch/sw_64/include/asm/ftrace.h
new file mode 100644
index 000000000000..ea82224e5826
--- /dev/null
+++ b/arch/sw_64/include/asm/ftrace.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw_64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2019, serveros, linyue
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_SW64_FTRACE_H
+#define _ASM_SW64_FTRACE_H
+#include <asm/insn.h>
+
+#define MCOUNT_ADDR		((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE	SW64_INSN_SIZE
+
+#ifndef __ASSEMBLY__
+#include <linux/compat.h>
+
+extern void _mcount(unsigned long);
+
+struct dyn_arch_ftrace {
+	/* No extra data needed for sw64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/*
+	 * addr is the address of the mcount call instruction.
+	 * recordmcount does the necessary offset calculation.
+	 */
+	return addr;
+}
+
+#endif /* ifndef __ASSEMBLY__ */
+#endif /* _ASM_SW64_FTRACE_H */
diff --git a/arch/sw_64/include/asm/futex.h b/arch/sw_64/include/asm/futex.h
new file mode 100644
index 000000000000..50324470173f
--- /dev/null
+++ b/arch/sw_64/include/asm/futex.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_FUTEX_H
+#define _ASM_SW64_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+#include <asm/barrier.h>
+
+#ifndef LOCK_MEMB
+#ifdef CONFIG_LOCK_MEMB
+#define LOCK_MEMB	"memb\n"
+#else
+#define LOCK_MEMB
+#endif
+#endif
+
+#ifndef LOCK_FIXUP
+#ifdef CONFIG_LOCK_FIXUP
+#define LOCK_FIXUP	"memb\n"
+#else
+#define LOCK_FIXUP
+#endif
+#endif
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg, tmp)	\
+	__asm__ __volatile__(					\
+	LOCK_MEMB						\
+	"1:	lldw	%0, 0(%3)\n"				\
+	"	ldi	%2, 1\n"				\
+	"	wr_f	%2\n"					\
+		insn						\
+	LOCK_FIXUP						\
+	"2:	lstw	%1, 0(%3)\n"				\
+	"	rd_f	%2\n"					\
+	"	beq	%2, 4f\n"				\
+	"	bis	$31, $31, %1\n"				\
+	"3:	.subsection 2\n"				\
+	"4:	br	1b\n"					\
+	"	.previous\n"					\
+	"	.section __ex_table, "a"\n"			\
+	"	.long	1b-.\n"					\
+	"	ldi	$31, 3b-1b(%1)\n"			\
+	"	.long	2b-.\n"					\
+	"	ldi	$31, 3b-2b(%1)\n"			\
+	"	.previous\n"					\
+	:	"=&r" (oldval), "=&r"(ret), "=&r"(tmp)		\
+	:	"r" (uaddr), "r"(oparg)				\
+	:	"memory")
+
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+					      u32 __user *uaddr)
+{
+	int oldval = 0, ret;
+	unsigned long tmp;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("mov %4, %1\n", ret, oldval, uaddr, oparg, tmp);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("addw %0, %4, %1\n", ret, oldval, uaddr, oparg, tmp);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("or %0, %4, %1\n", ret, oldval, uaddr, oparg, tmp);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("andnot %0, %4, %1\n", ret, oldval, uaddr, oparg, tmp);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("xor %0, %4, %1\n", ret, oldval, uaddr, oparg, tmp);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	int ret = 0, cmp;
+	u32 prev, tmp;
+
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	__asm__ __volatile__ (
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"1:	lldw	%1, 0(%4)\n"
+	"	cmpeq	%1, %5, %2\n"
+	"	wr_f	%2\n"
+	"	bis	$31, %6, %3\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"2:	lstw	%3, 0(%4)\n"
+	"	rd_f	%3\n"
+	"	beq	%2, 3f\n"
+	"	beq	%3, 4f\n"
+	"3:	.subsection 2\n"
+	"4:	br	1b\n"
+	"	.previous\n"
+	"	.section __ex_table, "a"\n"
+	"	.long	1b-.\n"
+	"	ldi	$31, 3b-1b(%0)\n"
+	"	.long	2b-.\n"
+	"	ldi	$31, 3b-2b(%0)\n"
+	"	.previous\n"
+	:	"+r"(ret), "=&r"(prev), "=&r"(cmp), "=&r"(tmp)
+	:	"r"(uaddr), "r"((long)(int)oldval), "r"(newval)
+	:	"memory");
+
+	*uval = prev;
+	return ret;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SW64_FUTEX_H */
diff --git a/arch/sw_64/include/asm/hardirq.h b/arch/sw_64/include/asm/hardirq.h
new file mode 100644
index 000000000000..03368c3659dd
--- /dev/null
+++ b/arch/sw_64/include/asm/hardirq.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_HARDIRQ_H
+#define _ASM_SW64_HARDIRQ_H
+
+void ack_bad_irq(unsigned int irq);
+#define ack_bad_irq ack_bad_irq
+
+#include <linux/irq.h>
+
+#define __ARCH_IRQ_STAT
+typedef struct {
+	u16		__softirq_pending;
+	unsigned int	timer_irqs_event;
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
+#define arch_irq_stat_cpu	arch_irq_stat_cpu
+#define arch_irq_stat		arch_irq_stat
+extern u64 arch_irq_stat_cpu(unsigned int cpu);
+extern u64 arch_irq_stat(void);
+
+#endif /* _ASM_SW64_HARDIRQ_H */
diff --git a/arch/sw_64/include/asm/hcall.h b/arch/sw_64/include/asm/hcall.h
new file mode 100644
index 000000000000..8117752b657e
--- /dev/null
+++ b/arch/sw_64/include/asm/hcall.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_HCALL_H
+#define _ASM_SW64_HCALL_H
+
+#define HMC_hcall	0x32
+/* HCALL must > 0 */
+enum HCALL_TYPE {
+	HCALL_HALT		= 10,
+	HCALL_NOTIFY		= 11,
+	HCALL_SHUTDOWN		= 12,
+	HCALL_SET_CLOCKEVENT	= 13,
+	HCALL_IVI		= 14,   /* interrupt between virtual cpu */
+	HCALL_TBI		= 15,   /* tlb flush for virtual cpu */
+	HCALL_STOP		= 16,   /* indicate virtual cpu stopped */
+	HCALL_RESTART		= 17,	/* indicate virtual cpu restarted */
+	HCALL_MSI		= 18,   /* guest request msi intr */
+	HCALL_MSIX		= 19,	/* guest request msix intr */
+	HCALL_SWNET		= 20,   /* guest request swnet service */
+	HCALL_SWNET_IRQ		= 21,   /* guest request swnet intr */
+	HCALL_FATAL_ERROR	= 22,   /* guest fatal error, issued by hmcode */
+	NR_HCALL
+};
+
+static inline unsigned long hcall(unsigned long hcall, unsigned long arg0,
+				  unsigned long arg1, unsigned long arg2)
+{
+	register unsigned long __r0 __asm__("$0");
+	register unsigned long __r16 __asm__("$16") = hcall;
+	register unsigned long __r17 __asm__("$17") = arg0;
+	register unsigned long __r18 __asm__("$18") = arg1;
+	register unsigned long __r19 __asm__("$19") = arg2;
+
+	__asm__ __volatile__(
+		"sys_call %5 "
+		: "=r"(__r16), "=r"(__r17), "=r"(__r18), "=r"(__r19), "=r"(__r0)
+		: "i"(HMC_hcall), "0"(__r16), "1"(__r17), "2"(__r18), "3"(__r19)
+		: "$1", "$22", "$23", "$24", "$25");
+	return __r0;
+}
+
+#endif  /* _ASM_SW64_HCALL_H */
diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h
new file mode 100644
index 000000000000..310cc61a5a34
--- /dev/null
+++ b/arch/sw_64/include/asm/hmcall.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_HMC_H
+#define _ASM_SW64_HMC_H
+
+/*
+ * Common HMC-code
+ */
+/* 0x0  - 0x3F : Kernel Level HMC routine */
+#define HMC_halt		0x00
+#define HMC_rdio64		0x01
+#define HMC_rdio32		0x02
+#define HMC_cpuid		0x03
+#define HMC_sleepen		0x05
+#define HMC_rdksp		0x06
+#define HMC_rdptbr		0x0B
+#define HMC_wrptbr		0x0C
+#define HMC_wrksp		0x0E
+#define HMC_mtinten		0x0F
+#define HMC_load_mm		0x11
+#define HMC_rdpcbb		0x12
+#define HMC_wrpcbb		0x13
+#define HMC_tbisasn		0x14
+#define HMC_tbivpn		0x19
+#define HMC_ret			0x1A
+#define HMC_wrvpcr		0x29
+#define HMC_wrfen		0x2B
+#define HMC_kvcpucb		0x2C
+#define HMC_sflush		0x2F
+#define HMC_swpctx		0x30
+#define HMC_entervm		0x31
+#define HMC_hcall		0x32
+#define HMC_tbi			0x33
+#define HMC_wrent		0x34
+#define HMC_swpipl		0x35
+#define HMC_rdps		0x36
+#define HMC_wrkgp		0x37
+#define HMC_wrusp		0x38
+#define HMC_rvpcr		0x39
+#define HMC_rdusp		0x3A
+#define HMC_wrtimer		0x3B
+#define HMC_whami		0x3C
+#define HMC_retsys		0x3D
+#define HMC_sendii		0x3E
+#define HMC_rti			0x3F
+
+
+/* 0x80  - 0xBF : User Level HMC routine */
+#define HMC_bpt			0x80
+#define HMC_callsys		0x83
+#define HMC_imb			0x86
+#define HMC_rwreg		0x87
+#define HMC_rdunique		0x9E
+#define HMC_wrunique		0x9F
+#define HMC_sz_uflush		0xA8
+#define HMC_gentrap		0xAA
+#define HMC_wrperfmon		0xB0
+#define HMC_longtime		0xB1
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+extern void halt(void) __attribute__((noreturn));
+#define __halt() __asm__ __volatile__ ("sys_call %0 #halt" : : "i" (HMC_halt))
+
+#define imb() \
+	__asm__ __volatile__ ("sys_call %0 #imb" : : "i" (HMC_imb) : "memory")
+
+#define __CALL_HMC_R0(NAME, TYPE)				\
+static inline TYPE NAME(void)					\
+{								\
+	register TYPE __r0 __asm__("$0");			\
+	__asm__ __volatile__(					\
+		"sys_call %1 # " #NAME				\
+		: "=r" (__r0)					\
+		: "i" (HMC_ ## NAME)				\
+		: "$1", "$16", "$22", "$23", "$24", "$25");	\
+	return __r0;						\
+}
+
+#define __CALL_HMC_W1(NAME, TYPE0)				\
+static inline void NAME(TYPE0 arg0)				\
+{								\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	__asm__ __volatile__(					\
+		"sys_call %1 # "#NAME				\
+		: "=r"(__r16)					\
+		: "i"(HMC_ ## NAME), "0"(__r16)			\
+		: "$1", "$22", "$23", "$24", "$25");		\
+}
+
+#define __CALL_HMC_W2(NAME, TYPE0, TYPE1)			\
+static inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
+{								\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	register TYPE1 __r17 __asm__("$17") = arg1;		\
+	__asm__ __volatile__(					\
+		"sys_call %2 # "#NAME				\
+		: "=r"(__r16), "=r"(__r17)			\
+		: "i"(HMC_ ## NAME), "0"(__r16), "1"(__r17)	\
+		: "$1", "$22", "$23", "$24", "$25");		\
+}
+
+#define __CALL_HMC_RW1(NAME, RTYPE, TYPE0)			\
+static inline RTYPE NAME(TYPE0 arg0)				\
+{								\
+	register RTYPE __r0 __asm__("$0");			\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	__asm__ __volatile__(					\
+		"sys_call %2 # "#NAME				\
+		: "=r"(__r16), "=r"(__r0)			\
+		: "i"(HMC_ ## NAME), "0"(__r16)			\
+		: "$1", "$22", "$23", "$24", "$25");		\
+	return __r0;						\
+}
+
+#define __CALL_HMC_RW2(NAME, RTYPE, TYPE0, TYPE1)		\
+static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1)		\
+{								\
+	register RTYPE __r0 __asm__("$0");			\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	register TYPE1 __r17 __asm__("$17") = arg1;		\
+	__asm__ __volatile__(					\
+		"sys_call %3 # "#NAME				\
+		: "=r"(__r16), "=r"(__r17), "=r"(__r0)		\
+		: "i"(HMC_ ## NAME), "0"(__r16), "1"(__r17)	\
+		: "$1", "$22", "$23", "$24", "$25");		\
+	return __r0;						\
+}
+
+#define __CALL_HMC_RW3(NAME, RTYPE, TYPE0, TYPE1, TYPE2)		\
+static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1, TYPE2 arg2)		\
+{									\
+	register RTYPE __r0 __asm__("$0");				\
+	register TYPE0 __r16 __asm__("$16") = arg0;			\
+	register TYPE1 __r17 __asm__("$17") = arg1;			\
+	register TYPE2 __r18 __asm__("$18") = arg2;			\
+	__asm__ __volatile__(						\
+		"sys_call %4 # "#NAME					\
+		: "=r"(__r16), "=r"(__r17), "=r"(__r18), "=r"(__r0)	\
+		: "i"(HMC_ ## NAME), "0"(__r16), "1"(__r17), "2"(__r18)	\
+		: "$1", "$22", "$23", "$24", "$25");			\
+	return __r0;							\
+}
+
+#define sflush()						\
+{								\
+	__asm__ __volatile__("sys_call 0x2f");			\
+}
+
+__CALL_HMC_R0(rdps, unsigned long);
+
+__CALL_HMC_R0(rdusp, unsigned long);
+__CALL_HMC_W1(wrusp, unsigned long);
+
+__CALL_HMC_R0(rdksp, unsigned long);
+__CALL_HMC_W1(wrksp, unsigned long);
+
+__CALL_HMC_W2(load_mm, unsigned long, unsigned long);
+__CALL_HMC_R0(rdpcbb, unsigned long);
+__CALL_HMC_W1(wrpcbb, unsigned long);
+
+__CALL_HMC_R0(rdptbr, unsigned long);
+__CALL_HMC_W1(wrptbr, unsigned long);
+
+__CALL_HMC_RW1(swpipl, unsigned long, unsigned long);
+__CALL_HMC_R0(whami, unsigned long);
+__CALL_HMC_RW1(rdio64, unsigned long, unsigned long);
+__CALL_HMC_RW1(rdio32, unsigned int, unsigned long);
+__CALL_HMC_R0(kvcpucb, unsigned long);
+__CALL_HMC_R0(sleepen, unsigned long);
+__CALL_HMC_R0(mtinten, unsigned long);
+__CALL_HMC_W2(wrent, void*, unsigned long);
+__CALL_HMC_W2(tbisasn, unsigned long, unsigned long);
+__CALL_HMC_W1(wrkgp, unsigned long);
+__CALL_HMC_RW2(wrperfmon, unsigned long, unsigned long, unsigned long);
+__CALL_HMC_RW3(sendii, unsigned long, unsigned long, unsigned long, unsigned long);
+__CALL_HMC_W1(wrtimer, unsigned long);
+__CALL_HMC_RW3(tbivpn, unsigned long, unsigned long, unsigned long, unsigned long);
+__CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long);
+
+/*
+ * TB routines..
+ */
+#define __tbi(nr, arg, arg1...)					\
+({								\
+	register unsigned long __r16 __asm__("$16") = (nr);	\
+	register unsigned long __r17 __asm__("$17"); arg;	\
+	__asm__ __volatile__(					\
+		"sys_call %3 #__tbi"				\
+		: "=r" (__r16), "=r" (__r17)			\
+		: "0" (__r16), "i" (HMC_tbi), ##arg1		\
+		: "$0", "$1", "$22", "$23", "$24", "$25");	\
+})
+
+#define tbi(x, y)	__tbi(x, __r17 = (y), "1" (__r17))
+#define tbisi(x)	__tbi(1, __r17 = (x), "1" (__r17))
+#define tbisd(x)	__tbi(2, __r17 = (x), "1" (__r17))
+#define tbis(x)		__tbi(3, __r17 = (x), "1" (__r17))
+#define tbiap()		__tbi(-1, /* no second argument */)
+#define tbia()		__tbi(-2, /* no second argument */)
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SW64_HMC_H */
diff --git a/arch/sw_64/include/asm/hugetlb.h b/arch/sw_64/include/asm/hugetlb.h
new file mode 100644
index 000000000000..11565a8f86cb
--- /dev/null
+++ b/arch/sw_64/include/asm/hugetlb.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_HUGETLB_H
+#define _ASM_SW64_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+			    unsigned long end, unsigned long floor,
+			    unsigned long ceiling);
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+#endif /* _ASM_SW64_HUGETLB_H */
diff --git a/arch/sw_64/include/asm/hw_init.h b/arch/sw_64/include/asm/hw_init.h
new file mode 100644
index 000000000000..9a56590ef653
--- /dev/null
+++ b/arch/sw_64/include/asm/hw_init.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_HW_INIT_H
+#define _ASM_SW64_HW_INIT_H
+#include <linux/numa.h>
+
+#define MMSIZE		__va(0x2040)
+
+/*
+ * Descriptor for a cache
+ */
+struct cache_desc {
+	unsigned int size;	/* Bytes per way */
+	unsigned int sets;	/* Number of lines per set */
+	unsigned char ways;	/* Number of ways */
+	unsigned char linesz;	/* Size of line in bytes */
+	unsigned char flags;	/* Flags describing cache properties */
+};
+
+struct cpuinfo_sw64 {
+	unsigned long loops_per_jiffy;
+	unsigned long last_asn;
+	int need_new_asn;
+	int asn_lock;
+	unsigned long ipi_count;
+	unsigned long prof_multiplier;
+	unsigned long prof_counter;
+	unsigned char mcheck_expected;
+	unsigned char mcheck_taken;
+	unsigned char mcheck_extra;
+	struct cache_desc icache; /* Primary I-cache */
+	struct cache_desc dcache; /* Primary D or combined I/D cache */
+	struct cache_desc scache; /* Secondary cache */
+	struct cache_desc tcache; /* Tertiary/split secondary cache */
+} __attribute__((aligned(64)));
+
+struct cpu_desc_t {
+	__u8 model;
+	__u8 family;
+	__u8 chip_var;
+	__u8 arch_var;
+	__u8 arch_rev;
+	__u8 pa_bits;
+	__u8 va_bits;
+	char vendor_id[16];
+	char model_id[64];
+	unsigned long frequency;
+	__u8 run_mode;
+} __randomize_layout;
+
+#define MAX_NUMSOCKETS		8
+struct socket_desc_t {
+	bool is_online;	/* 1 for online, 0 for offline */
+	int numcores;
+	unsigned long socket_mem;
+};
+
+enum memmap_types {
+	memmap_reserved,
+	memmap_pci,
+	memmap_initrd,
+	memmap_kvm,
+	memmap_crashkernel,
+	memmap_acpi,
+	memmap_use,
+	memmap_protected,
+};
+
+#define MAX_NUMMEMMAPS		64
+struct memmap_entry {
+	u64 addr;	/* start of memory segment */
+	u64 size;	/* size of memory segment */
+	enum memmap_types type;
+};
+
+extern struct cpuinfo_sw64 cpu_data[NR_CPUS];
+extern struct cpu_desc_t cpu_desc;
+extern struct socket_desc_t socket_desc[MAX_NUMSOCKETS];
+extern int memmap_nr;
+extern struct memmap_entry memmap_map[MAX_NUMMEMMAPS];
+extern cpumask_t cpu_offline;
+extern bool memblock_initialized;
+
+int __init add_memmap_region(u64 addr, u64 size, enum memmap_types type);
+void __init process_memmap(void);
+
+static inline unsigned long get_cpu_freq(void)
+{
+	return cpu_desc.frequency;
+}
+
+static inline bool icache_is_vivt_no_ictag(void)
+{
+	/*
+	 * Icache of C3B is vivt with ICtag. C4 will be vipt.
+	 */
+	return (cpu_desc.arch_var == 0x3 && cpu_desc.arch_rev == 0x1);
+}
+
+enum RUNMODE {
+	HOST_MODE = 0,
+	GUEST_MODE = 1,
+	EMUL_MODE = 2,
+};
+
+static inline bool is_in_host(void)
+{
+	return !cpu_desc.run_mode;
+}
+
+static inline bool is_in_guest(void)
+{
+	return cpu_desc.run_mode == GUEST_MODE;
+}
+
+static inline bool is_guest_or_emul(void)
+{
+	return !!cpu_desc.run_mode;
+}
+
+#define CPU_SW3231		0x31
+#define CPU_SW831		0x32
+
+#define GET_TABLE_ENTRY		1
+#define GET_VENDOR_ID		2
+#define GET_MODEL		3
+#define GET_CPU_FREQ		4
+#define GET_CACHE_INFO		5
+
+#define TABLE_ENTRY_MAX		32
+#define VENDOR_ID_MAX		2
+#define MODEL_MAX		8
+#define CACHE_INFO_MAX		4
+
+#define L1_ICACHE		0
+#define L1_DCACHE		1
+#define L2_CACHE		2
+#define L3_CACHE		3
+
+#define CPUID_ARCH_REV_MASK	0xf
+#define CPUID_ARCH_REV(val)	((val) & CPUID_ARCH_REV_MASK)
+#define CPUID_ARCH_VAR_SHIFT	4
+#define CPUID_ARCH_VAR_MASK	(0xf << CPUID_ARCH_VAR_SHIFT)
+#define CPUID_ARCH_VAR(val)	\
+	(((val) & CPUID_ARCH_VAR_MASK) >> CPUID_ARCH_VAR_SHIFT)
+#define CPUID_CHIP_VAR_SHIFT	8
+#define CPUID_CHIP_VAR_MASK	(0xf << CPUID_CHIP_VAR_SHIFT)
+#define CPUID_CHIP_VAR(val)	\
+	(((val) & CPUID_CHIP_VAR_MASK) >> CPUID_CHIP_VAR_SHIFT)
+#define CPUID_FAMILY_SHIFT	12
+#define CPUID_FAMILY_MASK	(0xf << CPUID_FAMILY_SHIFT)
+#define CPUID_FAMILY(val)	\
+	(((val) & CPUID_FAMILY_MASK) >> CPUID_FAMILY_SHIFT)
+#define CPUID_MODEL_SHIFT	24
+#define CPUID_MODEL_MASK	(0xff << CPUID_MODEL_SHIFT)
+#define CPUID_MODEL(val)	\
+	(((val) & CPUID_MODEL_MASK) >> CPUID_MODEL_SHIFT)
+#define CPUID_PA_BITS_SHIFT	32
+#define CPUID_PA_BITS_MASK	(0x7fUL << CPUID_PA_BITS_SHIFT)
+#define CPUID_PA_BITS(val)	\
+	(((val) & CPUID_PA_BITS_MASK) >> CPUID_PA_BITS_SHIFT)
+#define CPUID_VA_BITS_SHIFT	39
+#define CPUID_VA_BITS_MASK	(0x7fUL << CPUID_VA_BITS_SHIFT)
+#define CPUID_VA_BITS(val)	\
+	(((val) & CPUID_VA_BITS_MASK) >> CPUID_VA_BITS_SHIFT)
+
+
+#define CACHE_SIZE_SHIFT	0
+#define CACHE_SIZE_MASK		(0xffffffffUL << CACHE_SIZE_SHIFT)
+#define CACHE_SIZE(val)	\
+	(((val) & CACHE_SIZE_MASK) >> CACHE_SIZE_SHIFT)
+#define CACHE_LINE_BITS_SHIFT	32
+#define CACHE_LINE_BITS_MASK	(0xfUL << CACHE_LINE_BITS_SHIFT)
+#define CACHE_LINE_BITS(val)	\
+	(((val) & CACHE_LINE_BITS_MASK) >> CACHE_LINE_BITS_SHIFT)
+#define CACHE_INDEX_BITS_SHIFT	36
+#define CACHE_INDEX_BITS_MASK	(0x3fUL << CACHE_INDEX_BITS_SHIFT)
+#define CACHE_INDEX_BITS(val)	\
+	(((val) & CACHE_INDEX_BITS_MASK) >> CACHE_INDEX_BITS_SHIFT)
+
+#endif /* HW_INIT_H */
diff --git a/arch/sw_64/include/asm/hw_irq.h b/arch/sw_64/include/asm/hw_irq.h
new file mode 100644
index 000000000000..f6fd1d802abd
--- /dev/null
+++ b/arch/sw_64/include/asm/hw_irq.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_HW_IRQ_H
+#define _ASM_SW64_HW_IRQ_H
+
+#include<asm/msi.h>
+
+extern volatile unsigned long irq_err_count;
+DECLARE_PER_CPU(unsigned long, irq_pmi_count);
+
+#define ACTUAL_NR_IRQS	NR_IRQS
+
+#ifdef CONFIG_PCI_MSI
+typedef unsigned int vector_irq_t[PERCPU_MSI_IRQS];
+DECLARE_PER_CPU(vector_irq_t, vector_irq);
+#endif
+#endif
diff --git a/arch/sw_64/include/asm/insn.h b/arch/sw_64/include/asm/insn.h
new file mode 100644
index 000000000000..54a9a2026784
--- /dev/null
+++ b/arch/sw_64/include/asm/insn.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019, serveros, linyue
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+#ifndef	_ASM_SW64_INSN_H
+#define	_ASM_SW64_INSN_H
+#include <linux/types.h>
+
+/* Register numbers */
+enum {
+	R26 = 26,
+	R27,
+	R28,
+	R31 = 31,
+};
+
+#define BR_MAX_DISP		0xfffff
+/* SW64 instructions are always 32 bits. */
+#define	SW64_INSN_SIZE		4
+
+#define ___SW64_RA(a)		(((a) & 0x1f) << 21)
+#define ___SW64_RB(b)		(((b) & 0x1f) << 16)
+#define ___SW64_SIMP_RC(c)	(((c) & 0x1f))
+#define ___SW64_ST_DISP(disp)	(((disp) & 0xffff))
+#define ___SW64_SYSCALL_FUNC(func)	((func) & 0xff)
+#define ___SW64_BR_DISP(disp)	(((disp) & 0x1fffff))
+
+
+#define SW64_INSN_BIS		0x40000740
+#define SW64_INSN_CALL		0x04000000
+#define SW64_INSN_SYS_CALL	0x02000000
+#define SW64_INSN_BR		0x10000000
+
+#define SW64_BIS(a, b, c)	(SW64_INSN_BIS | ___SW64_RA(a)	| ___SW64_RB(b) | ___SW64_SIMP_RC(c))
+#define SW64_CALL(a, b, disp)	(SW64_INSN_CALL | ___SW64_RA(a)	| ___SW64_RB(b) | ___SW64_ST_DISP(disp))
+#define SW64_SYS_CALL(func)	(SW64_INSN_SYS_CALL | ___SW64_SYSCALL_FUNC(func))
+#define SW64_BR(a, disp)	(SW64_INSN_BR | ___SW64_RA(a) | ___SW64_BR_DISP(disp))
+
+extern int sw64_insn_read(void *addr, u32 *insnp);
+extern int sw64_insn_write(void *addr, u32 insn);
+extern int sw64_insn_double_write(void *addr, u64 insn);
+extern unsigned int sw64_insn_nop(void);
+extern unsigned int sw64_insn_call(unsigned int ra, unsigned int rb);
+extern unsigned int sw64_insn_sys_call(unsigned int num);
+extern unsigned int sw64_insn_br(unsigned int ra, unsigned long pc, unsigned long new_pc);
+
+#define SW64_OPCODE_RA(opcode)	((opcode >> 21) & 0x1f)
+
+#define SW64_INSN(name, opcode, mask)			\
+static  inline  bool sw64_insn_is_##name(u32 insn)	\
+{							\
+	return (insn & mask) == opcode;			\
+}
+
+SW64_INSN(sys_call_b,	0x00000000, 0xfc000000);
+SW64_INSN(sys_call,	0x00000001, 0xfc000000);
+SW64_INSN(call,		0x04000000, 0xfc000000);
+SW64_INSN(ret,		0x08000000, 0xfc000000);
+SW64_INSN(jmp,		0x0c000000, 0xfc000000);
+SW64_INSN(br,		0x10000000, 0xfc000000);
+SW64_INSN(bsr,		0x14000000, 0xfc000000);
+SW64_INSN(memb,		0x18000000, 0xfc00ffff);
+SW64_INSN(imemb,	0x18000001, 0xfc00ffff);
+SW64_INSN(rtc,		0x18000020, 0xfc00ffff);
+SW64_INSN(halt,		0x18000080, 0xfc00ffff);
+SW64_INSN(rd_f,		0x18001000, 0xfc00ffff);
+SW64_INSN(beq,		0xc0000000, 0xfc000000);
+SW64_INSN(bne,		0xc4000000, 0xfc000000);
+SW64_INSN(blt,		0xc8000000, 0xfc000000);
+SW64_INSN(ble,		0xcc000000, 0xfc000000);
+SW64_INSN(bgt,		0xd0000000, 0xfc000000);
+SW64_INSN(bge,		0xd4000000, 0xfc000000);
+SW64_INSN(blbc,		0xd8000000, 0xfc000000);
+SW64_INSN(blbs,		0xdc000000, 0xfc000000);
+SW64_INSN(fbeq,		0xe0000000, 0xfc000000);
+SW64_INSN(fbne,		0xe4000000, 0xfc000000);
+SW64_INSN(fblt,		0xe8000000, 0xfc000000);
+SW64_INSN(fble,		0xec000000, 0xfc000000);
+SW64_INSN(fbgt,		0xf0000000, 0xfc000000);
+SW64_INSN(fbge,		0xf4000000, 0xfc000000);
+SW64_INSN(lldw,		0x20000000, 0xfc00f000);
+SW64_INSN(lldl,		0x20001000, 0xfc00f000);
+
+#endif	/* _ASM_SW64_INSN_H */
diff --git a/arch/sw_64/include/asm/io.h b/arch/sw_64/include/asm/io.h
new file mode 100644
index 000000000000..6796c64f94ae
--- /dev/null
+++ b/arch/sw_64/include/asm/io.h
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_IO_H
+#define _ASM_SW64_IO_H
+
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/compiler.h>
+#include <asm/pgtable.h>
+
+/* The generic header contains only prototypes.  Including it ensures that
+ * the implementation we have here matches that interface.
+ */
+#include <asm-generic/iomap.h>
+
+/* We don't use IO slowdowns on the sw64, but.. */
+#define __SLOW_DOWN_IO	do { } while (0)
+#define SLOW_DOWN_IO	do { } while (0)
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ */
+static inline unsigned long virt_to_phys(void *address)
+{
+	return __pa(address);
+}
+
+static inline void *phys_to_virt(unsigned long address)
+{
+	return __va(address);
+}
+
+#define page_to_phys(page)	page_to_pa(page)
+
+/* Maximum PIO space address supported?  */
+#define IO_SPACE_LIMIT		0xffffffffffffffff
+
+/*
+ * Change addresses as seen by the kernel (virtual) to addresses as
+ * seen by a device (bus), and vice versa.
+ *
+ * Note that this only works for a limited range of kernel addresses,
+ * and very well may not span all memory.  Consider this interface
+ * deprecated in favour of the DMA-mapping API.
+ */
+
+static inline unsigned long __deprecated virt_to_bus(void *address)
+{
+	return virt_to_phys(address);
+}
+#define isa_virt_to_bus virt_to_bus
+
+static inline void * __deprecated bus_to_virt(unsigned long address)
+{
+	void *virt;
+
+	/* This check is a sanity check but also ensures that bus address 0
+	 * maps to virtual address 0 which is useful to detect null pointers
+	 * (the NCR driver is much simpler if NULL pointers are preserved).
+	 */
+	virt = phys_to_virt(address);
+	return (long)address <= 0 ? NULL : virt;
+}
+#define isa_bus_to_virt bus_to_virt
+
+/*
+ * There are different chipsets to interface the sw64 CPUs to the world.
+ */
+
+#define IO_CONCAT(a, b)		_IO_CONCAT(a, b)
+#define _IO_CONCAT(a, b)	a ## _ ## b
+
+#include <asm/sw64io.h>
+
+/*
+ * Generic IO read/write.  These perform native-endian accesses.
+ */
+
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+	asm volatile("stb %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+	asm volatile("sth %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+	asm volatile("stw %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+	asm volatile("stl %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	u8 val;
+
+	asm volatile("ldbu %0, 0(%1)" : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	u16 val;
+
+	asm volatile("ldhu %0, 0(%1)" : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	u32 val;
+
+	asm volatile("ldw	%0, 0(%1)\n"
+		     "zapnot	%0, 0xf, %0\n"
+		     : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	u64 val;
+
+	asm volatile("ldl %0, 0(%1)" : "=r" (val) : "r" (addr));
+	return val;
+}
+
+/* IO barriers */
+
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
+#define mmiowb()		do { } while (0)
+
+/*
+ * Relaxed I/O memory access primitives. These follow the Device memory
+ * ordering rules but do not guarantee any ordering relative to Normal memory
+ * accesses.
+ */
+#define readb_relaxed(c)	__raw_readb(c)
+#define readw_relaxed(c)	__raw_readw(c)
+#define readl_relaxed(c)	__raw_readl(c)
+#define readq_relaxed(c)	__raw_readq(c)
+
+#define writeb_relaxed(v, c)	__raw_writeb((v), (c))
+#define writew_relaxed(v, c)	__raw_writew((v), (c))
+#define writel_relaxed(v, c)	__raw_writel((v), (c))
+#define writeq_relaxed(v, c)	__raw_writeq((v), (c))
+
+/*
+ * I/O memory access primitives. Reads are ordered relative to any
+ * following Normal memory access. Writes are ordered relative to any prior
+ * Normal memory access.
+ */
+#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readq(c)		({ u64 __v = readq_relaxed(c); __iormb(); __v; })
+
+#define writeb(v, c)		({ __iowmb(); writeb_relaxed((v), (c)); })
+#define writew(v, c)		({ __iowmb(); writew_relaxed((v), (c)); })
+#define writel(v, c)		({ __iowmb(); writel_relaxed((v), (c)); })
+#define writeq(v, c)		({ __iowmb(); writeq_relaxed((v), (c)); })
+/*
+ * We always have external versions of these routines.
+ */
+extern u8		inb(unsigned long port);
+extern u16		inw(unsigned long port);
+extern u32		inl(unsigned long port);
+extern void		outb(u8 b, unsigned long port);
+extern void		outw(u16 b, unsigned long port);
+extern void		outl(u32 b, unsigned long port);
+
+/*
+ * Mapping from port numbers to __iomem space is pretty easy.
+ */
+static inline void __iomem *ioportmap(unsigned long addr)
+{
+	return sw64_platform->ioportmap(addr);
+}
+
+static inline void __iomem *__ioremap(phys_addr_t addr, size_t size,
+				      pgprot_t prot)
+{
+	unsigned long tmp = addr | PAGE_OFFSET;
+
+	return (void __iomem *)(tmp);
+}
+
+#define ioremap(addr, size)		__ioremap((addr), (size), PAGE_KERNEL)
+#define ioremap_nocache(addr, size)	__ioremap((addr), (size), PAGE_KERNEL)
+#define ioremap_cache(addr, size)	__ioremap((addr), (size), PAGE_KERNEL)
+#define ioremap_uc			ioremap_nocache
+
+static inline void __iounmap(volatile void __iomem *addr)
+{
+}
+
+#define iounmap				__iounmap
+
+static inline int __is_ioaddr(unsigned long addr)
+{
+	return addr >= (PAGE_OFFSET | IO_BASE);
+}
+
+#define __is_ioaddr(a)  __is_ioaddr((unsigned long)(a))
+
+static inline int __is_mmio(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr;
+
+	return (addr & 0x100000000UL) == 0;
+}
+
+
+
+#define ioread16be(p) be16_to_cpu(ioread16(p))
+#define ioread32be(p) be32_to_cpu(ioread32(p))
+#define iowrite16be(v, p) iowrite16(cpu_to_be16(v), (p))
+#define iowrite32be(v, p) iowrite32(cpu_to_be32(v), (p))
+
+#define inb_p		inb
+#define inw_p		inw
+#define inl_p		inl
+#define outb_p		outb
+#define outw_p		outw
+#define outl_p		outl
+
+
+/*
+ * String version of IO memory access ops:
+ */
+extern void memcpy_fromio(void *, const volatile void __iomem *, long);
+extern void memcpy_toio(volatile void __iomem *, const void *, long);
+extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
+
+static inline void memset_io(volatile void __iomem *addr, u8 c, long len)
+{
+	_memset_c_io(addr, 0x0101010101010101UL * c, len);
+}
+
+#define __HAVE_ARCH_MEMSETW_IO
+static inline void memsetw_io(volatile void __iomem *addr, u16 c, long len)
+{
+	_memset_c_io(addr, 0x0001000100010001UL * c, len);
+}
+
+/*
+ * String versions of in/out ops:
+ */
+extern void insb(unsigned long port, void *dst, unsigned long count);
+extern void insw(unsigned long port, void *dst, unsigned long count);
+extern void insl(unsigned long port, void *dst, unsigned long count);
+extern void outsb(unsigned long port, const void *src, unsigned long count);
+extern void outsw(unsigned long port, const void *src, unsigned long count);
+extern void outsl(unsigned long port, const void *src, unsigned long count);
+
+/*
+ * These defines will override the defaults when doing RTC queries
+ */
+
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_ALWAYS_BCD	0
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)	p
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SW64_IO_H */
diff --git a/arch/sw_64/include/asm/irq.h b/arch/sw_64/include/asm/irq.h
new file mode 100644
index 000000000000..be98132ce340
--- /dev/null
+++ b/arch/sw_64/include/asm/irq.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_IRQ_H
+#define _ASM_SW64_IRQ_H
+
+/*
+ *	arch/sw/include/asm/irq.h
+ *
+ *	(C) 2012 OSKernel JN
+ */
+
+#include <linux/linkage.h>
+
+#define NR_VECTORS_PERCPU	256
+#define NR_IRQS_LEGACY		16
+#define NR_IRQS			((NR_VECTORS_PERCPU + NR_IRQS_LEGACY) * NR_CPUS)
+
+static inline int irq_canonicalize(int irq)
+{
+	/*
+	 * XXX is this true for all Sw?  The old serial driver
+	 * did it this way for years without any complaints, so....
+	 */
+	return ((irq == 2) ? 9 : irq);
+}
+
+struct pt_regs;
+extern void (*perf_irq)(unsigned long, struct pt_regs *);
+extern void fixup_irqs(void);
+extern void sw64_timer_interrupt(void);
+
+#endif /* _ASM_SW64_IRQ_H */
diff --git a/arch/sw_64/include/asm/irq_impl.h b/arch/sw_64/include/asm/irq_impl.h
new file mode 100644
index 000000000000..713267077142
--- /dev/null
+++ b/arch/sw_64/include/asm/irq_impl.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file contains declarations and inline functions for interfacing
+ * with the IRQ handling routines in irq.c.
+ */
+
+#ifndef _ASM_SW64_IRQ_IMPL_H
+#define _ASM_SW64_IRQ_IMPL_H
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/profile.h>
+
+#define SW64_PCIE0_INT_BASE 17
+#define SW64_PCIE0_MSI_BASE 21
+
+#define SW64_PCIE1_INT_BASE 277
+#define SW64_PCIE1_MSI_BASE 281
+
+#define RTC_IRQ		8
+#define SWI2C_IRQ	14
+
+enum sw64_irq_type {
+	INT_IPI		= 1,
+	INT_PC0		= 2,
+	INT_PC1		= 3,
+	INT_INTx	= 5,
+	INT_MSI		= 6,
+	INT_MT		= 7,
+	INT_RTC		= 9,
+	INT_FAULT	= 10,
+	INT_VT_SERIAL	= 12,
+	INT_DEV		= 17,
+	INT_NMI		= 18,
+	INT_LEGACY	= 31,
+};
+
+extern struct irqaction timer_irqaction;
+extern void init_rtc_irq(irq_handler_t handler);
+extern void handle_irq(int irq);
+extern void handle_ipi(struct pt_regs *);
+extern void __init sw64_init_irq(void);
+extern irqreturn_t timer_interrupt(int irq, void *dev);
+extern void handle_chip_irq(unsigned long type, unsigned long vector,
+			    unsigned long irq_arg, struct pt_regs *regs);
+
+#endif
diff --git a/arch/sw_64/include/asm/irq_regs.h b/arch/sw_64/include/asm/irq_regs.h
new file mode 100644
index 000000000000..bba48f36a40f
--- /dev/null
+++ b/arch/sw_64/include/asm/irq_regs.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_IRQ_REGS_H
+#define _ASM_SW64_IRQ_REGS_H
+
+#include <asm-generic/irq_regs.h>
+
+#endif
diff --git a/arch/sw_64/include/asm/irqflags.h b/arch/sw_64/include/asm/irqflags.h
new file mode 100644
index 000000000000..6101b6ad2e99
--- /dev/null
+++ b/arch/sw_64/include/asm/irqflags.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_IRQFLAGS_H
+#define _ASM_SW64_IRQFLAGS_H
+
+#include <asm/hmcall.h>
+
+#define IPL_MIN		0
+#define IPL_SW0		1
+#define IPL_SW1		2
+#define IPL_DEV0	3
+#define IPL_DEV1	4
+#define IPL_TIMER	5
+#define IPL_PERF	6
+#define IPL_POWERFAIL	6
+#define IPL_MCHECK	7
+#define IPL_MAX		7
+
+#define getipl()		(rdps() & 7)
+#define setipl(ipl)		((void) swpipl(ipl))
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return rdps();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	setipl(IPL_MAX);
+	barrier();
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = swpipl(IPL_MAX);
+
+	barrier();
+	return flags;
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	barrier();
+	setipl(IPL_MIN);
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	barrier();
+	setipl(flags);
+	barrier();
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags > IPL_MIN;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(getipl());
+}
+
+#endif /* _ASM_SW64_IRQFLAGS_H */
diff --git a/arch/sw_64/include/asm/jump_label.h b/arch/sw_64/include/asm/jump_label.h
new file mode 100644
index 000000000000..78d3fb6246f0
--- /dev/null
+++ b/arch/sw_64/include/asm/jump_label.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_SW64_JUMP_LABEL_H
+#define __ASM_SW64_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/insn.h>
+
+#define JUMP_LABEL_NOP_SIZE		SW64_INSN_SIZE
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1: nop\n\t"
+			".pushsection __jump_table,  "aw"\n\t"
+			".align 3\n\t"
+			".quad 1b, %l[l_yes], %0\n\t"
+			".popsection\n\t"
+			:  :  "i"(&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1: br %l[l_yes]\n\t"
+			".pushsection __jump_table,  "aw"\n\t"
+			".align 3\n\t"
+			".quad 1b, %l[l_yes], %0\n\t"
+			".popsection\n\t"
+			:  :  "i"(&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+typedef u64 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_SW64_JUMP_LABEL_H */
diff --git a/arch/sw_64/include/asm/kdebug.h b/arch/sw_64/include/asm/kdebug.h
new file mode 100644
index 000000000000..73793057c3e8
--- /dev/null
+++ b/arch/sw_64/include/asm/kdebug.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KDEBUG_H
+#define _ASM_SW64_KDEBUG_H
+
+#include <linux/notifier.h>
+
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_BREAK,
+	DIE_SSTEPBP,
+	DIE_UPROBE,
+	DIE_UPROBE_XOL,
+};
+
+#endif /* _ASM_SW64_KDEBUG_H */
diff --git a/arch/sw_64/include/asm/kexec.h b/arch/sw_64/include/asm/kexec.h
new file mode 100644
index 000000000000..a99aba9638e6
--- /dev/null
+++ b/arch/sw_64/include/asm/kexec.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KEXEC_H
+#define _ASM_SW64_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT	(-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT	(-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT	(-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE		8192
+
+#define KEXEC_ARCH			KEXEC_ARCH_SW64
+
+#define KEXEC_SW64_ATAGS_OFFSET		0x1000
+#define KEXEC_SW64_ZIMAGE_OFFSET	0x8000
+
+#ifndef __ASSEMBLY__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ *
+ * Function copies machine registers from @oldregs to @newregs. If @oldregs is
+ * %NULL then current registers are stored there.
+ */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	} else {
+		__asm__ __volatile__ ("stl $0, %0" : "=m" (newregs->r0));
+		__asm__ __volatile__ ("stl $1, %0" : "=m" (newregs->r1));
+		__asm__ __volatile__ ("stl $2, %0" : "=m" (newregs->r2));
+		__asm__ __volatile__ ("stl $3, %0" : "=m" (newregs->r3));
+		__asm__ __volatile__ ("stl $4, %0" : "=m" (newregs->r4));
+		__asm__ __volatile__ ("stl $5, %0" : "=m" (newregs->r5));
+		__asm__ __volatile__ ("stl $6, %0" : "=m" (newregs->r6));
+		__asm__ __volatile__ ("stl $7, %0" : "=m" (newregs->r7));
+		__asm__ __volatile__ ("stl $8, %0" : "=m" (newregs->r8));
+		__asm__ __volatile__ ("stl $19, %0" : "=m" (newregs->r19));
+		__asm__ __volatile__ ("stl $20, %0" : "=m" (newregs->r20));
+		__asm__ __volatile__ ("stl $21, %0" : "=m" (newregs->r21));
+		__asm__ __volatile__ ("stl $22, %0" : "=m" (newregs->r22));
+		__asm__ __volatile__ ("stl $23, %0" : "=m" (newregs->r23));
+		__asm__ __volatile__ ("stl $24, %0" : "=m" (newregs->r24));
+		__asm__ __volatile__ ("stl $25, %0" : "=m" (newregs->r25));
+		__asm__ __volatile__ ("stl $26, %0" : "=m" (newregs->r26));
+		__asm__ __volatile__ ("stl $27, %0" : "=m" (newregs->r27));
+		__asm__ __volatile__ ("stl $28, %0" : "=m" (newregs->r28));
+		newregs->pc = (unsigned long)current_text_addr();
+	}
+}
+
+/* Function pointer to optional machine-specific reinitialization */
+extern void (*kexec_reinit)(void);
+
+#endif /* __ASSEMBLY__ */
+
+struct kimage;
+extern unsigned long kexec_args[4];
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* _ASM_SW64_KEXEC_H */
diff --git a/arch/sw_64/include/asm/kgdb.h b/arch/sw_64/include/asm/kgdb.h
new file mode 100644
index 000000000000..1d807362e867
--- /dev/null
+++ b/arch/sw_64/include/asm/kgdb.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sw64 KGDB support
+ *
+ * Based on arch/arm64/include/kgdb.h
+ *
+ * Copyright (C) Xia Bin
+ * Author: Xia Bin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+#ifndef _ASM_SW64_KGDB_H
+#define _ASM_SW64_KGDB_H
+
+#include <asm/ptrace.h>
+#include <linux/sched.h>
+
+#ifndef __ASSEMBLY__
+
+
+#define GDB_ADJUSTS_BREAK_OFFSET
+#define BREAK_INSTR_SIZE	4
+#define CACHE_FLUSH_IS_SAFE	0
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	asm __volatile__ ("sys_call/b 0x80");
+}
+
+void sw64_task_to_gdb_regs(struct task_struct *task, unsigned long *regs);
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+extern unsigned long get_reg(struct task_struct *task, unsigned long regno);
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * general purpose registers size in bytes.
+ */
+#define DBG_MAX_REG_NUM		(67)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX			4096
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES		(DBG_MAX_REG_NUM * 8)
+
+#endif /* _ASM_SW64_KGDB_H */
diff --git a/arch/sw_64/include/asm/kmap_types.h b/arch/sw_64/include/asm/kmap_types.h
new file mode 100644
index 000000000000..8e86b08dee94
--- /dev/null
+++ b/arch/sw_64/include/asm/kmap_types.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KMAP_TYPES_H
+#define _ASM_SW64_KMAP_TYPES_H
+
+/* Dummy header just to define km_type. */
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define  __WITH_KM_FENCE
+#endif
+
+#include <asm-generic/kmap_types.h>
+
+#undef __WITH_KM_FENCE
+
+#endif
diff --git a/arch/sw_64/include/asm/kprobes.h b/arch/sw_64/include/asm/kprobes.h
new file mode 100644
index 000000000000..c19b961a19da
--- /dev/null
+++ b/arch/sw_64/include/asm/kprobes.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Kernel Probes (KProbes)
+ *  Based on arch/mips/include/asm/kprobes.h
+ */
+
+#ifndef _ASM_SW64_KPROBES_H
+#define _ASM_SW64_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#define BREAK_KPROBE	0x40ffffff
+#define BREAK_KPROBE_SS	0x40fffeff
+
+#ifdef CONFIG_KPROBES
+#include <linux/ptrace.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+
+#define	 __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct kprobe;
+struct pt_regs;
+
+typedef u32 kprobe_opcode_t;
+
+#define MAX_INSN_SIZE 2
+
+#define flush_insn_slot(p)						\
+do {									\
+	if (p->addr)							\
+		flush_icache_range((unsigned long)p->addr,		\
+			(unsigned long)p->addr +			\
+			(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)));	\
+} while (0)
+
+
+#define kretprobe_blacklist_size 0
+
+void arch_remove_kprobe(struct kprobe *p);
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the original instruction */
+	kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+#define SKIP_DELAYSLOT 0x0001
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	/* Per-thread fields, used while emulating branches */
+	unsigned long flags;
+	unsigned long target_pc;
+	struct prev_kprobe prev_kprobe;
+};
+extern int kprobe_handler(struct pt_regs *regs);
+extern int post_kprobe_handler(struct pt_regs *regs);
+extern int kprobe_fault_handler(struct pt_regs *regs, unsigned long mmcsr);
+
+
+#endif /* CONFIG_KPROBES */
+#endif /* _ASM_SW64_KPROBES_H */
diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h
new file mode 100644
index 000000000000..4b851682188c
--- /dev/null
+++ b/arch/sw_64/include/asm/kvm_asm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KVM_ASM_H
+#define _ASM_SW64_KVM_ASM_H
+
+#define SW64_KVM_EXIT_HOST_INTR		0
+#define SW64_KVM_EXIT_IO		1
+#define SW64_KVM_EXIT_HALT		10
+#define SW64_KVM_EXIT_SHUTDOWN		12
+#define SW64_KVM_EXIT_TIMER		13
+#define SW64_KVM_EXIT_IPI		14
+#define SW64_KVM_EXIT_RESTART		17
+#define SW64_KVM_EXIT_FATAL_ERROR	22
+
+#endif /* _ASM_SW64_KVM_ASM_H */
diff --git a/arch/sw_64/include/asm/kvm_cma.h b/arch/sw_64/include/asm/kvm_cma.h
new file mode 100644
index 000000000000..192bca436380
--- /dev/null
+++ b/arch/sw_64/include/asm/kvm_cma.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KVM_CMA_H__
+#define _ASM_SW64_KVM_CMA_H__
+
+#include <linux/cma.h>
+
+extern int __init kvm_cma_declare_contiguous(phys_addr_t base,
+			phys_addr_t size, phys_addr_t limit,
+			phys_addr_t alignment, unsigned int order_per_bit,
+			const char *name, struct cma **res_cma);
+#endif
diff --git a/arch/sw_64/include/asm/kvm_emulate.h b/arch/sw_64/include/asm/kvm_emulate.h
new file mode 100644
index 000000000000..d842008f189a
--- /dev/null
+++ b/arch/sw_64/include/asm/kvm_emulate.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KVM_EMULATE_H
+#define _ASM_SW64_KVM_EMULATE_H
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+
+#define R(x)	((size_t) &((struct kvm_regs *)0)->x)
+
+static int reg_offsets[32] = {
+	R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8),
+	R(r9), R(r10), R(r11), R(r12), R(r13), R(r14), R(r15),
+	R(r16), R(r17), R(r18),
+	R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26),
+	R(r27), R(r28), R(gp),
+	0, 0,
+};
+
+
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+				unsigned long val)
+{
+	void *regs_ptr = (void *)&vcpu->arch.regs;
+
+	regs_ptr += reg_offsets[reg_num];
+	*(unsigned long *)regs_ptr = val;
+}
+
+static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	void *regs_ptr = (void *)&vcpu->arch.regs;
+
+	if (reg_num == 31)
+		return 0;
+	regs_ptr += reg_offsets[reg_num];
+	return *(unsigned long *)regs_ptr;
+}
+
+void sw64_decode(struct kvm_vcpu *vcpu, unsigned int insn,
+		 struct kvm_run *run);
+
+unsigned int interrupt_pending(struct kvm_vcpu *vcpu, bool *more);
+void clear_vcpu_irq(struct kvm_vcpu *vcpu);
+void inject_vcpu_irq(struct kvm_vcpu *vcpu, unsigned int irq);
+void try_deliver_interrupt(struct kvm_vcpu *vcpu, unsigned int irq, bool more);
+#endif
diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h
new file mode 100644
index 000000000000..913a2e9789c1
--- /dev/null
+++ b/arch/sw_64/include/asm/kvm_host.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KVM_HOST_H
+#define _ASM_SW64_KVM_HOST_H
+
+#include <linux/types.h>
+#include <linux/hardirq.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mmu_notifier.h>
+#include <linux/preempt.h>
+#include <linux/msi.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/ratelimit.h>
+#include <linux/err.h>
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <asm/signal.h>
+#include <asm/vcpu.h>
+
+#include <generated/autoconf.h>
+#include <asm/ptrace.h>
+
+#include <asm/kvm_mmio.h>
+
+#define KVM_MAX_VCPUS 64
+#define KVM_USER_MEM_SLOTS 512
+
+#define KVM_HALT_POLL_NS_DEFAULT 0
+#define KVM_IRQCHIP_NUM_PINS     256
+/* KVM Hugepage definitions for sw64 */
+#define KVM_NR_PAGE_SIZES   3
+#define KVM_HPAGE_GFN_SHIFT(x)  (((x) - 1) * 9)
+#define KVM_HPAGE_SHIFT(x)  (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
+#define KVM_HPAGE_SIZE(x)   (1UL << KVM_HPAGE_SHIFT(x))
+#define KVM_HPAGE_MASK(x)   (~(KVM_HPAGE_SIZE(x) - 1))
+#define KVM_PAGES_PER_HPAGE(x)  (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
+
+struct kvm_arch_memory_slot {
+
+};
+
+struct kvm_arch {
+	struct swvm_mem mem;
+};
+
+
+struct kvm_vcpu_arch {
+	struct kvm_regs regs __attribute__((__aligned__(32)));
+	struct vcpucb vcb;
+	struct task_struct *tsk;
+	unsigned int pcpu_id; /* current running pcpu id */
+
+	/* Virtual clock device */
+	struct hrtimer hrt;
+	unsigned long timer_next_event;
+	int first_run;
+	int halted;
+	int stopped;
+	int restart;
+
+	/* Pending virtual interrupts */
+	DECLARE_BITMAP(irqs_pending, SWVM_IRQS);
+	unsigned long vpnc[NR_CPUS];
+
+	/* WAIT executed */
+	int wait;
+
+	/* vcpu power-off state */
+	bool power_off;
+
+	/* Don't run the guest (internal implementation need) */
+	bool pause;
+
+	struct kvm_decode mmio_decode;
+};
+
+struct vmem_info {
+	unsigned long start;
+	size_t size;
+	atomic_t refcnt;
+};
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+	u64 halt_successful_poll;
+	u64 halt_attempted_poll;
+	u64 halt_poll_success_ns;
+	u64 halt_poll_fail_ns;
+	u64 halt_wakeup;
+	u64 halt_poll_invalid;
+};
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index, struct hcall_args *hargs);
+void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid);
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+
+#endif  /* _ASM_SW64_KVM_HOST_H */
diff --git a/arch/sw_64/include/asm/kvm_mmio.h b/arch/sw_64/include/asm/kvm_mmio.h
new file mode 100644
index 000000000000..9ba31c91902f
--- /dev/null
+++ b/arch/sw_64/include/asm/kvm_mmio.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KVM_MMIO_H
+#define _ASM_SW64_KVM_MMIO_H
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+
+struct kvm_decode {
+	unsigned long rt;
+	bool sign_extend;
+};
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 struct hcall_args *hargs);
+
+#endif	/* _ASM_SW64_KVM_MMIO_H */
diff --git a/arch/sw_64/include/asm/kvm_para.h b/arch/sw_64/include/asm/kvm_para.h
new file mode 100644
index 000000000000..ba78c5371570
--- /dev/null
+++ b/arch/sw_64/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KVM_PARA_H
+#define _ASM_SW64_KVM_PARA_H
+
+#include <uapi/asm/kvm_para.h>
+
+#define HMC_hcall 0x32
+
+static inline unsigned long kvm_hypercall3(unsigned long num,
+					   unsigned long arg0,
+					   unsigned long arg1,
+					   unsigned long arg2)
+{
+	register unsigned long __r0 __asm__("$0");
+	register unsigned long __r16 __asm__("$16") = num;
+	register unsigned long __r17 __asm__("$17") = arg0;
+	register unsigned long __r18 __asm__("$18") = arg1;
+	register unsigned long __r19 __asm__("$19") = arg2;
+	__asm__ __volatile__(
+		"sys_call %5"
+		: "=r"(__r16), "=r"(__r17), "=r"(__r18), "=r"(__r19), "=r"(__r0)
+		: "i"(HMC_hcall), "0"(__r16), "1"(__r17), "2"(__r18), "3"(__r19)
+		: "$1", "$22", "$23", "$24", "$25");
+	return __r0;
+}
+#endif
diff --git a/arch/sw_64/include/asm/kvm_timer.h b/arch/sw_64/include/asm/kvm_timer.h
new file mode 100644
index 000000000000..be50bba9c4c6
--- /dev/null
+++ b/arch/sw_64/include/asm/kvm_timer.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KVM_TIMER_H
+#define _ASM_SW64_KVM_TIMER_H
+
+void set_timer(struct kvm_vcpu *vcpu, unsigned long delta);
+void set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+enum hrtimer_restart clockdev_fn(struct hrtimer *timer);
+
+#endif
diff --git a/arch/sw_64/include/asm/linkage.h b/arch/sw_64/include/asm/linkage.h
new file mode 100644
index 000000000000..96c83663d9e8
--- /dev/null
+++ b/arch/sw_64/include/asm/linkage.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_LINKAGE_H
+#define _ASM_SW64_LINKAGE_H
+
+#define cond_syscall(x)	asm(".weak\t" #x "\n" #x " = sys_ni_syscall")
+#define SYSCALL_ALIAS(alias, name)                                      \
+	asm(#alias " = " #name "\n\t.globl " #alias)
+
+#endif
diff --git a/arch/sw_64/include/asm/local.h b/arch/sw_64/include/asm/local.h
new file mode 100644
index 000000000000..9144600f641d
--- /dev/null
+++ b/arch/sw_64/include/asm/local.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_LOCAL_H
+#define _ASM_SW64_LOCAL_H
+
+#include <linux/percpu.h>
+#include <linux/atomic.h>
+
+typedef struct {
+	atomic_long_t a;
+} local_t;
+
+#define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
+#define local_read(l)	atomic_long_read(&(l)->a)
+#define local_set(l, i)	atomic_long_set(&(l)->a, (i))
+#define local_inc(l)	atomic_long_inc(&(l)->a)
+#define local_dec(l)	atomic_long_dec(&(l)->a)
+#define local_add(i, l)	atomic_long_add((i), (&(l)->a))
+#define local_sub(i, l)	atomic_long_sub((i), (&(l)->a))
+
+static inline long local_add_return(long i, local_t *l)
+{
+	long temp1, temp2, result, addr;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi  %4, %2\n"
+	"1:	lldl %0, 0(%4)\n"
+	"	ldi  %1, 1\n"
+	"	wr_f %1\n"
+	"	addl %0, %5, %3\n"
+	"	addl %0, %5, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl %0, 0(%4)\n"
+	"	rd_f %0\n"
+	"	beq %0, 2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (l->a.counter),
+	  "=&r" (result), "=&r" (addr)
+	: "Ir" (i), "m" (l->a.counter) : "memory");
+	return result;
+}
+
+static inline long local_sub_return(long i, local_t *l)
+{
+	long temp1, temp2, result, addr;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi  %4, %2\n"
+	"1:	lldl %0, 0(%4)\n"
+	"	ldi  %1, 1\n"
+	"	wr_f %1\n"
+	"	subl %0, %5, %3\n"
+	"	subl %0, %5, %0\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"       memb\n"
+#endif
+	"	lstl %0, 0(%4)\n"
+	"	rd_f %0\n"
+	"	beq %0, 2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	: "=&r" (temp1), "=&r" (temp2), "=m" (l->a.counter),
+	  "=&r" (result), "=&r" (addr)
+	: "Ir" (i), "m" (l->a.counter) : "memory");
+	return result;
+}
+
+#define local_cmpxchg(l, o, n) \
+	(cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u)				\
+({								\
+	long c, old;						\
+	c = local_read(l);					\
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = local_cmpxchg((l), c, c + (a));	\
+		if (likely(old == c))				\
+			break;					\
+		c = old;					\
+	}							\
+	c != (u);						\
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+
+#define local_dec_return(l) local_sub_return(1, (l))
+
+#define local_inc_return(l) local_add_return(1, (l))
+
+#define local_sub_and_test(i, l) (local_sub_return((i), (l)) == 0)
+
+#define local_inc_and_test(l) (local_add_return(1, (l)) == 0)
+
+#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0)
+
+/* Verify if faster than atomic ops */
+#define __local_inc(l)		((l)->a.counter++)
+#define __local_dec(l)		((l)->a.counter++)
+#define __local_add(i, l)	((l)->a.counter += (i))
+#define __local_sub(i, l)	((l)->a.counter -= (i))
+
+#endif /* _ASM_SW64_LOCAL_H */
diff --git a/arch/sw_64/include/asm/local64.h b/arch/sw_64/include/asm/local64.h
new file mode 100644
index 000000000000..4278133cd8fa
--- /dev/null
+++ b/arch/sw_64/include/asm/local64.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_LOCAL64_H
+#define _ASM_SW64_LOCAL64_H
+
+#include <asm-generic/local64.h>
+
+#endif
diff --git a/arch/sw_64/include/asm/memory.h b/arch/sw_64/include/asm/memory.h
new file mode 100644
index 000000000000..d3191165c7b5
--- /dev/null
+++ b/arch/sw_64/include/asm/memory.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_MEMORY_H
+#define _ASM_SW64_MEMORY_H
+
+#ifdef CONFIG_NUMA
+#include <linux/numa.h>
+#endif
+
+#define NODE0_START	(_TEXT_START - __START_KERNEL_map)
+
+#define MAX_PHYSMEM_BITS	48
+
+struct mem_desc_t {
+	unsigned long phys_base;	/* start address of physical memory */
+	unsigned long phys_size;	/* size of physical memory */
+	phys_addr_t base;		/* start address of memory managed by kernel */
+	phys_addr_t size;		/* size of memory managed by kernel */
+};
+extern struct mem_desc_t mem_desc;
+
+struct numa_node_desc_t {
+	phys_addr_t base;
+	phys_addr_t size;
+};
+extern struct numa_node_desc_t numa_nodes_desc[];
+
+void __init callback_init(void);
+void __init mem_detect(void);
+void __init sw64_memblock_init(void);
+void __init zone_sizes_init(void);
+void __init sw64_numa_init(void);
+void __init sw64_memory_present(void);
+
+#endif /* _ASM_SW64_MEMORY_H */
diff --git a/arch/sw_64/include/asm/mmu.h b/arch/sw_64/include/asm/mmu.h
new file mode 100644
index 000000000000..548c73b318cb
--- /dev/null
+++ b/arch/sw_64/include/asm/mmu.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_MMU_H
+#define _ASM_SW64_MMU_H
+
+/* The sw64 MMU context is one "unsigned long" bitmap per CPU*/
+typedef struct {
+	unsigned long asid[NR_CPUS];
+	void *vdso;
+} mm_context_t;
+#endif
diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h
new file mode 100644
index 000000000000..6b2ab3224ec9
--- /dev/null
+++ b/arch/sw_64/include/asm/mmu_context.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_MMU_CONTEXT_H
+#define _ASM_SW64_MMU_CONTEXT_H
+
+/*
+ * get a new mmu context..
+ *
+ * Copyright (C) 1996, Linus Torvalds
+ */
+#include <linux/mm_types.h>
+
+#include <asm/compiler.h>
+#include <asm/io.h>
+
+/*
+ * Force a context reload. This is needed when we change the page
+ * table pointer or when we update the ASN of the current process.
+ */
+
+static inline unsigned long
+__reload_thread(struct pcb_struct *pcb)
+{
+	register unsigned long a0 __asm__("$16");
+	register unsigned long v0 __asm__("$0");
+
+	a0 = virt_to_phys(pcb);
+	__asm__ __volatile__(
+		"sys_call %2 #__reload_thread"
+		: "=r"(v0), "=r"(a0)
+		: "i"(HMC_swpctx), "r"(a0)
+		: "$1", "$22", "$23", "$24", "$25");
+
+	return v0;
+}
+
+#define load_asn_ptbr   load_mm
+
+/*
+ * The maximum ASN's the processor supports.
+ *
+ * If a processor implements address space numbers (ASNs), and the old
+ * PTE has the Address Space Match (ASM) bit clear (ASNs in use) and
+ * the Valid bit set, then entries can also effectively be made coherent
+ * by assigning a new, unused ASN to the currently running process and
+ * not reusing the previous ASN before calling the appropriate HMcode
+ * routine to invalidate the translation buffer (TB).
+ *
+ */
+
+#ifdef CONFIG_SUBARCH_C3B
+#define MAX_ASN			1023
+#define WIDTH_HARDWARE_ASN	10
+#endif
+
+/*
+ * cpu_last_asn(processor):
+ * 63                                            0
+ * +-------------+----------------+--------------+
+ * | asn version | this processor | hardware asn |
+ * +-------------+----------------+--------------+
+ */
+
+#include <asm/hw_init.h>
+#ifdef CONFIG_SMP
+#define cpu_last_asn(cpuid)	(cpu_data[cpuid].last_asn)
+#else
+extern unsigned long last_asn;
+#define cpu_last_asn(cpuid)	last_asn
+#endif /* CONFIG_SMP */
+
+#define ASN_FIRST_VERSION	(1UL << WIDTH_HARDWARE_ASN)
+#define HARDWARE_ASN_MASK	((1UL << WIDTH_HARDWARE_ASN) - 1)
+
+/*
+ * NOTE! The way this is set up, the high bits of the "asn_cache" (and
+ * the "mm->context") are the ASN _version_ code. A version of 0 is
+ * always considered invalid, so to invalidate another process you only
+ * need to do "p->mm->context = 0".
+ *
+ * If we need more ASN's than the processor has, we invalidate the old
+ * user TLB's (tbiap()) and start a new ASN version. That will automatically
+ * force a new asn for any other processes the next time they want to
+ * run.
+ */
+
+static inline unsigned long
+__get_new_mm_context(struct mm_struct *mm, long cpu)
+{
+	unsigned long asn = cpu_last_asn(cpu);
+	unsigned long next = asn + 1;
+
+	if ((asn & HARDWARE_ASN_MASK) >= MAX_ASN) {
+		tbiap();
+		next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION;
+	}
+	cpu_last_asn(cpu) = next;
+	return next;
+}
+
+static inline void
+switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm,
+	  struct task_struct *next)
+{
+	/* Check if our ASN is of an older version, and thus invalid. */
+	unsigned long asn;
+	unsigned long mmc;
+	long cpu = smp_processor_id();
+
+#ifdef CONFIG_SMP
+	cpu_data[cpu].asn_lock = 1;
+	barrier();
+#endif
+	asn = cpu_last_asn(cpu);
+	mmc = next_mm->context.asid[cpu];
+	if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) {
+		/* Check if mmc and cpu asn is in the same version */
+		mmc = __get_new_mm_context(next_mm, cpu);
+		next_mm->context.asid[cpu] = mmc;
+	}
+#ifdef CONFIG_SMP
+	else
+		cpu_data[cpu].need_new_asn = 1;
+#endif
+
+	/*
+	 * Always update the PCB ASN.  Another thread may have allocated
+	 * a new mm->context (via flush_tlb_mm) without the ASN serial
+	 * number wrapping.  We have no way to detect when this is needed.
+	 */
+	task_thread_info(next)->pcb.asn = mmc & HARDWARE_ASN_MASK;
+	/*
+	 * Always update the PCB PTBR. If next is kernel thread, it must
+	 * update PTBR. If next is user process, it's ok to update PTBR.
+	 */
+	task_thread_info(next)->pcb.ptbr = (__pa(next_mm->pgd)) >> PAGE_SHIFT;
+	load_asn_ptbr(task_thread_info(next)->pcb.asn, task_thread_info(next)->pcb.ptbr);
+}
+
+extern void __load_new_mm_context(struct mm_struct *);
+
+#ifdef CONFIG_SMP
+#define check_mmu_context()					\
+do {								\
+	int cpu = smp_processor_id();				\
+	cpu_data[cpu].asn_lock = 0;				\
+	barrier();						\
+	if (cpu_data[cpu].need_new_asn) {			\
+		struct mm_struct *mm = current->active_mm;	\
+		cpu_data[cpu].need_new_asn = 0;			\
+		if (!mm->context.asid[cpu])			\
+			__load_new_mm_context(mm);		\
+	}							\
+} while (0)
+#else
+#define check_mmu_context()  do { } while (0)
+#endif
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+	__load_new_mm_context(next_mm);
+}
+
+#define deactivate_mm(tsk, mm)	do { } while (0)
+
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		mm->context.asid[i] = 0;
+	if (tsk != current)
+		task_thread_info(tsk)->pcb.ptbr
+			= (__pa(mm->pgd)) >> PAGE_SHIFT;
+	return 0;
+}
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+	/* Nothing to do.  */
+}
+
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+				  struct task_struct *tsk)
+{
+	task_thread_info(tsk)->pcb.ptbr
+		= (__pa(mm->pgd)) >> PAGE_SHIFT;
+}
+
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+				struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
+				unsigned long end)
+{
+}
+
+static inline void arch_bprm_mm_init(struct mm_struct *mm,
+				     struct vm_area_struct *vma)
+{
+}
+
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+					     bool write, bool execute,
+					     bool foreign)
+{
+	/* by default, allow everything */
+	return true;
+}
+#endif /* _ASM_SW64_MMU_CONTEXT_H */
diff --git a/arch/sw_64/include/asm/mmzone.h b/arch/sw_64/include/asm/mmzone.h
new file mode 100644
index 000000000000..924e33f6d326
--- /dev/null
+++ b/arch/sw_64/include/asm/mmzone.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_MMZONE_H
+#define _ASM_SW64_MMZONE_H
+
+#include <asm/smp.h>
+
+/*
+ * Following are macros that are specific to this numa platform.
+ */
+
+extern pg_data_t *node_data[];
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+#define NODE_DATA(nid)		(node_data[(nid)])
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+extern int pa_to_nid(unsigned long pa);
+extern int pfn_valid(unsigned long pfn);
+
+#define mk_pte(page, pgprot)							\
+({										\
+	pte_t pte;								\
+	unsigned long pfn;							\
+										\
+	pfn = page_to_pfn(page) << _PTE_FLAGS_BITS;				\
+	pte_val(pte) = pfn | pgprot_val(pgprot);				\
+										\
+	pte;									\
+})
+
+#define pte_page(x)								\
+({										\
+	unsigned long kvirt;							\
+	struct page *__xx;							\
+										\
+	kvirt = (unsigned long)__va(pte_val(x) >> (_PTE_FLAGS_BITS-PAGE_SHIFT));\
+	__xx = virt_to_page(kvirt);						\
+										\
+	__xx;									\
+})
+
+#define page_to_pa(page)	(page_to_pfn(page) << PAGE_SHIFT)
+#define pfn_to_nid(pfn)		pa_to_nid(((u64)(pfn) << PAGE_SHIFT))
+#endif /* CONFIG_DISCONTIGMEM */
+
+#endif /* _ASM_SW64_MMZONE_H */
diff --git a/arch/sw_64/include/asm/module.h b/arch/sw_64/include/asm/module.h
new file mode 100644
index 000000000000..55e6e333585f
--- /dev/null
+++ b/arch/sw_64/include/asm/module.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_MODULE_H
+#define _ASM_SW64_MODULE_H
+
+struct mod_arch_specific {
+	unsigned int gotsecindex;
+};
+
+#define Elf_Sym		Elf64_Sym
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Phdr	Elf64_Phdr
+#define Elf_Dyn		Elf64_Dyn
+#define Elf_Rel		Elf64_Rel
+#define Elf_Rela	Elf64_Rela
+
+#define ARCH_SHF_SMALL	SHF_SW64_GPREL
+
+#ifdef MODULE
+asm(".section .got, "aw", @progbits; .align 3; .previous");
+#endif
+
+#endif /* _ASM_SW64_MODULE_H */
diff --git a/arch/sw_64/include/asm/msi.h b/arch/sw_64/include/asm/msi.h
new file mode 100644
index 000000000000..079fac0d128e
--- /dev/null
+++ b/arch/sw_64/include/asm/msi.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_MSI_H
+#define _ASM_SW64_MSI_H
+
+#define NR_VECTORS NR_IRQS
+#define NR_IRQ_VECTORS NR_IRQS
+
+#define AUTO_ASSIGN		0
+
+#define LAST_DEVICE_VECTOR	31
+
+#define MSI_OFFSET		0x44
+
+#define NUM_MSI_IRQS		256
+
+#define PERCPU_MSI_IRQS		256
+
+#define MSIX_MSG_ADDR		(0x91abc0UL)
+
+#ifdef CONFIG_PCI_MSI
+extern int sw64_setup_vt_msi_irqs(struct pci_dev *dev, int nvec, int type);
+extern int msi_compose_msg(unsigned int irq, struct msi_msg *msg);
+extern void sw64_irq_noop(struct irq_data *d);
+extern struct irq_chip sw64_irq_chip;
+
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+#define MSI_ADDR_BASE_HI	0
+#define MSI_ADDR_BASE_LO	0x91abc0
+struct sw6_msi_chip_data {
+	unsigned int msi_config_index;
+};
+extern void arch_init_msi_domain(struct irq_domain *domain);
+enum irq_alloc_type {
+	IRQ_ALLOC_TYPE_MSI,
+	IRQ_ALLOC_TYPE_MSIX,
+	IRQ_ALLOC_TYPE_INTX,
+};
+struct irq_alloc_info {
+	struct msi_desc         *desc;
+	enum irq_alloc_type	type;
+	struct pci_dev		*msi_dev;
+	irq_hw_number_t         hwirq;
+};
+typedef struct irq_alloc_info msi_alloc_info_t;
+#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
+#endif	/* CONFIG_PCI_MSI */
+#endif	/* _ASM_SW64_MSI_H */
diff --git a/arch/sw_64/include/asm/numa.h b/arch/sw_64/include/asm/numa.h
new file mode 100644
index 000000000000..47071007e8ff
--- /dev/null
+++ b/arch/sw_64/include/asm/numa.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SW64_NUMA_H
+#define _ASM_SW64_NUMA_H
+
+#include <linux/nodemask.h>
+
+#ifdef CONFIG_NUMA
+extern nodemask_t numa_nodes_parsed __initdata;
+struct numa_memblk {
+	u64			start;
+	u64			end;
+	int			nid;
+};
+
+#define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)
+struct numa_meminfo {
+	int			nr_blks;
+	struct numa_memblk	blk[NR_NODE_MEMBLKS];
+};
+extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+extern s16 __cpuid_to_node[CONFIG_NR_CPUS];
+static inline void numa_clear_node(int cpu)
+{
+}
+
+static inline void  set_cpuid_to_node(int cpuid, s16 node)
+{
+	__cpuid_to_node[cpuid] = node;
+}
+
+#endif	/* CONFIG_NUMA */
+
+#endif	/* _ASM_SW64_NUMA_H */
diff --git a/arch/sw_64/include/asm/page.h b/arch/sw_64/include/asm/page.h
new file mode 100644
index 000000000000..6e17d5e437c5
--- /dev/null
+++ b/arch/sw_64/include/asm/page.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PAGE_H
+#define _ASM_SW64_PAGE_H
+
+#include <linux/const.h>
+#include <asm/hmcall.h>
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT		13
+#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE - 1))
+
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+#define HUGE_MAX_HSTATE		2
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+extern void clear_page(void *page);
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+extern void copy_page(void *_to, void *_from);
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+typedef struct page *pgtable_t;
+
+extern unsigned long __phys_addr(unsigned long);
+#endif /* !__ASSEMBLY__ */
+
+#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+
+#include <asm/pgtable-4level.h>
+
+#if defined(CONFIG_SW64_LEGACY_KTEXT_ADDRESS)
+#define __START_KERNEL_map	PAGE_OFFSET
+#else
+#define __START_KERNEL_map	0xffffffff80000000
+#endif
+
+#define __pa(x)			__phys_addr((unsigned long)(x))
+#define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+#endif /* CONFIG_FLATMEM */
+
+#define VM_DATA_DEFAULT_FLAGS		(VM_READ | VM_WRITE | VM_EXEC | \
+					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+#endif
+
+#endif /* _ASM_SW64_PAGE_H */
diff --git a/arch/sw_64/include/asm/param.h b/arch/sw_64/include/asm/param.h
new file mode 100644
index 000000000000..49c5d03a3370
--- /dev/null
+++ b/arch/sw_64/include/asm/param.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PARAM_H
+#define _ASM_SW64_PARAM_H
+
+#include <uapi/asm/param.h>
+
+#undef HZ
+#define HZ		CONFIG_HZ
+#define USER_HZ		100
+#define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
+#endif /* _ASM_SW64_PARAM_H */
diff --git a/arch/sw_64/include/asm/parport.h b/arch/sw_64/include/asm/parport.h
new file mode 100644
index 000000000000..82b9a219b797
--- /dev/null
+++ b/arch/sw_64/include/asm/parport.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * parport.h: platform-specific PC-style parport initialisation
+ *
+ * Copyright (C) 1999, 2000  Tim Waugh tim@cyberelk.demon.co.uk
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+
+#ifndef _ASM_SW64_PARPORT_H
+#define _ASM_SW64_PARPORT_H
+
+static int parport_pc_find_isa_ports(int autoirq, int autodma);
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
+{
+	return parport_pc_find_isa_ports(autoirq, autodma);
+}
+
+#endif /* !(_ASM_SW64_PARPORT_H) */
diff --git a/arch/sw_64/include/asm/pci.h b/arch/sw_64/include/asm/pci.h
new file mode 100644
index 000000000000..7e0c03da1d17
--- /dev/null
+++ b/arch/sw_64/include/asm/pci.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PCI_H
+#define _ASM_SW64_PCI_H
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
+/*
+ * The following structure is used to manage multiple PCI busses.
+ */
+
+struct pci_dev;
+struct pci_bus;
+struct resource;
+struct pci_iommu_arena;
+struct sunway_iommu;
+struct page;
+
+
+/* A controller.  Used to manage multiple PCI busses.  */
+
+struct pci_controller {
+	struct pci_controller *next;
+	struct pci_bus *bus;
+	struct resource *io_space;
+	struct resource *mem_space;
+	struct resource *pre_mem_space;
+	struct resource *busn_space;
+	unsigned long sparse_mem_base;
+	unsigned long dense_mem_base;
+	unsigned long sparse_io_base;
+	unsigned long dense_io_base;
+
+	/* This one's for the kernel only.  It's in KSEG somewhere.  */
+	unsigned long ep_config_space_base;
+	unsigned long rc_config_space_base;
+
+	unsigned long index;
+	unsigned long node;
+	DECLARE_BITMAP(piu_msiconfig, 256);
+	int int_irq;
+	/* For compatibility with current (as of July 2003) pciutils
+	   and XFree86. Eventually will be removed. */
+	unsigned int need_domain_info;
+
+	struct pci_iommu_arena *sg_pci;
+	struct pci_iommu_arena *sg_isa;
+
+	bool iommu_enable;
+	struct sunway_iommu *pci_iommu;
+	int first_busno;
+	int last_busno;
+	int self_busno;
+	void *sysdata;
+};
+
+/* Override the logic in pci_scan_bus for skipping already-configured
+ * bus numbers.
+ */
+
+#define pcibios_assign_all_busses()	1
+
+#define PCIBIOS_MIN_IO		0
+#define PCIBIOS_MIN_MEM		0
+
+extern void pcibios_set_master(struct pci_dev *dev);
+extern void __init sw64_init_pci(void);
+extern void __init sw64_device_interrupt(unsigned long vector);
+extern void __init sw64_init_irq(void);
+extern void __init sw64_init_arch(void);
+extern unsigned char sw64_swizzle(struct pci_dev *dev, u8 *pinp);
+extern struct pci_ops sw64_pci_ops;
+extern int sw64_map_irq(const struct pci_dev *dev, u8 slot, u8 pin);
+extern struct pci_controller *hose_head;
+
+/* TODO: integrate with include/asm-generic/pci.h ? */
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return channel ? 15 : 14;
+}
+
+#ifdef CONFIG_SUNWAY_IOMMU
+extern struct syscore_ops iommu_cpu_syscore_ops;
+#endif
+
+#define pci_domain_nr(bus) 0
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	return hose->need_domain_info;
+}
+
+#ifdef CONFIG_NUMA
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+	struct pci_controller *hose;
+
+	hose = bus->sysdata;
+	if (!node_online(hose->node))
+		return next_node_in(hose->node, node_online_map);
+	else
+		return hose->node;
+}
+#define pcibus_to_node(bus)     __pcibus_to_node(bus)
+#endif
+
+#endif /* __KERNEL__ */
+
+/* Values for the `which' argument to sys_pciconfig_iobase. */
+#define IOBASE_HOSE		0
+#define IOBASE_SPARSE_MEM	1
+#define IOBASE_DENSE_MEM	2
+#define IOBASE_SPARSE_IO	3
+#define IOBASE_DENSE_IO		4
+#define IOBASE_ROOT_BUS		5
+#define IOBASE_FROM_HOSE	0x10000
+
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			    size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+				   enum pci_mmap_state mmap_type);
+#define HAVE_PCI_LEGACY	1
+
+extern int pci_create_resource_files(struct pci_dev *dev);
+extern void pci_remove_resource_files(struct pci_dev *dev);
+extern void __init reserve_mem_for_pci(void);
+extern int chip_pcie_configure(struct pci_controller *hose);
+
+#define PCI_VENDOR_ID_JN		0x5656
+#define PCI_DEVICE_ID_CHIP3		0x3231
+#define PCI_DEVICE_ID_JN_PCIESW		0x1000
+#define PCI_DEVICE_ID_JN_PCIEUSIP	0x1200
+#define PCI_DEVICE_ID_JN_PCIE2PCI	0x1314
+
+#define NR_IRQ_VECTORS NR_IRQS
+
+#define LAST_DEVICE_VECTOR		31
+
+#define PCITODMA_OFFSET			0x0	/*0 offset*/
+
+#endif /* _ASM_SW64_PCI_H */
diff --git a/arch/sw_64/include/asm/percpu.h b/arch/sw_64/include/asm/percpu.h
new file mode 100644
index 000000000000..3acdf36bcf55
--- /dev/null
+++ b/arch/sw_64/include/asm/percpu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PERCPU_H
+#define _ASM_SW64_PERCPU_H
+
+/*
+ * To calculate addresses of locally defined variables, GCC uses
+ * 32-bit displacement from the GP. Which doesn't work for per cpu
+ * variables in modules, as an offset to the kernel per cpu area is
+ * way above 4G.
+ *
+ * Always use weak definitions for percpu variables in modules.
+ */
+#if defined(MODULE) && defined(CONFIG_SMP)
+#define ARCH_NEEDS_WEAK_PER_CPU
+#endif
+
+#include <asm-generic/percpu.h>
+
+#endif /* _ASM_SW64_PERCPU_H */
diff --git a/arch/sw_64/include/asm/perf_event.h b/arch/sw_64/include/asm/perf_event.h
new file mode 100644
index 000000000000..5f5a45217544
--- /dev/null
+++ b/arch/sw_64/include/asm/perf_event.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PERF_EVENT_H
+#define _ASM_SW64_PERF_EVENT_H
+
+#include <asm/wrperfmon.h>
+
+#endif /* _ASM_SW64_PERF_EVENT_H */
diff --git a/arch/sw_64/include/asm/pgalloc.h b/arch/sw_64/include/asm/pgalloc.h
new file mode 100644
index 000000000000..3cfdcbef7ef8
--- /dev/null
+++ b/arch/sw_64/include/asm/pgalloc.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PGALLOC_H
+#define _ASM_SW64_PGALLOC_H
+
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <asm-generic/pgalloc.h>        /* for pte_{alloc,free}_one */
+
+/*
+ * Allocate and free page tables. The xxx_kernel() versions are
+ * used to allocate a kernel page table - this turns on ASN bits
+ * if any.
+ */
+
+static inline void
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
+{
+	pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET));
+}
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+{
+	pmd_set(pmd, pte);
+}
+
+static inline void
+p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+	p4d_set(p4d, pud);
+}
+
+static inline void
+pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, pmd);
+}
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+
+#define check_pgt_cache()	do { } while (0)
+
+#endif /* _ASM_SW64_PGALLOC_H */
diff --git a/arch/sw_64/include/asm/pgtable-4level.h b/arch/sw_64/include/asm/pgtable-4level.h
new file mode 100644
index 000000000000..8c45f441c520
--- /dev/null
+++ b/arch/sw_64/include/asm/pgtable-4level.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PGTABLE_4LEVEL_H
+#define _ASM_SW64_PGTABLE_4LEVEL_H
+
+#ifdef	__KERNEL__
+#ifndef __ASSEMBLY__
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x)	((x).pte)
+#define pmd_val(x)	((x).pmd)
+#define pgd_val(x)	((x).pgd)
+#define pud_val(x)	((x).pud)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) })
+#define __pmd(x)	((pmd_t) { (x) })
+#define __pud(x)	((pud_t) { (x)  })
+#define __pgd(x)	((pgd_t) { (x) })
+#define __pgprot(x)	((pgprot_t) { (x) })
+#endif /* !__ASSEMBLY__ */
+
+#define PAGE_OFFSET	0xfff0000000000000
+
+#endif
+#endif /* _ASM_SW64_PGTABLE_4LEVEL_H */
diff --git a/arch/sw_64/include/asm/pgtable.h b/arch/sw_64/include/asm/pgtable.h
new file mode 100644
index 000000000000..32fde38a2be0
--- /dev/null
+++ b/arch/sw_64/include/asm/pgtable.h
@@ -0,0 +1,634 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PGTABLE_H
+#define _ASM_SW64_PGTABLE_H
+
+
+#include <asm-generic/pgtable-nop4d.h>
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the sw64 page table tree.
+ *
+ * This hopefully works with any standard sw64 page-size, as defined
+ * in <asm/page.h> (currently 8192).
+ */
+#include <linux/mmzone.h>
+
+#include <asm/page.h>
+#include <asm/processor.h>	/* For TASK_SIZE */
+#include <asm/setup.h>
+
+struct mm_struct;
+struct vm_area_struct;
+
+/* Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+
+#define set_pmd(pmdptr, pmdval) ((*(pmdptr)) = (pmdval))
+#define set_pmd_at(mm, addr, pmdp, pmdval) set_pmd(pmdp, pmdval)
+
+/* PGDIR_SHIFT determines what a forth-level page table entry can map */
+#define PGDIR_SHIFT	(PAGE_SHIFT + 3 * (PAGE_SHIFT - 3))
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE - 1))
+
+/* PUD_SHIFT determines the size of the area a third-level page table can map */
+#define PUD_SHIFT	(PAGE_SHIFT + 2 * (PAGE_SHIFT - 3))
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PMD_SHIFT determines the size of the area a second-level page table can map */
+#define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT - 3))
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE - 1))
+
+/*
+ * Entries per page directory level:  the sw64 is three-level, with
+ * all levels having a one-page page table.
+ */
+#define PTRS_PER_PTE	(1UL << (PAGE_SHIFT - 3))
+#define PTRS_PER_PMD	(1UL << (PAGE_SHIFT - 3))
+#define PTRS_PER_PGD	(1UL << (PAGE_SHIFT - 3))
+#define PTRS_PER_PUD	(1UL << (PAGE_SHIFT - 3))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_ADDRESS	0UL
+
+/* Number of pointers that fit on a page:  this will go away. */
+#define PTRS_PER_PAGE	(1UL << (PAGE_SHIFT - 3))
+
+#define VMALLOC_START	(-2 * PGDIR_SIZE)
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#define VMALLOC_END	(-PGDIR_SIZE)
+#else
+#define VMEMMAP_END	(-PGDIR_SIZE)
+#define vmemmap		((struct page *)VMEMMAP_END - (1UL << (3 * (PAGE_SHIFT - 3))))
+#define VMALLOC_END	((unsigned long)vmemmap)
+#endif
+
+/*
+ * HMcode-imposed page table bits
+ */
+#define _PAGE_VALID	0x0001
+#define _PAGE_FOR	0x0002	/* used for page protection (fault on read) */
+#define _PAGE_FOW	0x0004	/* used for page protection (fault on write) */
+#define _PAGE_FOE	0x0008	/* used for page protection (fault on exec) */
+#define _PAGE_ASM	0x0010
+#define _PAGE_PHU	0x0020  /* used for 256M page size bit */
+#define _PAGE_PSE	0x0040  /* used for 8M page size bit */
+#define _PAGE_PROTNONE	0x0080  /* used for numa page balancing */
+#define _PAGE_KRE	0x0400	/* xxx - see below on the "accessed" bit */
+#define _PAGE_URE	0x0800	/* xxx */
+#define _PAGE_KWE	0x4000	/* used to do the dirty bit in software */
+#define _PAGE_UWE	0x8000	/* used to do the dirty bit in software */
+
+/* .. and these are ours ... */
+#define _PAGE_DIRTY	0x20000
+#define _PAGE_ACCESSED	0x40000
+
+#define _PAGE_BIT_ACCESSED	18		/* bit of _PAGE_ACCESSED */
+#define _PAGE_BIT_FOW		2		/* bit of _PAGE_FOW */
+#define _PAGE_SPLITTING		0x200000	/* For Transparent Huge Page */
+#define _PAGE_BIT_SPLITTING	21		/* bit of _PAGE_SPLITTING */
+
+/*
+ * NOTE! The "accessed" bit isn't necessarily exact:  it can be kept exactly
+ * by software (use the KRE/URE/KWE/UWE bits appropriately), but I'll fake it.
+ * Under Linux/sw64, the "accessed" bit just means "read", and I'll just use
+ * the KRE/URE bits to watch for it. That way we don't need to overload the
+ * KWE/UWE bits with both handling dirty and accessed.
+ *
+ * Note that the kernel uses the accessed bit just to check whether to page
+ * out a page or not, so it doesn't have to be exact anyway.
+ */
+
+#define __DIRTY_BITS	(_PAGE_DIRTY | _PAGE_KWE | _PAGE_UWE)
+#define __ACCESS_BITS	(_PAGE_ACCESSED | _PAGE_KRE | _PAGE_URE)
+
+
+#define _PFN_MASK	0xFFFFFFFFF0000000UL
+#define _PFN_BITS	36
+#define _PTE_FLAGS_BITS	(64 - _PFN_BITS)
+
+#define _PAGE_TABLE	(_PAGE_VALID | __DIRTY_BITS | __ACCESS_BITS)
+#define _PAGE_CHG_MASK	(_PFN_MASK | __DIRTY_BITS | __ACCESS_BITS)
+#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PHU)
+
+/*
+ * All the normal masks have the "page accessed" bits on, as any time they are used,
+ * the page is accessed. They are cleared only by the page-out routines
+ */
+#define PAGE_NONE	__pgprot(__ACCESS_BITS | _PAGE_FOR | _PAGE_FOW | _PAGE_FOE | _PAGE_PROTNONE)
+#define PAGE_SHARED	__pgprot(_PAGE_VALID | __ACCESS_BITS)
+#define PAGE_COPY	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
+#define PAGE_READONLY	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
+#define PAGE_KERNEL	__pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE)
+#define _PAGE_NORMAL(x)	__pgprot(_PAGE_VALID | __ACCESS_BITS | (x))
+
+#define _PAGE_P(x)	_PAGE_NORMAL((x) | _PAGE_FOW)
+#define _PAGE_S(x)	_PAGE_NORMAL(x)
+
+/*
+ * The hardware can handle write-only mappings, but as the sw64
+ * architecture does byte-wide writes with a read-modify-write
+ * sequence, it's not practical to have write-without-read privs.
+ * Thus the "-w- -> rw-" and "-wx -> rwx" mapping here (and in
+ * arch/sw_64/mm/fault.c)
+ */
+	/* xwr */
+#define __P000		_PAGE_P(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR)
+#define __P001		_PAGE_P(_PAGE_FOE | _PAGE_FOW)
+#define __P010		_PAGE_P(_PAGE_FOE)
+#define __P011		_PAGE_P(_PAGE_FOE)
+#define __P100		_PAGE_P(_PAGE_FOW | _PAGE_FOR)
+#define __P101		_PAGE_P(_PAGE_FOW)
+#define __P110		_PAGE_P(0)
+#define __P111		_PAGE_P(0)
+
+#define __S000		_PAGE_S(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR)
+#define __S001		_PAGE_S(_PAGE_FOE | _PAGE_FOW)
+#define __S010		_PAGE_S(_PAGE_FOE)
+#define __S011		_PAGE_S(_PAGE_FOE)
+#define __S100		_PAGE_S(_PAGE_FOW | _PAGE_FOR)
+#define __S101		_PAGE_S(_PAGE_FOW)
+#define __S110		_PAGE_S(0)
+#define __S111		_PAGE_S(0)
+
+/*
+ * pgprot_noncached() is only for infiniband pci support, and a real
+ * implementation for RAM would be more complicated.
+ */
+#define pgprot_noncached(prot)		(prot)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero:  used
+ * for zero-mapped memory areas etc..
+ */
+
+extern struct page *empty_zero_page;
+#define ZERO_PAGE(vaddr)		(empty_zero_page)
+
+/* number of bits that fit into a memory pointer */
+#define BITS_PER_PTR			(8 * sizeof(unsigned long))
+
+/* to align the pointer to a pointer address */
+#define PTR_MASK			(~(sizeof(void *) - 1))
+
+/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
+#define SIZEOF_PTR_LOG2			3
+
+/* to find an entry in a page-table */
+#define PAGE_PTR(address) \
+	((unsigned long)(address) >> (PAGE_SHIFT - SIZEOF_PTR_LOG2) & PTR_MASK & ~PAGE_MASK)
+
+#define PHYS_TWIDDLE(pfn)		(pfn)
+
+/*
+ * Conversion functions:  convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+#define page_to_pa(page)		(page_to_pfn(page) << PAGE_SHIFT)
+
+#define pmd_pfn(pmd)			(pmd_val(pmd) >> _PTE_FLAGS_BITS)
+#define pte_pfn(pte)			(pte_val(pte) >> _PTE_FLAGS_BITS)
+#ifndef CONFIG_DISCONTIGMEM
+#define pte_page(pte)			pfn_to_page(pte_pfn(pte))
+#define mk_pte(page, pgprot)						\
+({									\
+	pte_t pte;							\
+									\
+	pte_val(pte) = (page_to_pfn(page) << _PTE_FLAGS_BITS) | pgprot_val(pgprot);	\
+	pte;								\
+})
+#endif
+
+static inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot)
+{
+	pte_t pte;
+
+	pte_val(pte) = (PHYS_TWIDDLE(physpfn) << _PTE_FLAGS_BITS) | pgprot_val(pgprot);
+	return pte;
+}
+
+static inline pmd_t pfn_pmd(unsigned long physpfn, pgprot_t pgprot)
+{
+	pmd_t pmd;
+
+	pmd_val(pmd) = (PHYS_TWIDDLE(physpfn) << _PTE_FLAGS_BITS) | pgprot_val(pgprot);
+	return pmd;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	return pte;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | pgprot_val(newprot);
+	return pmd;
+}
+
+static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
+{
+	pmd_val(*pmdp) = _PAGE_TABLE | (__pa(ptep) << (_PTE_FLAGS_BITS - PAGE_SHIFT));
+}
+
+static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
+{
+	pud_val(*pudp) = _PAGE_TABLE | (__pa(pmdp) << (_PTE_FLAGS_BITS - PAGE_SHIFT));
+}
+
+static inline void p4d_set(p4d_t *p4dp, pud_t *pudp)
+{
+	p4d_val(*p4dp) = _PAGE_TABLE | (__pa(pudp) << (_PTE_FLAGS_BITS - PAGE_SHIFT));
+}
+
+static inline unsigned long
+pmd_page_vaddr(pmd_t pmd)
+{
+	return ((pmd_val(pmd) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT)) + PAGE_OFFSET;
+}
+
+#define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> _PTE_FLAGS_BITS))
+#define pud_page(pud)		(pfn_to_page(pud_val(pud) >> _PTE_FLAGS_BITS))
+#define p4d_page(p4d)		(pfn_to_page(p4d_val(p4d) >> _PTE_FLAGS_BITS))
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+	return (pud_t *)(PAGE_OFFSET + ((p4d_val(p4d) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT)));
+}
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+	return (pmd_t *)(PAGE_OFFSET + ((pud_val(pud) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT)));
+}
+
+static inline int pte_none(pte_t pte)
+{
+	return !pte_val(pte);
+}
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & (_PAGE_VALID | _PAGE_PROTNONE);
+}
+
+static inline int pte_huge(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PSE;
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+			     unsigned long addr, pte_t *ptep)
+{
+	pte_val(*ptep) = 0;
+}
+
+#define pte_accessible pte_accessible
+static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
+{
+	if (pte_val(a) & _PAGE_VALID)
+		return true;
+
+	if ((pte_val(a) & _PAGE_PROTNONE) &&
+			mm_tlb_flush_pending(mm))
+		return true;
+
+	return false;
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+	return !pmd_val(pmd);
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	return (pmd_val(pmd) & ~_PFN_MASK) != _PAGE_TABLE;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) & (_PAGE_VALID | _PAGE_PROTNONE);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	pmd_val(*pmdp) = 0;
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_DIRTY;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_ACCESSED;
+}
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+	return !(pmd_val(pmd) & _PAGE_FOW);
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+	pmd_val(pmd) |= _PAGE_FOW;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~(_PAGE_VALID | _PAGE_PROTNONE);
+	return pmd;
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~(__DIRTY_BITS);
+	pmd_val(pmd) |= _PAGE_FOW;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~(__ACCESS_BITS);
+	return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~_PAGE_FOW;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	pmd_val(pmd) |= __DIRTY_BITS;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+	pmd_val(pmd) |= __ACCESS_BITS;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	pmd_val(pmd) |= _PAGE_PSE;
+	return pmd;
+}
+
+static inline int pud_none(pud_t pud)
+{
+	return !pud_val(pud);
+}
+
+static inline int pud_bad(pud_t pud)
+{
+	return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE;
+}
+
+static inline int pud_present(pud_t pud)
+{
+	return pud_val(pud) & _PAGE_VALID;
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	pud_val(*pudp) = 0;
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+	return !p4d_val(p4d);
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+	return (p4d_val(p4d) & ~_PFN_MASK) != _PAGE_TABLE;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+	return p4d_val(p4d) & _PAGE_VALID;
+}
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+	p4d_val(*p4dp) = 0;
+}
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_write(pte_t pte)
+{
+	return !(pte_val(pte) & _PAGE_FOW);
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_DIRTY;
+}
+
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_FOW;
+	return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	pte_val(pte) &= ~(__DIRTY_BITS);
+	pte_val(pte) |= _PAGE_FOW;
+	return pte;
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	pte_val(pte) &= ~(__ACCESS_BITS);
+	return pte;
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_FOW;
+	return pte;
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	pte_val(pte) |= __DIRTY_BITS;
+	return pte;
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	pte_val(pte) |= __ACCESS_BITS;
+	return pte;
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_PSE;
+	return pte;
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * See the comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_val(pte) & (_PAGE_PROTNONE | _PAGE_VALID))
+		== _PAGE_PROTNONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return (pmd_val(pmd) & (_PAGE_PROTNONE | _PAGE_VALID))
+		== _PAGE_PROTNONE;
+}
+#endif
+
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+/* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/
+#define pmdp_establish generic_pmdp_establish
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_SPLITTING;
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_PSE;
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long pmd_val = xchg(&pmdp->pmd, 0);
+	pmd_t pmd = (pmd_t){pmd_val};
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+				      unsigned long addr, pmd_t *pmdp)
+{
+	set_bit(_PAGE_BIT_FOW, (unsigned long *)pmdp);
+}
+
+#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
+
+#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+				     unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+				 unsigned long addr, pmd_t *pmdp);
+
+#define PAGE_DIR_OFFSET(tsk, address) pgd_offset((tsk), (address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+#define pgd_offset(mm, address)	((mm)->pgd+pgd_index(address))
+
+extern pgd_t swapper_pg_dir[1024];
+
+/*
+ * The sw64 doesn't have any external MMU info:  the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
+/*
+ * Encode and decode a swap entry:
+ *
+ * Format of swap PTE:
+ *	bit  0:		_PAGE_VALID (must be zero)
+ *	bit  6:		_PAGE_PSE (must be zero)
+ *	bit  7:		_PAGE_PROTNONE (must be zero)
+ *	bits 8-15:	swap type
+ *	bits 16-63:	swap offset
+ */
+#define __SWP_TYPE_SHIFT	8
+#define __SWP_TYPE_BITS		8
+#define __SWP_OFFSET_BITS	48
+#define __SWP_TYPE_MASK		((1UL << __SWP_TYPE_BITS) - 1)
+#define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+#define __SWP_OFFSET_MASK	((1UL << __SWP_OFFSET_BITS) - 1)
+
+#define __swp_type(x)		(((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
+#define __swp_offset(x)		(((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
+#define __swp_entry(type, offset) \
+	((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
+
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+#if defined(CONFIG_FLATMEM)
+#define kern_addr_valid(addr)	(1)
+#elif defined(CONFIG_DISCONTIGMEM)
+/* XXX: FIXME -- wli */
+#define kern_addr_valid(kaddr)	(0)
+#elif defined(CONFIG_SPARSEMEM)
+#define kern_addr_valid(addr)	(1)
+#endif
+
+#define pte_ERROR(e) \
+	pr_err("%s: %d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	pr_err("%s: %d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+	pr_err("%s: %d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s: %d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
+extern void paging_init(void);
+
+/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT.  */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#endif /* _ASM_SW64_PGTABLE_H */
diff --git a/arch/sw_64/include/asm/platform.h b/arch/sw_64/include/asm/platform.h
new file mode 100644
index 000000000000..318b6ca732cd
--- /dev/null
+++ b/arch/sw_64/include/asm/platform.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PLATFORM_H
+#define _ASM_SW64_PLATFORM_H
+
+struct sw64_platform_ops {
+	void (*kill_arch)(int mode);
+	void __iomem *(*ioportmap)(unsigned long);
+	void (*register_platform_devices)(void);
+	void (*ops_fixup)(void);
+};
+
+
+extern struct sw64_platform_ops *sw64_platform;
+
+extern struct sw64_platform_ops xuelang_ops;
+
+#endif  /* _ASM_SW64_PLATFORM_H */
diff --git a/arch/sw_64/include/asm/preempt.h b/arch/sw_64/include/asm/preempt.h
new file mode 100644
index 000000000000..dc6643a43766
--- /dev/null
+++ b/arch/sw_64/include/asm/preempt.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PREEMPT_H
+#define _ASM_SW64_PREEMPT_H
+
+#include <asm-generic/preempt.h>
+
+#endif /* _ASM_SW64_PREEMPT_H */
diff --git a/arch/sw_64/include/asm/processor.h b/arch/sw_64/include/asm/processor.h
new file mode 100644
index 000000000000..645c33a596ff
--- /dev/null
+++ b/arch/sw_64/include/asm/processor.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm-sw64/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef _ASM_SW64_PROCESSOR_H
+#define _ASM_SW64_PROCESSOR_H
+
+#include <linux/personality.h>	/* for ADDR_LIMIT_32BIT */
+
+/*
+ * Returns current instruction pointer ("program counter").
+ */
+#define current_text_addr() \
+	({ void *__pc; __asm__ ("br %0, .+4" : "=r"(__pc)); __pc; })
+
+/*
+ * SW64 does have an arch_pick_mmap_layout()
+ */
+#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
+
+/*
+ * We have a 52-bit user address space: 4PB user VM...
+ */
+#define TASK_SIZE (0x10000000000000UL)
+#define UNMAPPED_BASE (TASK_SIZE >> 6)
+#define STACK_TOP \
+	(current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL)
+
+#define STACK_TOP_MAX	0x00120000000UL
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE \
+	((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : UNMAPPED_BASE)
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+struct context_fpregs {
+	unsigned long f0[4];
+	unsigned long f1[4];
+	unsigned long f2[4];
+	unsigned long f3[4];
+	unsigned long f4[4];
+	unsigned long f5[4];
+	unsigned long f6[4];
+	unsigned long f7[4];
+	unsigned long f8[4];
+	unsigned long f9[4];
+	unsigned long f10[4];
+	unsigned long f11[4];
+	unsigned long f12[4];
+	unsigned long f13[4];
+	unsigned long f14[4];
+	unsigned long f15[4];
+	unsigned long f16[4];
+	unsigned long f17[4];
+	unsigned long f18[4];
+	unsigned long f19[4];
+	unsigned long f20[4];
+	unsigned long f21[4];
+	unsigned long f22[4];
+	unsigned long f23[4];
+	unsigned long f24[4];
+	unsigned long f25[4];
+	unsigned long f26[4];
+	unsigned long f27[4];
+	unsigned long f28[4];
+	unsigned long f29[4];
+	unsigned long f30[4];
+} __aligned(32);	/* 256 bits aligned for simd */
+
+struct thread_struct {
+	struct context_fpregs ctx_fp;
+	unsigned long fpcr;
+};
+#define INIT_THREAD  { }
+
+/* Return saved PC of a blocked thread.  */
+struct task_struct;
+extern unsigned long thread_saved_pc(struct task_struct *);
+
+/* Do necessary setup to start up a newly executed thread.  */
+struct pt_regs;
+extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+unsigned long get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
+
+#define KSTK_ESP(tsk) \
+	((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp)
+
+#define cpu_relax()	barrier()
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+#ifndef CONFIG_SMP
+/* Nothing to prefetch. */
+#define spin_lock_prefetch(lock)	do { } while (0)
+#endif
+
+static inline void prefetch(const void *ptr)
+{
+	__builtin_prefetch(ptr, 0, 3);
+}
+
+static inline void prefetchw(const void *ptr)
+{
+	__builtin_prefetch(ptr, 1, 3);
+}
+
+#ifdef CONFIG_SMP
+static inline void spin_lock_prefetch(const void *ptr)
+{
+	__builtin_prefetch(ptr, 1, 3);
+}
+#endif
+
+#endif /* _ASM_SW64_PROCESSOR_H */
diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h
new file mode 100644
index 000000000000..1dde5e6cba8a
--- /dev/null
+++ b/arch/sw_64/include/asm/ptrace.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PTRACE_H
+#define _ASM_SW64_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+
+
+#define arch_has_single_step()		(1)
+#define user_mode(regs) (((regs)->ps & 8) != 0)
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+#define current_user_stack_pointer() rdusp()
+#define user_stack_pointer(regs) rdusp()
+#define kernel_stack_pointer(regs) (((regs->ps) >> 4) & (TASK_SIZE - 1))
+#define instruction_pointer_set(regs, val) ((regs)->pc = val)
+
+#define task_pt_regs(task) \
+	((struct pt_regs *) (task_stack_page(task) + 2 * PAGE_SIZE) - 1)
+
+#define current_pt_regs() \
+	((struct pt_regs *) ((char *)current_thread_info() + 2 * PAGE_SIZE) - 1)
+#define signal_pt_regs current_pt_regs
+
+#define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
+
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, r18))
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:       pt_regs from which register value is gotten
+ * @offset:     offset of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+	if (unlikely(offset > MAX_REG_OFFSET))
+		return 0;
+
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+extern int regs_query_register_offset(const char *name);
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+	return regs->r0;
+}
+#endif
diff --git a/arch/sw_64/include/asm/seccomp.h b/arch/sw_64/include/asm/seccomp.h
new file mode 100644
index 000000000000..db2f298862c3
--- /dev/null
+++ b/arch/sw_64/include/asm/seccomp.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw_64/include/asm/seccomp.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_SW64_SECCOMP_H
+#define _ASM_SW64_SECCOMP_H
+
+#include <asm/unistd.h>
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SW64_SECCOMP_H */
diff --git a/arch/sw_64/include/asm/sections.h b/arch/sw_64/include/asm/sections.h
new file mode 100644
index 000000000000..37dab4fde720
--- /dev/null
+++ b/arch/sw_64/include/asm/sections.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SECTIONS_H
+#define _ASM_SW64_SECTIONS_H
+
+/* nothing to see, move along */
+#include <asm-generic/sections.h>
+
+#endif
diff --git a/arch/sw_64/include/asm/segment.h b/arch/sw_64/include/asm/segment.h
new file mode 100644
index 000000000000..dc90357765e5
--- /dev/null
+++ b/arch/sw_64/include/asm/segment.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SEGMENT_H
+#define _ASM_SW64_SEGMENT_H
+
+/* Only here because we have some old header files that expect it.. */
+
+#endif
diff --git a/arch/sw_64/include/asm/serial.h b/arch/sw_64/include/asm/serial.h
new file mode 100644
index 000000000000..059e603642b9
--- /dev/null
+++ b/arch/sw_64/include/asm/serial.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SERIAL_H
+#define _ASM_SW64_SERIAL_H
+
+#define BASE_BAUD (1843200 / 16)
+
+/* Standard COM flags (except for COM4, because of the 8514 problem) */
+#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ)
+#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ)
+#else
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
+#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF
+#endif
+
+#endif /* _ASM_SW64_SERIAL_H */
diff --git a/arch/sw_64/include/asm/setup.h b/arch/sw_64/include/asm/setup.h
new file mode 100644
index 000000000000..c0fb4e8bd80c
--- /dev/null
+++ b/arch/sw_64/include/asm/setup.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SETUP_H
+#define _ASM_SW64_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+/*
+ * We leave one page for the initial stack page, and one page for
+ * the initial process structure. Also, the console eats 3 MB for
+ * the initial bootloader (one of which we can reclaim later).
+ */
+#define BOOT_PCB		0x20000000
+#define BOOT_ADDR		0x20000000
+/* Remove when official MILO sources have ELF support: */
+#define BOOT_SIZE		(16 * 1024)
+
+#define KERNEL_START_PHYS	CONFIG_PHYSICAL_START
+#define KERNEL_START		(__START_KERNEL_map + CONFIG_PHYSICAL_START)
+
+/* INIT_STACK may be used for merging lwk to kernel*/
+#define INIT_STACK		(KERNEL_START + 0x02000)
+
+/*
+ * This is setup by the secondary bootstrap loader.  Because
+ * the zero page is zeroed out as soon as the vm system is
+ * initialized, we need to copy things out into a more permanent
+ * place.
+ */
+#define PARAM			(KERNEL_START + 0x0A000)
+#define COMMAND_LINE		((char *)(KERNEL_START + 0x0B000))
+#define INITRD_START		(*(unsigned long *)(PARAM + 0x100))
+#define INITRD_SIZE		(*(unsigned long *)(PARAM + 0x108))
+#define DTB_START		(*(unsigned long *)(PARAM + 0x118))
+
+#define _TEXT_START		(KERNEL_START + 0x10000)
+
+#define COMMAND_LINE_OFF	(0x10000UL - 0xB000UL)
+#define INITRD_START_OFF	(0x10000UL - 0xA100UL)
+#define INITRD_SIZE_OFF		(0x10000UL - 0xA108UL)
+
+#ifndef __ASSEMBLY__
+#include <asm/bootparam.h>
+extern struct boot_params *sunway_boot_params;
+#endif
+
+#endif
diff --git a/arch/sw_64/include/asm/sfp-machine.h b/arch/sw_64/include/asm/sfp-machine.h
new file mode 100644
index 000000000000..9b3e8688feee
--- /dev/null
+++ b/arch/sw_64/include/asm/sfp-machine.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Machine-dependent software floating-point definitions.
+ * sw64 kernel version.
+ * Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Contributed by Richard Henderson (rth@cygnus.com),
+ *		  Jakub Jelinek (jakub@redhat.com) and
+ *		  David S. Miller (davem@redhat.com).
+ */
+
+#ifndef _ASM_SW64_SFP_MACHINE_H
+#define _ASM_SW64_SFP_MACHINE_H
+
+#define _FP_W_TYPE_SIZE		64
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R, X, Y) \
+	_FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S, R, X, Y)
+#define _FP_MUL_MEAT_D(R, X, Y) \
+	_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_Q(R, X, Y) \
+	_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R, X, Y)	_FP_DIV_MEAT_1_imm(S, R, X, Y, _FP_DIV_HELP_imm)
+#define _FP_DIV_MEAT_D(R, X, Y)	_FP_DIV_MEAT_1_udiv(D, R, X, Y)
+#define _FP_DIV_MEAT_Q(R, X, Y)	_FP_DIV_MEAT_2_udiv(Q, R, X, Y)
+
+#define _FP_NANFRAC_S		_FP_QNANBIT_S
+#define _FP_NANFRAC_D		_FP_QNANBIT_D
+#define _FP_NANFRAC_Q		_FP_QNANBIT_Q
+#define _FP_NANSIGN_S		1
+#define _FP_NANSIGN_D		1
+#define _FP_NANSIGN_Q		1
+
+#define _FP_KEEPNANFRACP	1
+
+/* sw64 Architecture Handbook, 4.7.10.4 sais that
+ * we should prefer any type of NaN in Fb, then Fa.
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+do {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R, X);				\
+	R##_c = FP_CLS_NAN;					\
+} while (0)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE	mode
+#define FP_RND_NEAREST	(FPCR_DYN_NORMAL >> FPCR_DYN_SHIFT)
+#define FP_RND_ZERO	(FPCR_DYN_CHOPPED >> FPCR_DYN_SHIFT)
+#define FP_RND_PINF	(FPCR_DYN_PLUS >> FPCR_DYN_SHIFT)
+#define FP_RND_MINF	(FPCR_DYN_MINUS >> FPCR_DYN_SHIFT)
+
+/* Exception flags. */
+#define FP_EX_INVALID		IEEE_TRAP_ENABLE_INV
+#define FP_EX_OVERFLOW		IEEE_TRAP_ENABLE_OVF
+#define FP_EX_UNDERFLOW		IEEE_TRAP_ENABLE_UNF
+#define FP_EX_DIVZERO		IEEE_TRAP_ENABLE_DZE
+#define FP_EX_INEXACT		IEEE_TRAP_ENABLE_INE
+#define FP_EX_DENORM		IEEE_TRAP_ENABLE_DNO
+
+#define FP_DENORM_ZERO		(swcr & IEEE_MAP_DMZ)
+
+/* We write the results always */
+#define FP_INHIBIT_RESULTS 0
+
+#endif
diff --git a/arch/sw_64/include/asm/shmparam.h b/arch/sw_64/include/asm/shmparam.h
new file mode 100644
index 000000000000..15f71533b1ed
--- /dev/null
+++ b/arch/sw_64/include/asm/shmparam.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SHMPARAM_H
+#define _ASM_SW64_SHMPARAM_H
+
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
+
+#endif /* _ASM_SW64_SHMPARAM_H */
diff --git a/arch/sw_64/include/asm/signal.h b/arch/sw_64/include/asm/signal.h
new file mode 100644
index 000000000000..3e91b72c0b0a
--- /dev/null
+++ b/arch/sw_64/include/asm/signal.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SIGNAL_H
+#define _ASM_SW64_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+
+#define _NSIG		64
+#define _NSIG_BPW	64
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#ifdef CONFIG_OLD_SIGACTION
+#define __ARCH_HAS_SA_RESTORER
+#endif
+
+#include <asm/sigcontext.h>
+#endif
diff --git a/arch/sw_64/include/asm/smp.h b/arch/sw_64/include/asm/smp.h
new file mode 100644
index 000000000000..e7aa742f73f0
--- /dev/null
+++ b/arch/sw_64/include/asm/smp.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SMP_H
+#define _ASM_SW64_SMP_H
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/bitops.h>
+#include <asm/hmcall.h>
+#include <asm/hcall.h>
+#include <asm/core.h>
+#include <asm/hw_init.h>
+
+/* HACK: Cabrio WHAMI return value is bogus if more than 8 bits used.. :-( */
+
+extern cpumask_t core_start;
+
+static inline unsigned char
+__hard_smp_processor_id(void)
+{
+	register unsigned char __r0 __asm__("$0");
+	__asm__ __volatile__(
+		"sys_call %1 #whami"
+		: "=r"(__r0)
+		: "i" (HMC_whami)
+		: "$1", "$22", "$23", "$24", "$25");
+	return __r0;
+}
+
+static inline unsigned long
+read_vpcr(void)
+{
+	register unsigned long __r0 __asm__("$0");
+	__asm__ __volatile__(
+		"sys_call %1 #rvpcr"
+		: "=r"(__r0)
+		: "i" (0x39)
+		: "$1", "$22", "$23", "$24", "$25");
+	return __r0;
+}
+
+#ifdef CONFIG_SMP
+/* SMP initialization hook for setup_arch */
+void __init setup_smp(void);
+
+#include <asm/irq.h>
+
+/* smp reset control block */
+struct smp_rcb_struct {
+	void (*restart_entry)(unsigned long args);
+	unsigned long restart_args;
+	unsigned long ready;
+	unsigned long init_done;
+};
+
+#define INIT_SMP_RCB ((struct smp_rcb_struct *) __va(0x820000UL))
+
+#define hard_smp_processor_id()	__hard_smp_processor_id()
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
+
+/* The map from sequential logical cpu number to hard cid.  */
+extern int __cpu_to_rcid[NR_CPUS];
+#define cpu_to_rcid(cpu)  __cpu_to_rcid[cpu]
+
+/*
+ * Map from hard cid to sequential logical cpu number.  This will only
+ * not be idempotent when cpus failed to come on-line.
+ */
+extern int __rcid_to_cpu[NR_CPUS];
+#define rcid_to_cpu(cpu)  __rcid_to_cpu[cpu]
+#define cpu_physical_id(cpu)    __cpu_to_rcid[cpu]
+
+extern unsigned long tidle_pcb[NR_CPUS];
+
+struct smp_ops {
+	void (*smp_prepare_boot_cpu)(void);
+	void (*smp_prepare_cpus)(unsigned int max_cpus);
+	void (*smp_cpus_done)(unsigned int max_cpus);
+
+	void (*stop_other_cpus)(int wait);
+	void (*smp_send_reschedule)(int cpu);
+
+	int (*cpu_up)(unsigned int cpu, struct task_struct *tidle);
+	int (*cpu_disable)(void);
+	void (*cpu_die)(unsigned int cpu);
+	void (*play_dead)(void);
+
+	void (*send_call_func_ipi)(const struct cpumask *mask);
+	void (*send_call_func_single_ipi)(int cpu);
+};
+
+extern struct smp_ops smp_ops;
+
+static inline void smp_send_stop(void)
+{
+	smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+	smp_ops.stop_other_cpus(1);
+}
+
+static inline void smp_prepare_boot_cpu(void)
+{
+	smp_ops.smp_prepare_boot_cpu();
+}
+
+static inline void smp_prepare_cpus(unsigned int max_cpus)
+{
+	smp_ops.smp_prepare_cpus(max_cpus);
+}
+
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+	smp_ops.smp_cpus_done(max_cpus);
+}
+
+static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	return smp_ops.cpu_up(cpu, tidle);
+}
+
+static inline int __cpu_disable(void)
+{
+	return smp_ops.cpu_disable();
+}
+
+static inline void __cpu_die(unsigned int cpu)
+{
+	smp_ops.cpu_die(cpu);
+}
+
+static inline void play_dead(void)
+{
+	smp_ops.play_dead();
+}
+
+static inline void smp_send_reschedule(int cpu)
+{
+	smp_ops.smp_send_reschedule(cpu);
+}
+
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_ops.send_call_func_single_ipi(cpu);
+}
+
+static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	smp_ops.send_call_func_ipi(mask);
+}
+
+
+#else /* CONFIG_SMP */
+static inline void play_dead(void)
+{
+	BUG(); /*Fixed me*/
+}
+#define hard_smp_processor_id()		0
+#define smp_call_function_on_cpu(func, info, wait, cpu)    ({ 0; })
+#define cpu_to_rcid(cpu)	((int)whami())
+#define rcid_to_cpu(rcid)	0
+#endif /* CONFIG_SMP */
+
+#define NO_PROC_ID	(-1)
+
+static inline void send_ipi(int cpu, unsigned long type)
+{
+	int rcid;
+
+	rcid = cpu_to_rcid(cpu);
+
+	if (is_in_guest())
+		hcall(HCALL_IVI, rcid, type, 0);
+	else
+		sendii(rcid, type, 0);
+}
+
+#define reset_cpu(cpu)  send_ipi((cpu), II_RESET)
+
+#endif
diff --git a/arch/sw_64/include/asm/socket.h b/arch/sw_64/include/asm/socket.h
new file mode 100644
index 000000000000..e87043467775
--- /dev/null
+++ b/arch/sw_64/include/asm/socket.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SOCKET_H
+#define _ASM_SW64_SOCKET_H
+
+#include <uapi/asm/socket.h>
+
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK	0x40000000
+#endif /* _ASM_SW64_SOCKET_H */
diff --git a/arch/sw_64/include/asm/sparsemem.h b/arch/sw_64/include/asm/sparsemem.h
new file mode 100644
index 000000000000..a60e757f3838
--- /dev/null
+++ b/arch/sw_64/include/asm/sparsemem.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SPARSEMEM_H
+#define _ASM_SW64_SPARSEMEM_H
+
+#include <asm/memory.h>
+
+#define SECTION_SIZE_BITS	28
+
+#endif /* _ASM_SW64_SPARSEMEM_H */
diff --git a/arch/sw_64/include/asm/special_insns.h b/arch/sw_64/include/asm/special_insns.h
new file mode 100644
index 000000000000..7f5a52b20444
--- /dev/null
+++ b/arch/sw_64/include/asm/special_insns.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SPECIAL_INSNS_H
+#define _ASM_SW64_SPECIAL_INSNS_H
+
+enum amask_enum {
+	AMASK_BWX = (1UL << 0),
+	AMASK_FIX = (1UL << 1),
+	AMASK_CIX = (1UL << 2),
+	AMASK_MAX = (1UL << 8),
+	AMASK_PRECISE_TRAP = (1UL << 9),
+};
+
+#define amask(mask)						\
+({								\
+	unsigned long __amask, __input = (mask);		\
+	__asm__ ("mov %1, %0" : "=r"(__amask) : "rI"(__input));	\
+	__amask;						\
+})
+
+#endif /* _ASM_SW64_SPECIAL_INSNS_H */
diff --git a/arch/sw_64/include/asm/spinlock.h b/arch/sw_64/include/asm/spinlock.h
new file mode 100644
index 000000000000..a05afb9af496
--- /dev/null
+++ b/arch/sw_64/include/asm/spinlock.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+#ifndef _ASM_SW64_SPINLOCK_H
+#define _ASM_SW64_SPINLOCK_H
+
+#include <asm/qrwlock.h>
+#include <asm/qspinlock.h>
+
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()	smp_mb()
+
+#endif /* _ASM_SW64_SPINLOCK_H */
diff --git a/arch/sw_64/include/asm/spinlock_types.h b/arch/sw_64/include/asm/spinlock_types.h
new file mode 100644
index 000000000000..28f2183ced74
--- /dev/null
+++ b/arch/sw_64/include/asm/spinlock_types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SPINLOCK_TYPES_H
+#define _ASM_SW64_SPINLOCK_TYPES_H
+
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+
+#endif
diff --git a/arch/sw_64/include/asm/string.h b/arch/sw_64/include/asm/string.h
new file mode 100644
index 000000000000..4f4a4687d8d0
--- /dev/null
+++ b/arch/sw_64/include/asm/string.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_STRING_H
+#define _ASM_SW64_STRING_H
+
+#ifdef __KERNEL__
+
+/*
+ * GCC of any recent vintage doesn't do stupid things with bcopy.
+ * EGCS 1.1 knows all about expanding memcpy inline, others don't.
+ *
+ * Similarly for a memset with data = 0.
+ */
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *dest, const void *src, size_t n);
+/* For backward compatibility with modules.  Unused otherwise.  */
+extern void *__memcpy(void *dest, const void *src, size_t n);
+
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *dest, const void *src, size_t n);
+
+#define __HAVE_ARCH_MEMSET
+extern void *__constant_c_memset(void *s, unsigned long c, size_t n);
+extern void *___memset(void *s, int c, size_t n);
+extern void *__memset(void *s, int c, size_t n);
+extern void *memset(void *s, int c, size_t n);
+
+#define __HAVE_ARCH_STRCPY
+extern char *strcpy(char *dest, const char *src);
+
+#define __HAVE_ARCH_STRNCPY
+extern char *strncpy(char *dest, const char *src, size_t n);
+
+/*
+ * The following routine is like memset except that it writes 16-bit
+ * aligned values.  The DEST and COUNT parameters must be even for
+ * correct operation.
+ */
+
+#define __HAVE_ARCH_MEMSETW
+extern void *__memsetw(void *dest, unsigned short c, size_t count);
+
+#define memsetw(s, c, n)						 \
+(__builtin_constant_p(c)						 \
+	? __constant_c_memset((s), 0x0001000100010001UL * (unsigned short)(c), (n)) \
+	: __memsetw((s), (c), (n)))
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SW64_STRING_H */
diff --git a/arch/sw_64/include/asm/suspend.h b/arch/sw_64/include/asm/suspend.h
new file mode 100644
index 000000000000..83fd413fd6e2
--- /dev/null
+++ b/arch/sw_64/include/asm/suspend.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SLEEP_H
+#define _ASM_SW64_SLEEP_H
+
+#include <asm/hmcall.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#define SOFTINF_SLEEP_MAGIC 0x0123456789ABCDEFUL
+
+#ifdef CONFIG_HIBERNATION
+#include <asm/vcpu.h>
+#include <asm/thread_info.h>
+#endif
+
+struct callee_saved_regs {
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long ra;
+};
+
+struct callee_saved_fpregs {
+	unsigned long f2[4];
+	unsigned long f3[4];
+	unsigned long f4[4];
+	unsigned long f5[4];
+	unsigned long f6[4];
+	unsigned long f7[4];
+	unsigned long f8[4];
+	unsigned long f9[4];
+} __aligned(32);	/* 256 bits aligned for simd */
+
+struct processor_state {
+	struct callee_saved_regs regs;
+	struct callee_saved_fpregs fpregs;
+	unsigned long fpcr;
+#ifdef CONFIG_HIBERNATION
+	struct pcb_struct pcb;
+	struct vcpucb vcb;
+#endif
+};
+
+extern void sw64_suspend_deep_sleep(struct processor_state *state);
+#endif /* _ASM_SW64_SLEEP_H */
diff --git a/arch/sw_64/include/asm/sw64_init.h b/arch/sw_64/include/asm/sw64_init.h
new file mode 100644
index 000000000000..15842d22e5ba
--- /dev/null
+++ b/arch/sw_64/include/asm/sw64_init.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_INIT_H
+#define _ASM_SW64_INIT_H
+
+#include <linux/cpu.h>
+#include <linux/pci.h>
+
+
+struct sw64_early_init_ops {
+	void (*setup_core_start)(struct cpumask *cpumask);
+	unsigned long (*get_node_mem)(int nodeid);
+};
+
+struct sw64_pci_init_ops {
+	int (*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
+	unsigned long (*get_rc_enable)(unsigned long node);
+	void (*hose_init)(struct pci_controller *hose);
+	void (*set_rc_piu)(unsigned long node, unsigned long index);
+	int (*check_pci_linkup)(unsigned long node, unsigned long index);
+	void (*set_intx)(unsigned long node, unsigned long index,
+			unsigned long int_conf);
+};
+
+
+struct sw64_chip_init_ops {
+	struct sw64_early_init_ops	early_init;
+	struct sw64_pci_init_ops	pci_init;
+	void (*fixup)(void);
+};
+
+struct sw64_chip_ops {
+	int (*get_cpu_num)(void);
+	void (*device_interrupt)(unsigned long irq_info);
+	void (*suspend)(int wake);
+	void (*fixup)(void);
+};
+
+extern void sw64_init_noop(void);
+extern void sw64_setup_chip_ops(void);
+extern struct sw64_chip_ops *sw64_chip;
+extern struct sw64_chip_init_ops *sw64_chip_init;
+
+DECLARE_PER_CPU(unsigned long, hard_node_id);
+
+#endif    /* _ASM_SW64_INIT_H */
diff --git a/arch/sw_64/include/asm/sw64io.h b/arch/sw_64/include/asm/sw64io.h
new file mode 100644
index 000000000000..7c032070acf0
--- /dev/null
+++ b/arch/sw_64/include/asm/sw64io.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SW64IO_H
+#define _ASM_SW64_SW64IO_H
+
+#include <asm/page.h>
+
+extern void setup_chip_clocksource(void);
+
+#if defined(CONFIG_SW64_CHIP3)
+#include <asm/chip3_io.h>
+#endif
+
+#define MK_RC_CFG(nid, idx) \
+	(PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_RC_CFG)
+#define MK_PIU_IOR0(nid, idx) \
+	(PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR0_BASE)
+#define MK_PIU_IOR1(nid, idx) \
+	(PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR1_BASE)
+
+static inline  unsigned int
+read_rc_conf(unsigned long node, unsigned long rc_index,
+		unsigned int conf_offset)
+{
+	unsigned long addr;
+	unsigned int value;
+
+	addr = MK_RC_CFG(node, rc_index) | conf_offset;
+	value = *(volatile unsigned int *)addr;
+	mb();
+
+	return value;
+}
+
+static inline void
+write_rc_conf(unsigned long node, unsigned long rc_index,
+		unsigned int conf_offset, unsigned int data)
+{
+	unsigned long addr;
+
+	addr = MK_RC_CFG(node, rc_index) | conf_offset;
+	*(unsigned int *)addr = data;
+	mb();
+}
+
+static inline  unsigned long
+read_piu_ior0(unsigned long node, unsigned long rc_index,
+		unsigned int reg)
+{
+	unsigned long addr;
+	unsigned long value;
+
+	addr = MK_PIU_IOR0(node, rc_index) + reg;
+	value = *(volatile unsigned long __iomem *)addr;
+	mb();
+
+	return value;
+}
+
+static inline void
+write_piu_ior0(unsigned long node, unsigned long rc_index,
+		unsigned int reg, unsigned long data)
+{
+	unsigned long addr;
+
+	addr = MK_PIU_IOR0(node, rc_index) + reg;
+	*(unsigned long __iomem *)addr = data;
+	mb();
+}
+
+static inline  unsigned long
+read_piu_ior1(unsigned long node, unsigned long rc_index,
+		unsigned int reg)
+{
+	unsigned long addr, value;
+
+	addr = MK_PIU_IOR1(node, rc_index) + reg;
+	value = *(volatile unsigned long __iomem *)addr;
+	mb();
+
+	return value;
+}
+
+static inline void
+write_piu_ior1(unsigned long node, unsigned long rc_index,
+		unsigned int reg, unsigned long data)
+{
+	unsigned long addr;
+
+	addr = MK_PIU_IOR1(node, rc_index) + reg;
+	*(volatile unsigned long __iomem *)addr = data;
+	mb();
+}
+
+static inline unsigned long
+sw64_io_read(unsigned long node, unsigned long reg)
+{
+	unsigned long addr, value;
+
+	addr = PAGE_OFFSET | SW64_IO_BASE(node) | reg;
+	value = *(volatile unsigned long __iomem *)addr;
+	mb();
+
+	return value;
+}
+
+static inline void
+sw64_io_write(unsigned long node, unsigned long reg, unsigned long data)
+{
+	unsigned long addr;
+
+	addr = PAGE_OFFSET | SW64_IO_BASE(node) | reg;
+	*(volatile unsigned long __iomem *)addr = data;
+	mb();
+}
+#endif
diff --git a/arch/sw_64/include/asm/switch_to.h b/arch/sw_64/include/asm/switch_to.h
new file mode 100644
index 000000000000..22045b247557
--- /dev/null
+++ b/arch/sw_64/include/asm/switch_to.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SWITCH_TO_H
+#define _ASM_SW64_SWITCH_TO_H
+
+struct task_struct;
+extern struct task_struct *__switch_to(unsigned long, struct task_struct *);
+extern void restore_da_match_after_sched(void);
+#define switch_to(P, N, L)						\
+do {									\
+	(L) = __switch_to(virt_to_phys(&task_thread_info(N)->pcb), (P));\
+	check_mmu_context();						\
+} while (0)
+
+
+/* TODO: finish_arch_switch has been removed from arch-independent code. */
+
+/*
+ * finish_arch_switch will be called after switch_to
+ */
+#define finish_arch_post_lock_switch()					\
+do {									\
+	restore_da_match_after_sched();					\
+} while (0)
+
+
+#endif /* _ASM_SW64_SWITCH_TO_H */
diff --git a/arch/sw_64/include/asm/syscall.h b/arch/sw_64/include/asm/syscall.h
new file mode 100644
index 000000000000..4b784c3d846b
--- /dev/null
+++ b/arch/sw_64/include/asm/syscall.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SYSCALL_H
+#define _ASM_SW64_SYSCALL_H
+
+#include <uapi/linux/audit.h>
+
+extern void *sys_call_table[];
+static inline int syscall_get_nr(struct task_struct *task,
+				 struct pt_regs *regs)
+{
+	return regs->r0;
+}
+
+static inline long
+syscall_get_error(struct task_struct *task, struct pt_regs *regs)
+{
+	return regs->r19 ? -regs->r0 : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->r0;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error) {
+		regs->r0  = -error;
+		regs->r19 = -1;
+	} else {
+		regs->r0 = val;
+		regs->r19 = 0;
+	}
+}
+
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* Do nothing */
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	*args++ = regs->r16;
+	*args++ = regs->r17;
+	*args++ = regs->r18;
+	*args++ = regs->r19;
+	*args++ = regs->r20;
+	*args	= regs->r21;
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 const unsigned long *args)
+{
+	regs->r16 = *args++;
+	regs->r17 = *args++;
+	regs->r18 = *args++;
+	regs->r19 = *args++;
+	regs->r20 = *args++;
+	regs->r21 = *args;
+}
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+	return AUDIT_ARCH_SW64;
+}
+
+#endif  /* _ASM_SW64_SYSCALL_H */
diff --git a/arch/sw_64/include/asm/tc.h b/arch/sw_64/include/asm/tc.h
new file mode 100644
index 000000000000..f995a2a75f85
--- /dev/null
+++ b/arch/sw_64/include/asm/tc.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TC_H
+#define _ASM_SW64_TC_H
+
+static inline unsigned long rdtc(void)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__ ("rtc %0" : "=r"(ret));
+	return ret;
+}
+
+extern void tc_sync_clear(void);
+extern void tc_sync_ready(void *ignored);
+extern void tc_sync_set(void);
+#endif
diff --git a/arch/sw_64/include/asm/termios.h b/arch/sw_64/include/asm/termios.h
new file mode 100644
index 000000000000..ef509946675a
--- /dev/null
+++ b/arch/sw_64/include/asm/termios.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TERMIOS_H
+#define _ASM_SW64_TERMIOS_H
+
+#include <uapi/asm/termios.h>
+
+/*	eof=^D		eol=\0		eol2=\0		erase=del
+ *	werase=^W	kill=^U		reprint=^R	sxtc=\0
+ *	intr=^C		quit=^\		susp=^Z		<VDSUSP>
+ *	start=^Q	stop=^S		lnext=^V	discard=^U
+ *	vmin=\1		vtime=\0
+ */
+#define INIT_C_CC "\004\000\000\177\027\025\022\000\003\034\032\000\021\023\026\025\001\000"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+
+#define user_termio_to_kernel_termios(a_termios, u_termio)			\
+({										\
+	struct ktermios *k_termios = (a_termios);				\
+	struct termio k_termio;							\
+	int canon, ret;								\
+										\
+	ret = copy_from_user(&k_termio, u_termio, sizeof(k_termio));		\
+	if (!ret) {								\
+		/* Overwrite only the low bits.  */				\
+		*(unsigned short *)&k_termios->c_iflag = k_termio.c_iflag;	\
+		*(unsigned short *)&k_termios->c_oflag = k_termio.c_oflag;	\
+		*(unsigned short *)&k_termios->c_cflag = k_termio.c_cflag;	\
+		*(unsigned short *)&k_termios->c_lflag = k_termio.c_lflag;	\
+		canon = k_termio.c_lflag & ICANON;				\
+										\
+		k_termios->c_cc[VINTR]  = k_termio.c_cc[_VINTR];		\
+		k_termios->c_cc[VQUIT]  = k_termio.c_cc[_VQUIT];		\
+		k_termios->c_cc[VERASE] = k_termio.c_cc[_VERASE];		\
+		k_termios->c_cc[VKILL]  = k_termio.c_cc[_VKILL];		\
+		k_termios->c_cc[VEOL2]  = k_termio.c_cc[_VEOL2];		\
+		k_termios->c_cc[VSWTC]  = k_termio.c_cc[_VSWTC];		\
+		k_termios->c_cc[canon ? VEOF : VMIN]  = k_termio.c_cc[_VEOF];	\
+		k_termios->c_cc[canon ? VEOL : VTIME] = k_termio.c_cc[_VEOL];	\
+	}									\
+	ret;									\
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ *
+ * Note the "fun" _VMIN overloading.
+ */
+#define kernel_termios_to_user_termio(u_termio, a_termios)		\
+({									\
+	struct ktermios *k_termios = (a_termios);			\
+	struct termio k_termio;						\
+	int canon;							\
+									\
+	k_termio.c_iflag = k_termios->c_iflag;				\
+	k_termio.c_oflag = k_termios->c_oflag;				\
+	k_termio.c_cflag = k_termios->c_cflag;				\
+	canon = (k_termio.c_lflag = k_termios->c_lflag) & ICANON;	\
+									\
+	k_termio.c_line = k_termios->c_line;				\
+	k_termio.c_cc[_VINTR]  = k_termios->c_cc[VINTR];		\
+	k_termio.c_cc[_VQUIT]  = k_termios->c_cc[VQUIT];		\
+	k_termio.c_cc[_VERASE] = k_termios->c_cc[VERASE];		\
+	k_termio.c_cc[_VKILL]  = k_termios->c_cc[VKILL];		\
+	k_termio.c_cc[_VEOF]   = k_termios->c_cc[canon ? VEOF : VMIN];	\
+	k_termio.c_cc[_VEOL]   = k_termios->c_cc[canon ? VEOL : VTIME];	\
+	k_termio.c_cc[_VEOL2]  = k_termios->c_cc[VEOL2];		\
+	k_termio.c_cc[_VSWTC]  = k_termios->c_cc[VSWTC];		\
+									\
+	copy_to_user(u_termio, &k_termio, sizeof(k_termio));		\
+})
+
+#define user_termios_to_kernel_termios(k, u) \
+	copy_from_user(k, u, sizeof(struct termios))
+
+#define kernel_termios_to_user_termios(u, k) \
+	copy_to_user(u, k, sizeof(struct termios))
+
+#endif	/* _ASM_SW64_TERMIOS_H */
diff --git a/arch/sw_64/include/asm/thread_info.h b/arch/sw_64/include/asm/thread_info.h
new file mode 100644
index 000000000000..cffb09fc6262
--- /dev/null
+++ b/arch/sw_64/include/asm/thread_info.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_THREAD_INFO_H
+#define _ASM_SW64_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/types.h>
+#include <asm/sysinfo.h>
+
+struct pcb_struct {
+	unsigned long ksp;
+	unsigned long usp;
+	unsigned long ptbr;
+	unsigned int pcc;
+	unsigned int asn;
+	unsigned long unique;
+	unsigned long flags;
+	unsigned long da_match, da_mask;
+	unsigned long dv_match, dv_mask;
+	unsigned long dc_ctl;
+};
+
+struct thread_info {
+	struct pcb_struct	pcb;		/* hmcode state */
+
+	struct task_struct	*task;		/* main task structure */
+	unsigned int		flags;		/* low level flags */
+	unsigned int		ieee_state;	/* see fpu.h */
+
+	mm_segment_t		addr_limit;	/* thread address space */
+	unsigned int		cpu;		/* current CPU */
+	int			preempt_count;	/* 0 => preemptible, <0 => BUG */
+	unsigned int		status;		/* thread-synchronous flags */
+
+	int bpt_nsaved;
+	unsigned long bpt_addr[2];		/* breakpoint handling  */
+	unsigned int bpt_insn[2];
+#ifdef CONFIG_DYNAMIC_FTRACE
+	unsigned long		dyn_ftrace_addr;
+#endif
+};
+
+/*
+ * Macros/functions for gaining access to the thread information structure.
+ */
+#define INIT_THREAD_INFO(tsk)				\
+{							\
+	.task		= &tsk,				\
+	.addr_limit	= KERNEL_DS,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,		\
+}
+
+/* How to get the thread information struct from C.  */
+register struct thread_info *__current_thread_info __asm__("$8");
+#define current_thread_info()	 __current_thread_info
+
+#endif /* __ASSEMBLY__ */
+
+/* Thread information allocation.  */
+#define THREAD_SIZE_ORDER	1
+#define THREAD_SIZE		(2 * PAGE_SIZE)
+
+/*
+ * Thread information flags:
+ * - these are process state flags and used from assembly
+ * - pending work-to-be-done flags come first and must be assigned to be
+ *   within bits 0 to 7 to fit in and immediate operand.
+ *
+ * TIF_SYSCALL_TRACE is known to be 0 via blbs.
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SYSCALL_AUDIT	4       /* syscall audit active */
+#define TIF_UPROBE		5       /* uprobe breakpoint or singlestep */
+#define TIF_DIE_IF_KERNEL	9	/* dik recursion lock */
+#define TIF_SYSCALL_TRACEPOINT	10
+#define TIF_SECCOMP		11	/* secure computing */
+#define TIF_MEMDIE		13	/* is terminating due to OOM killer */
+#define TIF_POLLING_NRFLAG	14      /* idle is polling for TIF_NEED_RESCHED */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_UPROBE		(1 << TIF_UPROBE)
+
+/* Work to do on interrupt/exception return.  */
+#define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+				 _TIF_NOTIFY_RESUME | _TIF_UPROBE)
+
+#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+
+/* Work to do on any return to userspace.  */
+#define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK	| _TIF_SYSCALL_TRACE)
+
+#define TS_UAC_NOPRINT		0x0001	/* ! Preserve the following three */
+#define TS_UAC_NOFIX		0x0002	/* ! flags as they match          */
+#define TS_UAC_SIGBUS		0x0004	/* ! userspace part of 'prctl' */
+
+#define SET_UNALIGN_CTL(task, value)	({				\
+	__u32 status = task_thread_info(task)->status & ~UAC_BITMASK;	\
+	if (value & PR_UNALIGN_NOPRINT)					\
+		status |= TS_UAC_NOPRINT;				\
+	if (value & PR_UNALIGN_SIGBUS)					\
+		status |= TS_UAC_SIGBUS;				\
+	if (value & PR_NOFIX)	/* sw-specific */			\
+		status |= TS_UAC_NOFIX;					\
+	task_thread_info(task)->status = status;			\
+	0; })
+
+#define GET_UNALIGN_CTL(task, value)	({				\
+	__u32 status = task_thread_info(task)->status & ~UAC_BITMASK;	\
+	__u32 res = 0;							\
+	if (status & TS_UAC_NOPRINT)					\
+		res |= PR_UNALIGN_NOPRINT;				\
+	if (status & TS_UAC_SIGBUS)					\
+		res |= PR_UNALIGN_SIGBUS;				\
+	if (status & TS_UAC_NOFIX)					\
+		res |= PR_NOFIX;					\
+	put_user(res, (int __user *)(value));				\
+	})
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_SW64_THREAD_INFO_H */
diff --git a/arch/sw_64/include/asm/timex.h b/arch/sw_64/include/asm/timex.h
new file mode 100644
index 000000000000..9065e39a0466
--- /dev/null
+++ b/arch/sw_64/include/asm/timex.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TIMEX_H
+#define _ASM_SW64_TIMEX_H
+
+#include <asm/tc.h>
+
+/*
+ * With only one or two oddballs, we use the RTC as the ticker, selecting
+ * the 32.768kHz reference clock, which nicely divides down to our HZ.
+ */
+#define CLOCK_TICK_RATE	32768
+
+/*
+ * Standard way to access the cycle counter.
+ */
+
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles(void)
+{
+	return rdtc();
+}
+
+#endif
diff --git a/arch/sw_64/include/asm/tlb.h b/arch/sw_64/include/asm/tlb.h
new file mode 100644
index 000000000000..4902624dba88
--- /dev/null
+++ b/arch/sw_64/include/asm/tlb.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TLB_H
+#define _ASM_SW64_TLB_H
+
+#define tlb_start_vma(tlb, vma)			do { } while (0)
+#define tlb_end_vma(tlb, vma)			do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, pte, addr)	do { } while (0)
+
+#define tlb_flush(tlb)				flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, address)	pmd_free((tlb)->mm, pmd)
+
+#define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
+
+#endif
diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h
new file mode 100644
index 000000000000..7805bb287257
--- /dev/null
+++ b/arch/sw_64/include/asm/tlbflush.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TLBFLUSH_H
+#define _ASM_SW64_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/compiler.h>
+#include <asm/pgalloc.h>
+#include <asm/hw_init.h>
+#include <asm/hmcall.h>
+
+extern void __load_new_mm_context(struct mm_struct *);
+
+
+static inline void flush_tlb_current(struct mm_struct *mm)
+{
+	__load_new_mm_context(mm);
+}
+
+/*
+ * Flush just one page in the current TLB set.  We need to be very
+ * careful about the icache here, there is no way to invalidate a
+ * specific icache page.
+ */
+
+static inline void flush_tlb_current_page(struct mm_struct *mm,
+					  struct vm_area_struct *vma,
+					  unsigned long addr)
+{
+	if (vma->vm_flags & VM_EXEC) {
+		tbi(3, addr);
+		if (icache_is_vivt_no_ictag())
+			imb();
+	} else
+		tbi(2, addr);
+}
+
+
+/* Flush current user mapping.  */
+static inline void flush_tlb(void)
+{
+	flush_tlb_current(current->active_mm);
+}
+
+/* Flush someone else's user mapping.  */
+static inline void flush_tlb_other(struct mm_struct *mm)
+{
+	unsigned long *mmc;
+
+	if (mm) {
+		mmc = &mm->context.asid[smp_processor_id()];
+	/*
+	 * Check it's not zero first to avoid cacheline ping pong
+	 * when possible.
+	 */
+		if (*mmc)
+			*mmc = 0;
+	}
+}
+
+#ifndef CONFIG_SMP
+/*
+ * Flush everything (kernel mapping may also have changed
+ * due to vmalloc/vfree).
+ */
+static inline void flush_tlb_all(void)
+{
+	tbia();
+}
+
+/* Flush a specified user mapping.  */
+static inline void
+flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->mm)
+		flush_tlb_current(mm);
+	else
+		flush_tlb_other(mm);
+}
+
+/* Page-granular tlb flush.  */
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm == current->mm)
+		flush_tlb_current_page(mm, vma, addr);
+	else
+		flush_tlb_other(mm);
+}
+
+/*
+ * Flush a specified range of user mapping.  On the sw64 we flush
+ * the whole user tlb.
+ */
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+#else /* CONFIG_SMP */
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+extern void flush_tlb_range(struct vm_area_struct *, unsigned long,
+			    unsigned long);
+
+#endif /* CONFIG_SMP */
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();
+}
+
+#endif /* _ASM_SW64_TLBFLUSH_H */
diff --git a/arch/sw_64/include/asm/topology.h b/arch/sw_64/include/asm/topology.h
new file mode 100644
index 000000000000..79af6349fe80
--- /dev/null
+++ b/arch/sw_64/include/asm/topology.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TOPOLOGY_H
+#define _ASM_SW64_TOPOLOGY_H
+
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/core.h>
+#include <asm/smp.h>
+
+#define THREAD_ID_SHIFT	5
+#define THREAD_ID_MASK	1
+#define CORE_ID_MASK ((1 << THREAD_ID_SHIFT) - 1)
+
+extern struct cpu_topology cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu)	(cpu_topology[cpu].package_id)
+#define topology_core_id(cpu)			(cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu)		(&cpu_topology[cpu].core_sibling)
+#define topology_sibling_cpumask(cpu)		(&cpu_topology[cpu].thread_sibling)
+#define topology_llc_cpumask(cpu)		(&cpu_topology[cpu].llc_sibling)
+
+void init_cpu_topology(void);
+void store_cpu_topology(int cpuid);
+void remove_cpu_topology(int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+static inline int rcid_to_package(int rcid)
+{
+	return rcid >> CORES_PER_NODE_SHIFT;
+}
+
+#ifdef CONFIG_NUMA
+
+#ifndef CONFIG_USE_PERCPU_NUMA_NODE_ID
+extern int cpuid_to_nid(int cpuid);
+static inline int cpu_to_node(int cpu)
+{
+	int node;
+
+	node = cpuid_to_nid(cpu);
+
+#ifdef DEBUG_NUMA
+	BUG_ON(node < 0);
+#endif
+
+	return node;
+}
+
+static inline void set_cpu_numa_node(int cpu, int node) { }
+#endif /* CONFIG_USE_PERCPU_NUMA_NODE_ID */
+
+extern const struct cpumask *cpumask_of_node(int node);
+extern void numa_add_cpu(unsigned int cpu);
+extern void numa_remove_cpu(unsigned int cpu);
+extern void numa_store_cpu_info(unsigned int cpu);
+#define parent_node(node) (node)
+#define cpumask_of_pcibus(bus)	(cpu_online_mask)
+#else /* !CONFIG_NUMA */
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
+static inline void numa_store_cpu_info(unsigned int cpu) { }
+#endif /* CONFIG_NUMA */
+#include <asm-generic/topology.h>
+
+static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+
+#endif /* _ASM_SW64_TOPOLOGY_H */
diff --git a/arch/sw_64/include/asm/trace_clock.h b/arch/sw_64/include/asm/trace_clock.h
new file mode 100644
index 000000000000..57324215a837
--- /dev/null
+++ b/arch/sw_64/include/asm/trace_clock.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TRACE_CLOCK_H
+#define _ASM_SW64_TRACE_CLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define ARCH_TRACE_CLOCKS
+
+#endif  /* _ASM_SW64_TRACE_CLOCK_H */
diff --git a/arch/sw_64/include/asm/types.h b/arch/sw_64/include/asm/types.h
new file mode 100644
index 000000000000..37d626269a02
--- /dev/null
+++ b/arch/sw_64/include/asm/types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_TYPES_H
+#define _ASM_SW64_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#endif /* _ASM_SW64_TYPES_H */
diff --git a/arch/sw_64/include/asm/uaccess.h b/arch/sw_64/include/asm/uaccess.h
new file mode 100644
index 000000000000..ceacfaa07cfb
--- /dev/null
+++ b/arch/sw_64/include/asm/uaccess.h
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_UACCESS_H
+#define _ASM_SW64_UACCESS_H
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * Or at least it did once upon a time.  Nowadays it is a mask that
+ * defines which bits of the address space are off limits.  This is a
+ * wee bit faster than the above.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define KERNEL_DS	((mm_segment_t) { 0UL })
+#define USER_DS		((mm_segment_t) { -0x10000000000000UL  })
+
+#define get_fs()	(current_thread_info()->addr_limit)
+#define get_ds()	(KERNEL_DS)
+#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+
+#define uaccess_kernel()       (get_fs().seg == KERNEL_DS.seg)
+
+/*
+ * Is a address valid? This does a straightforward calculation rather
+ * than tests.
+ *
+ * Address valid if:
+ *  - "addr" doesn't have any high-bits set
+ *  - AND "size" doesn't have any high-bits set
+ *  - AND "addr+size-(size != 0)" doesn't have any high-bits set
+ *  - OR we are in kernel mode.
+ */
+#define __access_ok(addr, size) ({				\
+	unsigned long __ao_a = (addr), __ao_b = (size);		\
+	unsigned long __ao_end = __ao_a + __ao_b - !!__ao_b;	\
+	(get_fs().seg & (__ao_a | __ao_b | __ao_end)) == 0; })
+
+#define access_ok(addr, size)				\
+({							\
+	__chk_user_ptr(addr);				\
+	__access_ok(((unsigned long)(addr)), (size));		\
+})
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * As the sw64 uses the same address space for kernel and user
+ * data, we can just do these as direct assignments.  (Of course, the
+ * exception handling means that it's no longer "just"...)
+ *
+ * Careful to not
+ * (a) re-use the arguments for side effects (sizeof/typeof is ok)
+ * (b) require any knowledge of processes at this stage
+ */
+#define put_user(x, ptr) \
+	__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define get_user(x, ptr) \
+	__get_user_check((x), (ptr), sizeof(*(ptr)))
+
+/*
+ * The "__xxx" versions do not do address space checking, useful when
+ * doing multiple accesses to the same area (the programmer has to do the
+ * checks by hand with "access_ok()")
+ */
+#define __put_user(x, ptr) \
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) \
+	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+/*
+ * The "ldi %1, 2b-1b(%0)" bits are magic to get the assembler to
+ * encode the bits we need for resolving the exception.  See the
+ * more extensive comments with fixup_inline_exception below for
+ * more information.
+ */
+
+extern void __get_user_unknown(void);
+
+#define __get_user_nocheck(x, ptr, size)			\
+({								\
+	long __gu_err = 0;					\
+	unsigned long __gu_val;					\
+	__chk_user_ptr(ptr);					\
+	switch (size) {						\
+	case 1:							\
+		__get_user_8(ptr);				\
+		break;						\
+	case 2:							\
+		__get_user_16(ptr);				\
+		break;						\
+	case 4:							\
+		__get_user_32(ptr);				\
+		break;						\
+	case 8:							\
+		__get_user_64(ptr);				\
+		break;						\
+	default:						\
+		__get_user_unknown();				\
+		break;						\
+	}							\
+	(x) = (__force __typeof__(*(ptr))) __gu_val;		\
+	__gu_err;						\
+})
+
+#define __get_user_check(x, ptr, size)				\
+({								\
+	long __gu_err = -EFAULT;				\
+	unsigned long __gu_val = 0;				\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
+	if (__access_ok((unsigned long)__gu_addr, size)) {	\
+		__gu_err = 0;					\
+		switch (size) {					\
+		case 1:						\
+			__get_user_8(__gu_addr);		\
+			break;					\
+		case 2:						\
+			__get_user_16(__gu_addr);		\
+			break;					\
+		case 4:						\
+			__get_user_32(__gu_addr);		\
+			break;					\
+		case 8:						\
+			__get_user_64(__gu_addr);		\
+			break;					\
+		default:					\
+			__get_user_unknown();			\
+			break;					\
+		}						\
+	}							\
+	(x) = (__force __typeof__(*(ptr))) __gu_val;		\
+	__gu_err;						\
+})
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct __user *)(x))
+
+#define __get_user_64(addr)					\
+	__asm__("1: ldl %0,%2\n"				\
+	"2:\n"							\
+	".section __ex_table,"a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi %0, 2b-1b(%1)\n"				\
+	".previous"						\
+		: "=r"(__gu_val), "=r"(__gu_err)		\
+		: "m"(__m(addr)), "1"(__gu_err))
+
+#define __get_user_32(addr)					\
+	__asm__("1: ldw %0,%2\n"				\
+	"2:\n"							\
+	".section __ex_table,"a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi %0, 2b-1b(%1)\n"				\
+	".previous"						\
+		: "=r"(__gu_val), "=r"(__gu_err)		\
+		: "m"(__m(addr)), "1"(__gu_err))
+
+#define __get_user_16(addr)					\
+	__asm__("1: ldhu %0,%2\n"				\
+	"2:\n"							\
+	".section __ex_table,"a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi %0, 2b-1b(%1)\n"				\
+	".previous"						\
+		: "=r"(__gu_val), "=r"(__gu_err)		\
+		: "m"(__m(addr)), "1"(__gu_err))
+
+#define __get_user_8(addr)					\
+	__asm__("1: ldbu %0,%2\n"				\
+	"2:\n"							\
+	".section __ex_table,"a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi %0, 2b-1b(%1)\n"				\
+	".previous"						\
+		: "=r"(__gu_val), "=r"(__gu_err)		\
+		: "m"(__m(addr)), "1"(__gu_err))
+
+extern void __put_user_unknown(void);
+
+#define __put_user_nocheck(x, ptr, size)			\
+({								\
+	long __pu_err = 0;					\
+	__chk_user_ptr(ptr);					\
+	switch (size) {						\
+	case 1:							\
+		__put_user_8(x, ptr);				\
+		break;						\
+	case 2:							\
+		__put_user_16(x, ptr);				\
+		break;						\
+	case 4:							\
+		__put_user_32(x, ptr);				\
+		break;						\
+	case 8:							\
+		__put_user_64(x, ptr);				\
+		break;						\
+	default:						\
+		__put_user_unknown();				\
+		break;						\
+	}							\
+	__pu_err;						\
+})
+
+#define __put_user_check(x, ptr, size)				\
+({								\
+	long __pu_err = -EFAULT;				\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
+	if (__access_ok((unsigned long)__pu_addr, size)) {	\
+		__pu_err = 0;					\
+		switch (size) {					\
+		case 1:						\
+			__put_user_8(x, __pu_addr);		\
+			break;					\
+		case 2:						\
+			__put_user_16(x, __pu_addr);		\
+			break;					\
+		case 4:						\
+			__put_user_32(x, __pu_addr);		\
+			break;					\
+		case 8:						\
+			__put_user_64(x, __pu_addr);		\
+			break;					\
+		default:					\
+			__put_user_unknown();			\
+			break;					\
+		}						\
+	}							\
+	__pu_err;						\
+})
+
+/*
+ * The "__put_user_xx()" macros tell gcc they read from memory
+ * instead of writing: this is because they do not write to
+ * any memory gcc knows about, so there are no aliasing issues
+ */
+#define __put_user_64(x, addr)					\
+__asm__ __volatile__("1: stl %r2, %1\n"				\
+	"2:\n"							\
+	".section __ex_table, "a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi $31, 2b-1b(%0)\n"				\
+	".previous"						\
+		: "=r"(__pu_err)				\
+		: "m" (__m(addr)), "rJ" (x), "0"(__pu_err))
+
+#define __put_user_32(x, addr)					\
+__asm__ __volatile__("1: stw %r2, %1\n"				\
+	"2:\n"							\
+	".section __ex_table, "a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi $31, 2b-1b(%0)\n"				\
+	".previous"						\
+		: "=r"(__pu_err)				\
+		: "m"(__m(addr)), "rJ"(x), "0"(__pu_err))
+
+#define __put_user_16(x, addr)					\
+__asm__ __volatile__("1: sth %r2, %1\n"				\
+	"2:\n"							\
+	".section __ex_table, "a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi $31, 2b-1b(%0)\n"				\
+	".previous"						\
+		: "=r"(__pu_err)				\
+		: "m"(__m(addr)), "rJ"(x), "0"(__pu_err))
+
+#define __put_user_8(x, addr)					\
+__asm__ __volatile__("1: stb %r2, %1\n"				\
+	"2:\n"							\
+	".section __ex_table, "a"\n"				\
+	"	.long 1b - .\n"					\
+	"	ldi $31, 2b-1b(%0)\n"				\
+	".previous"						\
+		: "=r"(__pu_err)				\
+		: "m"(__m(addr)), "rJ"(x), "0"(__pu_err))
+
+/*
+ * Complex access routines
+ */
+
+extern long __copy_user(void *to, const void *from, long len);
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long len)
+{
+	return __copy_user(to, (__force const void *)from, len);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long len)
+{
+	return __copy_user((__force void *)to, from, len);
+}
+
+extern long __clear_user(void __user *to, long len);
+
+static inline long
+clear_user(void __user *to, long len)
+{
+	if (__access_ok((unsigned long)to, len))
+		len = __clear_user(to, len);
+	return len;
+}
+
+#define user_addr_max()	(uaccess_kernel() ? ~0UL : TASK_SIZE)
+
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
+#include <asm/extable.h>
+#endif /* _ASM_SW64_UACCESS_H */
diff --git a/arch/sw_64/include/asm/ucontext.h b/arch/sw_64/include/asm/ucontext.h
new file mode 100644
index 000000000000..d40eebe988ef
--- /dev/null
+++ b/arch/sw_64/include/asm/ucontext.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_UCONTEXT_H
+#define _ASM_SW64_UCONTEXT_H
+
+struct ucontext {
+	unsigned long		uc_flags;
+	struct ucontext		*uc_link;
+	old_sigset_t		uc_old_sigmask;
+	stack_t			uc_stack;
+	struct sigcontext	uc_mcontext;
+	sigset_t		uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* _ASM_SW64_UCONTEXT_H */
diff --git a/arch/sw_64/include/asm/unaligned.h b/arch/sw_64/include/asm/unaligned.h
new file mode 100644
index 000000000000..91fdff923ce5
--- /dev/null
+++ b/arch/sw_64/include/asm/unaligned.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_UNALIGNED_H
+#define _ASM_SW64_UNALIGNED_H
+
+#include <linux/unaligned/le_struct.h>
+#include <linux/unaligned/be_byteshift.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned __get_unaligned_le
+#define put_unaligned __put_unaligned_le
+
+#endif /* _ASM_SW64_UNALIGNED_H */
diff --git a/arch/sw_64/include/asm/unistd.h b/arch/sw_64/include/asm/unistd.h
new file mode 100644
index 000000000000..c1778adf4fba
--- /dev/null
+++ b/arch/sw_64/include/asm/unistd.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_UNISTD_H
+#define _ASM_SW64_UNISTD_H
+
+#include <uapi/asm/unistd.h>
+
+#define NR_SYSCALLS			519
+#define NR_syscalls			NR_SYSCALLS
+
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_SIGPROCMASK
+
+#endif /* _ASM_SW64_UNISTD_H */
diff --git a/arch/sw_64/include/asm/uprobes.h b/arch/sw_64/include/asm/uprobes.h
new file mode 100644
index 000000000000..97b67af25bce
--- /dev/null
+++ b/arch/sw_64/include/asm/uprobes.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef _ASM_SW64_UPROBES_H
+#define _ASM_SW64_UPROBES_H
+
+#include <linux/notifier.h>
+#include <linux/types.h>
+#include <asm/insn.h>
+
+/*
+ * We want this to be defined as union sw64_instruction but that makes the
+ * generic code blow up.
+ */
+typedef u32 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES			SW64_INSN_SIZE
+#define UPROBE_XOL_SLOT_BYTES		SW64_INSN_SIZE
+
+#define UPROBE_BRK_UPROBE		0x000d000d	/* break 13 */
+#define UPROBE_BRK_UPROBE_XOL		0x000e000d	/* break 14 */
+
+#define UPROBE_SWBP_INSN		UPROBE_BRK_UPROBE
+#define UPROBE_SWBP_INSN_SIZE		MAX_UINSN_BYTES
+
+struct arch_uprobe {
+	u32	insn;
+	u32	ixol[2];
+};
+
+struct arch_uprobe_task {
+	unsigned long saved_trap_nr;
+};
+
+#endif /* _ASM_SW64_UPROBES_H */
diff --git a/arch/sw_64/include/asm/user.h b/arch/sw_64/include/asm/user.h
new file mode 100644
index 000000000000..a6ff58097ea3
--- /dev/null
+++ b/arch/sw_64/include/asm/user.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_USER_H
+#define _ASM_SW64_USER_H
+
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+
+#include <asm/page.h>
+#include <asm/reg.h>
+
+/*
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd).  The file contents are as follows:
+ *
+ *  upage: 1 page consisting of a user struct that tells gdb
+ *	what is present in the file.  Directly after this is a
+ *	copy of the task_struct, which is currently not used by gdb,
+ *	but it may come in handy at some point.  All of the registers
+ *	are stored as part of the upage.  The upage should always be
+ *	only one page long.
+ *  data: The data segment follows next.  We use current->end_text to
+ *	current->brk to pick up all of the user variables, plus any memory
+ *	that may have been sbrk'ed.  No attempt is made to determine if a
+ *	page is demand-zero or if a page is totally unused, we just cover
+ *	the entire range.  All of the addresses are rounded in such a way
+ *	that an integral number of pages is written.
+ *  stack: We need the stack information in order to get a meaningful
+ *	backtrace.  We need to write the data from usp to
+ *	current->start_stack, so we round each of these in order to be able
+ *	to write an integer number of pages.
+ */
+struct user {
+	unsigned long	regs[EF_SIZE/8+32];	/* integer and fp regs */
+	size_t		u_tsize;		/* text size (pages) */
+	size_t		u_dsize;		/* data size (pages) */
+	size_t		u_ssize;		/* stack size (pages) */
+	unsigned long	start_code;		/* text starting address */
+	unsigned long	start_data;		/* data starting address */
+	unsigned long	start_stack;		/* stack starting address */
+	long		signal;			/* signal causing core dump */
+	unsigned long	u_ar0;			/* help gdb find registers */
+	unsigned long	magic;			/* identifies a core file */
+	char		u_comm[32];		/* user command name */
+};
+
+#define NBPG			PAGE_SIZE
+#define UPAGES			1
+#define HOST_TEXT_START_ADDR	(u.start_code)
+#define HOST_DATA_START_ADDR	(u.start_data)
+#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _ASM_SW64_USER_H */
diff --git a/arch/sw_64/include/asm/vcpu.h b/arch/sw_64/include/asm/vcpu.h
new file mode 100644
index 000000000000..5b3fe80aed1b
--- /dev/null
+++ b/arch/sw_64/include/asm/vcpu.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_VCPU_H
+#define  _ASM_SW64_VCPU_H
+
+#ifndef __ASSEMBLY__
+
+struct vcpucb {
+	unsigned long go_flag;
+	unsigned long pcbb;
+	unsigned long ksp;
+	unsigned long usp;
+	unsigned long kgp;
+	unsigned long ent_arith;
+	unsigned long ent_if;
+	unsigned long ent_int;
+	unsigned long ent_mm;
+	unsigned long ent_sys;
+	unsigned long ent_una;
+	unsigned long stack_pc;
+	unsigned long new_a0;
+	unsigned long new_a1;
+	unsigned long new_a2;
+	unsigned long whami;
+	unsigned long csr_save;
+	unsigned long wakeup_magic;
+	unsigned long host_vcpucb;
+	unsigned long upcr;
+	unsigned long vpcr;
+	unsigned long dtb_pcr;
+	unsigned long guest_ksp;
+	unsigned long guest_usp;
+	unsigned long vcpu_irq_disabled;
+	unsigned long vcpu_irq;
+	unsigned long ptbr;
+	unsigned long int_stat0;
+	unsigned long int_stat1;
+	unsigned long int_stat2;
+	unsigned long int_stat3;
+	unsigned long reset_entry;
+	unsigned long pvcpu;
+	unsigned long exit_reason;
+	unsigned long ipaddr;
+	unsigned long vcpu_irq_vector;
+};
+
+#endif  /* __ASSEMBLY__ */
+#endif  /* _ASM_SW64_VCPU_H */
diff --git a/arch/sw_64/include/asm/vdso.h b/arch/sw_64/include/asm/vdso.h
new file mode 100644
index 000000000000..8ecd5add42ad
--- /dev/null
+++ b/arch/sw_64/include/asm/vdso.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 SW64 Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+#ifndef _ASM_SW64_VDSO_H
+#define _ASM_SW64_VDSO_H
+
+#ifdef __KERNEL__
+
+/*
+ * Default link address for the vDSO.
+ * Since we randomise the VDSO mapping, there's little point in trying
+ * to prelink this.
+ */
+#define VDSO_LBASE	0x0
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+#include <asm/sw64io.h>
+#include <asm/processor.h>
+#define VDSO_SYMBOL(base, name)						\
+({									\
+	extern const unsigned long __vdso_##name;			\
+	((unsigned long)(base) + __vdso_##name);			\
+})
+
+
+struct vdso_data {
+	u64 xtime_sec;
+	u64 xtime_nsec;
+	u32 wall_to_mono_sec;
+	u32 wall_to_mono_nsec;
+	u32 cs_shift;
+	u32 cs_mult;
+	u64 cs_cycle_last;
+	u64 cs_mask;
+	s32 tz_minuteswest;
+	s32 tz_dsttime;
+	u32 seq_count;
+};
+
+static inline unsigned long get_vdso_base(void)
+{
+	unsigned long addr, tmp;
+	 __asm__ __volatile__(
+	"	br	%1, 1f\n"
+	"1:	ldi	%0, 0(%1)\n"
+	: "=r" (addr), "=&r" (tmp)
+	::);
+
+	addr &= ~(PAGE_SIZE - 1);
+	return addr;
+}
+
+static inline const struct vdso_data *get_vdso_data(void)
+{
+	return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
+}
+
+static inline u32 vdso_data_read_begin(const struct vdso_data *data)
+{
+	u32 seq;
+
+	while (true) {
+		seq = READ_ONCE(data->seq_count);
+		if (likely(!(seq & 1))) {
+			/* Paired with smp_wmb() in vdso_data_write_*(). */
+			smp_rmb();
+			return seq;
+		}
+
+		cpu_relax();
+	}
+}
+
+static inline bool vdso_data_read_retry(const struct vdso_data *data,
+		u32 start_seq)
+{
+	/* Paired with smp_wmb() in vdso_data_write_*(). */
+	smp_rmb();
+	return unlikely(data->seq_count != start_seq);
+}
+
+static inline void vdso_data_write_begin(struct vdso_data *data)
+{
+	++data->seq_count;
+
+	/* Ensure sequence update is written before other data page values. */
+	smp_wmb();
+}
+
+static inline void vdso_data_write_end(struct vdso_data *data)
+{
+	/* Ensure data values are written before updating sequence again. */
+	smp_wmb();
+	++data->seq_count;
+}
+
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_SW64_VDSO_H */
diff --git a/arch/sw_64/include/asm/vga.h b/arch/sw_64/include/asm/vga.h
new file mode 100644
index 000000000000..3ca5c397b946
--- /dev/null
+++ b/arch/sw_64/include/asm/vga.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares mj@ucw.cz
+ */
+
+#ifndef _ASM_SW64_VGA_H
+#define _ASM_SW64_VGA_H
+
+#include <asm/io.h>
+
+#define VT_BUF_HAVE_RW
+#define VT_BUF_HAVE_MEMSETW
+#define VT_BUF_HAVE_MEMCPYW
+
+static inline void scr_writew(u16 val, volatile u16 *addr)
+{
+	if (__is_ioaddr(addr))
+		__raw_writew(val, (volatile u16 __iomem *) addr);
+	else
+		*addr = val;
+}
+
+static inline u16 scr_readw(volatile const u16 *addr)
+{
+	if (__is_ioaddr(addr))
+		return __raw_readw((volatile const u16 __iomem *) addr);
+	else
+		return *addr;
+}
+
+static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
+{
+	if (__is_ioaddr(s))
+		memsetw_io((u16 __iomem *) s, c, count);
+	else
+		memsetw(s, c, count);
+}
+
+/* Do not trust that the usage will be correct; analyze the arguments.  */
+extern void scr_memcpyw(u16 *d, const u16 *s, unsigned int count);
+
+/*
+ * ??? These are currently only used for downloading character sets.  As
+ * such, they don't need memory barriers.  Is this all they are intended
+ * to be used for?
+ */
+#define vga_readb(a)		readb((u8 __iomem *)(a))
+#define vga_writeb(v, a)	writeb(v, (u8 __iomem *)(a))
+
+#ifdef CONFIG_VGA_HOSE
+#include <linux/ioport.h>
+#include <linux/pci.h>
+
+extern struct pci_controller *pci_vga_hose;
+
+# define __is_port_vga(a) \
+	(((a) >= 0x3b0) && ((a) < 0x3e0) && \
+	((a) != 0x3b3) && ((a) != 0x3d3))
+
+# define __is_mem_vga(a) \
+	(((a) >= 0xa0000) && ((a) <= 0xc0000))
+
+# define FIXUP_IOADDR_VGA(a) do { \
+	if (pci_vga_hose && __is_port_vga(a)) \
+		(a) += pci_vga_hose->io_space->start; \
+} while (0)
+
+# define FIXUP_MEMADDR_VGA(a) do { \
+	if (pci_vga_hose && __is_mem_vga(a)) \
+		(a) += pci_vga_hose->mem_space->start; \
+} while (0)
+
+#else /* CONFIG_VGA_HOSE */
+# define pci_vga_hose 0
+# define __is_port_vga(a) 0
+# define __is_mem_vga(a) 0
+# define FIXUP_IOADDR_VGA(a)
+# define FIXUP_MEMADDR_VGA(a)
+#endif /* CONFIG_VGA_HOSE */
+
+#define VGA_MAP_MEM(x, s)	((unsigned long)ioremap(x, s))
+
+#endif
diff --git a/arch/sw_64/include/asm/vmalloc.h b/arch/sw_64/include/asm/vmalloc.h
new file mode 100644
index 000000000000..a76d1133d6c6
--- /dev/null
+++ b/arch/sw_64/include/asm/vmalloc.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_VMALLOC_H
+#define _ASM_SW64_VMALLOC_H
+
+#endif /* _ASM_SW64_VMALLOC_H */
diff --git a/arch/sw_64/include/asm/word-at-a-time.h b/arch/sw_64/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..623efbec4429
--- /dev/null
+++ b/arch/sw_64/include/asm/word-at-a-time.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_WORD_AT_A_TIME_H
+#define _ASM_SW64_WORD_AT_A_TIME_H
+
+#include <asm/compiler.h>
+
+/*
+ * word-at-a-time interface for SW64.
+ */
+
+/*
+ * We do not use the word_at_a_time struct on SW64, but it needs to be
+ * implemented to humour the generic code.
+ */
+struct word_at_a_time {
+	const unsigned long unused;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { 0 }
+
+/* Return nonzero if val has a zero */
+static inline unsigned long has_zero(unsigned long val, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long zero_locations = __kernel_cmpgeb(0, val);
+	*bits = zero_locations;
+	return zero_locations;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long val, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long find_zero(unsigned long bits)
+{
+	return __kernel_cttz(bits);
+}
+
+#define zero_bytemask(mask) ((2ul << (find_zero(mask) * 8)) - 1)
+
+#endif /* _ASM_SW64_WORD_AT_A_TIME_H */
diff --git a/arch/sw_64/include/asm/wrperfmon.h b/arch/sw_64/include/asm/wrperfmon.h
new file mode 100644
index 000000000000..eaa6735b5a25
--- /dev/null
+++ b/arch/sw_64/include/asm/wrperfmon.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definitions for use with the sw64 wrperfmon HMCODE call.
+ */
+
+#ifndef _ASM_SW64_WRPERFMON_H
+#define _ASM_SW64_WRPERFMON_H
+
+#define PERFMON_PC0			0
+#define PERFMON_PC1			1
+
+/* Following commands are implemented on all CPUs */
+#define PERFMON_CMD_DISABLE		0
+#define PERFMON_CMD_ENABLE		1
+#define PERFMON_CMD_EVENT_PC0		2
+#define PERFMON_CMD_EVENT_PC1		3
+#define PERFMON_CMD_PM			4
+#define PERFMON_CMD_READ		5
+#define PERFMON_CMD_READ_CLEAR		6
+#define PERFMON_CMD_WRITE_PC0		7
+#define PERFMON_CMD_WRITE_PC1		8
+
+#define PERFMON_DISABLE_ARGS_PC0	1
+#define PERFMON_DISABLE_ARGS_PC1	2
+#define PERFMON_DISABLE_ARGS_PC		3
+
+#define PERFMON_ENABLE_ARGS_PC0		1
+#define PERFMON_ENABLE_ARGS_PC1		2
+#define PERFMON_ENABLE_ARGS_PC		3
+
+#define PERFMON_READ_PC0		0
+#define PERFMON_READ_PC1		1
+
+#define PC0_RAW_BASE			0x0
+#define PC1_RAW_BASE			0x100
+#define PC0_MIN				0x0
+#define PC0_MAX				0xF
+#define PC1_MIN				0x0
+#define PC1_MAX				0x37
+
+/* pc0 events */
+#define PC0_INSTRUCTIONS		0x0
+#define PC0_BRANCH_INSTRUCTIONS		0x3
+#define PC0_CPU_CYCLES			0x8
+#define PC0_ITB_READ			0x9
+#define PC0_DTB_READ			0xA
+#define PC0_ICACHE_READ			0xB
+#define PC0_DCACHE_READ			0xC
+#define PC0_SCACHE_REFERENCES		0xD
+
+/* pc1 events */
+#define PC1_BRANCH_MISSES		0xB
+#define PC1_SCACHE_MISSES		0x10
+#define PC1_ICACHE_READ_MISSES		0x16
+#define PC1_ITB_MISSES			0x17
+#define PC1_DTB_SINGLE_MISSES		0x30
+#define PC1_DCACHE_MISSES		0x32
+
+#define MAX_HWEVENTS			2
+#define PMC_COUNT_MASK			((1UL << 58) - 1)
+
+#endif
diff --git a/arch/sw_64/include/asm/xchg.h b/arch/sw_64/include/asm/xchg.h
new file mode 100644
index 000000000000..bac67623da91
--- /dev/null
+++ b/arch/sw_64/include/asm/xchg.h
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CMPXCHG_H
+#error Do not include xchg.h directly!
+#else
+/*
+ * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code
+ * except that local version do not have the expensive memory barrier.
+ * So this file is included twice from asm/cmpxchg.h.
+ */
+
+/*
+ * Atomic exchange.
+ * Since it can be used to implement critical sections
+ * it must clobber "memory" (also for interrupts in UP).
+ */
+
+static inline unsigned long
+____xchg(_u8, volatile char *m, unsigned long val)
+{
+	unsigned long ret, tmp, addr64;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+
+	"	andnot	%4, 7, %3\n"
+	"	inslb	%1, %4, %1\n"
+	"1:	lldl	%2, 0(%3)\n"
+	"	ldi	%0, 1\n"
+	"	wr_f	%0\n"
+	"	extlb	%2, %4, %0\n"
+	"	masklb	%2, %4, %2\n"
+	"	or	%1, %2, %2\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl	%2, 0(%3)\n"
+	"	rd_f	%2\n"
+	"	beq	%2, 2f\n"
+	".subsection 2\n"
+	"2:	br	1b\n"
+	".previous"
+	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+	: "r" ((long)m), "1" (val) : "memory");
+
+	return ret;
+}
+
+static inline unsigned long
+____xchg(_u16, volatile short *m, unsigned long val)
+{
+	unsigned long ret, tmp, addr64;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	andnot	%4, 7, %3\n"
+	"	inslh	%1, %4, %1\n"
+	"1:	lldl	%2, 0(%3)\n"
+	"	ldi	%0, 1\n"
+	"	wr_f	%0\n"
+	"	extlh	%2, %4, %0\n"
+	"	masklh	%2, %4, %2\n"
+	"	or	%1, %2, %2\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl	%2, 0(%3)\n"
+	"	rd_f	%2\n"
+	"	beq	%2, 2f\n"
+	".subsection 2\n"
+	"2:	br	1b\n"
+	".previous"
+	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+	: "r" ((long)m), "1" (val) : "memory");
+
+	return ret;
+}
+
+static inline unsigned long
+____xchg(_u32, volatile int *m, unsigned long val)
+{
+	unsigned long dummy, addr;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi %3, %5\n"
+	"1:	lldw %0, 0(%3)\n"
+	"	ldi %1, 1\n"
+	"	wr_f %1\n"
+	"	bis $31, %4, %1\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw %1, 0(%3)\n"
+	"	rd_f %1\n"
+	"	beq %1, 2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	: "=&r" (val), "=&r" (dummy), "=m" (*m), "=&r"(addr)
+	: "rI" (val), "m" (*m) : "memory");
+
+	return val;
+}
+
+static inline unsigned long
+____xchg(_u64, volatile long *m, unsigned long val)
+{
+	unsigned long dummy, addr;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi %3, %5\n"
+	"1:	lldl %0, 0(%3)\n"
+	"	ldi %1, 1\n"
+	"	wr_f %1\n"
+	"	bis $31, %4, %1\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl %1, 0(%3)\n"
+	"	rd_f %1\n"
+	"	beq %1, 2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	: "=&r" (val), "=&r" (dummy), "=m" (*m), "=&r"(addr)
+	: "rI" (val), "m" (*m) : "memory");
+
+	return val;
+}
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid xchg().
+ */
+extern void __xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+____xchg(, volatile void *ptr, unsigned long x, int size)
+{
+	switch (size) {
+	case 1:
+		return ____xchg(_u8, ptr, x);
+	case 2:
+		return ____xchg(_u16, ptr, x);
+	case 4:
+		return ____xchg(_u32, ptr, x);
+	case 8:
+		return ____xchg(_u64, ptr, x);
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ *
+ * The memory barrier should be placed in SMP only when we actually
+ * make the change. If we don't change anything (so if the returned
+ * prev is equal to old) then we aren't acquiring anything new and
+ * we don't need any memory barrier as far I can tell.
+ */
+
+static inline unsigned long
+____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
+{
+	unsigned long prev, tmp, cmp, addr64;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	andnot	%5, 7, %4\n"
+	"	inslb	%1, %5, %1\n"
+	"1:	lldl	%2, 0(%4)\n"
+	"	extlb	%2, %5, %0\n"
+	"	cmpeq	%0, %6, %3\n"
+	"	wr_f	%3\n"
+	"	masklb	%2, %5, %2\n"
+	"	or	%1, %2, %2\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl	%2, 0(%4)\n"
+	"	rd_f	%2\n"
+	"	beq	%3, 2f\n"
+	"	beq	%2, 3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+	: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+	return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
+{
+	unsigned long prev, tmp, cmp, addr64;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	andnot	%5, 7, %4\n"
+	"	inslh	%1, %5, %1\n"
+	"1:	lldl	%2, 0(%4)\n"
+	"	extlh	%2, %5, %0\n"
+	"	cmpeq	%0, %6, %3\n"
+	"	wr_f	%3\n"
+	"	masklh	%2, %5, %2\n"
+	"	or	%1, %2, %2\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl	%2, 0(%4)\n"
+	"	rd_f	%2\n"
+	"	beq	%3, 2f\n"
+	"	beq	%2, 3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+	: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+	return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u32, volatile int *m, int old, int new)
+{
+	unsigned long prev, cmp, addr, tmp;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi %3, %7\n"
+	"1:	lldw %0, 0(%3)\n"
+	"	cmpeq %0, %5, %1\n"
+	"	wr_f %1\n"
+	"	bis $31, %6, %4\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstw %4, 0(%3)\n"
+	"	rd_f %4\n"
+	"	beq %1, 2f\n"
+	"	beq %4, 3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r"(prev), "=&r"(cmp), "=m"(*m), "=&r"(addr), "=&r"(tmp)
+	: "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+	return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
+{
+	unsigned long prev, cmp, addr, tmp;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_LOCK_MEMB
+	"	memb\n"
+#endif
+	"	ldi %3, %7\n"
+	"1:	lldl %0, 0(%3)\n"
+	"	cmpeq %0, %5, %1\n"
+	"	wr_f %1\n"
+	"	bis $31, %6, %4\n"
+#ifdef CONFIG_LOCK_FIXUP
+	"	memb\n"
+#endif
+	"	lstl %4, 0(%3)\n"
+	"	rd_f %4\n"
+	"	beq %1, 2f\n"
+	"	beq %4, 3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r"(prev), "=&r"(cmp), "=m"(*m), "=&r"(addr), "=&r"(tmp)
+	: "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+	return prev;
+}
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg().
+ */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long ____cmpxchg(, volatile void *ptr,
+						 unsigned long old,
+						 unsigned long new, int size)
+{
+	switch (size) {
+	case 1:
+		return ____cmpxchg(_u8, ptr, old, new);
+	case 2:
+		return ____cmpxchg(_u16, ptr, old, new);
+	case 4:
+		return ____cmpxchg(_u32, ptr, old, new);
+	case 8:
+		return ____cmpxchg(_u64, ptr, old, new);
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#endif
diff --git a/arch/sw_64/include/asm/xor.h b/arch/sw_64/include/asm/xor.h
new file mode 100644
index 000000000000..af95259ed8ef
--- /dev/null
+++ b/arch/sw_64/include/asm/xor.h
@@ -0,0 +1,847 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Optimized RAID-5 checksumming functions.
+ */
+
+#ifndef _ASM_SW64_XOR_H
+#define _ASM_SW64_XOR_H
+
+extern void xor_sw64_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_sw64_3(unsigned long, unsigned long *, unsigned long *,
+		     unsigned long *);
+extern void xor_sw64_4(unsigned long, unsigned long *, unsigned long *,
+		     unsigned long *, unsigned long *);
+extern void xor_sw64_5(unsigned long, unsigned long *, unsigned long *,
+		     unsigned long *, unsigned long *, unsigned long *);
+
+extern void xor_sw64_prefetch_2(unsigned long, unsigned long *,
+			      unsigned long *);
+extern void xor_sw64_prefetch_3(unsigned long, unsigned long *,
+			      unsigned long *, unsigned long *);
+extern void xor_sw64_prefetch_4(unsigned long, unsigned long *,
+			      unsigned long *, unsigned long *,
+			      unsigned long *);
+extern void xor_sw64_prefetch_5(unsigned long, unsigned long *,
+			      unsigned long *, unsigned long *,
+			      unsigned long *, unsigned long *);
+
+asm("								\n\
+	.text							\n\
+	.align 3						\n\
+	.ent xor_sw64_2						\n\
+xor_sw64_2:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+2:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 8($17)						\n\
+	ldl $3, 8($18)						\n\
+								\n\
+	ldl $4, 16($17)						\n\
+	ldl $5, 16($18)						\n\
+	ldl $6, 24($17)						\n\
+	ldl $7, 24($18)						\n\
+								\n\
+	ldl $19, 32($17)					\n\
+	ldl $20, 32($18)					\n\
+	ldl $21, 40($17)					\n\
+	ldl $22, 40($18)					\n\
+								\n\
+	ldl $23, 48($17)					\n\
+	ldl $24, 48($18)					\n\
+	ldl $25, 56($17)					\n\
+	xor $0, $1, $0		# 7 cycles from $1 load		\n\
+								\n\
+	ldl $27, 56($18)					\n\
+	xor $2, $3, $2						\n\
+	stl $0, 0($17)						\n\
+	xor $4, $5, $4						\n\
+								\n\
+	stl $2, 8($17)						\n\
+	xor $6, $7, $6						\n\
+	stl $4, 16($17)						\n\
+	xor $19, $20, $19					\n\
+								\n\
+	stl $6, 24($17)						\n\
+	xor $21, $22, $21					\n\
+	stl $19, 32($17)					\n\
+	xor $23, $24, $23					\n\
+								\n\
+	stl $21, 40($17)					\n\
+	xor $25, $27, $25					\n\
+	stl $23, 48($17)					\n\
+	subl $16, 1, $16					\n\
+								\n\
+	stl $25, 56($17)					\n\
+	addl $17, 64, $17					\n\
+	addl $18, 64, $18					\n\
+	bgt $16, 2b						\n\
+								\n\
+	ret							\n\
+	.end xor_sw64_2						\n\
+								\n\
+	.align 3						\n\
+	.ent xor_sw64_3						\n\
+xor_sw64_3:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+3:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 0($19)						\n\
+	ldl $3, 8($17)						\n\
+								\n\
+	ldl $4, 8($18)						\n\
+	ldl $6, 16($17)						\n\
+	ldl $7, 16($18)						\n\
+	ldl $21, 24($17)					\n\
+								\n\
+	ldl $22, 24($18)					\n\
+	ldl $24, 32($17)					\n\
+	ldl $25, 32($18)					\n\
+	ldl $5, 8($19)						\n\
+								\n\
+	ldl $20, 16($19)					\n\
+	ldl $23, 24($19)					\n\
+	ldl $27, 32($19)					\n\
+								\n\
+	xor $0, $1, $1		# 8 cycles from $0 load		\n\
+	xor $3, $4, $4		# 6 cycles from $4 load		\n\
+	xor $6, $7, $7		# 6 cycles from $7 load		\n\
+	xor $21, $22, $22	# 5 cycles from $22 load	\n\
+								\n\
+	xor $1, $2, $2		# 9 cycles from $2 load		\n\
+	xor $24, $25, $25	# 5 cycles from $25 load	\n\
+	stl $2, 0($17)						\n\
+	xor $4, $5, $5		# 6 cycles from $5 load		\n\
+								\n\
+	stl $5, 8($17)						\n\
+	xor $7, $20, $20	# 7 cycles from $20 load	\n\
+	stl $20, 16($17)					\n\
+	xor $22, $23, $23	# 7 cycles from $23 load	\n\
+								\n\
+	stl $23, 24($17)					\n\
+	xor $25, $27, $27	# 7 cycles from $27 load	\n\
+	stl $27, 32($17)					\n\
+								\n\
+	ldl $0, 40($17)						\n\
+	ldl $1, 40($18)						\n\
+	ldl $3, 48($17)						\n\
+	ldl $4, 48($18)						\n\
+								\n\
+	ldl $6, 56($17)						\n\
+	ldl $7, 56($18)						\n\
+	ldl $2, 40($19)						\n\
+	ldl $5, 48($19)						\n\
+								\n\
+	ldl $20, 56($19)					\n\
+	xor $0, $1, $1		# 4 cycles from $1 load		\n\
+	xor $3, $4, $4		# 5 cycles from $4 load		\n\
+	xor $6, $7, $7		# 5 cycles from $7 load		\n\
+								\n\
+	xor $1, $2, $2		# 4 cycles from $2 load		\n\
+	xor $4, $5, $5		# 5 cycles from $5 load		\n\
+	stl $2, 40($17)						\n\
+	xor $7, $20, $20	# 4 cycles from $20 load	\n\
+								\n\
+	stl $5, 48($17)						\n\
+	subl $16, 1, $16					\n\
+	stl $20, 56($17)					\n\
+	addl $19, 64, $19					\n\
+								\n\
+	addl $18, 64, $18					\n\
+	addl $17, 64, $17					\n\
+	bgt $16, 3b						\n\
+	ret							\n\
+	.end xor_sw64_3						\n\
+								\n\
+	.align 3						\n\
+	.ent xor_sw64_4						\n\
+xor_sw64_4:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+4:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 0($19)						\n\
+	ldl $3, 0($20)						\n\
+								\n\
+	ldl $4, 8($17)						\n\
+	ldl $5, 8($18)						\n\
+	ldl $6, 8($19)						\n\
+	ldl $7, 8($20)						\n\
+								\n\
+	ldl $21, 16($17)					\n\
+	ldl $22, 16($18)					\n\
+	ldl $23, 16($19)					\n\
+	ldl $24, 16($20)					\n\
+								\n\
+	ldl $25, 24($17)					\n\
+	xor $0, $1, $1		# 6 cycles from $1 load		\n\
+	ldl $27, 24($18)					\n\
+	xor $2, $3, $3		# 6 cycles from $3 load		\n\
+								\n\
+	ldl $0, 24($19)						\n\
+	xor $1, $3, $3						\n\
+	ldl $1, 24($20)						\n\
+	xor $4, $5, $5		# 7 cycles from $5 load		\n\
+								\n\
+	stl $3, 0($17)						\n\
+	xor $6, $7, $7						\n\
+	xor $21, $22, $22	# 7 cycles from $22 load	\n\
+	xor $5, $7, $7						\n\
+								\n\
+	stl $7, 8($17)						\n\
+	xor $23, $24, $24	# 7 cycles from $24 load	\n\
+	ldl $2, 32($17)						\n\
+	xor $22, $24, $24					\n\
+								\n\
+	ldl $3, 32($18)						\n\
+	ldl $4, 32($19)						\n\
+	ldl $5, 32($20)						\n\
+	xor $25, $27, $27	# 8 cycles from $27 load	\n\
+								\n\
+	ldl $6, 40($17)						\n\
+	ldl $7, 40($18)						\n\
+	ldl $21, 40($19)					\n\
+	ldl $22, 40($20)					\n\
+								\n\
+	stl $24, 16($17)					\n\
+	xor $0, $1, $1		# 9 cycles from $1 load		\n\
+	xor $2, $3, $3		# 5 cycles from $3 load		\n\
+	xor $27, $1, $1						\n\
+								\n\
+	stl $1, 24($17)						\n\
+	xor $4, $5, $5		# 5 cycles from $5 load		\n\
+	ldl $23, 48($17)					\n\
+	ldl $24, 48($18)					\n\
+								\n\
+	ldl $25, 48($19)					\n\
+	xor $3, $5, $5						\n\
+	ldl $27, 48($20)					\n\
+	ldl $0, 56($17)						\n\
+								\n\
+	ldl $1, 56($18)						\n\
+	ldl $2, 56($19)						\n\
+	xor $6, $7, $7		# 8 cycles from $6 load		\n\
+	ldl $3, 56($20)						\n\
+								\n\
+	stl $5, 32($17)						\n\
+	xor $21, $22, $22	# 8 cycles from $22 load	\n\
+	xor $7, $22, $22					\n\
+	xor $23, $24, $24	# 5 cycles from $24 load	\n\
+								\n\
+	stl $22, 40($17)					\n\
+	xor $25, $27, $27	# 5 cycles from $27 load	\n\
+	xor $24, $27, $27					\n\
+	xor $0, $1, $1		# 5 cycles from $1 load		\n\
+								\n\
+	stl $27, 48($17)					\n\
+	xor $2, $3, $3		# 4 cycles from $3 load		\n\
+	xor $1, $3, $3						\n\
+	subl $16, 1, $16					\n\
+								\n\
+	stl $3, 56($17)						\n\
+	addl $20, 64, $20					\n\
+	addl $19, 64, $19					\n\
+	addl $18, 64, $18					\n\
+								\n\
+	addl $17, 64, $17					\n\
+	bgt $16, 4b						\n\
+	ret							\n\
+	.end xor_sw64_4						\n\
+								\n\
+	.align 3						\n\
+	.ent xor_sw64_5						\n\
+xor_sw64_5:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+5:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 0($19)						\n\
+	ldl $3, 0($20)						\n\
+								\n\
+	ldl $4, 0($21)						\n\
+	ldl $5, 8($17)						\n\
+	ldl $6, 8($18)						\n\
+	ldl $7, 8($19)						\n\
+								\n\
+	ldl $22, 8($20)						\n\
+	ldl $23, 8($21)						\n\
+	ldl $24, 16($17)					\n\
+	ldl $25, 16($18)					\n\
+								\n\
+	ldl $27, 16($19)					\n\
+	xor $0, $1, $1		# 6 cycles from $1 load		\n\
+	ldl $28, 16($20)					\n\
+	xor $2, $3, $3		# 6 cycles from $3 load		\n\
+								\n\
+	ldl $0, 16($21)						\n\
+	xor $1, $3, $3						\n\
+	ldl $1, 24($17)						\n\
+	xor $3, $4, $4		# 7 cycles from $4 load		\n\
+								\n\
+	stl $4, 0($17)						\n\
+	xor $5, $6, $6		# 7 cycles from $6 load		\n\
+	xor $7, $22, $22	# 7 cycles from $22 load	\n\
+	xor $6, $23, $23	# 7 cycles from $23 load	\n\
+								\n\
+	ldl $2, 24($18)						\n\
+	xor $22, $23, $23					\n\
+	ldl $3, 24($19)						\n\
+	xor $24, $25, $25	# 8 cycles from $25 load	\n\
+								\n\
+	stl $23, 8($17)						\n\
+	xor $25, $27, $27	# 8 cycles from $27 load	\n\
+	ldl $4, 24($20)						\n\
+	xor $28, $0, $0		# 7 cycles from $0 load		\n\
+								\n\
+	ldl $5, 24($21)						\n\
+	xor $27, $0, $0						\n\
+	ldl $6, 32($17)						\n\
+	ldl $7, 32($18)						\n\
+								\n\
+	stl $0, 16($17)						\n\
+	xor $1, $2, $2		# 6 cycles from $2 load		\n\
+	ldl $22, 32($19)					\n\
+	xor $3, $4, $4		# 4 cycles from $4 load		\n\
+								\n\
+	ldl $23, 32($20)					\n\
+	xor $2, $4, $4						\n\
+	ldl $24, 32($21)					\n\
+	ldl $25, 40($17)					\n\
+								\n\
+	ldl $27, 40($18)					\n\
+	ldl $28, 40($19)					\n\
+	ldl $0, 40($20)						\n\
+	xor $4, $5, $5		# 7 cycles from $5 load		\n\
+								\n\
+	stl $5, 24($17)						\n\
+	xor $6, $7, $7		# 7 cycles from $7 load		\n\
+	ldl $1, 40($21)						\n\
+	ldl $2, 48($17)						\n\
+								\n\
+	ldl $3, 48($18)						\n\
+	xor $7, $22, $22	# 7 cycles from $22 load	\n\
+	ldl $4, 48($19)						\n\
+	xor $23, $24, $24	# 6 cycles from $24 load	\n\
+								\n\
+	ldl $5, 48($20)						\n\
+	xor $22, $24, $24					\n\
+	ldl $6, 48($21)						\n\
+	xor $25, $27, $27	# 7 cycles from $27 load	\n\
+								\n\
+	stl $24, 32($17)					\n\
+	xor $27, $28, $28	# 8 cycles from $28 load	\n\
+	ldl $7, 56($17)						\n\
+	xor $0, $1, $1		# 6 cycles from $1 load		\n\
+								\n\
+	ldl $22, 56($18)					\n\
+	ldl $23, 56($19)					\n\
+	ldl $24, 56($20)					\n\
+	ldl $25, 56($21)					\n\
+								\n\
+	xor $28, $1, $1						\n\
+	xor $2, $3, $3		# 9 cycles from $3 load		\n\
+	xor $3, $4, $4		# 9 cycles from $4 load		\n\
+	xor $5, $6, $6		# 8 cycles from $6 load		\n\
+								\n\
+	stl $1, 40($17)						\n\
+	xor $4, $6, $6						\n\
+	xor $7, $22, $22	# 7 cycles from $22 load	\n\
+	xor $23, $24, $24	# 6 cycles from $24 load	\n\
+								\n\
+	stl $6, 48($17)						\n\
+	xor $22, $24, $24					\n\
+	subl $16, 1, $16					\n\
+	xor $24, $25, $25	# 8 cycles from $25 load	\n\
+								\n\
+	stl $25, 56($17)					\n\
+	addl $21, 64, $21					\n\
+	addl $20, 64, $20					\n\
+	addl $19, 64, $19					\n\
+								\n\
+	addl $18, 64, $18					\n\
+	addl $17, 64, $17					\n\
+	bgt $16, 5b						\n\
+	ret							\n\
+	.end xor_sw64_5						\n\
+								\n\
+	.align 3						\n\
+	.ent xor_sw64_prefetch_2					\n\
+xor_sw64_prefetch_2:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	fillde 0($17)						\n\
+	fillde 0($18)						\n\
+								\n\
+	fillde 64($17)						\n\
+	fillde 64($18)						\n\
+								\n\
+	fillde 128($17)						\n\
+	fillde 128($18)						\n\
+								\n\
+	fillde 192($17)						\n\
+	fillde 192($18)						\n\
+	.align 4						\n\
+2:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 8($17)						\n\
+	ldl $3, 8($18)						\n\
+								\n\
+	ldl $4, 16($17)						\n\
+	ldl $5, 16($18)						\n\
+	ldl $6, 24($17)						\n\
+	ldl $7, 24($18)						\n\
+								\n\
+	ldl $19, 32($17)					\n\
+	ldl $20, 32($18)					\n\
+	ldl $21, 40($17)					\n\
+	ldl $22, 40($18)					\n\
+								\n\
+	ldl $23, 48($17)					\n\
+	ldl $24, 48($18)					\n\
+	ldl $25, 56($17)					\n\
+	ldl $27, 56($18)					\n\
+								\n\
+	fillde 256($17)						\n\
+	xor $0, $1, $0		# 8 cycles from $1 load		\n\
+	fillde 256($18)						\n\
+	xor $2, $3, $2						\n\
+								\n\
+	stl $0, 0($17)						\n\
+	xor $4, $5, $4						\n\
+	stl $2, 8($17)						\n\
+	xor $6, $7, $6						\n\
+								\n\
+	stl $4, 16($17)						\n\
+	xor $19, $20, $19					\n\
+	stl $6, 24($17)						\n\
+	xor $21, $22, $21					\n\
+								\n\
+	stl $19, 32($17)					\n\
+	xor $23, $24, $23					\n\
+	stl $21, 40($17)					\n\
+	xor $25, $27, $25					\n\
+								\n\
+	stl $23, 48($17)					\n\
+	subl $16, 1, $16					\n\
+	stl $25, 56($17)					\n\
+	addl $17, 64, $17					\n\
+								\n\
+	addl $18, 64, $18					\n\
+	bgt $16, 2b						\n\
+	ret							\n\
+	.end xor_sw64_prefetch_2					\n\
+								\n\
+	.align 3						\n\
+	.ent xor_sw64_prefetch_3					\n\
+xor_sw64_prefetch_3:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	fillde 0($17)						\n\
+	fillde 0($18)						\n\
+	fillde 0($19)						\n\
+								\n\
+	fillde 64($17)						\n\
+	fillde 64($18)						\n\
+	fillde 64($19)						\n\
+								\n\
+	fillde 128($17)						\n\
+	fillde 128($18)						\n\
+	fillde 128($19)						\n\
+								\n\
+	fillde 192($17)						\n\
+	fillde 192($18)						\n\
+	fillde 192($19)						\n\
+	.align 4						\n\
+3:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 0($19)						\n\
+	ldl $3, 8($17)						\n\
+								\n\
+	ldl $4, 8($18)						\n\
+	ldl $6, 16($17)						\n\
+	ldl $7, 16($18)						\n\
+	ldl $21, 24($17)					\n\
+								\n\
+	ldl $22, 24($18)					\n\
+	ldl $24, 32($17)					\n\
+	ldl $25, 32($18)					\n\
+	ldl $5, 8($19)						\n\
+								\n\
+	ldl $20, 16($19)					\n\
+	ldl $23, 24($19)					\n\
+	ldl $27, 32($19)					\n\
+								\n\
+	xor $0, $1, $1		# 8 cycles from $0 load		\n\
+	xor $3, $4, $4		# 7 cycles from $4 load		\n\
+	xor $6, $7, $7		# 6 cycles from $7 load		\n\
+	xor $21, $22, $22	# 5 cycles from $22 load	\n\
+								\n\
+	xor $1, $2, $2		# 9 cycles from $2 load		\n\
+	xor $24, $25, $25	# 5 cycles from $25 load	\n\
+	stl $2, 0($17)						\n\
+	xor $4, $5, $5		# 6 cycles from $5 load		\n\
+								\n\
+	stl $5, 8($17)						\n\
+	xor $7, $20, $20	# 7 cycles from $20 load	\n\
+	stl $20, 16($17)					\n\
+	xor $22, $23, $23	# 7 cycles from $23 load	\n\
+								\n\
+	stl $23, 24($17)					\n\
+	xor $25, $27, $27	# 7 cycles from $27 load	\n\
+	stl $27, 32($17)					\n\
+								\n\
+	ldl $0, 40($17)						\n\
+	ldl $1, 40($18)						\n\
+	ldl $3, 48($17)						\n\
+	ldl $4, 48($18)						\n\
+								\n\
+	ldl $6, 56($17)						\n\
+	ldl $7, 56($18)						\n\
+	ldl $2, 40($19)						\n\
+	ldl $5, 48($19)						\n\
+								\n\
+	ldl $20, 56($19)					\n\
+	fillde 256($17)						\n\
+	fillde 256($18)						\n\
+	fillde 256($19)						\n\
+								\n\
+	xor $0, $1, $1		# 6 cycles from $1 load		\n\
+	xor $3, $4, $4		# 5 cycles from $4 load		\n\
+	xor $6, $7, $7		# 5 cycles from $7 load		\n\
+	xor $1, $2, $2		# 4 cycles from $2 load		\n\
+								\n\
+	xor $4, $5, $5		# 5 cycles from $5 load		\n\
+	xor $7, $20, $20	# 4 cycles from $20 load	\n\
+	stl $2, 40($17)						\n\
+	subl $16, 1, $16					\n\
+								\n\
+	stl $5, 48($17)						\n\
+	addl $19, 64, $19					\n\
+	stl $20, 56($17)					\n\
+	addl $18, 64, $18					\n\
+								\n\
+	addl $17, 64, $17					\n\
+	bgt $16, 3b						\n\
+	ret							\n\
+	.end xor_sw64_prefetch_3					\n\
+								\n\
+	.align 3						\n\
+	.ent xor_sw64_prefetch_4					\n\
+xor_sw64_prefetch_4:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	fillde 0($17)						\n\
+	fillde 0($18)						\n\
+	fillde 0($19)						\n\
+	fillde 0($20)						\n\
+								\n\
+	fillde 64($17)						\n\
+	fillde 64($18)						\n\
+	fillde 64($19)						\n\
+	fillde 64($20)						\n\
+								\n\
+	fillde 128($17)						\n\
+	fillde 128($18)						\n\
+	fillde 128($19)						\n\
+	fillde 128($20)						\n\
+								\n\
+	fillde 192($17)						\n\
+	fillde 192($18)						\n\
+	fillde 192($19)						\n\
+	fillde 192($20)						\n\
+	.align 4						\n\
+4:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 0($19)						\n\
+	ldl $3, 0($20)						\n\
+								\n\
+	ldl $4, 8($17)						\n\
+	ldl $5, 8($18)						\n\
+	ldl $6, 8($19)						\n\
+	ldl $7, 8($20)						\n\
+								\n\
+	ldl $21, 16($17)					\n\
+	ldl $22, 16($18)					\n\
+	ldl $23, 16($19)					\n\
+	ldl $24, 16($20)					\n\
+								\n\
+	ldl $25, 24($17)					\n\
+	xor $0, $1, $1		# 6 cycles from $1 load		\n\
+	ldl $27, 24($18)					\n\
+	xor $2, $3, $3		# 6 cycles from $3 load		\n\
+								\n\
+	ldl $0, 24($19)						\n\
+	xor $1, $3, $3						\n\
+	ldl $1, 24($20)						\n\
+	xor $4, $5, $5		# 7 cycles from $5 load		\n\
+								\n\
+	stl $3, 0($17)						\n\
+	xor $6, $7, $7						\n\
+	xor $21, $22, $22	# 7 cycles from $22 load	\n\
+	xor $5, $7, $7						\n\
+								\n\
+	stl $7, 8($17)						\n\
+	xor $23, $24, $24	# 7 cycles from $24 load	\n\
+	ldl $2, 32($17)						\n\
+	xor $22, $24, $24					\n\
+								\n\
+	ldl $3, 32($18)						\n\
+	ldl $4, 32($19)						\n\
+	ldl $5, 32($20)						\n\
+	xor $25, $27, $27	# 8 cycles from $27 load	\n\
+								\n\
+	ldl $6, 40($17)						\n\
+	ldl $7, 40($18)						\n\
+	ldl $21, 40($19)					\n\
+	ldl $22, 40($20)					\n\
+								\n\
+	stl $24, 16($17)					\n\
+	xor $0, $1, $1		# 9 cycles from $1 load		\n\
+	xor $2, $3, $3		# 5 cycles from $3 load		\n\
+	xor $27, $1, $1						\n\
+								\n\
+	stl $1, 24($17)						\n\
+	xor $4, $5, $5		# 5 cycles from $5 load		\n\
+	ldl $23, 48($17)					\n\
+	xor $3, $5, $5						\n\
+								\n\
+	ldl $24, 48($18)					\n\
+	ldl $25, 48($19)					\n\
+	ldl $27, 48($20)					\n\
+	ldl $0, 56($17)						\n\
+								\n\
+	ldl $1, 56($18)						\n\
+	ldl $2, 56($19)						\n\
+	ldl $3, 56($20)						\n\
+	xor $6, $7, $7		# 8 cycles from $6 load		\n\
+								\n\
+	fillde 256($17)						\n\
+	xor $21, $22, $22	# 8 cycles from $22 load	\n\
+	fillde 256($18)						\n\
+	xor $7, $22, $22					\n\
+								\n\
+	fillde 256($19)						\n\
+	xor $23, $24, $24	# 6 cycles from $24 load	\n\
+	fillde 256($20)						\n\
+	xor $25, $27, $27	# 6 cycles from $27 load	\n\
+								\n\
+	stl $5, 32($17)						\n\
+	xor $24, $27, $27					\n\
+	xor $0, $1, $1		# 7 cycles from $1 load		\n\
+	xor $2, $3, $3		# 6 cycles from $3 load		\n\
+								\n\
+	stl $22, 40($17)					\n\
+	xor $1, $3, $3						\n\
+	stl $27, 48($17)					\n\
+	subl $16, 1, $16					\n\
+								\n\
+	stl $3, 56($17)						\n\
+	addl $20, 64, $20					\n\
+	addl $19, 64, $19					\n\
+	addl $18, 64, $18					\n\
+								\n\
+	addl $17, 64, $17					\n\
+	bgt $16, 4b						\n\
+	ret							\n\
+	.end xor_sw64_prefetch_4					\n\
+								\n\
+	.align 3						\n\
+	.ent xor_sw64_prefetch_5					\n\
+xor_sw64_prefetch_5:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	fillde 0($17)						\n\
+	fillde 0($18)						\n\
+	fillde 0($19)						\n\
+	fillde 0($20)						\n\
+	fillde 0($21)						\n\
+								\n\
+	fillde 64($17)						\n\
+	fillde 64($18)						\n\
+	fillde 64($19)						\n\
+	fillde 64($20)						\n\
+	fillde 64($21)						\n\
+								\n\
+	fillde 128($17)						\n\
+	fillde 128($18)						\n\
+	fillde 128($19)						\n\
+	fillde 128($20)						\n\
+	fillde 128($21)						\n\
+								\n\
+	fillde 192($17)						\n\
+	fillde 192($18)						\n\
+	fillde 192($19)						\n\
+	fillde 192($20)						\n\
+	fillde 192($21)						\n\
+	.align 4						\n\
+5:								\n\
+	ldl $0, 0($17)						\n\
+	ldl $1, 0($18)						\n\
+	ldl $2, 0($19)						\n\
+	ldl $3, 0($20)						\n\
+								\n\
+	ldl $4, 0($21)						\n\
+	ldl $5, 8($17)						\n\
+	ldl $6, 8($18)						\n\
+	ldl $7, 8($19)						\n\
+								\n\
+	ldl $22, 8($20)						\n\
+	ldl $23, 8($21)						\n\
+	ldl $24, 16($17)					\n\
+	ldl $25, 16($18)					\n\
+								\n\
+	ldl $27, 16($19)					\n\
+	xor $0, $1, $1		# 6 cycles from $1 load		\n\
+	ldl $28, 16($20)					\n\
+	xor $2, $3, $3		# 6 cycles from $3 load		\n\
+								\n\
+	ldl $0, 16($21)						\n\
+	xor $1, $3, $3						\n\
+	ldl $1, 24($17)						\n\
+	xor $3, $4, $4		# 7 cycles from $4 load		\n\
+								\n\
+	stl $4, 0($17)						\n\
+	xor $5, $6, $6		# 7 cycles from $6 load		\n\
+	xor $7, $22, $22	# 7 cycles from $22 load	\n\
+	xor $6, $23, $23	# 7 cycles from $23 load	\n\
+								\n\
+	ldl $2, 24($18)						\n\
+	xor $22, $23, $23					\n\
+	ldl $3, 24($19)						\n\
+	xor $24, $25, $25	# 8 cycles from $25 load	\n\
+								\n\
+	stl $23, 8($17)						\n\
+	xor $25, $27, $27	# 8 cycles from $27 load	\n\
+	ldl $4, 24($20)						\n\
+	xor $28, $0, $0		# 7 cycles from $0 load		\n\
+								\n\
+	ldl $5, 24($21)						\n\
+	xor $27, $0, $0						\n\
+	ldl $6, 32($17)						\n\
+	ldl $7, 32($18)						\n\
+								\n\
+	stl $0, 16($17)						\n\
+	xor $1, $2, $2		# 6 cycles from $2 load		\n\
+	ldl $22, 32($19)					\n\
+	xor $3, $4, $4		# 4 cycles from $4 load		\n\
+								\n\
+	ldl $23, 32($20)					\n\
+	xor $2, $4, $4						\n\
+	ldl $24, 32($21)					\n\
+	ldl $25, 40($17)					\n\
+								\n\
+	ldl $27, 40($18)					\n\
+	ldl $28, 40($19)					\n\
+	ldl $0, 40($20)						\n\
+	xor $4, $5, $5		# 7 cycles from $5 load		\n\
+								\n\
+	stl $5, 24($17)						\n\
+	xor $6, $7, $7		# 7 cycles from $7 load		\n\
+	ldl $1, 40($21)						\n\
+	ldl $2, 48($17)						\n\
+								\n\
+	ldl $3, 48($18)						\n\
+	xor $7, $22, $22	# 7 cycles from $22 load	\n\
+	ldl $4, 48($19)						\n\
+	xor $23, $24, $24	# 6 cycles from $24 load	\n\
+								\n\
+	ldl $5, 48($20)						\n\
+	xor $22, $24, $24					\n\
+	ldl $6, 48($21)						\n\
+	xor $25, $27, $27	# 7 cycles from $27 load	\n\
+								\n\
+	stl $24, 32($17)					\n\
+	xor $27, $28, $28	# 8 cycles from $28 load	\n\
+	ldl $7, 56($17)						\n\
+	xor $0, $1, $1		# 6 cycles from $1 load		\n\
+								\n\
+	ldl $22, 56($18)					\n\
+	ldl $23, 56($19)					\n\
+	ldl $24, 56($20)					\n\
+	ldl $25, 56($21)					\n\
+								\n\
+	fillde 256($17)						\n\
+	xor $28, $1, $1						\n\
+	fillde 256($18)						\n\
+	xor $2, $3, $3		# 9 cycles from $3 load		\n\
+								\n\
+	fillde 256($19)						\n\
+	xor $3, $4, $4		# 9 cycles from $4 load		\n\
+	fillde 256($20)						\n\
+	xor $5, $6, $6		# 8 cycles from $6 load		\n\
+								\n\
+	stl $1, 40($17)						\n\
+	xor $4, $6, $6						\n\
+	xor $7, $22, $22	# 7 cycles from $22 load	\n\
+	xor $23, $24, $24	# 6 cycles from $24 load	\n\
+								\n\
+	stl $6, 48($17)						\n\
+	xor $22, $24, $24					\n\
+	fillde 256($21)						\n\
+	xor $24, $25, $25	# 8 cycles from $25 load	\n\
+								\n\
+	stl $25, 56($17)					\n\
+	subl $16, 1, $16					\n\
+	addl $21, 64, $21					\n\
+	addl $20, 64, $20					\n\
+								\n\
+	addl $19, 64, $19					\n\
+	addl $18, 64, $18					\n\
+	addl $17, 64, $17					\n\
+	bgt $16, 5b						\n\
+								\n\
+	ret							\n\
+	.end xor_sw64_prefetch_5				\n\
+");
+
+static struct xor_block_template xor_block_sw64 = {
+	.name	= "sw64",
+	.do_2	= xor_sw64_2,
+	.do_3	= xor_sw64_3,
+	.do_4	= xor_sw64_4,
+	.do_5	= xor_sw64_5,
+};
+
+static struct xor_block_template xor_block_sw64_prefetch = {
+	.name	= "sw64 prefetch",
+	.do_2	= xor_sw64_prefetch_2,
+	.do_3	= xor_sw64_prefetch_3,
+	.do_4	= xor_sw64_prefetch_4,
+	.do_5	= xor_sw64_prefetch_5,
+};
+
+/* For grins, also test the generic routines.  */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+	do {						\
+		xor_speed(&xor_block_8regs);		\
+		xor_speed(&xor_block_32regs);		\
+		xor_speed(&xor_block_sw64);		\
+		xor_speed(&xor_block_sw64_prefetch);	\
+	} while (0)
+
+/*
+ * Force the use of sw64_prefetch as it is significantly
+ * faster in the cold cache case.
+ */
+#define XOR_SELECT_TEMPLATE(FASTEST)   (&xor_block_sw64_prefetch)
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/Kbuild b/arch/sw_64/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..a01bfb9600ec
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/Kbuild
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+# UAPI Header export list
+
+generated-y += unistd_64.h
diff --git a/arch/sw_64/include/uapi/asm/a.out.h b/arch/sw_64/include/uapi/asm/a.out.h
new file mode 100644
index 000000000000..addb648b8ed6
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/a.out.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_A_OUT_H
+#define _UAPI_ASM_SW64_A_OUT_H
+
+#include <linux/types.h>
+
+/*
+ * ECOFF header structs.  ECOFF files consist of:
+ *	- a file header (struct filehdr),
+ *	- an a.out header (struct aouthdr),
+ *	- one or more section headers (struct scnhdr).
+ *	  The filhdr's "f_nscns" field contains the
+ *	  number of section headers.
+ */
+
+struct filehdr {
+	/* "file" header */
+	__u16 f_magic, f_nscns;
+	__u32 f_timdat;
+	__u64 f_symptr;
+	__u32 f_nsyms;
+	__u16 f_opthdr, f_flags;
+};
+
+struct aouthdr {
+	__u64 info;		/* after that it looks quite normal.. */
+	__u64 tsize;
+	__u64 dsize;
+	__u64 bsize;
+	__u64 entry;
+	__u64 text_start;	/* with a few additions that actually make sense */
+	__u64 data_start;
+	__u64 bss_start;
+	__u32 gprmask, fprmask;	/* bitmask of general & floating point regs used in binary */
+	__u64 gpvalue;
+};
+
+struct scnhdr {
+	char	s_name[8];
+	__u64	s_paddr;
+	__u64	s_vaddr;
+	__u64	s_size;
+	__u64	s_scnptr;
+	__u64	s_relptr;
+	__u64	s_lnnoptr;
+	__u16	s_nreloc;
+	__u16	s_nlnno;
+	__u32	s_flags;
+};
+
+struct exec {
+	/* "file" header */
+	struct filehdr		fh;
+	struct aouthdr		ah;
+};
+
+/*
+ * Define's so that the kernel exec code can access the a.out header
+ * fields...
+ */
+#define	a_info		ah.info
+#define	a_text		ah.tsize
+#define a_data		ah.dsize
+#define a_bss		ah.bsize
+#define a_entry		ah.entry
+#define a_textstart	ah.text_start
+#define	a_datastart	ah.data_start
+#define	a_bssstart	ah.bss_start
+#define	a_gprmask	ah.gprmask
+#define a_fprmask	ah.fprmask
+#define a_gpvalue	ah.gpvalue
+
+#define N_TXTADDR(x)	((x).a_textstart)
+#define N_DATADDR(x)	((x).a_datastart)
+#define N_BSSADDR(x)	((x).a_bssstart)
+#define N_DRSIZE(x)	0
+#define N_TRSIZE(x)	0
+#define N_SYMSIZE(x)	0
+
+#define AOUTHSZ		sizeof(struct aouthdr)
+#define SCNHSZ		sizeof(struct scnhdr)
+#define SCNROUND	16
+
+#define N_TXTOFF(x) \
+	((long) N_MAGIC(x) == ZMAGIC ? 0 : \
+	(sizeof(struct exec) + (x).fh.f_nscns * SCNHSZ + SCNROUND - 1) & ~(SCNROUND - 1))
+
+#endif /* _UAPI_ASM_SW64_A_OUT_H */
diff --git a/arch/sw_64/include/uapi/asm/auxvec.h b/arch/sw_64/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000000..59854f3ac501
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/auxvec.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_AUXVEC_H
+#define _UAPI_ASM_SW64_AUXVEC_H
+
+/* Reserve these numbers for any future use of a VDSO.  */
+#if 1
+#define AT_SYSINFO		32
+#define AT_SYSINFO_EHDR		33
+#endif
+
+/*
+ * More complete cache descriptions than AT_[DIU]CACHEBSIZE.  If the
+ * value is -1, then the cache doesn't exist.  Otherwise:
+ *
+ * bit 0-3:	  Cache set-associativity; 0 means fully associative.
+ * bit 4-7:	  Log2 of cacheline size.
+ * bit 8-31:	  Size of the entire cache >> 8.
+ * bit 32-63:  Reserved.
+ */
+
+#define AT_L1I_CACHESHAPE	34
+#define AT_L1D_CACHESHAPE	35
+#define AT_L2_CACHESHAPE	36
+#define AT_L3_CACHESHAPE	37
+
+#define AT_VECTOR_SIZE_ARCH	4 /* entries in ARCH_DLINFO */
+
+#endif /* _UAPI_ASM_SW64_AUXVEC_H */
diff --git a/arch/sw_64/include/uapi/asm/bitsperlong.h b/arch/sw_64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..5d2c677a86b8
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_BITSPERLONG_H
+#define _UAPI_ASM_SW64_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_SW64_BITSPERLONG_H */
diff --git a/arch/sw_64/include/uapi/asm/bootparam.h b/arch/sw_64/include/uapi/asm/bootparam.h
new file mode 100644
index 000000000000..6ce75d65e86e
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/bootparam.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_SW64_BOOTPARAM_H
+#define _UAPI_ASM_SW64_BOOTPARAM_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct boot_params {
+	__u64 initrd_start;			/* logical address of initrd */
+	__u64 initrd_size;			/* size of initrd */
+	__u64 dtb_start;			/* logical address of dtb */
+	__u64 efi_systab;			/* logical address of EFI system table */
+	__u64 efi_memmap;			/* logical address of EFI memory map */
+	__u64 efi_memmap_size;			/* size of EFI memory map */
+	__u64 efi_memdesc_size;			/* size of an EFI memory map descriptor */
+	__u64 efi_memdesc_version;		/* memory descriptor version */
+	__u64 cmdline;				/* logical address of cmdline */
+};
+#endif
+
+#endif /* _UAPI_ASM_SW64_BOOTPARAM_H */
diff --git a/arch/sw_64/include/uapi/asm/byteorder.h b/arch/sw_64/include/uapi/asm/byteorder.h
new file mode 100644
index 000000000000..1b1698df58ca
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/byteorder.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_BYTEORDER_H
+#define _UAPI_ASM_SW64_BYTEORDER_H
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _UAPI_ASM_SW64_BYTEORDER_H */
diff --git a/arch/sw_64/include/uapi/asm/compiler.h b/arch/sw_64/include/uapi/asm/compiler.h
new file mode 100644
index 000000000000..e5cf0fb170fa
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/compiler.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_COMPILER_H
+#define _UAPI_ASM_SW64_COMPILER_H
+
+/*
+ * Herein are macros we use when describing various patterns we want to GCC.
+ * In all cases we can get better schedules out of the compiler if we hide
+ * as little as possible inside inline assembly.  However, we want to be
+ * able to know what we'll get out before giving up inline assembly.  Thus
+ * these tests and macros.
+ */
+
+#define __kernel_inslb(val, shift)					\
+({									\
+	unsigned long __kir;						\
+	__asm__("inslb %2, %1, %0" : "=r"(__kir) : "rI"(shift), "r"(val));\
+	__kir;								\
+})
+
+#define __kernel_inslh(val, shift)					\
+({									\
+	unsigned long __kir;						\
+	__asm__("inslh %2, %1, %0" : "=r"(__kir) : "rI"(shift), "r"(val));\
+	__kir;								\
+})
+
+#define __kernel_insll(val, shift)					\
+({									\
+	unsigned long __kir;						\
+	__asm__("insll %2, %1, %0" : "=r"(__kir) : "rI"(shift), "r"(val));\
+	__kir;								\
+})
+
+#define __kernel_inshw(val, shift)					\
+({									\
+	unsigned long __kir;						\
+	__asm__("inshw %2, %1, %0" : "=r"(__kir) : "rI"(shift), "r"(val));\
+	__kir;								\
+})
+
+#define __kernel_extlb(val, shift)					\
+({									\
+	unsigned long __kir;						\
+	__asm__("extlb %2, %1, %0" : "=r"(__kir) : "rI"(shift), "r"(val));\
+	__kir;								\
+})
+
+#define __kernel_extlh(val, shift)					\
+({									\
+	unsigned long __kir;						\
+	__asm__("extlh %2, %1, %0" : "=r"(__kir) : "rI"(shift), "r"(val));\
+	__kir;								\
+})
+
+#define __kernel_cmpgeb(a, b)						\
+({									\
+	unsigned long __kir;						\
+	__asm__("cmpgeb %r2, %1, %0" : "=r"(__kir) : "rI"(b), "rJ"(a));	\
+	__kir;								\
+})
+
+#define __kernel_cttz(x)						\
+({									\
+	unsigned long __kir;						\
+	__asm__("cttz %1, %0" : "=r"(__kir) : "r"(x));			\
+	 __kir;								\
+})
+
+#define __kernel_ctlz(x)						\
+({									\
+	unsigned long __kir;						\
+	__asm__("ctlz %1, %0" : "=r"(__kir) : "r"(x));			\
+	__kir;								\
+})
+
+#define __kernel_ctpop(x)						\
+({									\
+	unsigned long __kir;						\
+	__asm__("ctpop %1, %0" : "=r"(__kir) : "r"(x));			\
+	__kir;								\
+})
+
+#endif /* _UAPI_ASM_SW64_COMPILER_H */
diff --git a/arch/sw_64/include/uapi/asm/console.h b/arch/sw_64/include/uapi/asm/console.h
new file mode 100644
index 000000000000..91246b759ecf
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/console.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_CONSOLE_H
+#define _UAPI_ASM_SW64_CONSOLE_H
+
+/*
+ * Console callback routine numbers
+ */
+#define CCB_GETC		0x01
+#define CCB_PUTS		0x02
+#define CCB_RESET_TERM		0x03
+#define CCB_SET_TERM_INT	0x04
+#define CCB_SET_TERM_CTL	0x05
+#define CCB_PROCESS_KEYCODE	0x06
+#define CCB_OPEN_CONSOLE	0x07
+#define CCB_CLOSE_CONSOLE	0x08
+
+#define CCB_OPEN		0x10
+#define CCB_CLOSE		0x11
+#define CCB_IOCTL		0x12
+#define CCB_READ		0x13
+#define CCB_WRITE		0x14
+
+#define CCB_SET_ENV		0x20
+#define CCB_RESET_ENV		0x21
+#define CCB_GET_ENV		0x22
+#define CCB_SAVE_ENV		0x23
+
+#define CCB_PSWITCH		0x30
+#define CCB_BIOS_EMUL		0x32
+
+/*
+ * Environment variable numbers
+ */
+#define ENV_AUTO_ACTION		0x01
+#define ENV_BOOT_DEV		0x02
+#define ENV_BOOTDEF_DEV		0x03
+#define ENV_BOOTED_DEV		0x04
+#define ENV_BOOT_FILE		0x05
+#define ENV_BOOTED_FILE		0x06
+#define ENV_BOOT_OSFLAGS	0x07
+#define ENV_BOOTED_OSFLAGS	0x08
+#define ENV_BOOT_RESET		0x09
+#define ENV_DUMP_DEV		0x0A
+#define ENV_ENABLE_AUDIT	0x0B
+#define ENV_LICENSE		0x0C
+#define ENV_CHAR_SET		0x0D
+#define ENV_LANGUAGE		0x0E
+#define ENV_TTY_DEV		0x0F
+
+
+#endif /* _UAPI_ASM_SW64_CONSOLE_H */
diff --git a/arch/sw_64/include/uapi/asm/errno.h b/arch/sw_64/include/uapi/asm/errno.h
new file mode 100644
index 000000000000..04b07f30c787
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/errno.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_ERRNO_H
+#define _UAPI_ASM_SW64_ERRNO_H
+
+#include <asm-generic/errno-base.h>
+
+#undef	EAGAIN			/* 11 in errno-base.h */
+
+#define	EDEADLK		11	/* Resource deadlock would occur */
+
+#define	EAGAIN		35	/* Try again */
+#define	EWOULDBLOCK	EAGAIN	/* Operation would block */
+#define	EINPROGRESS	36	/* Operation now in progress */
+#define	EALREADY	37	/* Operation already in progress */
+#define	ENOTSOCK	38	/* Socket operation on non-socket */
+#define	EDESTADDRREQ	39	/* Destination address required */
+#define	EMSGSIZE	40	/* Message too long */
+#define	EPROTOTYPE	41	/* Protocol wrong type for socket */
+#define	ENOPROTOOPT	42	/* Protocol not available */
+#define	EPROTONOSUPPORT	43	/* Protocol not supported */
+#define	ESOCKTNOSUPPORT	44	/* Socket type not supported */
+#define	EOPNOTSUPP	45	/* Operation not supported on transport endpoint */
+#define	EPFNOSUPPORT	46	/* Protocol family not supported */
+#define	EAFNOSUPPORT	47	/* Address family not supported by protocol */
+#define	EADDRINUSE	48	/* Address already in use */
+#define	EADDRNOTAVAIL	49	/* Cannot assign requested address */
+#define	ENETDOWN	50	/* Network is down */
+#define	ENETUNREACH	51	/* Network is unreachable */
+#define	ENETRESET	52	/* Network dropped connection because of reset */
+#define	ECONNABORTED	53	/* Software caused connection abort */
+#define	ECONNRESET	54	/* Connection reset by peer */
+#define	ENOBUFS		55	/* No buffer space available */
+#define	EISCONN		56	/* Transport endpoint is already connected */
+#define	ENOTCONN	57	/* Transport endpoint is not connected */
+#define	ESHUTDOWN	58	/* Cannot send after transport endpoint shutdown */
+#define	ETOOMANYREFS	59	/* Too many references: cannot splice */
+#define	ETIMEDOUT	60	/* Connection timed out */
+#define	ECONNREFUSED	61	/* Connection refused */
+#define	ELOOP		62	/* Too many symbolic links encountered */
+#define	ENAMETOOLONG	63	/* File name too long */
+#define	EHOSTDOWN	64	/* Host is down */
+#define	EHOSTUNREACH	65	/* No route to host */
+#define	ENOTEMPTY	66	/* Directory not empty */
+
+#define	EUSERS		68	/* Too many users */
+#define	EDQUOT		69	/* Quota exceeded */
+#define	ESTALE		70	/* Stale NFS file handle */
+#define	EREMOTE		71	/* Object is remote */
+
+#define	ENOLCK		77	/* No record locks available */
+#define	ENOSYS		78	/* Function not implemented */
+
+#define	ENOMSG		80	/* No message of desired type */
+#define	EIDRM		81	/* Identifier removed */
+#define	ENOSR		82	/* Out of streams resources */
+#define	ETIME		83	/* Timer expired */
+#define	EBADMSG		84	/* Not a data message */
+#define	EPROTO		85	/* Protocol error */
+#define	ENODATA		86	/* No data available */
+#define	ENOSTR		87	/* Device not a stream */
+
+#define	ENOPKG		92	/* Package not installed */
+
+#define	EILSEQ		116	/* Illegal byte sequence */
+
+/* The following are just random noise.. */
+#define	ECHRNG		88	/* Channel number out of range */
+#define	EL2NSYNC	89	/* Level 2 not synchronized */
+#define	EL3HLT		90	/* Level 3 halted */
+#define	EL3RST		91	/* Level 3 reset */
+
+#define	ELNRNG		93	/* Link number out of range */
+#define	EUNATCH		94	/* Protocol driver not attached */
+#define	ENOCSI		95	/* No CSI structure available */
+#define	EL2HLT		96	/* Level 2 halted */
+#define	EBADE		97	/* Invalid exchange */
+#define	EBADR		98	/* Invalid request descriptor */
+#define	EXFULL		99	/* Exchange full */
+#define	ENOANO		100	/* No anode */
+#define	EBADRQC		101	/* Invalid request code */
+#define	EBADSLT		102	/* Invalid slot */
+
+#define	EDEADLOCK	EDEADLK
+
+#define	EBFONT		104	/* Bad font file format */
+#define	ENONET		105	/* Machine is not on the network */
+#define	ENOLINK		106	/* Link has been severed */
+#define	EADV		107	/* Advertise error */
+#define	ESRMNT		108	/* Srmount error */
+#define	ECOMM		109	/* Communication error on send */
+#define	EMULTIHOP	110	/* Multihop attempted */
+#define	EDOTDOT		111	/* RFS specific error */
+#define	EOVERFLOW	112	/* Value too large for defined data type */
+#define	ENOTUNIQ	113	/* Name not unique on network */
+#define	EBADFD		114	/* File descriptor in bad state */
+#define	EREMCHG		115	/* Remote address changed */
+
+#define	EUCLEAN		117	/* Structure needs cleaning */
+#define	ENOTNAM		118	/* Not a XENIX named type file */
+#define	ENAVAIL		119	/* No XENIX semaphores available */
+#define	EISNAM		120	/* Is a named type file */
+#define	EREMOTEIO	121	/* Remote I/O error */
+
+#define	ELIBACC		122	/* Can not access a needed shared library */
+#define	ELIBBAD		123	/* Accessing a corrupted shared library */
+#define	ELIBSCN		124	/* .lib section in a.out corrupted */
+#define	ELIBMAX		125	/* Attempting to link in too many shared libraries */
+#define	ELIBEXEC	126	/* Cannot exec a shared library directly */
+#define	ERESTART	127	/* Interrupted system call should be restarted */
+#define	ESTRPIPE	128	/* Streams pipe error */
+
+#define ENOMEDIUM	129	/* No medium found */
+#define EMEDIUMTYPE	130	/* Wrong medium type */
+#define	ECANCELED	131	/* Operation Cancelled */
+#define	ENOKEY		132	/* Required key not available */
+#define	EKEYEXPIRED	133	/* Key has expired */
+#define	EKEYREVOKED	134	/* Key has been revoked */
+#define	EKEYREJECTED	135	/* Key was rejected by service */
+
+/* for robust mutexes */
+#define	EOWNERDEAD	136	/* Owner died */
+#define	ENOTRECOVERABLE	137	/* State not recoverable */
+
+#define	ERFKILL		138	/* Operation not possible due to RF-kill */
+
+#define EHWPOISON	139	/* Memory page has hardware error */
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/fcntl.h b/arch/sw_64/include/uapi/asm/fcntl.h
new file mode 100644
index 000000000000..29c3aece8b55
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/fcntl.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_FCNTL_H
+#define _UAPI_ASM_SW64_FCNTL_H
+
+#define O_CREAT		01000		/* not fcntl */
+#define O_TRUNC		02000		/* not fcntl */
+#define O_EXCL		04000		/* not fcntl */
+#define O_NOCTTY	010000		/* not fcntl */
+
+#define O_NONBLOCK	00004
+#define O_APPEND	00010
+#define O_DSYNC		040000		/* used to be O_SYNC, see below */
+#define O_DIRECTORY	0100000		/* must be a directory */
+#define O_NOFOLLOW	0200000		/* don't follow links */
+#define O_LARGEFILE	0400000		/* will be set by the kernel on every open */
+#define O_DIRECT	02000000	/* direct disk access */
+#define O_NOATIME	04000000
+#define O_CLOEXEC	010000000	/* set close_on_exec */
+/*
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
+ * the O_SYNC flag.  We continue to use the existing numerical value
+ * for O_DSYNC semantics now, but using the correct symbolic name for it.
+ * This new value is used to request true Posix O_SYNC semantics.  It is
+ * defined in this strange way to make sure applications compiled against
+ * new headers get at least O_DSYNC semantics on older kernels.
+ *
+ * This has the nice side-effect that we can simply test for O_DSYNC
+ * wherever we do not care if O_DSYNC or O_SYNC is used.
+ *
+ * Note: __O_SYNC must never be used directly.
+ */
+#define __O_SYNC	020000000
+#define O_SYNC		(__O_SYNC|O_DSYNC)
+
+#define O_PATH		040000000
+#define __O_TMPFILE	0100000000
+
+#define F_GETLK		7
+#define F_SETLK		8
+#define F_SETLKW	9
+
+#define F_SETOWN	5	/*  for sockets. */
+#define F_GETOWN	6	/*  for sockets. */
+#define F_SETSIG	10	/*  for sockets. */
+#define F_GETSIG	11	/*  for sockets. */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK		1
+#define F_WRLCK		2
+#define F_UNLCK		8
+
+/* for old implementation of bsd flock () */
+#define F_EXLCK		16	/* or 3 */
+#define F_SHLCK		32	/* or 4 */
+
+#include <asm-generic/fcntl.h>
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/fpu.h b/arch/sw_64/include/uapi/asm/fpu.h
new file mode 100644
index 000000000000..9b25f97e6a3a
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/fpu.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_FPU_H
+#define _UAPI_ASM_SW64_FPU_H
+
+/*
+ * SW-64 floating-point control register defines:
+ */
+#define FPCR_DNOD	(1UL << 47)		/* denorm INV trap disable */
+#define FPCR_DNZ	(1UL << 48)		/* denorms to zero */
+#define FPCR_INVD	(1UL << 49)		/* invalid op disable (opt.) */
+#define FPCR_DZED	(1UL << 50)		/* division by zero disable (opt.) */
+#define FPCR_OVFD	(1UL << 51)		/* overflow disable (optional) */
+#define FPCR_INV	(1UL << 52)		/* invalid operation */
+#define FPCR_DZE	(1UL << 53)		/* division by zero */
+#define FPCR_OVF	(1UL << 54)		/* overflow */
+#define FPCR_UNF	(1UL << 55)		/* underflow */
+#define FPCR_INE	(1UL << 56)		/* inexact */
+#define FPCR_IOV	(1UL << 57)		/* integer overflow */
+#define FPCR_UNDZ	(1UL << 60)		/* underflow to zero (opt.) */
+#define FPCR_UNFD	(1UL << 61)		/* underflow disable (opt.) */
+#define FPCR_INED	(1UL << 62)		/* inexact disable (opt.) */
+#define FPCR_SUM	(1UL << 63)		/* summary bit */
+
+#define FPCR_DYN_SHIFT		58				/* first dynamic rounding mode bit */
+#define FPCR_DYN_CHOPPED	(0x0UL << FPCR_DYN_SHIFT)	/* towards 0 */
+#define FPCR_DYN_MINUS		(0x1UL << FPCR_DYN_SHIFT)	/* towards -INF */
+#define FPCR_DYN_NORMAL		(0x2UL << FPCR_DYN_SHIFT)	/* towards nearest */
+#define FPCR_DYN_PLUS		(0x3UL << FPCR_DYN_SHIFT)	/* towards +INF */
+#define FPCR_DYN_MASK		(0x3UL << FPCR_DYN_SHIFT)
+
+#define FPCR_MASK		0xffff800000000000L
+
+/* status bit coming from hardware fpcr . definde by fire3 */
+#define FPCR_STATUS_INV0	(1UL << 52)
+#define FPCR_STATUS_DZE0	(1UL << 53)
+#define FPCR_STATUS_OVF0	(1UL << 54)
+#define FPCR_STATUS_UNF0	(1UL << 55)
+#define FPCR_STATUS_INE0	(1UL << 56)
+#define FPCR_STATUS_OVI0	(1UL << 57)
+
+#define FPCR_STATUS_INV1	(1UL << 36)
+#define FPCR_STATUS_DZE1	(1UL << 37)
+#define FPCR_STATUS_OVF1	(1UL << 38)
+#define FPCR_STATUS_UNF1	(1UL << 39)
+#define FPCR_STATUS_INE1	(1UL << 40)
+#define FPCR_STATUS_OVI1	(1UL << 41)
+
+#define FPCR_STATUS_INV2	(1UL << 20)
+#define FPCR_STATUS_DZE2	(1UL << 21)
+#define FPCR_STATUS_OVF2	(1UL << 22)
+#define FPCR_STATUS_UNF2	(1UL << 23)
+#define FPCR_STATUS_INE2	(1UL << 24)
+#define FPCR_STATUS_OVI2	(1UL << 25)
+
+#define FPCR_STATUS_INV3	(1UL << 4)
+#define FPCR_STATUS_DZE3	(1UL << 5)
+#define FPCR_STATUS_OVF3	(1UL << 6)
+#define FPCR_STATUS_UNF3	(1UL << 7)
+#define FPCR_STATUS_INE3	(1UL << 8)
+#define FPCR_STATUS_OVI3	(1UL << 9)
+
+#define FPCR_STATUS_MASK0	(FPCR_STATUS_INV0 | FPCR_STATUS_DZE0 |  \
+				 FPCR_STATUS_OVF0 | FPCR_STATUS_UNF0 |	\
+				 FPCR_STATUS_INE0 | FPCR_STATUS_OVI0)
+
+#define FPCR_STATUS_MASK1	(FPCR_STATUS_INV1 | FPCR_STATUS_DZE1 |  \
+				 FPCR_STATUS_OVF1 | FPCR_STATUS_UNF1 |	\
+				 FPCR_STATUS_INE1 | FPCR_STATUS_OVI1)
+
+#define FPCR_STATUS_MASK2	(FPCR_STATUS_INV2 | FPCR_STATUS_DZE2 |  \
+				 FPCR_STATUS_OVF2 | FPCR_STATUS_UNF2 |	\
+				 FPCR_STATUS_INE2 | FPCR_STATUS_OVI2)
+
+#define FPCR_STATUS_MASK3	(FPCR_STATUS_INV3 | FPCR_STATUS_DZE3 |  \
+				 FPCR_STATUS_OVF3 | FPCR_STATUS_UNF3 |	\
+				 FPCR_STATUS_INE3 | FPCR_STATUS_OVI3)
+
+
+/*
+ * IEEE trap enables are implemented in software.  These per-thread
+ * bits are stored in the "ieee_state" field of "struct thread_info".
+ * Thus, the bits are defined so as not to conflict with the
+ * floating-point enable bit (which is architected).
+ */
+#define IEEE_TRAP_ENABLE_INV	(1UL << 1)	/* invalid op */
+#define IEEE_TRAP_ENABLE_DZE	(1UL << 2)	/* division by zero */
+#define IEEE_TRAP_ENABLE_OVF	(1UL << 3)	/* overflow */
+#define IEEE_TRAP_ENABLE_UNF	(1UL << 4)	/* underflow */
+#define IEEE_TRAP_ENABLE_INE	(1UL << 5)	/* inexact */
+#define IEEE_TRAP_ENABLE_DNO	(1UL << 6)	/* denorm */
+#define IEEE_TRAP_ENABLE_MASK	(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\
+				 IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\
+				 IEEE_TRAP_ENABLE_INE | IEEE_TRAP_ENABLE_DNO)
+
+/* Denorm and Underflow flushing */
+#define IEEE_MAP_DMZ		(1UL << 12)	/* Map denorm inputs to zero */
+#define IEEE_MAP_UMZ		(1UL << 13)	/* Map underflowed outputs to zero */
+
+#define IEEE_MAP_MASK		(IEEE_MAP_DMZ | IEEE_MAP_UMZ)
+
+/* status bits coming from fpcr: */
+#define IEEE_STATUS_INV		(1UL << 17)
+#define IEEE_STATUS_DZE		(1UL << 18)
+#define IEEE_STATUS_OVF		(1UL << 19)
+#define IEEE_STATUS_UNF		(1UL << 20)
+#define IEEE_STATUS_INE		(1UL << 21)
+#define IEEE_STATUS_DNO		(1UL << 22)
+
+
+#define IEEE_STATUS_MASK	(IEEE_STATUS_INV | IEEE_STATUS_DZE |	\
+				 IEEE_STATUS_OVF | IEEE_STATUS_UNF |	\
+				 IEEE_STATUS_INE | IEEE_STATUS_DNO)
+
+#define IEEE_SW_MASK		(IEEE_TRAP_ENABLE_MASK |		\
+				 IEEE_STATUS_MASK | IEEE_MAP_MASK)
+
+#define IEEE_CURRENT_RM_SHIFT	32
+#define IEEE_CURRENT_RM_MASK	(3UL << IEEE_CURRENT_RM_SHIFT)
+
+#define IEEE_STATUS_TO_EXCSUM_SHIFT	16
+
+#define IEEE_INHERIT    (1UL << 63)	/* inherit on thread create? */
+
+/* ieee_state expand to surport simd added by fire3 */
+
+#define IEEE_STATUS_INV0	(1UL << 17)
+#define IEEE_STATUS_DZE0	(1UL << 18)
+#define IEEE_STATUS_OVF0	(1UL << 19)
+#define IEEE_STATUS_UNF0	(1UL << 20)
+#define IEEE_STATUS_INE0	(1UL << 21)
+#define IEEE_STATUS_DNO0	(1UL << 22)
+#define IEEE_STATUS_MASK0	(IEEE_STATUS_INV0 | IEEE_STATUS_DZE0 |	\
+				 IEEE_STATUS_OVF0 | IEEE_STATUS_UNF0 |	\
+				 IEEE_STATUS_INE0 | IEEE_STATUS_DNO0)
+
+#define IEEE_STATUS0_TO_EXCSUM_SHIFT	16
+
+#define IEEE_STATUS_INV1	(1UL << 23)
+#define IEEE_STATUS_DZE1	(1UL << 24)
+#define IEEE_STATUS_OVF1	(1UL << 25)
+#define IEEE_STATUS_UNF1	(1UL << 26)
+#define IEEE_STATUS_INE1	(1UL << 27)
+#define IEEE_STATUS_DNO1	(1UL << 28)
+#define IEEE_STATUS_MASK1	(IEEE_STATUS_INV1 | IEEE_STATUS_DZE1 |	\
+				 IEEE_STATUS_OVF1 | IEEE_STATUS_UNF1 |	\
+				 IEEE_STATUS_INE1 | IEEE_STATUS_DNO1)
+
+#define IEEE_STATUS1_TO_EXCSUM_SHIFT	22
+
+#define IEEE_STATUS_INV2	(1UL << 34)
+#define IEEE_STATUS_DZE2	(1UL << 35)
+#define IEEE_STATUS_OVF2	(1UL << 36)
+#define IEEE_STATUS_UNF2	(1UL << 37)
+#define IEEE_STATUS_INE2	(1UL << 38)
+#define IEEE_STATUS_DNO2	(1UL << 39)
+#define IEEE_STATUS_MASK2	(IEEE_STATUS_INV2 | IEEE_STATUS_DZE2 |	\
+				 IEEE_STATUS_OVF2 | IEEE_STATUS_UNF2 |	\
+				 IEEE_STATUS_INE2 | IEEE_STATUS_DNO2)
+
+#define IEEE_STATUS2_TO_EXCSUM_SHIFT	33
+
+#define IEEE_STATUS_INV3	(1UL << 40)
+#define IEEE_STATUS_DZE3	(1UL << 41)
+#define IEEE_STATUS_OVF3	(1UL << 42)
+#define IEEE_STATUS_UNF3	(1UL << 43)
+#define IEEE_STATUS_INE3	(1UL << 44)
+#define IEEE_STATUS_DNO3	(1UL << 45)
+#define IEEE_STATUS_MASK3	(IEEE_STATUS_INV3 | IEEE_STATUS_DZE3 |	\
+				 IEEE_STATUS_OVF3 | IEEE_STATUS_UNF3 |	\
+				 IEEE_STATUS_INE3 | IEEE_STATUS_DNO3)
+
+#define IEEE_STATUS3_TO_EXCSUM_SHIFT	39
+
+
+/*
+ * Convert the software IEEE trap enable and status bits into the
+ * hardware fpcr format.
+ */
+static inline unsigned long
+ieee_swcr_to_fpcr(unsigned long sw)
+{
+	unsigned long fp;
+
+	fp = (sw & IEEE_STATUS_MASK0) << 35;
+	fp |= (sw & IEEE_STATUS_MASK1) << 13;
+	fp |= (sw & IEEE_STATUS_MASK2) >> 14;
+	fp |= (sw & IEEE_STATUS_MASK3) >> 36;
+
+	fp |= (sw & IEEE_MAP_DMZ) << 36;
+	fp |= (sw & IEEE_STATUS_MASK0 ? FPCR_SUM : 0);
+	fp |= (sw & IEEE_STATUS_MASK1 ? FPCR_SUM : 0);
+	fp |= (sw & IEEE_STATUS_MASK2 ? FPCR_SUM : 0);
+	fp |= (sw & IEEE_STATUS_MASK3 ? FPCR_SUM : 0);
+	fp |= (~sw & (IEEE_TRAP_ENABLE_INV
+				| IEEE_TRAP_ENABLE_DZE
+				| IEEE_TRAP_ENABLE_OVF)) << 48;
+	fp |= (~sw & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE)) << 57;
+	fp |= (sw & IEEE_MAP_UMZ ? FPCR_UNDZ | FPCR_UNFD : 0);
+	fp |= (~sw & IEEE_TRAP_ENABLE_DNO) << 41;
+	return fp;
+}
+
+static inline unsigned long
+ieee_fpcr_to_swcr(unsigned long fp)
+{
+	unsigned long sw;
+
+	sw = (fp >> 35) & IEEE_STATUS_MASK;
+	sw |= (fp >> 36) & IEEE_MAP_DMZ;
+	sw |= (~fp >> 48) & (IEEE_TRAP_ENABLE_INV
+			| IEEE_TRAP_ENABLE_DZE
+			| IEEE_TRAP_ENABLE_OVF);
+	sw |= (~fp >> 57) & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE);
+	sw |= (fp >> 47) & IEEE_MAP_UMZ;
+	sw |= (~fp >> 41) & IEEE_TRAP_ENABLE_DNO;
+	return sw;
+}
+#endif /* _UAPI_ASM_SW64_FPU_H */
diff --git a/arch/sw_64/include/uapi/asm/gentrap.h b/arch/sw_64/include/uapi/asm/gentrap.h
new file mode 100644
index 000000000000..4345058291fb
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/gentrap.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_GENTRAP_H
+#define _UAPI_ASM_SW64_GENTRAP_H
+
+/*
+ * Definitions for gentrap causes.  They are generated by user-level
+ * programs and therefore should be compatible with the corresponding
+ * legacy definitions.
+ */
+#define GEN_INTOVF	-1	/* integer overflow */
+#define GEN_INTDIV	-2	/* integer division by zero */
+#define GEN_FLTOVF	-3	/* fp overflow */
+#define GEN_FLTDIV	-4	/* fp division by zero */
+#define GEN_FLTUND	-5	/* fp underflow */
+#define GEN_FLTINV	-6	/* invalid fp operand */
+#define GEN_FLTINE	-7	/* inexact fp operand */
+#define GEN_DECOVF	-8	/* decimal overflow (for COBOL??) */
+#define GEN_DECDIV	-9	/* decimal division by zero */
+#define GEN_DECINV	-10	/* invalid decimal operand */
+#define GEN_ROPRAND	-11	/* reserved operand */
+#define GEN_ASSERTERR	-12	/* assertion error */
+#define GEN_NULPTRERR	-13	/* null pointer error */
+#define GEN_STKOVF	-14	/* stack overflow */
+#define GEN_STRLENERR	-15	/* string length error */
+#define GEN_SUBSTRERR	-16	/* substring error */
+#define GEN_RANGERR	-17	/* range error */
+#define GEN_SUBRNG	-18
+#define GEN_SUBRNG1	-19
+#define GEN_SUBRNG2	-20
+#define GEN_SUBRNG3	-21	/* these report range errors for */
+#define GEN_SUBRNG4	-22	/* subscripting (indexing) at levels 0..7 */
+#define GEN_SUBRNG5	-23
+#define GEN_SUBRNG6	-24
+#define GEN_SUBRNG7	-25
+
+/* the remaining codes (-26..-1023) are reserved. */
+
+#endif /* _UAPI_ASM_SW64_GENTRAP_H */
diff --git a/arch/sw_64/include/uapi/asm/hmcall.h b/arch/sw_64/include/uapi/asm/hmcall.h
new file mode 100644
index 000000000000..524101102fb8
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/hmcall.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_HMCALL_H
+#define _UAPI_ASM_SW64_HMCALL_H
+
+/* hmcall may be used in user mode */
+
+#define HMC_bpt			0x80
+#define HMC_callsys		0x83
+#define HMC_imb			0x86
+#define HMC_rdunique		0x9E
+#define HMC_wrunique		0x9F
+#define HMC_gentrap		0xAA
+#define HMC_wrperfmon		0xB0
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/ioctl.h b/arch/sw_64/include/uapi/asm/ioctl.h
new file mode 100644
index 000000000000..d62f10a6fa64
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/ioctl.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_IOCTL_H
+#define _UAPI_ASM_SW64_IOCTL_H
+
+#define _IOC_SIZEBITS	13
+#define _IOC_DIRBITS	3
+
+/*
+ * Direction bits _IOC_NONE could be 0, but legacy version gives it a bit.
+ * And this turns out useful to catch old ioctl numbers in header files for
+ * us.
+ */
+#define _IOC_NONE	1U
+#define _IOC_READ	2U
+#define _IOC_WRITE	4U
+
+#include <asm-generic/ioctl.h>
+
+#endif /* _UAPI_ASM_SW64_IOCTL_H */
diff --git a/arch/sw_64/include/uapi/asm/ioctls.h b/arch/sw_64/include/uapi/asm/ioctls.h
new file mode 100644
index 000000000000..eab34173f222
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/ioctls.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_IOCTLS_H
+#define _UAPI_ASM_SW64_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+#define FIOCLEX		_IO('f', 1)
+#define FIONCLEX	_IO('f', 2)
+#define FIOASYNC	_IOW('f', 125, int)
+#define FIONBIO		_IOW('f', 126, int)
+#define FIONREAD	_IOR('f', 127, int)
+#define TIOCINQ		FIONREAD
+#define FIOQSIZE	_IOR('f', 128, loff_t)
+
+#define TIOCGETP	_IOR('t', 8, struct sgttyb)
+#define TIOCSETP	_IOW('t', 9, struct sgttyb)
+#define TIOCSETN	_IOW('t', 10, struct sgttyb)	/* TIOCSETP wo flush */
+
+#define TIOCSETC	_IOW('t', 17, struct tchars)
+#define TIOCGETC	_IOR('t', 18, struct tchars)
+#define TCGETS		_IOR('t', 19, struct termios)
+#define TCSETS		_IOW('t', 20, struct termios)
+#define TCSETSW		_IOW('t', 21, struct termios)
+#define TCSETSF		_IOW('t', 22, struct termios)
+
+#define TCGETA		_IOR('t', 23, struct termio)
+#define TCSETA		_IOW('t', 24, struct termio)
+#define TCSETAW		_IOW('t', 25, struct termio)
+#define TCSETAF		_IOW('t', 28, struct termio)
+
+#define TCSBRK		_IO('t', 29)
+#define TCXONC		_IO('t', 30)
+#define TCFLSH		_IO('t', 31)
+
+#define TIOCSWINSZ	_IOW('t', 103, struct winsize)
+#define TIOCGWINSZ	_IOR('t', 104, struct winsize)
+#define	TIOCSTART	_IO('t', 110)		/* start output, like ^Q */
+#define	TIOCSTOP	_IO('t', 111)		/* stop output, like ^S */
+#define TIOCOUTQ	_IOR('t', 115, int)	/* output queue size */
+
+#define TIOCGLTC	_IOR('t', 116, struct ltchars)
+#define TIOCSLTC	_IOW('t', 117, struct ltchars)
+#define TIOCSPGRP	_IOW('t', 118, int)
+#define TIOCGPGRP	_IOR('t', 119, int)
+
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+
+#define TIOCSTI		0x5412
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+# define TIOCM_LE	0x001
+# define TIOCM_DTR	0x002
+# define TIOCM_RTS	0x004
+# define TIOCM_ST	0x008
+# define TIOCM_SR	0x010
+# define TIOCM_CTS	0x020
+# define TIOCM_CAR	0x040
+# define TIOCM_RNG	0x080
+# define TIOCM_DSR	0x100
+# define TIOCM_CD	TIOCM_CAR
+# define TIOCM_RI	TIOCM_RNG
+# define TIOCM_OUT1	0x2000
+# define TIOCM_OUT2	0x4000
+# define TIOCM_LOOP	0x8000
+
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+# define TIOCPKT_DATA		 0
+# define TIOCPKT_FLUSHREAD	 1
+# define TIOCPKT_FLUSHWRITE	 2
+# define TIOCPKT_STOP		 4
+# define TIOCPKT_START		 8
+# define TIOCPKT_NOSTOP		16
+# define TIOCPKT_DOSTOP		32
+# define TIOCPKT_IOCTL		64
+
+
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425				/* Needed for POSIX tcsendbreak() */
+#define TIOCSBRK	0x5427				/* BSD compatibility */
+#define TIOCCBRK	0x5428				/* BSD compatibility */
+#define TIOCGSID	0x5429				/* Return the session ID of FD */
+#define TIOCGRS485	_IOR('T', 0x2E, struct serial_rs485)
+#define TIOCSRS485	_IOWR('T', 0x2F, struct serial_rs485)
+#define TIOCGPTN	_IOR('T', 0x30, unsigned int)	/* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T', 0x31, int)		/* Lock/unlock Pty */
+#define TIOCGDEV	_IOR('T', 0x32, unsigned int)	/* Get primary device node of /dev/console */
+#define TIOCSIG		_IOW('T', 0x36, int)		/* Generate signal on Pty slave */
+#define TIOCVHANGUP	0x5437
+#define TIOCGPKT	_IOR('T', 0x38, int)		/* Get packet mode state */
+#define TIOCGPTLCK	_IOR('T', 0x39, int)		/* Get Pty lock state */
+#define TIOCGEXCL	_IOR('T', 0x40, int)		/* Get exclusive mode state */
+#define TIOCGPTPEER	_IO('T', 0x41)			/* Safely open the slave */
+#define TIOCGISO7816    _IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816    _IOWR('T', 0x43, struct serial_iso7816)
+
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458				/* For debugging only */
+#define TIOCSERGETLSR	0x5459				/* Get line status register */
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+# define TIOCSER_TEMT	0x01				/* Transmitter physically empty */
+#define TIOCSERGETMULTI	0x545A				/* Get multiport config  */
+#define TIOCSERSETMULTI	0x545B				/* Set multiport config */
+
+#define TIOCMIWAIT	0x545C				/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D				/* read serial port inline interrupt counts */
+
+#endif /* _UAPI_ASM_SW64_IOCTLS_H */
diff --git a/arch/sw_64/include/uapi/asm/ipcbuf.h b/arch/sw_64/include/uapi/asm/ipcbuf.h
new file mode 100644
index 000000000000..f063105ba09f
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/ipcbuf.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_IPCBUF_H
+#define _UAPI_ASM_SW64_IPCBUF_H
+
+#include <asm-generic/ipcbuf.h>
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/kvm.h b/arch/sw_64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..47877b56e980
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/kvm.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_KVM_H
+#define _UAPI_ASM_SW64_KVM_H
+
+/*
+ * KVM SW specific structures and definitions.
+ */
+#define SWVM_IRQS 64
+enum SW64_KVM_IRQ {
+	SW64_KVM_IRQ_IPI = 27,
+	SW64_KVM_IRQ_TIMER = 9,
+	SW64_KVM_IRQ_KBD = 29,
+	SW64_KVM_IRQ_MOUSE = 30,
+};
+
+#define SWVM_VM_TYPE_DEFAULT	0
+#define SWVM_VM_TYPE_PHYVCPU	1
+#define __KVM_HAVE_IRQ_LINE
+
+#define SWVM_NUM_NUMA_MEMBANKS	1
+#define KVM_NR_IRQCHIPS		1
+/*
+ * for KVM_GET_REGS and KVM_SET_REGS
+ */
+struct kvm_regs {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+
+	unsigned long r8;
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+
+	unsigned long r19;
+	unsigned long r20;
+	unsigned long r21;
+	unsigned long r22;
+
+	unsigned long r23;
+	unsigned long r24;
+	unsigned long r25;
+	unsigned long r26;
+
+	unsigned long r27;
+	unsigned long r28;
+	unsigned long __padding0;
+	unsigned long fpcr;
+
+	unsigned long fp[124];
+	/* These are saved by HMcode: */
+	unsigned long ps;
+	unsigned long pc;
+	unsigned long gp;
+	unsigned long r16;
+	unsigned long r17;
+	unsigned long r18;
+};
+
+
+/*
+ * return stack for __sw64_vcpu_run
+ */
+struct vcpu_run_ret_stack {
+	unsigned long ra;
+	unsigned long r0;
+};
+
+struct host_int_args {
+	unsigned long r18;
+	unsigned long r17;
+	unsigned long r16;
+};
+
+/*
+ * for KVM_GET_FPU and KVM_SET_FPU
+ */
+struct kvm_fpu {
+};
+
+struct hcall_args {
+	unsigned long arg0, arg1, arg2;
+};
+
+struct phyvcpu_hcall_args {
+	unsigned long call;
+	struct hcall_args args;
+};
+
+struct kvm_debug_exit_arch {
+	unsigned long epc;
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+/* dummy definition */
+struct kvm_sregs {
+};
+
+
+struct swvm_mem_bank {
+	unsigned long guest_phys_addr;
+	unsigned long host_phys_addr;
+	unsigned long host_addr;
+	unsigned long size;
+};
+
+struct swvm_mem {
+	struct swvm_mem_bank membank[SWVM_NUM_NUMA_MEMBANKS];
+};
+
+#endif  /* _UAPI_ASM_SW64_KVM_H */
diff --git a/arch/sw_64/include/uapi/asm/kvm_para.h b/arch/sw_64/include/uapi/asm/kvm_para.h
new file mode 100644
index 000000000000..405840b0e1d8
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/kvm_para.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_KVM_PARA_H
+#define _UAPI_ASM_SW64_KVM_PARA_H
+
+#include <asm-generic/kvm_para.h>
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/mman.h b/arch/sw_64/include/uapi/asm/mman.h
new file mode 100644
index 000000000000..f9ac285702a5
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/mman.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_MMAN_H
+#define _UAPI_ASM_SW64_MMAN_H
+
+#define PROT_READ		0x1		/* page can be read */
+#define PROT_WRITE		0x2		/* page can be written */
+#define PROT_EXEC		0x4		/* page can be executed */
+#define PROT_SEM		0x8		/* page may be used for atomic ops */
+#define PROT_NONE		0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN		0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP		0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_TYPE		0x0f		/* Mask for type of mapping */
+#define MAP_FIXED		0x100		/* Interpret addr exactly */
+#define MAP_ANONYMOUS		0x10		/* don't use a file */
+
+/* not used by linux, may be deprecated */
+#define _MAP_HASSEMAPHORE	0x0200
+#define _MAP_INHERIT		0x0400
+#define _MAP_UNALIGNED		0x0800
+
+/* These are linux-specific */
+#define MAP_GROWSDOWN		0x01000		/* stack-like segment */
+#define MAP_DENYWRITE		0x02000		/* ETXTBSY */
+#define MAP_EXECUTABLE		0x04000		/* mark it as an executable */
+#define MAP_LOCKED		0x08000		/* lock the mapping */
+#define MAP_NORESERVE		0x10000		/* don't check for reservations */
+#define MAP_POPULATE		0x20000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK		0x40000		/* do not block on IO */
+#define MAP_STACK		0x80000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB		0x100000	/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE	0x200000	/* MAP_FIXED which doesn't unmap underlying mapping */
+#define MAP_PA32BIT		0x400000	/* physical address is within 4G */
+#define MAP_CHECKNODE		0x800000	/* hugetlb numa node check */
+
+#define MS_ASYNC		1		/* sync memory asynchronously */
+#define MS_SYNC			2		/* synchronous memory sync */
+#define MS_INVALIDATE		4		/* invalidate the caches */
+
+#define MCL_CURRENT		8192		/* lock all currently mapped pages */
+#define MCL_FUTURE		16384		/* lock all additions to address space */
+#define MCL_ONFAULT		32768		/* lock all pages that are faulted in */
+
+#define MLOCK_ONFAULT		0x01		/* Lock pages in range after they are faulted in, do not prefault */
+
+#define MADV_NORMAL		0		/* no further special treatment */
+#define MADV_RANDOM		1		/* expect random page references */
+#define MADV_SEQUENTIAL		2		/* expect sequential page references */
+#define MADV_WILLNEED		3		/* will need these pages */
+#define MADV_SPACEAVAIL		5		/* ensure resources are available */
+#define MADV_DONTNEED		6		/* don't need these pages */
+
+/* common/generic parameters */
+#define MADV_FREE		8		/* free pages only if memory pressure */
+#define MADV_REMOVE		9		/* remove these pages & resources */
+#define MADV_DONTFORK		10		/* don't inherit across fork */
+#define MADV_DOFORK		11		/* do inherit across fork */
+
+#define MADV_MERGEABLE		12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE	13		/* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE		14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE		15		/* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP		16		/* Explicity exclude from the core dump,
+						overrides the coredump filter bits */
+#define MADV_DODUMP		17		/* Clear the MADV_NODUMP flag */
+
+#define MADV_WIPEONFORK		18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK		19		/* Undo MADV_WIPEONFORK */
+
+#define MADV_COLD		20		/* deactivate these pages */
+#define MADV_PAGEOUT		21		/* reclaim these pages */
+
+/* compatibility flags */
+#define MAP_FILE		0
+
+
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+
+#endif /* _UAPI_ASM_SW64_MMAN_H */
diff --git a/arch/sw_64/include/uapi/asm/msgbuf.h b/arch/sw_64/include/uapi/asm/msgbuf.h
new file mode 100644
index 000000000000..d61eea10813d
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/msgbuf.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_MSGBUF_H
+#define _UAPI_ASM_SW64_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for sw64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	long		msg_stime;	/* last msgsnd time */
+	long		msg_rtime;	/* last msgrcv time */
+	long		msg_ctime;	/* last change time */
+	unsigned long	msg_cbytes;	/* current number of bytes on queue */
+	unsigned long	msg_qnum;	/* number of messages in queue */
+	unsigned long	msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t	msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t	msg_lrpid;	/* last receive pid */
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* _UAPI_ASM_SW64_MSGBUF_H */
diff --git a/arch/sw_64/include/uapi/asm/param.h b/arch/sw_64/include/uapi/asm/param.h
new file mode 100644
index 000000000000..75eeac6a7dc8
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/param.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_PARAM_H
+#define _UAPI_ASM_SW64_PARAM_H
+
+#define HZ		100
+
+#define EXEC_PAGESIZE	8192
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+
+#endif /* _UAPI_ASM_SW64_PARAM_H */
diff --git a/arch/sw_64/include/uapi/asm/perf_regs.h b/arch/sw_64/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..426ae642fcc8
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _ASM_SW64_PERF_REGS_H
+#define _ASM_SW64_PERF_REGS_H
+
+enum perf_event_sw64_regs {
+	PERF_REG_SW64_R0,
+	PERF_REG_SW64_R1,
+	PERF_REG_SW64_R2,
+	PERF_REG_SW64_R3,
+	PERF_REG_SW64_R4,
+	PERF_REG_SW64_R5,
+	PERF_REG_SW64_R6,
+	PERF_REG_SW64_R7,
+	PERF_REG_SW64_R8,
+	PERF_REG_SW64_R19,
+	PERF_REG_SW64_R20,
+	PERF_REG_SW64_R21,
+	PERF_REG_SW64_R22,
+	PERF_REG_SW64_R23,
+	PERF_REG_SW64_R24,
+	PERF_REG_SW64_R25,
+	PERF_REG_SW64_R26,
+	PERF_REG_SW64_R27,
+	PERF_REG_SW64_R28,
+	PERF_REG_SW64_HAE,
+	PERF_REG_SW64_TRAP_A0,
+	PERF_REG_SW64_TRAP_A1,
+	PERF_REG_SW64_TRAP_A2,
+	PERF_REG_SW64_PS,
+	PERF_REG_SW64_PC,
+	PERF_REG_SW64_GP,
+	PERF_REG_SW64_R16,
+	PERF_REG_SW64_R17,
+	PERF_REG_SW64_R18,
+	PERF_REG_SW64_MAX,
+};
+#endif /* _ASM_SW64_PERF_REGS_H */
diff --git a/arch/sw_64/include/uapi/asm/poll.h b/arch/sw_64/include/uapi/asm/poll.h
new file mode 100644
index 000000000000..5e2de3182050
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/poll.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_POLL_H
+#define _UAPI_ASM_SW64_POLL_H
+
+#include <asm-generic/poll.h>
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/posix_types.h b/arch/sw_64/include/uapi/asm/posix_types.h
new file mode 100644
index 000000000000..fb7badf78c3c
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/posix_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_POSIX_TYPES_H
+#define _UAPI_ASM_SW64_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long	__kernel_ino_t;
+#define __kernel_ino_t	__kernel_ino_t
+
+typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */
+
+#include <asm-generic/posix_types.h>
+
+#endif /* _UAPI_ASM_SW64_POSIX_TYPES_H */
diff --git a/arch/sw_64/include/uapi/asm/ptrace.h b/arch/sw_64/include/uapi/asm/ptrace.h
new file mode 100644
index 000000000000..7cf7bf5a75b4
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/ptrace.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_PTRACE_H
+#define _UAPI_ASM_SW64_PTRACE_H
+
+
+/*
+ * This struct defines the way the registers are stored on the
+ * kernel stack during a system call or other kernel entry
+ *
+ * NOTE! I want to minimize the overhead of system calls, so this
+ * struct has as little information as possible.  I does not have
+ *
+ *  - floating point regs: the kernel doesn't change those
+ *  - r9-15: saved by the C compiler
+ *
+ * This makes "fork()" and "exec()" a bit more complex, but should
+ * give us low system call latency.
+ */
+
+struct pt_regs {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r19;
+	unsigned long r20;
+	unsigned long r21;
+	unsigned long r22;
+	unsigned long r23;
+	unsigned long r24;
+	unsigned long r25;
+	unsigned long r26;
+	unsigned long r27;
+	unsigned long r28;
+	unsigned long hae;
+/* JRP - These are the values provided to a0-a2 by HMcode */
+	unsigned long trap_a0;
+	unsigned long trap_a1;
+	unsigned long trap_a2;
+/* These are saved by HMcode: */
+	unsigned long ps;
+	unsigned long pc;
+	unsigned long gp;
+	unsigned long r16;
+	unsigned long r17;
+	unsigned long r18;
+};
+
+/*
+ * This is the extended stack used by signal handlers and the context
+ * switcher: it's pushed after the normal "struct pt_regs".
+ */
+struct switch_stack {
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long r26;
+};
+
+#define PTRACE_GETREGS		12	/* get general purpose registers */
+#define PTRACE_SETREGS		13	/* set general purpose registers */
+#define PTRACE_GETFPREGS	14	/* get floating-point registers */
+#define PTRACE_SETFPREGS	15	/* set floating-point registers */
+/* PTRACE_ATTACH is 16 */
+/* PTRACE_DETACH is 17 */
+
+#define REG_BASE		0
+#define REG_END			29
+#define USP			30
+#define FPREG_BASE		32
+#define FPREG_END		62
+#define FPCR			63
+#define PC			64
+#define UNIQUE			65
+#define VECREG_BASE		67
+#define VECREG_END		161
+#define F31_V1			98
+#define F31_V2			130
+#define DA_MATCH		163
+#define DA_MASK			164
+#define DV_MATCH		165
+#define DV_MASK			166
+#define DC_CTL			167
+
+#endif /* _UAPI_ASM_SW64_PTRACE_H */
diff --git a/arch/sw_64/include/uapi/asm/reg.h b/arch/sw_64/include/uapi/asm/reg.h
new file mode 100644
index 000000000000..a19dc4cbf744
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/reg.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_REG_H
+#define _UAPI_ASM_SW64_REG_H
+
+/*
+ * Exception frame offsets.
+ */
+#define EF_V0		0
+#define EF_T0		1
+#define EF_T1		2
+#define EF_T2		3
+#define EF_T3		4
+#define EF_T4		5
+#define EF_T5		6
+#define EF_T6		7
+#define EF_T7		8
+#define EF_S0		9
+#define EF_S1		10
+#define EF_S2		11
+#define EF_S3		12
+#define EF_S4		13
+#define EF_S5		14
+#define EF_S6		15
+#define EF_A3		16
+#define EF_A4		17
+#define EF_A5		18
+#define EF_T8		19
+#define EF_T9		20
+#define EF_T10		21
+#define EF_T11		22
+#define EF_RA		23
+#define EF_T12		24
+#define EF_AT		25
+#define EF_SP		26
+#define EF_PS		27
+#define EF_PC		28
+#define EF_GP		29
+#define EF_A0		30
+#define EF_A1		31
+#define EF_A2		32
+
+#define EF_SIZE		(33*8)
+#define HWEF_SIZE	(6*8)		/* size of HMCODE frame (PS-A2) */
+
+#define EF_SSIZE	(EF_SIZE - HWEF_SIZE)
+
+/*
+ * Map register number into core file offset.
+ */
+#define CORE_REG(reg, ubase) \
+	(((unsigned long *)((unsigned long)(ubase)))[reg])
+
+#endif /* _UAPI_ASM_SW64_REG_H */
diff --git a/arch/sw_64/include/uapi/asm/regdef.h b/arch/sw_64/include/uapi/asm/regdef.h
new file mode 100644
index 000000000000..5031abc0947a
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/regdef.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_REGDEF_H
+#define _UAPI_ASM_SW64_REGDEF_H
+
+#define v0	$0	/* function return value */
+
+#define t0	$1	/* temporary registers (caller-saved) */
+#define t1	$2
+#define t2	$3
+#define t3	$4
+#define t4	$5
+#define t5	$6
+#define t6	$7
+#define t7	$8
+
+#define	s0	$9	/* saved-registers (callee-saved registers) */
+#define	s1	$10
+#define	s2	$11
+#define	s3	$12
+#define	s4	$13
+#define	s5	$14
+#define	s6	$15
+#define	fp	s6	/* frame-pointer (s6 in frame-less procedures) */
+
+#define a0	$16	/* argument registers (caller-saved) */
+#define a1	$17
+#define a2	$18
+#define a3	$19
+#define a4	$20
+#define a5	$21
+
+#define t8	$22	/* more temps (caller-saved) */
+#define t9	$23
+#define t10	$24
+#define t11	$25
+#define ra	$26	/* return address register */
+#define t12	$27
+
+#define pv	t12	/* procedure-variable register */
+#define AT	$at	/* assembler temporary */
+#define gp	$29	/* global pointer */
+#define sp	$30	/* stack pointer */
+#define zero	$31	/* reads as zero, writes are noops */
+
+#endif /* _UAPI_ASM_SW64_REGDEF_H */
diff --git a/arch/sw_64/include/uapi/asm/resource.h b/arch/sw_64/include/uapi/asm/resource.h
new file mode 100644
index 000000000000..ff7dc683c195
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/resource.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_RESOURCE_H
+#define _UAPI_ASM_SW64_RESOURCE_H
+
+/*
+ * SW-64/Linux-specific ordering of these four resource limit IDs,
+ * the rest comes from the generic header:
+ */
+#define RLIMIT_NOFILE		6	/* max number of open files */
+#define RLIMIT_AS		7	/* address space limit */
+#define RLIMIT_NPROC		8	/* max number of processes */
+#define RLIMIT_MEMLOCK		9	/* max locked-in-memory address space */
+
+/*
+ * SuS says limits have to be unsigned.  Fine, it's unsigned, but
+ * we retain the old value for compatibility, especially with DU.
+ * When you run into the 2^63 barrier, you call me.
+ */
+#define RLIM_INFINITY		0x7ffffffffffffffful
+
+#include <asm-generic/resource.h>
+
+#endif /* _UAPI_ASM_SW64_RESOURCE_H */
diff --git a/arch/sw_64/include/uapi/asm/sembuf.h b/arch/sw_64/include/uapi/asm/sembuf.h
new file mode 100644
index 000000000000..f574390bcd57
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/sembuf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SEMBUF_H
+#define _UAPI_ASM_SW64_SEMBUF_H
+
+/*
+ * The semid64_ds structure for sw64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	long		sem_otime;		/* last semop time */
+	long		sem_ctime;		/* last change time */
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* _UAPI_ASM_SW64_SEMBUF_H */
diff --git a/arch/sw_64/include/uapi/asm/setup.h b/arch/sw_64/include/uapi/asm/setup.h
new file mode 100644
index 000000000000..fefd57415a3b
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/setup.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SETUP_H
+#define _UAPI_ASM_SW64_SETUP_H
+
+#define COMMAND_LINE_SIZE   2048
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/shmbuf.h b/arch/sw_64/include/uapi/asm/shmbuf.h
new file mode 100644
index 000000000000..66d8cb5b2ba3
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/shmbuf.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SHMBUF_H
+#define _UAPI_ASM_SW64_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for sw64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	long			shm_atime;	/* last attach time */
+	long			shm_dtime;	/* last detach time */
+	long			shm_ctime;	/* last change time */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _UAPI_ASM_SW64_SHMBUF_H */
diff --git a/arch/sw_64/include/uapi/asm/sigcontext.h b/arch/sw_64/include/uapi/asm/sigcontext.h
new file mode 100644
index 000000000000..c2b7cff884eb
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/sigcontext.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SIGCONTEXT_H
+#define _UAPI_ASM_SW64_SIGCONTEXT_H
+
+struct sigcontext {
+	/*
+	 * What should we have here? I'd probably better use the same
+	 * stack layout as DEC Unix, just in case we ever want to try
+	 * running their binaries..
+	 *
+	 * This is the basic layout, but I don't know if we'll ever
+	 * actually fill in all the values..
+	 */
+	long		sc_onstack;
+	long		sc_mask;
+	long		sc_pc;
+	long		sc_ps;
+	long		sc_regs[32];
+	long		sc_ownedfp;
+	long		sc_fpregs[128];	/* SIMD-FP */
+	unsigned long	sc_fpcr;
+	unsigned long	sc_fp_control;
+	unsigned long	sc_reserved1, sc_reserved2;
+	unsigned long	sc_ssize;
+	char		*sc_sbase;
+	unsigned long	sc_traparg_a0;
+	unsigned long	sc_traparg_a1;
+	unsigned long	sc_traparg_a2;
+	unsigned long	sc_fp_trap_pc;
+	unsigned long	sc_fp_trigger_sum;
+	unsigned long	sc_fp_trigger_inst;
+};
+
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/siginfo.h b/arch/sw_64/include/uapi/asm/siginfo.h
new file mode 100644
index 000000000000..b50afbf15f7c
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/siginfo.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SIGINFO_H
+#define _UAPI_ASM_SW64_SIGINFO_H
+
+#define __ARCH_SI_PREAMBLE_SIZE		(4 * sizeof(int))
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/signal.h b/arch/sw_64/include/uapi/asm/signal.h
new file mode 100644
index 000000000000..71471c8c7624
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/signal.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SIGNAL_H
+#define _UAPI_ASM_SW64_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * Linux/sw64 different signal numbers that Linux/i386.
+ */
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGEMT		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGBUS		10
+#define SIGSEGV		11
+#define SIGSYS		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGURG		16
+#define SIGSTOP		17
+#define SIGTSTP		18
+#define SIGCONT		19
+#define SIGCHLD		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGIO		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGINFO		29
+#define SIGUSR1		30
+#define SIGUSR2		31
+
+#define SIGPOLL		SIGIO
+#define SIGPWR		SIGINFO
+#define SIGIOT		SIGABRT
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+
+#define SA_ONSTACK	0x00000001
+#define SA_RESTART	0x00000002
+#define SA_NOCLDSTOP	0x00000004
+#define SA_NODEFER	0x00000008
+#define SA_RESETHAND	0x00000010
+#define SA_NOCLDWAIT	0x00000020
+#define SA_SIGINFO	0x00000040
+
+#define SA_ONESHOT	SA_RESETHAND
+#define SA_NOMASK	SA_NODEFER
+
+#define MINSIGSTKSZ	4096
+#define SIGSTKSZ	16384
+
+#define SIG_BLOCK	1	/* for blocking signals */
+#define SIG_UNBLOCK	2	/* for unblocking signals */
+#define SIG_SETMASK	3	/* for setting the signal mask */
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+		__sighandler_t	_sa_handler;
+		void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t	sa_mask;
+	int		sa_flags;
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+#endif /* _UAPI_ASM_SW64_SIGNAL_H */
diff --git a/arch/sw_64/include/uapi/asm/socket.h b/arch/sw_64/include/uapi/asm/socket.h
new file mode 100644
index 000000000000..abfa2108522c
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/socket.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SOCKET_H
+#define _UAPI_ASM_SW64_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+/*
+ * Note: we only bother about making the SOL_SOCKET options
+ * same as legacy, as that's all that "normal" programs are
+ * likely to set.  We don't necessarily want to be binary
+ * compatible with _everything_.
+ */
+#define SOL_SOCKET	0xffff
+
+#define SO_DEBUG	0x0001
+#define SO_REUSEADDR	0x0004
+#define SO_KEEPALIVE	0x0008
+#define SO_DONTROUTE	0x0010
+#define SO_BROADCAST	0x0020
+#define SO_LINGER	0x0080
+#define SO_OOBINLINE	0x0100
+#define SO_REUSEPORT	0x0200
+
+#define SO_TYPE		0x1008
+#define SO_ERROR	0x1007
+#define SO_SNDBUF	0x1001
+#define SO_RCVBUF	0x1002
+#define SO_SNDBUFFORCE	0x100a
+#define SO_RCVBUFFORCE	0x100b
+#define	SO_RCVLOWAT	0x1010
+#define	SO_SNDLOWAT	0x1011
+#define	SO_RCVTIMEO_OLD	0x1012
+#define	SO_SNDTIMEO_OLD	0x1013
+#define SO_ACCEPTCONN	0x1014
+#define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
+
+/* linux-specific, might as well be the same as on i386 */
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_BSDCOMPAT	14
+
+#define SO_PASSCRED	17
+#define SO_PEERCRED	18
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_PEERSEC		30
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		19
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	20
+#define SO_SECURITY_ENCRYPTION_NETWORK		21
+
+#define SO_MARK			36
+
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#define SO_RXQ_OVFL		40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+#define SO_PEEK_OFF		42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		43
+
+#define SO_LOCK_FILTER		44
+#define SO_SELECT_ERR_QUEUE	45
+#define SO_BUSY_POLL		46
+#define SO_MAX_PACING_RATE	47
+#define SO_BPF_EXTENSIONS	48
+#define SO_INCOMING_CPU		49
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
+#define SO_ATTACH_REUSEPORT_CBPF	51
+#define SO_ATTACH_REUSEPORT_EBPF	52
+
+#define SO_CNX_ADVICE			53
+
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
+#define SO_MEMINFO			55
+
+#define SO_INCOMING_NAPI_ID		56
+
+#define SO_COOKIE			57
+
+#define SCM_TIMESTAMPING_PKTINFO	58
+
+#define SO_PEERGROUPS			59
+
+#define SO_ZEROCOPY			60
+
+#define SO_TXTIME			61
+#define SCM_TXTIME			SO_TXTIME
+
+#define SO_BINDTOIFINDEX		62
+
+#define SO_TIMESTAMP_OLD		29
+#define SO_TIMESTAMPNS_OLD		35
+#define SO_TIMESTAMPING_OLD		37
+
+#define SO_TIMESTAMP_NEW		63
+#define SO_TIMESTAMPNS_NEW		64
+#define SO_TIMESTAMPING_NEW		65
+
+#define SO_RCVTIMEO_NEW			66
+#define SO_SNDTIMEO_NEW			67
+
+#define SO_DETACH_REUSEPORT_BPF		68
+
+#endif /* _UAPI_ASM_SW64_SOCKET_H */
diff --git a/arch/sw_64/include/uapi/asm/sockios.h b/arch/sw_64/include/uapi/asm/sockios.h
new file mode 100644
index 000000000000..1f30fb881065
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/sockios.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SOCKIOS_H
+#define _UAPI_ASM_SW64_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+
+#define FIOGETOWN	_IOR('f', 123, int)
+#define FIOSETOWN	_IOW('f', 124, int)
+
+#define SIOCATMARK	_IOR('s', 7, int)
+#define SIOCSPGRP	_IOW('s', 8, pid_t)
+#define SIOCGPGRP	_IOR('s', 9, pid_t)
+
+#define SIOCGSTAMP_OLD		0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD	0x8907		/* Get stamp (timespec) */
+
+#endif /* _UAPI_ASM_SW64_SOCKIOS_H */
diff --git a/arch/sw_64/include/uapi/asm/stat.h b/arch/sw_64/include/uapi/asm/stat.h
new file mode 100644
index 000000000000..b1c1c5e3db22
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/stat.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_STAT_H
+#define _UAPI_ASM_SW64_STAT_H
+
+struct stat {
+	unsigned int	st_dev;
+	unsigned int	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	st_rdev;
+	long		st_size;
+	unsigned long	st_atime;
+	unsigned long	st_mtime;
+	unsigned long	st_ctime;
+	unsigned int	st_blksize;
+	unsigned int	st_blocks;
+	unsigned int	st_flags;
+	unsigned int	st_gen;
+};
+
+/*
+ * The stat64 structure increases the size of dev_t, blkcnt_t, adds
+ * nanosecond resolution times, and padding for expansion.
+ */
+
+struct stat64 {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned long	st_rdev;
+	long		st_size;
+	unsigned long	st_blocks;
+
+	unsigned int	st_mode;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	st_blksize;
+	unsigned int	st_nlink;
+	unsigned int	__pad0;
+
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long   st_ctime_nsec;
+	long		__unused[3];
+};
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/statfs.h b/arch/sw_64/include/uapi/asm/statfs.h
new file mode 100644
index 000000000000..3b8d1e3300a9
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/statfs.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_STATFS_H
+#define _UAPI_ASM_SW64_STATFS_H
+
+#include <linux/types.h>
+
+#include <asm-generic/statfs.h>
+
+#endif
diff --git a/arch/sw_64/include/uapi/asm/swab.h b/arch/sw_64/include/uapi/asm/swab.h
new file mode 100644
index 000000000000..a3d67645aa52
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/swab.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_SWAB_H
+#define _UAPI_ASM_SW64_SWAB_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/compiler.h>
+
+#ifdef __GNUC__
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+	/*
+	 * Unfortunately, we can't use the 6 instruction sequence
+	 * on sw64 since the latency of the UNPKBW is 3, which is
+	 * pretty hard to hide.  Just in case a future implementation
+	 * has a lower latency, here's the sequence (also by Mike Burrows)
+	 *
+	 * UNPKBW a0, v0       v0: 00AA00BB00CC00DD
+	 * SLL v0, 24, a0      a0: BB00CC00DD000000
+	 * BIS v0, a0, a0      a0: BBAACCBBDDCC00DD
+	 * EXTWL a0, 6, v0     v0: 000000000000BBAA
+	 * ZAP a0, 0xf3, a0    a0: 00000000DDCC0000
+	 * ADDL a0, v0, v0     v0: ssssssssDDCCBBAA
+	 */
+
+	__u64 t0, t1, t2, t3;
+
+	t0 = __kernel_inshw(x, 7);	/* t0 : 0000000000AABBCC */
+	t1 = __kernel_inslh(x, 3);	/* t1 : 000000CCDD000000 */
+	t1 |= t0;			/* t1 : 000000CCDDAABBCC */
+	t2 = t1 >> 16;			/* t2 : 0000000000CCDDAA */
+	t0 = t1 & 0xFF00FF00;		/* t0 : 00000000DD00BB00 */
+	t3 = t2 & 0x00FF00FF;		/* t3 : 0000000000CC00AA */
+	t1 = t0 + t3;			/* t1 : ssssssssDDCCBBAA */
+
+	return t1;
+}
+#define __arch_swab32 __arch_swab32
+
+#endif /* __GNUC__ */
+
+#endif /* _UAPI_ASM_SW64_SWAB_H */
diff --git a/arch/sw_64/include/uapi/asm/sysinfo.h b/arch/sw_64/include/uapi/asm/sysinfo.h
new file mode 100644
index 000000000000..9d2112f8bc4d
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/sysinfo.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm/sysinfo.h
+ */
+
+#ifndef _UAPI_ASM_SW64_SYSINFO_H
+#define _UAPI_ASM_SW64_SYSINFO_H
+
+#define GSI_IEEE_FP_CONTROL		45
+
+#define SSI_IEEE_FP_CONTROL		14
+#define SSI_IEEE_RAISE_EXCEPTION	1001	/* linux specific */
+
+#define UAC_BITMASK			7
+#define UAC_NOPRINT			1
+#define UAC_NOFIX			2
+#define UAC_SIGBUS			4
+#define PR_NOFIX			4	/* do not fix up unaligned accesses */
+
+#endif /* _UAPI_ASM_SW64_SYSINFO_H */
diff --git a/arch/sw_64/include/uapi/asm/termbits.h b/arch/sw_64/include/uapi/asm/termbits.h
new file mode 100644
index 000000000000..bcb9adb11e81
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/termbits.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_TERMBITS_H
+#define _UAPI_ASM_SW64_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+/*
+ * termios type and macro definitions.  Be careful about adding stuff
+ * to this file since it's used in GNU libc and there are strict rules
+ * concerning namespace pollution.
+ */
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* SW-64 has matching termios and ktermios */
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VEOF		0
+#define VEOL		1
+#define VEOL2		2
+#define VERASE		3
+#define VWERASE		4
+#define VKILL		5
+#define VREPRINT	6
+#define VSWTC		7
+#define VINTR		8
+#define VQUIT		9
+#define VSUSP		10
+#define VSTART		12
+#define VSTOP		13
+#define VLNEXT		14
+#define VDISCARD	15
+#define VMIN		16
+#define VTIME		17
+
+/* c_iflag bits */
+#define IGNBRK		0000001
+#define BRKINT		0000002
+#define IGNPAR		0000004
+#define PARMRK		0000010
+#define INPCK		0000020
+#define ISTRIP		0000040
+#define INLCR		0000100
+#define IGNCR		0000200
+#define ICRNL		0000400
+#define IXON		0001000
+#define IXOFF		0002000
+#define IXANY		0004000
+#define IUCLC		0010000
+#define IMAXBEL		0020000
+#define IUTF8		0040000
+
+/* c_oflag bits */
+#define OPOST		0000001
+#define ONLCR		0000002
+#define OLCUC		0000004
+
+#define OCRNL		0000010
+#define ONOCR		0000020
+#define ONLRET		0000040
+
+#define OFILL		00000100
+#define OFDEL		00000200
+#define NLDLY		00001400
+#define   NL0		00000000
+#define   NL1		00000400
+#define   NL2		00001000
+#define   NL3		00001400
+#define TABDLY		00006000
+#define   TAB0		00000000
+#define   TAB1		00002000
+#define   TAB2		00004000
+#define   TAB3		00006000
+#define	CRDLY		00030000
+#define   CR0		00000000
+#define   CR1		00010000
+#define   CR2		00020000
+#define   CR3		00030000
+#define FFDLY		00040000
+#define   FF0		00000000
+#define   FF1		00040000
+#define BSDLY		00100000
+#define   BS0		00000000
+#define   BS1		00100000
+#define VTDLY		00200000
+#define   VT0		00000000
+#define   VT1		00200000
+#define XTABS		01000000	/* Hmm.. Linux/i386 considers this part of TABDLY.. */
+
+/* c_cflag bit meaning */
+#define CBAUD		0000037
+#define  B0		0000000		/* hang up */
+#define  B50		0000001
+#define  B75		0000002
+#define  B110		0000003
+#define  B134		0000004
+#define  B150		0000005
+#define  B200		0000006
+#define  B300		0000007
+#define  B600		0000010
+#define  B1200		0000011
+#define  B1800		0000012
+#define  B2400		0000013
+#define  B4800		0000014
+#define  B9600		0000015
+#define  B19200		0000016
+#define  B38400		0000017
+#define EXTA		B19200
+#define EXTB		B38400
+#define CBAUDEX		0000000
+#define  B57600		00020
+#define  B115200	00021
+#define  B230400	00022
+#define  B460800	00023
+#define  B500000	00024
+#define  B576000	00025
+#define  B921600	00026
+#define  B1000000	00027
+#define  B1152000	00030
+#define  B1500000	00031
+#define  B2000000	00032
+#define  B2500000	00033
+#define  B3000000	00034
+#define  B3500000	00035
+#define  B4000000	00036
+
+#define CSIZE		00001400
+#define   CS5		00000000
+#define   CS6		00000400
+#define   CS7		00001000
+#define   CS8		00001400
+
+#define CSTOPB		00002000
+#define CREAD		00004000
+#define PARENB		00010000
+#define PARODD		00020000
+#define HUPCL		00040000
+
+#define CLOCAL		00100000
+#define CMSPAR		010000000000		/* mark or space (stick) parity */
+#define CRTSCTS		020000000000		/* flow control */
+
+/* c_lflag bits */
+#define ISIG		0x00000080
+#define ICANON		0x00000100
+#define XCASE		0x00004000
+#define ECHO		0x00000008
+#define ECHOE		0x00000002
+#define ECHOK		0x00000004
+#define ECHONL		0x00000010
+#define NOFLSH		0x80000000
+#define TOSTOP		0x00400000
+#define ECHOCTL		0x00000040
+#define ECHOPRT		0x00000020
+#define ECHOKE		0x00000001
+#define FLUSHO		0x00800000
+#define PENDIN		0x20000000
+#define IEXTEN		0x00000400
+#define EXTPROC		0x10000000
+
+/* Values for the ACTION argument to `tcflow'.  */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* Values for the QUEUE_SELECTOR argument to `tcflush'.  */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'.  */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* _UAPI_ASM_SW64_TERMBITS_H */
diff --git a/arch/sw_64/include/uapi/asm/termios.h b/arch/sw_64/include/uapi/asm/termios.h
new file mode 100644
index 000000000000..d44e218b29b5
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/termios.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_TERMIOS_H
+#define _UAPI_ASM_SW64_TERMIOS_H
+
+#include <asm/ioctls.h>
+#include <asm/termbits.h>
+
+struct sgttyb {
+	char	sg_ispeed;
+	char	sg_ospeed;
+	char	sg_erase;
+	char	sg_kill;
+	short	sg_flags;
+};
+
+struct tchars {
+	char	t_intrc;
+	char	t_quitc;
+	char	t_startc;
+	char	t_stopc;
+	char	t_eofc;
+	char	t_brkc;
+};
+
+struct ltchars {
+	char	t_suspc;
+	char	t_dsuspc;
+	char	t_rprntc;
+	char	t_flushc;
+	char	t_werasc;
+	char	t_lnextc;
+};
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/*
+ * c_cc characters in the termio structure.  Oh, how I love being
+ * backwardly compatible.  Notice that character 4 and 5 are
+ * interpreted differently depending on whether ICANON is set in
+ * c_lflag.  If it's set, they are used as _VEOF and _VEOL, otherwise
+ * as _VMIN and V_TIME.  This is for compatibility with sysV)...
+ */
+#define _VINTR	0
+#define _VQUIT	1
+#define _VERASE	2
+#define _VKILL	3
+#define _VEOF	4
+#define _VMIN	4
+#define _VEOL	5
+#define _VTIME	5
+#define _VEOL2	6
+#define _VSWTC	7
+
+
+#endif /* _UAPI_ASM_SW64_TERMIOS_H */
diff --git a/arch/sw_64/include/uapi/asm/types.h b/arch/sw_64/include/uapi/asm/types.h
new file mode 100644
index 000000000000..9c605ea7bba9
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_TYPES_H
+#define _UAPI_ASM_SW64_TYPES_H
+
+/*
+ * This file is never included by application software unless
+ * explicitly requested (e.g., via linux/types.h) in which case the
+ * application is Linux specific so (user-) name space pollution is
+ * not a major issue.  However, for interoperability, libraries still
+ * need to be careful to avoid a name clashes.
+ */
+
+/*
+ * This is here because we used to use l64 for sw64 and we don't want
+ * to impact user mode with our change to ll64 in the kernel.
+ *
+ * However, some user programs are fine with this.  They can
+ * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
+ */
+#ifndef __KERNEL__
+#ifndef __SANE_USERSPACE_TYPES__
+#include <asm-generic/int-l64.h>
+#else
+#include <asm-generic/int-ll64.h>
+#endif /* __SANE_USERSPACE_TYPES__ */
+#endif /* __KERNEL__ */
+
+#endif /* _UAPI_ASM_SW64_TYPES_H */
diff --git a/arch/sw_64/include/uapi/asm/unistd.h b/arch/sw_64/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..225358536dc9
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/unistd.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_ASM_SW64_UNISTD_H
+#define _UAPI_ASM_SW64_UNISTD_H
+
+/*
+ * These are traditionally the names uses for generic system calls
+ */
+#define __NR_umount     __NR_umount2
+
+#include <asm/unistd_64.h>
+
+/* sw64 doesn't have protection keys. */
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
+
+#endif /* _UAPI_ASM_SW64_UNISTD_H */
diff --git a/arch/sw_64/kernel/.gitignore b/arch/sw_64/kernel/.gitignore
new file mode 100644
index 000000000000..46c9537c5551
--- /dev/null
+++ b/arch/sw_64/kernel/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+vmlinux.lds
diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile
new file mode 100644
index 000000000000..d9e2fcbc1e91
--- /dev/null
+++ b/arch/sw_64/kernel/Makefile
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+extra-y		:= head.o vmlinux.lds
+asflags-y	:= $(KBUILD_CFLAGS)
+ccflags-y	:= -Wno-sign-compare
+
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+CFLAGS_REMOVE_printk.o = -pg
+endif
+
+obj-y    := entry.o traps.o process.o sys_sw64.o irq.o \
+	    irq_sw64.o signal.o setup.o ptrace.o time.o \
+	    systbls.o dup_print.o tc.o \
+	    insn.o early_init.o topology.o cacheinfo.o \
+	    vdso.o vdso/
+
+obj-$(CONFIG_ACPI) += acpi.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_PCI)	+= pci.o pci-sysfs.o
+obj-$(CONFIG_MODULES)	+= module.o
+obj-$(CONFIG_PCI_MSI)   += msi.o
+obj-$(CONFIG_SUSPEND)	+= suspend_asm.o suspend.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o
+obj-$(CONFIG_AUDIT)     += audit.o
+obj-$(CONFIG_DIRECT_DMA) += pci_common.o
+obj-$(CONFIG_SWIOTLB) += dma_swiotlb.o
+obj-$(CONFIG_RELOCATABLE)   += relocate.o
+obj-$(CONFIG_DEBUG_FS)	+= unaligned.o segvdbg.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+
+ifndef CONFIG_PCI
+obj-y += pci-noop.o
+endif
+
+ifdef CONFIG_KVM
+obj-y += kvm_cma.o
+endif
+
+# Core logic support
+obj-$(CONFIG_SW64)	+= core.o timer.o
+
+obj-$(CONFIG_CRASH_DUMP)    += crash_dump.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_FUNCTION_TRACER)     += ftrace.o entry-ftrace.o
+obj-$(CONFIG_KPROBES)           += kprobes/
+obj-$(CONFIG_UPROBES)           += uprobes.o
+obj-$(CONFIG_EARLY_PRINTK)      += early_printk.o
+obj-$(CONFIG_KGDB)     += kgdb.o
+obj-$(CONFIG_HAVE_PERF_REGS)   += perf_regs.o
diff --git a/arch/sw_64/kernel/acpi.c b/arch/sw_64/kernel/acpi.c
new file mode 100644
index 000000000000..1c1afe8e812e
--- /dev/null
+++ b/arch/sw_64/kernel/acpi.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/efi.h>
+#include <linux/stddef.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/memblock.h>
+#include <acpi/actbl.h>
+#include <acpi/actbl2.h>
+
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/numa.h>
+#include <asm/early_ioremap.h>
+
+int acpi_disabled = 1;
+EXPORT_SYMBOL(acpi_disabled);
+int acpi_noirq;				/* skip ACPI IRQ initialization */
+int acpi_pci_disabled;		/* skip ACPI PCI scan and IRQ initialization */
+EXPORT_SYMBOL(acpi_pci_disabled);
+int acpi_strict;
+u64 arch_acpi_wakeup_start;
+u64 acpi_saved_sp_s3;
+
+#define MAX_LOCAL_APIC 256
+
+#define PREFIX			"ACPI: "
+/*
+ * The default interrupt routing model is PIC (8259).  This gets
+ * overridden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
+void __iomem *__init __acpi_map_table(unsigned long phys, unsigned long size)
+{
+	if (!phys || !size)
+		return NULL;
+
+	return early_ioremap(phys, size);
+}
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
+{
+	if (!map || !size)
+		return;
+
+	early_iounmap(map, size);
+}
+/*
+ * Following __acpi_xx functions should be implemented for sepecific cpu.
+ */
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
+{
+	if (irqp != NULL)
+		*irqp = acpi_register_gsi(NULL, gsi, -1, -1);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
+
+int acpi_isa_irq_to_gsi(unsigned int isa_irq, u32 *gsi)
+{
+	if (gsi)
+		*gsi = isa_irq;
+
+	return 0;
+}
+
+int (*acpi_suspend_lowlevel)(void);
+
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+static struct irq_domain *irq_default_domain;
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
+{
+	u32 irq;
+
+	irq = irq_find_mapping(irq_default_domain, gsi);
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(acpi_register_gsi);
+
+void acpi_unregister_gsi(u32 gsi)
+{
+
+}
+EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
+/*
+ *  ACPI based hotplug support for CPU
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+#include <acpi/processor.h>
+
+/* wrapper to silence section mismatch warning */
+int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
+{
+	return 0;
+}
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+int acpi_unmap_lsapic(int cpu)
+{
+	return 0;
+}
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif				/* CONFIG_ACPI_HOTPLUG_CPU */
+
+u8 acpi_checksum(u8 *table, u32 length)
+{
+	u8 ret = 0;
+
+	while (length--) {
+		ret += *table;
+		table++;
+	}
+	return -ret;
+}
+
+static int __init parse_acpi(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	/* "acpi=off" disables both ACPI table parsing and interpreter */
+	if (strcmp(arg, "off") == 0) {
+		disable_acpi();
+	} else {
+		/* Core will printk when we return error. */
+		return -EINVAL;
+	}
+	return 0;
+}
+early_param("acpi", parse_acpi);
+
+/*
+ * __acpi_acquire_global_lock
+ * will always return -1 indicating owning the lock.
+ *
+ * __acpi_release_global_lock will always return 0 indicating
+ * no acquring request pending.
+ */
+int __acpi_acquire_global_lock(unsigned int *lock)
+{
+	return -1;
+}
+
+int __acpi_release_global_lock(unsigned int *lock)
+{
+	return 0;
+}
+
+#ifdef CONFIG_ACPI_NUMA
+static __init int setup_node(int pxm)
+{
+	return acpi_map_pxm_to_node(pxm);
+}
+
+/*
+ * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
+ * I/O localities since SRAT does not list them.  I/O localities are
+ * not supported at this point.
+ */
+extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
+unsigned int numa_distance_cnt;
+
+static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit)
+{
+	return slit->locality_count;
+}
+
+void __init numa_set_distance(int from, int to, int distance)
+{
+	unsigned char *numa_distance = (unsigned char *)__node_distances;
+
+	if ((u8)distance != distance ||
+			(from == to && distance != LOCAL_DISTANCE)) {
+		pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+				from, to, distance);
+		return;
+	}
+
+	numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+	int i, j;
+
+	numa_distance_cnt = get_numa_distances_cnt(slit);
+
+	for (i = 0; i < slit->locality_count; i++) {
+		const int from_node = pxm_to_node(i);
+
+		if (from_node == NUMA_NO_NODE)
+			continue;
+
+		for (j = 0; j < slit->locality_count; j++) {
+			const int to_node = pxm_to_node(j);
+
+			if (to_node == NUMA_NO_NODE)
+				continue;
+
+			numa_set_distance(from_node, to_node,
+					slit->entry[slit->locality_count * i + j]);
+		}
+	}
+}
+
+extern cpumask_t possible_cpu_per_node;
+/* Callback for Proximity Domain -> CPUID mapping */
+void __init
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
+{
+	int pxm, node;
+
+	if (srat_disabled())
+		return;
+	if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
+		bad_srat();
+		return;
+	}
+	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
+		return;
+	pxm = pa->proximity_domain_lo;
+	if (acpi_srat_revision >= 2) {
+		pxm |= (pa->proximity_domain_hi[0] << 8);
+		pxm |= (pa->proximity_domain_hi[1] << 16);
+		pxm |= (pa->proximity_domain_hi[2] << 24);
+	}
+	node = setup_node(pxm);
+	if (node < 0) {
+		pr_err("SRAT: Too many proximity domains %x\n", pxm);
+		bad_srat();
+		return;
+	}
+
+	if (pa->apic_id >= CONFIG_NR_CPUS) {
+		pr_err("SRAT: PXM %u -> CPU 0x%02x -> Node %u skipped apicid that is too big\n", pxm, pa->apic_id, node);
+		return;
+	}
+
+	if (!cpu_guestmode)
+		numa_add_cpu(__cpu_number_map[pa->apic_id], node);
+	else
+		numa_add_cpu(pa->apic_id, node);
+
+	set_cpuid_to_node(pa->apic_id, node);
+	node_set(node, numa_nodes_parsed);
+	acpi_numa = 1;
+	pr_err("SRAT: PXM %u -> CPU 0x%02x -> Node %u\n",
+		pxm, pa->apic_id, node);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline int save_add_info(void) { return 1; }
+#else
+static inline int save_add_info(void) { return 0; }
+#endif
+
+/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
+int __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
+{
+	u64 start, end;
+	u32 hotpluggable;
+	int node, pxm;
+
+	if (srat_disabled())
+		goto out_err;
+	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
+		goto out_err_bad_srat;
+	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
+		goto out_err;
+	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
+	if (hotpluggable && !save_add_info())
+		goto out_err;
+
+	start = ma->base_address;
+	end = start + ma->length;
+	pxm = ma->proximity_domain;
+	if (acpi_srat_revision <= 1)
+		pxm &= 0xff;
+
+	node = setup_node(pxm);
+	if (node < 0) {
+		pr_err("SRAT: Too many proximity domains.\n");
+		goto out_err_bad_srat;
+	}
+	if (numa_add_memblk(node, start, end) < 0)
+		goto out_err_bad_srat;
+
+	node_set(node, numa_nodes_parsed);
+
+	pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
+		node, pxm,
+		(unsigned long long) start, (unsigned long long) end - 1,
+		hotpluggable ? " hotplug" : "",
+		ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
+
+	/* Mark hotplug range in memblock. */
+	if (hotpluggable && memblock_mark_hotplug(start, ma->length))
+		pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
+			(unsigned long long)start, (unsigned long long)end - 1);
+
+	max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
+
+	return 0;
+out_err_bad_srat:
+	bad_srat();
+out_err:
+	return -1;
+}
+
+void __init acpi_numa_arch_fixup(void) {}
+#endif
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+#include <acpi/processor.h>
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+{
+#ifdef CONFIG_ACPI_NUMA
+	int nid;
+
+	nid = acpi_get_node(handle);
+	if (nid != NUMA_NO_NODE) {
+		set_cpuid_to_node(cpu, nid);
+		node_set(nid, numa_nodes_parsed);
+	}
+#endif
+	return 0;
+}
+
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
+			int *pcpu)
+{
+	int cpu;
+	struct acpi_madt_local_apic *processor;
+
+	processor = kzalloc(sizeof(struct acpi_madt_local_apic), GFP_KERNEL);
+	processor->id = physid;
+	processor->processor_id = acpi_id;
+	processor->lapic_flags = ACPI_MADT_ENABLED;
+
+	cpu = set_processor_mask(processor);
+	if (cpu < 0) {
+		pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
+		return cpu;
+	}
+
+	acpi_map_cpu2node(handle, cpu, physid);
+
+	*pcpu = cpu;
+	return 0;
+}
+EXPORT_SYMBOL(acpi_map_cpu);
+
+int acpi_unmap_cpu(int cpu)
+{
+#ifdef CONFIG_ACPI_NUMA
+	set_cpuid_to_node(cpu, NUMA_NO_NODE);
+#endif
+	set_cpu_present(cpu, false);
+	num_processors--;
+
+	pr_info("cpu%d hot remove!\n", cpu);
+
+	return 0;
+}
+EXPORT_SYMBOL(acpi_unmap_cpu);
+#endif				/* CONFIG_ACPI_HOTPLUG_CPU */
+
+void __init acpi_boot_table_init(void)
+
+{
+	/*
+	 * If acpi_disabled, bail out
+	 */
+	if (!acpi_disabled) {
+		if (acpi_table_init()) {
+			pr_err("Failed to init ACPI tables\n");
+			disable_acpi();
+		}
+		pr_info("Enable ACPI support\n");
+	}
+}
diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c
new file mode 100644
index 000000000000..44e7fa77265e
--- /dev/null
+++ b/arch/sw_64/kernel/asm-offsets.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/kbuild.h>
+#include <linux/suspend.h>
+#include <asm/io.h>
+#include <asm/suspend.h>
+#include "traps.c"
+
+#include <asm/kvm.h>
+void foo(void)
+{
+	DEFINE(TI_TASK, offsetof(struct thread_info, task));
+	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+	BLANK();
+
+	DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
+	DEFINE(TASK_CRED, offsetof(struct task_struct, cred));
+	DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent));
+	DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader));
+	DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
+	BLANK();
+
+	OFFSET(PSTATE_REGS, processor_state, regs);
+	OFFSET(PSTATE_FPREGS, processor_state, fpregs);
+	OFFSET(PSTATE_FPCR, processor_state, fpcr);
+#ifdef CONFIG_HIBERNATION
+	OFFSET(PSTATE_PCB, processor_state, pcb);
+#endif
+	OFFSET(PCB_KSP, pcb_struct, ksp);
+	OFFSET(PBE_ADDR, pbe, address);
+	OFFSET(PBE_ORIG_ADDR, pbe, orig_address);
+	OFFSET(PBE_NEXT, pbe, next);
+	OFFSET(CALLEE_R9, callee_saved_regs, r9);
+	OFFSET(CALLEE_R10, callee_saved_regs, r10);
+	OFFSET(CALLEE_R11, callee_saved_regs, r11);
+	OFFSET(CALLEE_R12, callee_saved_regs, r12);
+	OFFSET(CALLEE_R13, callee_saved_regs, r13);
+	OFFSET(CALLEE_R14, callee_saved_regs, r14);
+	OFFSET(CALLEE_R15, callee_saved_regs, r15);
+	OFFSET(CALLEE_RA, callee_saved_regs, ra);
+	OFFSET(CALLEE_F2, callee_saved_fpregs, f2);
+	OFFSET(CALLEE_F3, callee_saved_fpregs, f3);
+	OFFSET(CALLEE_F4, callee_saved_fpregs, f4);
+	OFFSET(CALLEE_F5, callee_saved_fpregs, f5);
+	OFFSET(CALLEE_F6, callee_saved_fpregs, f6);
+	OFFSET(CALLEE_F7, callee_saved_fpregs, f7);
+	OFFSET(CALLEE_F8, callee_saved_fpregs, f8);
+	OFFSET(CALLEE_F9, callee_saved_fpregs, f9);
+	BLANK();
+	DEFINE(CRED_UID,  offsetof(struct cred, uid));
+	DEFINE(CRED_EUID, offsetof(struct cred, euid));
+	DEFINE(CRED_GID,  offsetof(struct cred, gid));
+	DEFINE(CRED_EGID, offsetof(struct cred, egid));
+	BLANK();
+
+	DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
+	DEFINE(PT_REGS_R0, offsetof(struct pt_regs, r0));
+	DEFINE(PT_REGS_R1, offsetof(struct pt_regs, r1));
+	DEFINE(PT_REGS_R2, offsetof(struct pt_regs, r2));
+	DEFINE(PT_REGS_R3, offsetof(struct pt_regs, r3));
+	DEFINE(PT_REGS_R4, offsetof(struct pt_regs, r4));
+	DEFINE(PT_REGS_R5, offsetof(struct pt_regs, r5));
+	DEFINE(PT_REGS_R6, offsetof(struct pt_regs, r6));
+	DEFINE(PT_REGS_R7, offsetof(struct pt_regs, r7));
+	DEFINE(PT_REGS_R8, offsetof(struct pt_regs, r8));
+	DEFINE(PT_REGS_R19, offsetof(struct pt_regs, r19));
+	DEFINE(PT_REGS_R20, offsetof(struct pt_regs, r20));
+	DEFINE(PT_REGS_R21, offsetof(struct pt_regs, r21));
+	DEFINE(PT_REGS_R22, offsetof(struct pt_regs, r22));
+	DEFINE(PT_REGS_R23, offsetof(struct pt_regs, r23));
+	DEFINE(PT_REGS_R24, offsetof(struct pt_regs, r24));
+	DEFINE(PT_REGS_R25, offsetof(struct pt_regs, r25));
+	DEFINE(PT_REGS_R26, offsetof(struct pt_regs, r26));
+	DEFINE(PT_REGS_R27, offsetof(struct pt_regs, r27));
+	DEFINE(PT_REGS_R28, offsetof(struct pt_regs, r28));
+	DEFINE(PT_REGS_TRAP_A0, offsetof(struct pt_regs, trap_a0));
+	DEFINE(PT_REGS_TRAP_A1, offsetof(struct pt_regs, trap_a1));
+	DEFINE(PT_REGS_TRAP_A2, offsetof(struct pt_regs, trap_a2));
+	DEFINE(PT_REGS_PS, offsetof(struct pt_regs, ps));
+	DEFINE(PT_REGS_PC, offsetof(struct pt_regs, pc));
+	DEFINE(PT_REGS_GP, offsetof(struct pt_regs, gp));
+	DEFINE(PT_REGS_R16, offsetof(struct pt_regs, r16));
+	DEFINE(PT_REGS_R17, offsetof(struct pt_regs, r17));
+	DEFINE(PT_REGS_R18, offsetof(struct pt_regs, r18));
+	BLANK();
+
+	DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack));
+	DEFINE(SWITCH_STACK_R9, offsetof(struct switch_stack, r9));
+	DEFINE(SWITCH_STACK_R10, offsetof(struct switch_stack, r10));
+	DEFINE(SWITCH_STACK_R11, offsetof(struct switch_stack, r11));
+	DEFINE(SWITCH_STACK_R12, offsetof(struct switch_stack, r12));
+	DEFINE(SWITCH_STACK_R13, offsetof(struct switch_stack, r13));
+	DEFINE(SWITCH_STACK_R14, offsetof(struct switch_stack, r14));
+	DEFINE(SWITCH_STACK_R15, offsetof(struct switch_stack, r15));
+	DEFINE(SWITCH_STACK_RA, offsetof(struct switch_stack, r26));
+	BLANK();
+
+	DEFINE(ALLREGS_SIZE, sizeof(struct allregs));
+	DEFINE(ALLREGS_R0, offsetof(struct allregs, regs[0]));
+	DEFINE(ALLREGS_R1, offsetof(struct allregs, regs[1]));
+	DEFINE(ALLREGS_R2, offsetof(struct allregs, regs[2]));
+	DEFINE(ALLREGS_R3, offsetof(struct allregs, regs[3]));
+	DEFINE(ALLREGS_R4, offsetof(struct allregs, regs[4]));
+	DEFINE(ALLREGS_R5, offsetof(struct allregs, regs[5]));
+	DEFINE(ALLREGS_R6, offsetof(struct allregs, regs[6]));
+	DEFINE(ALLREGS_R7, offsetof(struct allregs, regs[7]));
+	DEFINE(ALLREGS_R8, offsetof(struct allregs, regs[8]));
+	DEFINE(ALLREGS_R9, offsetof(struct allregs, regs[9]));
+	DEFINE(ALLREGS_R10, offsetof(struct allregs, regs[10]));
+	DEFINE(ALLREGS_R11, offsetof(struct allregs, regs[11]));
+	DEFINE(ALLREGS_R12, offsetof(struct allregs, regs[12]));
+	DEFINE(ALLREGS_R13, offsetof(struct allregs, regs[13]));
+	DEFINE(ALLREGS_R14, offsetof(struct allregs, regs[14]));
+	DEFINE(ALLREGS_R15, offsetof(struct allregs, regs[15]));
+	DEFINE(ALLREGS_R16, offsetof(struct allregs, regs[16]));
+	DEFINE(ALLREGS_R17, offsetof(struct allregs, regs[17]));
+	DEFINE(ALLREGS_R18, offsetof(struct allregs, regs[18]));
+	DEFINE(ALLREGS_R19, offsetof(struct allregs, regs[19]));
+	DEFINE(ALLREGS_R20, offsetof(struct allregs, regs[20]));
+	DEFINE(ALLREGS_R21, offsetof(struct allregs, regs[21]));
+	DEFINE(ALLREGS_R22, offsetof(struct allregs, regs[22]));
+	DEFINE(ALLREGS_R23, offsetof(struct allregs, regs[23]));
+	DEFINE(ALLREGS_R24, offsetof(struct allregs, regs[24]));
+	DEFINE(ALLREGS_R25, offsetof(struct allregs, regs[25]));
+	DEFINE(ALLREGS_R26, offsetof(struct allregs, regs[26]));
+	DEFINE(ALLREGS_R27, offsetof(struct allregs, regs[27]));
+	DEFINE(ALLREGS_R28, offsetof(struct allregs, regs[28]));
+	DEFINE(ALLREGS_R29, offsetof(struct allregs, regs[29]));
+	DEFINE(ALLREGS_R30, offsetof(struct allregs, regs[30]));
+	DEFINE(ALLREGS_R31, offsetof(struct allregs, regs[31]));
+	DEFINE(ALLREGS_PS, offsetof(struct allregs, ps));
+	DEFINE(ALLREGS_PC, offsetof(struct allregs, pc));
+	DEFINE(ALLREGS_GP, offsetof(struct allregs, gp));
+	DEFINE(ALLREGS_A0, offsetof(struct allregs, a0));
+	DEFINE(ALLREGS_A1, offsetof(struct allregs, a1));
+	DEFINE(ALLREGS_A2, offsetof(struct allregs, a2));
+	BLANK();
+
+	DEFINE(KVM_REGS_SIZE, sizeof(struct kvm_regs));
+	DEFINE(KVM_REGS_R0, offsetof(struct kvm_regs, r0));
+	DEFINE(KVM_REGS_R1, offsetof(struct kvm_regs, r1));
+	DEFINE(KVM_REGS_R2, offsetof(struct kvm_regs, r2));
+	DEFINE(KVM_REGS_R3, offsetof(struct kvm_regs, r3));
+	DEFINE(KVM_REGS_R4, offsetof(struct kvm_regs, r4));
+	DEFINE(KVM_REGS_R5, offsetof(struct kvm_regs, r5));
+	DEFINE(KVM_REGS_R6, offsetof(struct kvm_regs, r6));
+	DEFINE(KVM_REGS_R7, offsetof(struct kvm_regs, r7));
+	DEFINE(KVM_REGS_R8, offsetof(struct kvm_regs, r8));
+	DEFINE(KVM_REGS_R9, offsetof(struct kvm_regs, r9));
+	DEFINE(KVM_REGS_R10, offsetof(struct kvm_regs, r10));
+	DEFINE(KVM_REGS_R11, offsetof(struct kvm_regs, r11));
+	DEFINE(KVM_REGS_R12, offsetof(struct kvm_regs, r12));
+	DEFINE(KVM_REGS_R13, offsetof(struct kvm_regs, r13));
+	DEFINE(KVM_REGS_R14, offsetof(struct kvm_regs, r14));
+	DEFINE(KVM_REGS_R15, offsetof(struct kvm_regs, r15));
+	DEFINE(KVM_REGS_R19, offsetof(struct kvm_regs, r19));
+	DEFINE(KVM_REGS_R20, offsetof(struct kvm_regs, r20));
+	DEFINE(KVM_REGS_R21, offsetof(struct kvm_regs, r21));
+	DEFINE(KVM_REGS_R22, offsetof(struct kvm_regs, r22));
+	DEFINE(KVM_REGS_R23, offsetof(struct kvm_regs, r23));
+	DEFINE(KVM_REGS_R24, offsetof(struct kvm_regs, r24));
+	DEFINE(KVM_REGS_R25, offsetof(struct kvm_regs, r25));
+	DEFINE(KVM_REGS_R26, offsetof(struct kvm_regs, r26));
+	DEFINE(KVM_REGS_R27, offsetof(struct kvm_regs, r27));
+	DEFINE(KVM_REGS_R28, offsetof(struct kvm_regs, r28));
+	DEFINE(KVM_REGS_FPCR, offsetof(struct kvm_regs, fpcr));
+	DEFINE(KVM_REGS_F0, offsetof(struct kvm_regs, fp[0 * 4]));
+	DEFINE(KVM_REGS_F1, offsetof(struct kvm_regs, fp[1 * 4]));
+	DEFINE(KVM_REGS_F2, offsetof(struct kvm_regs, fp[2 * 4]));
+	DEFINE(KVM_REGS_F3, offsetof(struct kvm_regs, fp[3 * 4]));
+	DEFINE(KVM_REGS_F4, offsetof(struct kvm_regs, fp[4 * 4]));
+	DEFINE(KVM_REGS_F5, offsetof(struct kvm_regs, fp[5 * 4]));
+	DEFINE(KVM_REGS_F6, offsetof(struct kvm_regs, fp[6 * 4]));
+	DEFINE(KVM_REGS_F7, offsetof(struct kvm_regs, fp[7 * 4]));
+	DEFINE(KVM_REGS_F8, offsetof(struct kvm_regs, fp[8 * 4]));
+	DEFINE(KVM_REGS_F9, offsetof(struct kvm_regs, fp[9 * 4]));
+	DEFINE(KVM_REGS_F10, offsetof(struct kvm_regs, fp[10 * 4]));
+	DEFINE(KVM_REGS_F11, offsetof(struct kvm_regs, fp[11 * 4]));
+	DEFINE(KVM_REGS_F12, offsetof(struct kvm_regs, fp[12 * 4]));
+	DEFINE(KVM_REGS_F13, offsetof(struct kvm_regs, fp[13 * 4]));
+	DEFINE(KVM_REGS_F14, offsetof(struct kvm_regs, fp[14 * 4]));
+	DEFINE(KVM_REGS_F15, offsetof(struct kvm_regs, fp[15 * 4]));
+	DEFINE(KVM_REGS_F16, offsetof(struct kvm_regs, fp[16 * 4]));
+	DEFINE(KVM_REGS_F17, offsetof(struct kvm_regs, fp[17 * 4]));
+	DEFINE(KVM_REGS_F18, offsetof(struct kvm_regs, fp[18 * 4]));
+	DEFINE(KVM_REGS_F19, offsetof(struct kvm_regs, fp[19 * 4]));
+	DEFINE(KVM_REGS_F20, offsetof(struct kvm_regs, fp[20 * 4]));
+	DEFINE(KVM_REGS_F21, offsetof(struct kvm_regs, fp[21 * 4]));
+	DEFINE(KVM_REGS_F22, offsetof(struct kvm_regs, fp[22 * 4]));
+	DEFINE(KVM_REGS_F23, offsetof(struct kvm_regs, fp[23 * 4]));
+	DEFINE(KVM_REGS_F24, offsetof(struct kvm_regs, fp[24 * 4]));
+	DEFINE(KVM_REGS_F25, offsetof(struct kvm_regs, fp[25 * 4]));
+	DEFINE(KVM_REGS_F26, offsetof(struct kvm_regs, fp[26 * 4]));
+	DEFINE(KVM_REGS_F27, offsetof(struct kvm_regs, fp[27 * 4]));
+	DEFINE(KVM_REGS_F28, offsetof(struct kvm_regs, fp[28 * 4]));
+	DEFINE(KVM_REGS_F29, offsetof(struct kvm_regs, fp[29 * 4]));
+	DEFINE(KVM_REGS_F30, offsetof(struct kvm_regs, fp[30 * 4]));
+	DEFINE(KVM_REGS_PS, offsetof(struct kvm_regs, ps));
+	DEFINE(KVM_REGS_PC, offsetof(struct kvm_regs, pc));
+	DEFINE(KVM_REGS_GP, offsetof(struct kvm_regs, gp));
+	DEFINE(KVM_REGS_R16, offsetof(struct kvm_regs, r16));
+	DEFINE(KVM_REGS_R17, offsetof(struct kvm_regs, r17));
+	DEFINE(KVM_REGS_R18, offsetof(struct kvm_regs, r18));
+	BLANK();
+
+	DEFINE(VCPU_RET_SIZE, sizeof(struct vcpu_run_ret_stack));
+	DEFINE(VCPU_RET_RA, offsetof(struct vcpu_run_ret_stack, ra));
+	DEFINE(VCPU_RET_R0, offsetof(struct vcpu_run_ret_stack, r0));
+	BLANK();
+
+	DEFINE(HOST_INT_SIZE, sizeof(struct host_int_args));
+	DEFINE(HOST_INT_R18, offsetof(struct host_int_args, r18));
+	DEFINE(HOST_INT_R17, offsetof(struct host_int_args, r17));
+	DEFINE(HOST_INT_R16, offsetof(struct host_int_args, r16));
+	BLANK();
+
+	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
+	DEFINE(THREAD_CTX_FP, offsetof(struct thread_struct, ctx_fp));
+	DEFINE(THREAD_FPCR, offsetof(struct thread_struct, fpcr));
+	DEFINE(CTX_FP_F0, offsetof(struct context_fpregs, f0));
+	DEFINE(CTX_FP_F1, offsetof(struct context_fpregs, f1));
+	DEFINE(CTX_FP_F2, offsetof(struct context_fpregs, f2));
+	DEFINE(CTX_FP_F3, offsetof(struct context_fpregs, f3));
+	DEFINE(CTX_FP_F4, offsetof(struct context_fpregs, f4));
+	DEFINE(CTX_FP_F5, offsetof(struct context_fpregs, f5));
+	DEFINE(CTX_FP_F6, offsetof(struct context_fpregs, f6));
+	DEFINE(CTX_FP_F7, offsetof(struct context_fpregs, f7));
+	DEFINE(CTX_FP_F8, offsetof(struct context_fpregs, f8));
+	DEFINE(CTX_FP_F9, offsetof(struct context_fpregs, f9));
+	DEFINE(CTX_FP_F10, offsetof(struct context_fpregs, f10));
+	DEFINE(CTX_FP_F11, offsetof(struct context_fpregs, f11));
+	DEFINE(CTX_FP_F12, offsetof(struct context_fpregs, f12));
+	DEFINE(CTX_FP_F13, offsetof(struct context_fpregs, f13));
+	DEFINE(CTX_FP_F14, offsetof(struct context_fpregs, f14));
+	DEFINE(CTX_FP_F15, offsetof(struct context_fpregs, f15));
+	DEFINE(CTX_FP_F16, offsetof(struct context_fpregs, f16));
+	DEFINE(CTX_FP_F17, offsetof(struct context_fpregs, f17));
+	DEFINE(CTX_FP_F18, offsetof(struct context_fpregs, f18));
+	DEFINE(CTX_FP_F19, offsetof(struct context_fpregs, f19));
+	DEFINE(CTX_FP_F20, offsetof(struct context_fpregs, f20));
+	DEFINE(CTX_FP_F21, offsetof(struct context_fpregs, f21));
+	DEFINE(CTX_FP_F22, offsetof(struct context_fpregs, f22));
+	DEFINE(CTX_FP_F23, offsetof(struct context_fpregs, f23));
+	DEFINE(CTX_FP_F24, offsetof(struct context_fpregs, f24));
+	DEFINE(CTX_FP_F25, offsetof(struct context_fpregs, f25));
+	DEFINE(CTX_FP_F26, offsetof(struct context_fpregs, f26));
+	DEFINE(CTX_FP_F27, offsetof(struct context_fpregs, f27));
+	DEFINE(CTX_FP_F28, offsetof(struct context_fpregs, f28));
+	DEFINE(CTX_FP_F29, offsetof(struct context_fpregs, f29));
+	DEFINE(CTX_FP_F30, offsetof(struct context_fpregs, f30));
+	BLANK();
+}
diff --git a/arch/sw_64/kernel/audit.c b/arch/sw_64/kernel/audit.c
new file mode 100644
index 000000000000..adc4622211d2
--- /dev/null
+++ b/arch/sw_64/kernel/audit.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned int dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned int read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned int write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned int chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned int signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned int syscall)
+{
+	switch (syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_execve:
+		return 5;
+	default:
+		return 0;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+device_initcall(audit_classes_init);
diff --git a/arch/sw_64/kernel/cacheinfo.c b/arch/sw_64/kernel/cacheinfo.c
new file mode 100644
index 000000000000..5193d7544b59
--- /dev/null
+++ b/arch/sw_64/kernel/cacheinfo.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SW64 cacheinfo support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+#include <linux/arch_topology.h>
+#include <linux/cacheinfo.h>
+#include <asm/hw_init.h>
+#include <asm/topology.h>
+
+/* Populates leaf and increments to next leaf */
+#define populate_cache(cache, leaf, c_level, c_type, c_id)	\
+do {								\
+	leaf->id = c_id;					\
+	leaf->attributes = CACHE_ID;				\
+	leaf->type = c_type;					\
+	leaf->level = c_level;					\
+	leaf->coherency_line_size = c->cache.linesz;		\
+	leaf->number_of_sets = c->cache.sets;			\
+	leaf->ways_of_associativity = c->cache.ways;		\
+	leaf->size = c->cache.size;				\
+	leaf++;							\
+} while (0)
+
+int init_cache_level(unsigned int cpu)
+{
+	struct cpuinfo_sw64 *c = &cpu_data[cpu];
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	int levels = 0, leaves = 0;
+
+	/*
+	 * If Dcache is not set, we assume the cache structures
+	 * are not properly initialized.
+	 */
+	if (c->dcache.size)
+		levels += 1;
+	else
+		return -ENOENT;
+
+
+	leaves += (c->icache.size) ? 2 : 1;
+
+	if (c->scache.size) {
+		levels++;
+		leaves++;
+	}
+
+	if (c->tcache.size) {
+		levels++;
+		leaves++;
+	}
+
+	this_cpu_ci->num_levels = levels;
+	this_cpu_ci->num_leaves = leaves;
+	return 0;
+}
+
+int populate_cache_leaves(unsigned int cpu)
+{
+	struct cpuinfo_sw64 *c = &cpu_data[cpu];
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	struct cpu_topology *topo = &cpu_topology[cpu];
+
+	if (c->icache.size) {
+		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+		populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA, cpu);
+		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+		populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST, cpu);
+
+	} else {
+		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+		populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED, cpu);
+	}
+
+	if (c->scache.size) {
+		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+		populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED, cpu);
+	}
+
+	if (c->tcache.size) {
+		cpumask_copy(&this_leaf->shared_cpu_map, cpu_online_mask);
+		populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED, topo->package_id);
+	}
+
+	this_cpu_ci->cpu_map_populated = true;
+
+	return 0;
+}
diff --git a/arch/sw_64/kernel/core.c b/arch/sw_64/kernel/core.c
new file mode 100644
index 000000000000..4a35c1dc1e19
--- /dev/null
+++ b/arch/sw_64/kernel/core.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/memblock.h>
+#include <linux/pfn.h>
+#include <linux/export.h>
+#include <asm/core.h>
+#include <asm/tlbflush.h>
+#include <asm/smp.h>
+#include <asm/compiler.h>
+#include <asm/mmu_context.h>
+#include <asm/bitops.h>
+#include <asm/sw64_init.h>
+#include <asm/hw_init.h>
+#ifdef CONFIG_NUMA
+#include <asm/memory.h>
+#endif
+#include "pci_impl.h"
+
+#ifdef CONFIG_NUMA
+#ifdef CONFIG_DISCONTIGMEM
+int pa_to_nid(unsigned long pa)
+{
+	int i = 0;
+	phys_addr_t pfn_base, pfn_size, pfn;
+
+	pfn = pa >> PAGE_SHIFT;
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (!NODE_DATA(i))
+			continue;
+
+		pfn_base = NODE_DATA(i)->node_start_pfn;
+		pfn_size = NODE_DATA(i)->node_spanned_pages;
+
+		if (pfn >= pfn_base && pfn < pfn_base + pfn_size)
+			return i;
+	}
+
+	pr_err("%s: pa %#lx does not belong to any node, return node 0\n", __func__, pa);
+	return 0;
+}
+EXPORT_SYMBOL(pa_to_nid);
+#endif /* CONFIG_DISCONTIGMEM */
+
+#ifndef CONFIG_USE_PERCPU_NUMA_NODE_ID
+extern int cpu_to_node_map[NR_CPUS];
+int cpuid_to_nid(int cpuid)
+{
+	return cpu_to_node_map[cpuid];
+}
+EXPORT_SYMBOL(cpuid_to_nid);
+#endif /* CONFIG_USE_PERCPU_NUMA_NODE_ID */
+#else /* !CONFIG_NUMA */
+#ifdef CONFIG_DISCONTIGMEM
+int pa_to_nid(unsigned long pa)
+{
+	return 0;
+}
+EXPORT_SYMBOL(pa_to_nid);
+#endif /* CONFIG_DISCONTIGMEM */
+
+#ifndef CONFIG_USE_PERCPU_NUMA_NODE_ID
+int cpuid_to_nid(int cpuid)
+{
+	return 0;
+}
+EXPORT_SYMBOL(cpuid_to_nid);
+#endif /* CONFIG_USE_PERCPU_NUMA_NODE_ID */
+#endif /* CONFIG_NUMA */
diff --git a/arch/sw_64/kernel/crash_dump.c b/arch/sw_64/kernel/crash_dump.c
new file mode 100644
index 000000000000..f3836afe3e25
--- /dev/null
+++ b/arch/sw_64/kernel/crash_dump.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sw_64/kernel/crash_dump.c
+ *
+ * Copyright (C) 2019 JN
+ * Author: He Sheng
+ *
+ * This code is taken from arch/x86/kernel/crash_dump_64.c
+ *   Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *   Copyright (C) IBM Corporation, 2004. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/crash_dump.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is int he user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
+			iounmap(vaddr);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(buf, vaddr + offset, csize);
+	}
+
+	iounmap(vaddr);
+	return csize;
+}
diff --git a/arch/sw_64/kernel/dma_swiotlb.c b/arch/sw_64/kernel/dma_swiotlb.c
new file mode 100644
index 000000000000..a665e022fad8
--- /dev/null
+++ b/arch/sw_64/kernel/dma_swiotlb.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/sw_64/kernel/pci_iommu.c
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/export.h>
+#include <linux/scatterlist.h>
+#include <linux/log2.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu-helper.h>
+#include <linux/slab.h>
+
+#include <linux/swiotlb.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <asm/dma.h>
+
+#include <asm/io.h>
+
+const struct dma_map_ops *dma_ops = NULL;
+EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sw_64/kernel/dup_print.c b/arch/sw_64/kernel/dup_print.c
new file mode 100644
index 000000000000..ac0a95d4d30b
--- /dev/null
+++ b/arch/sw_64/kernel/dup_print.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+#ifdef CONFIG_SW64_RRK
+
+#define KERNEL_PRINTK_BUFF_BASE (0x700000UL + __START_KERNEL_map)
+
+static DEFINE_SPINLOCK(printk_lock);
+
+unsigned long sw64_printk_offset;
+#define PRINTK_SIZE	0x100000UL
+
+/*
+ * For output the kernel message on the console
+ * with full-system emulator.
+ */
+#define QEMU_PRINTF_BUFF_BASE	(0x805000040000ULL | PAGE_OFFSET)
+
+int sw64_printk(const char *fmt, va_list args)
+{
+	char *sw64_printk_buf;
+	int printed_len = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&printk_lock, flags);
+
+	sw64_printk_buf = (char *)(KERNEL_PRINTK_BUFF_BASE  + sw64_printk_offset);
+
+	if (sw64_printk_offset >= (PRINTK_SIZE-1024)) {	//printk wrapped
+		sw64_printk_offset = 0;
+		sw64_printk_buf = (char *)(KERNEL_PRINTK_BUFF_BASE  + sw64_printk_offset);
+		memset(sw64_printk_buf, 0, PRINTK_SIZE);
+		printed_len += vscnprintf(sw64_printk_buf, 1024, fmt, args);
+	} else {
+		printed_len += vscnprintf(sw64_printk_buf, 1024, fmt, args);
+		if (is_guest_or_emul()) {
+			unsigned long write_addr = QEMU_PRINTF_BUFF_BASE;
+			*(unsigned long *)write_addr = (unsigned long)((((unsigned long)sw64_printk_buf) & 0xffffffffUL)
+					| ((unsigned long)printed_len << 32));
+		}
+	}
+	sw64_printk_offset += printed_len;
+	spin_unlock_irqrestore(&printk_lock, flags);
+	return printed_len;
+}
+#endif
+
+#ifdef CONFIG_SW64_RRU
+static DEFINE_SPINLOCK(printf_lock);
+#define USER_PRINT_BUFF_BASE            (0x600000UL + __START_KERNEL_map)
+#define USER_PRINT_BUFF_LEN             0x100000UL
+#define USER_MESSAGE_MAX_LEN		0x100000UL
+unsigned long sw64_printf_offset;
+int sw64_user_printf(const char __user *buf, int len)
+{
+	static char *user_printf_buf;
+	unsigned long flags;
+
+	if (current->pid <= 0)
+		return 0;
+
+	/*
+	 * do not write large (fake) message which may not be from
+	 * STDOUT/STDERR any more as file descriptor could be duplicated
+	 * in a pipe.
+	 */
+	if (len > USER_MESSAGE_MAX_LEN)
+		return 0;
+
+	spin_lock_irqsave(&printf_lock, flags);
+	user_printf_buf = (char *)(USER_PRINT_BUFF_BASE + sw64_printf_offset);
+
+	if (sw64_printf_offset == 0)
+		memset(user_printf_buf, 0, USER_PRINT_BUFF_LEN);
+
+	if ((sw64_printf_offset + len) > USER_PRINT_BUFF_LEN) {
+		sw64_printf_offset = 0;
+		user_printf_buf = (char *)(USER_PRINT_BUFF_BASE + sw64_printf_offset);
+		memset(user_printf_buf, 0, USER_PRINT_BUFF_LEN);
+	}
+	copy_from_user(user_printf_buf, buf, len);
+	sw64_printf_offset += len;
+	spin_unlock_irqrestore(&printf_lock, flags);
+	return 0;
+}
+#endif
diff --git a/arch/sw_64/kernel/early_init.c b/arch/sw_64/kernel/early_init.c
new file mode 100644
index 000000000000..392627bef8bb
--- /dev/null
+++ b/arch/sw_64/kernel/early_init.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/start_kernel.h>
+
+#include <asm/sw64_init.h>
+
+void sw64_init_noop(void) { }
+struct sw64_platform_ops *sw64_platform;
+EXPORT_SYMBOL(sw64_platform);
+struct sw64_chip_ops *sw64_chip;
+struct sw64_chip_init_ops *sw64_chip_init;
+
+static void __init sw64_setup_platform_ops(void)
+{
+	/*
+	 * FIXME: set platform operation depending on CONFIG now.
+	 * SMBIOS will help use to determin actual board.
+	 */
+#ifdef CONFIG_PLATFORM_XUELANG
+	sw64_platform = &xuelang_ops;
+#endif
+}
+
+
+asmlinkage __visible void __init sw64_start_kernel(void)
+{
+	sw64_setup_chip_ops();
+	sw64_setup_platform_ops();
+	sw64_platform->ops_fixup();
+	start_kernel();
+}
diff --git a/arch/sw_64/kernel/early_printk.c b/arch/sw_64/kernel/early_printk.c
new file mode 100644
index 000000000000..f4d5f2d5c876
--- /dev/null
+++ b/arch/sw_64/kernel/early_printk.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <asm/io.h>
+
+static unsigned long early_serial_base;  /* ttyS0 */
+
+#define XMTRDY          0x20
+
+#define DLAB            0x80
+
+#define TXR             0       /*  Transmit register (WRITE) */
+#define RXR             0       /*  Receive register  (READ)  */
+#define IER             1       /*  Interrupt Enable          */
+#define IIR             2       /*  Interrupt ID              */
+#define FCR             2       /*  FIFO control              */
+#define LCR             3       /*  Line control              */
+#define MCR             4       /*  Modem control             */
+#define LSR             5       /*  Line Status               */
+#define MSR             6       /*  Modem Status              */
+#define DLL             0       /*  Divisor Latch Low         */
+#define DLH             1       /*  Divisor latch High        */
+
+static void mem32_serial_out(unsigned long addr, int offset, int value)
+{
+	void __iomem *vaddr = (void __iomem *)addr;
+
+	offset = offset << 9;
+
+	writel(value, vaddr + offset);
+}
+
+static unsigned int mem32_serial_in(unsigned long addr, int offset)
+{
+	void __iomem *vaddr = (void __iomem *)addr;
+
+	offset = offset << 9;
+
+	return readl(vaddr + offset);
+}
+
+static unsigned int (*serial_in)(unsigned long addr, int offset) = mem32_serial_in;
+static void (*serial_out)(unsigned long addr, int offset, int value) = mem32_serial_out;
+
+static int early_serial_putc(unsigned char ch)
+{
+	unsigned int timeout = 0xffff;
+
+	while ((serial_in(early_serial_base, LSR) & XMTRDY) == 0 && --timeout)
+		cpu_relax();
+	serial_out(early_serial_base, TXR, ch);
+
+	return timeout ? 0 : -1;
+}
+
+static void early_serial_write(struct console *con, const char *s, unsigned int n)
+{
+	while (*s && n-- > 0) {
+		if (*s == '\n')
+			early_serial_putc('\r');
+		early_serial_putc(*s);
+		s++;
+	}
+}
+
+static unsigned int uart_get_refclk(void)
+{
+	return 24000000UL;
+}
+
+static unsigned int uart_calculate_baudrate_divisor(unsigned long baudrate)
+{
+	unsigned int refclk = uart_get_refclk();
+
+	return (1 + (2 * refclk) / (baudrate * 16)) / 2;
+}
+
+static __init void early_serial_hw_init(unsigned long baud)
+{
+	unsigned char c;
+	unsigned long divisor = uart_calculate_baudrate_divisor(baud);
+
+	serial_out(early_serial_base, LCR, 0x3);        /* 8n1 */
+	serial_out(early_serial_base, IER, 0);  /* no interrupt */
+	serial_out(early_serial_base, FCR, 0);  /* no fifo */
+	serial_out(early_serial_base, MCR, 0x3);        /* DTR + RTS */
+
+	c = serial_in(early_serial_base, LCR);
+	serial_out(early_serial_base, LCR, c | DLAB);
+	serial_out(early_serial_base, DLL, divisor & 0xff);
+	serial_out(early_serial_base, DLH, (divisor >> 8) & 0xff);
+	serial_out(early_serial_base, LCR, c & ~DLAB);
+}
+
+#define DEFAULT_BAUD 115200
+
+static __init void early_serial_init(char *s)
+{
+	unsigned long baud = DEFAULT_BAUD;
+	char *e;
+
+	if (*s == ',')
+		++s;
+
+	if (*s) {
+		unsigned int port;
+		static const long bases[] __initconst = { 0xfff0803300000000ULL,
+			0xfff0903300000000ULL };
+
+		if (!strncmp(s, "ttyS", 4))
+			s += 4;
+		port = simple_strtoul(s, &e, 10);
+		if (port > 1 || s == e)
+			port = 0;
+		early_serial_base = bases[port];
+		s += strcspn(s, ",");
+		if (*s == ',')
+			s++;
+	}
+
+	if (*s) {
+		baud = simple_strtoull(s, &e, 0);
+
+		if (baud == 0 || s == e)
+			baud = DEFAULT_BAUD;
+	}
+
+	/* These will always be IO based ports */
+	serial_in = mem32_serial_in;
+	serial_out = mem32_serial_out;
+
+	/* Set up the HW */
+	early_serial_hw_init(baud);
+}
+
+static struct console early_serial_console = {
+	.name =         "early",
+	.write =        early_serial_write,
+	.flags =        CON_PRINTBUFFER,
+	.index =        -1,
+};
+
+static void early_console_register(struct console *con, int keep_early)
+{
+	if (con->index != -1) {
+		pr_crit("ERROR: earlyprintk= %s already used\n",
+				con->name);
+		return;
+	}
+	early_console = con;
+
+	if (keep_early)
+		early_console->flags &= ~CON_BOOT;
+	else
+		early_console->flags |= CON_BOOT;
+
+	register_console(early_console);
+}
+
+static int __init setup_early_printk(char *buf)
+{
+	int keep;
+
+	if (!buf)
+		return 0;
+
+	if (early_console)
+		return 0;
+
+	keep = (strstr(buf, "keep") != NULL);
+
+	if (!strncmp(buf, "serial", 6)) {
+		buf += 6;
+		early_serial_init(buf);
+		early_console_register(&early_serial_console, keep);
+		if (!strncmp(buf, ",ttyS", 5))
+			buf += 5;
+	}
+
+	return 0;
+}
+
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/sw_64/kernel/entry-ftrace.S b/arch/sw_64/kernel/entry-ftrace.S
new file mode 100644
index 000000000000..3f88a9fe2e3e
--- /dev/null
+++ b/arch/sw_64/kernel/entry-ftrace.S
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw_64/kernel/entry-ftrace.S
+ *
+ * Author: linyue
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+
+	.text
+	.set noat
+	.align 4
+
+#define FTRACE_SP_OFF	0x50
+	.macro mcount_enter
+	subl	$sp, FTRACE_SP_OFF, $sp
+	stl	$16, 0($sp)
+	stl	$17, 0x8($sp)
+	stl	$18, 0x10($sp)
+	stl	$26, 0x18($sp)
+	stl	$27, 0x20($sp)
+	stl	$28, 0x28($sp)
+	stl	$29, 0x30($sp)
+	stl	$19, 0x38($sp)
+	stl	$20, 0x40($sp)
+	stl	$21, 0x48($sp)
+	.endm
+
+	.macro mcount_end
+	ldl	$16, 0($sp)
+	ldl	$17, 0x8($sp)
+	ldl	$18, 0x10($sp)
+	ldl	$26, 0x18($sp)
+	ldl	$27, 0x20($sp)
+	ldl	$28, 0x28($sp)
+	ldl	$29, 0x30($sp)
+	ldl	$19, 0x38($sp)
+	ldl	$20, 0x40($sp)
+	ldl	$21, 0x48($sp)
+	addl	$sp, FTRACE_SP_OFF, $sp
+	.endm
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	.global _mcount
+	.ent _mcount
+_mcount:
+	ret	$31, ($28), 1
+	.end _mcount
+
+
+	.global ftrace_caller
+	.ent ftrace_caller
+ftrace_caller:
+	mcount_enter
+
+	br	$27, 2f
+2:	ldgp	$29, 0($27)
+
+	bis	$28, $31, $16
+	subl	$16, 8,	$16
+	bis	$26, $31, $17
+
+	ldi	$4, current_tracer
+	ldl	$27, 0($4)
+
+	.global ftrace_call
+ftrace_call:					/* tracer(pc, lr); call 26, 27 , 1 */
+	nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldi	 $27, prepare_ftrace_return	/* prepare_ftrace_return(&lr, pc, fp) */
+	.global ftrace_graph_call
+ftrace_graph_call:				/* ftrace_graph_caller(); */
+	nop					/* If enabled, this will be replaced */
+						/* "br ftrace_graph_caller" */
+#endif
+	mcount_end
+	ret $31, ($28), 1
+	.end ftrace_caller
+#else /* !CONFIG_DYNAMIC_FTRACE */
+
+	.global _mcount
+	.ent _mcount
+_mcount:
+	mcount_enter
+
+	br	$27, 1f
+1:	ldgp	$29, 0($27)
+
+	ldi	$4, ftrace_trace_function
+	ldl	$27, 0($4)
+	ldi	$5, ftrace_stub
+	cmpeq	$4, $5, $6
+	bne	$6, skip_ftrace
+
+	bis	$28, $31, $16
+	subl	$16, 8,	$16
+	bis	$26, $31, $17
+	call	$26, ($27), 1
+
+skip_ftrace:
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldi	$4, ftrace_graph_return
+	ldl	$4, 0($4)
+	ldi	$5, ftrace_stub
+	cmpeq	$4, $5, $6
+	beq	$6, ftrace_graph_caller
+
+
+	ldi	$4, ftrace_graph_entry
+	ldl	$4, 0($4)
+	ldi	$5, ftrace_graph_entry_stub
+	cmpeq	$4, $5, $6
+	beq	$6, ftrace_graph_caller
+#endif
+	mcount_end
+	ret	$31, ($28), 1
+	.end _mcount
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+	.global ftrace_stub
+	.ent ftrace_stub
+ftrace_stub:
+	ret	$31, ($26), 1
+	.end ftrace_stub
+
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.macro RESTORE_GRAPH_ARGS
+	ldl	$26, 0x18($sp)
+	ldl	$28, 0x28($sp)
+	.endm
+
+	/* save return value regs*/
+	.macro save_return_regs
+	subl	$sp, 0x8, $sp
+	stl	$0, 0x0($sp)
+	.endm
+
+	/* restore return value regs*/
+	.macro restore_return_regs
+	ldl	$0, 0x0($sp)
+	addl	$sp, 0x8, $sp
+	.endm
+
+
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from _mcount() or ftrace_caller() when function_graph tracer is
+ * selected.
+ * This function w/ prepare_ftrace_return() fakes link register's value on
+ * the call stack in order to intercept instrumented function's return path
+ * and run return_to_handler() later on its exit.
+ */
+	.global ftrace_graph_caller
+	.ent ftrace_graph_caller
+ftrace_graph_caller:
+	memb			/* need memb, otherwise it'll  go wrong */
+	RESTORE_GRAPH_ARGS
+	addl	$sp, 0x18, $16
+	bis	$28, $31, $17
+	subl	$17, 8,	$17
+	bis	$15, $31, $18	/* parent's fp */
+
+	call	$26, ($27)	/* prepare_ftrace_return() */
+
+	mcount_end
+	ret	$31, ($28), 1
+	.end ftrace_graph_caller
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ */
+ENTRY(return_to_handler)
+	save_return_regs
+	br	$27, 3f
+3:	ldgp	$29, 0($27)
+	ldi	$27, ftrace_return_to_handler
+	call	$26, ($27)
+	bis	$0, $31, $26
+	restore_return_regs
+	ret	$31, ($26), 1
+END(return_to_handler)
+
+#endif
diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S
new file mode 100644
index 000000000000..753eb31a76c6
--- /dev/null
+++ b/arch/sw_64/kernel/entry.S
@@ -0,0 +1,706 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel entry-points.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/hmcall.h>
+#include <asm/errno.h>
+#include <asm/unistd.h>
+
+	.text
+	.set noat
+/*
+ * This defines the normal kernel pt-regs layout.
+ *
+ * regs 9-15 preserved by C code
+ * regs 16-18 saved by HMcode
+ * regs 29-30 saved and set up by HMcode
+ * JRP - Save regs 16-18 in a special area of the stack, so that
+ * the hmcode-provided values are available to the signal handler.
+ */
+
+#define SAVE_ALL				\
+	subl	$sp, PT_REGS_PS, $sp;		\
+	stl	$0, PT_REGS_R0($sp);		\
+	stl	$1, PT_REGS_R1($sp);		\
+	stl	$2, PT_REGS_R2($sp);		\
+	stl	$3, PT_REGS_R3($sp);		\
+	stl	$4, PT_REGS_R4($sp);		\
+	stl	$28, PT_REGS_R28($sp);		\
+	stl	$5, PT_REGS_R5($sp);		\
+	stl	$6, PT_REGS_R6($sp);		\
+	stl	$7, PT_REGS_R7($sp);		\
+	stl	$8, PT_REGS_R8($sp);		\
+	stl	$19, PT_REGS_R19($sp);		\
+	stl	$20, PT_REGS_R20($sp);		\
+	stl	$21, PT_REGS_R21($sp);		\
+	stl	$22, PT_REGS_R22($sp);		\
+	stl	$23, PT_REGS_R23($sp);		\
+	stl	$24, PT_REGS_R24($sp);		\
+	stl	$25, PT_REGS_R25($sp);		\
+	stl	$26, PT_REGS_R26($sp);		\
+	stl	$27, PT_REGS_R27($sp);		\
+	stl	$16, PT_REGS_TRAP_A0($sp);	\
+	stl	$17, PT_REGS_TRAP_A1($sp);	\
+	stl	$18, PT_REGS_TRAP_A2($sp)
+
+#define RESTORE_ALL				\
+	ldl	$0, PT_REGS_R0($sp);		\
+	ldl	$1, PT_REGS_R1($sp);		\
+	ldl	$2, PT_REGS_R2($sp);		\
+	ldl	$3, PT_REGS_R3($sp);		\
+	ldl	$4, PT_REGS_R4($sp);		\
+	ldl	$5, PT_REGS_R5($sp);		\
+	ldl	$6, PT_REGS_R6($sp);		\
+	ldl	$7, PT_REGS_R7($sp);		\
+	ldl	$8, PT_REGS_R8($sp);		\
+	ldl	$19, PT_REGS_R19($sp);		\
+	ldl	$20, PT_REGS_R20($sp);		\
+	ldl	$21, PT_REGS_R21($sp);		\
+	ldl	$22, PT_REGS_R22($sp);		\
+	ldl	$23, PT_REGS_R23($sp);		\
+	ldl	$24, PT_REGS_R24($sp);		\
+	ldl	$25, PT_REGS_R25($sp);		\
+	ldl	$26, PT_REGS_R26($sp);		\
+	ldl	$27, PT_REGS_R27($sp);		\
+	ldl	$28, PT_REGS_R28($sp);		\
+	addl	$sp, PT_REGS_PS, $sp
+
+/*
+ * Non-syscall kernel entry points.
+ */
+
+	.align 4
+	.globl entInt
+	.ent entInt
+entInt:
+	SAVE_ALL
+	ldi	$8, 0x3fff
+	ldi	$26, ret_from_sys_call
+	bic	$sp, $8, $8
+	mov	$sp, $19
+	call	$31, do_entInt
+	.end entInt
+
+	.align 4
+	.globl entArith
+	.ent entArith
+entArith:
+	SAVE_ALL
+	ldi	$8, 0x3fff
+	ldi	$26, ret_from_sys_call
+	bic	$sp, $8, $8
+	mov	$sp, $18
+	call	$31, do_entArith
+	.end entArith
+
+	.align 4
+	.globl entMM
+	.ent entMM
+entMM:
+	SAVE_ALL
+/* save $9 - $15 so the inline exception code can manipulate them.  */
+	subl	$sp, SWITCH_STACK_RA, $sp
+	stl	$9, SWITCH_STACK_R9($sp)
+	stl	$10, SWITCH_STACK_R10($sp)
+	stl	$11, SWITCH_STACK_R11($sp)
+	stl	$12, SWITCH_STACK_R12($sp)
+	stl	$13, SWITCH_STACK_R13($sp)
+	stl	$14, SWITCH_STACK_R14($sp)
+	stl	$15, SWITCH_STACK_R15($sp)
+	addl	$sp, SWITCH_STACK_RA, $19
+/* handle the fault */
+	ldi	$8, 0x3fff
+	bic	$sp, $8, $8
+	call	$26, do_page_fault
+/* reload the registers after the exception code played.  */
+	ldl	$9, SWITCH_STACK_R9($sp)
+	ldl	$10, SWITCH_STACK_R10($sp)
+	ldl	$11, SWITCH_STACK_R11($sp)
+	ldl	$12, SWITCH_STACK_R12($sp)
+	ldl	$13, SWITCH_STACK_R13($sp)
+	ldl	$14, SWITCH_STACK_R14($sp)
+	ldl	$15, SWITCH_STACK_R15($sp)
+	addl	$sp, SWITCH_STACK_RA, $sp
+/* finish up the syscall as normal.  */
+	br	ret_from_sys_call
+	.end entMM
+
+	.align 4
+	.globl entIF
+	.ent entIF
+entIF:
+	SAVE_ALL
+	ldi	$8, 0x3fff
+	ldi	$26, ret_from_sys_call
+	bic	$sp, $8, $8
+	mov	$sp, $17
+	call	$31, do_entIF
+	.end entIF
+
+	.align 4
+	.globl entUna
+	.ent entUna
+entUna:
+	ldi	$sp, -ALLREGS_PS($sp)
+	stl	$0, ALLREGS_R0($sp)
+	ldl	$0, ALLREGS_PS($sp)	/* get PS */
+	stl	$1, ALLREGS_R1($sp)
+	stl	$2, ALLREGS_R2($sp)
+	stl	$3, ALLREGS_R3($sp)
+	and	$0, 8, $0	/* user mode? */
+	stl	$4, ALLREGS_R4($sp)
+	bne	$0, entUnaUser	/* yup -> do user-level unaligned fault */
+	stl	$5, ALLREGS_R5($sp)
+	stl	$6, ALLREGS_R6($sp)
+	stl	$7, ALLREGS_R7($sp)
+	stl	$8, ALLREGS_R8($sp)
+	stl	$9, ALLREGS_R9($sp)
+	stl	$10, ALLREGS_R10($sp)
+	stl	$11, ALLREGS_R11($sp)
+	stl	$12, ALLREGS_R12($sp)
+	stl	$13, ALLREGS_R13($sp)
+	stl	$14, ALLREGS_R14($sp)
+	stl	$15, ALLREGS_R15($sp)
+	/* 16-18 HMCODE-saved */
+	stl	$19, ALLREGS_R19($sp)
+	stl	$20, ALLREGS_R20($sp)
+	stl	$21, ALLREGS_R21($sp)
+	stl	$22, ALLREGS_R22($sp)
+	stl	$23, ALLREGS_R23($sp)
+	stl	$24, ALLREGS_R24($sp)
+	stl	$25, ALLREGS_R25($sp)
+	stl	$26, ALLREGS_R26($sp)
+	stl	$27, ALLREGS_R27($sp)
+	stl	$28, ALLREGS_R28($sp)
+	mov	$sp, $19
+	stl	$gp, ALLREGS_R29($sp)
+	ldi	$8, 0x3fff
+	stl	$31, ALLREGS_R31($sp)
+	bic	$sp, $8, $8
+	call	$26, do_entUna
+	ldl	$0, ALLREGS_R0($sp)
+	ldl	$1, ALLREGS_R1($sp)
+	ldl	$2, ALLREGS_R2($sp)
+	ldl	$3, ALLREGS_R3($sp)
+	ldl	$4, ALLREGS_R4($sp)
+	ldl	$5, ALLREGS_R5($sp)
+	ldl	$6, ALLREGS_R6($sp)
+	ldl	$7, ALLREGS_R7($sp)
+	ldl	$8, ALLREGS_R8($sp)
+	ldl	$9, ALLREGS_R9($sp)
+	ldl	$10, ALLREGS_R10($sp)
+	ldl	$11, ALLREGS_R11($sp)
+	ldl	$12, ALLREGS_R12($sp)
+	ldl	$13, ALLREGS_R13($sp)
+	ldl	$14, ALLREGS_R14($sp)
+	ldl	$15, ALLREGS_R15($sp)
+	/* 16-18 HMCODE-saved */
+	ldl	$19, ALLREGS_R19($sp)
+	ldl	$20, ALLREGS_R20($sp)
+	ldl	$21, ALLREGS_R21($sp)
+	ldl	$22, ALLREGS_R22($sp)
+	ldl	$23, ALLREGS_R23($sp)
+	ldl	$24, ALLREGS_R24($sp)
+	ldl	$25, ALLREGS_R25($sp)
+	ldl	$26, ALLREGS_R26($sp)
+	ldl	$27, ALLREGS_R27($sp)
+	ldl	$28, ALLREGS_R28($sp)
+	ldl	$gp, ALLREGS_R29($sp)
+	ldi	$sp, ALLREGS_PS($sp)
+	sys_call HMC_rti
+	.end entUna
+
+	.align 4
+	.ent entUnaUser
+entUnaUser:
+	ldl	$0, ALLREGS_R0($sp)	/* restore original $0 */
+	ldi	$sp, ALLREGS_PS($sp)	/* pop entUna's stack frame */
+	SAVE_ALL		/* setup normal kernel stack */
+	ldi	$sp, -SWITCH_STACK_RA($sp)
+	stl	$9, SWITCH_STACK_R9($sp)
+	stl	$10, SWITCH_STACK_R10($sp)
+	stl	$11, SWITCH_STACK_R11($sp)
+	stl	$12, SWITCH_STACK_R12($sp)
+	stl	$13, SWITCH_STACK_R13($sp)
+	stl	$14, SWITCH_STACK_R14($sp)
+	stl	$15, SWITCH_STACK_R15($sp)
+	ldi	$8, 0x3fff
+	addl	$sp, SWITCH_STACK_RA, $19
+	bic	$sp, $8, $8
+	call	$26, do_entUnaUser
+	ldl	$9, SWITCH_STACK_R9($sp)
+	ldl	$10, SWITCH_STACK_R10($sp)
+	ldl	$11, SWITCH_STACK_R11($sp)
+	ldl	$12, SWITCH_STACK_R12($sp)
+	ldl	$13, SWITCH_STACK_R13($sp)
+	ldl	$14, SWITCH_STACK_R14($sp)
+	ldl	$15, SWITCH_STACK_R15($sp)
+	ldi	$sp, SWITCH_STACK_RA($sp)
+	br	ret_from_sys_call
+	.end entUnaUser
+
+
+/*
+ * The system call entry point is special.  Most importantly, it looks
+ * like a function call to userspace as far as clobbered registers.  We
+ * do preserve the argument registers (for syscall restarts) and $26
+ * (for leaf syscall functions).
+ *
+ * So much for theory.  We don't take advantage of this yet.
+ *
+ * Note that a0-a2 are not saved by HMcode as with the other entry points.
+ */
+
+	.align 4
+	.globl entSys
+	.globl ret_from_sys_call
+	.ent entSys
+entSys:
+
+	SAVE_ALL
+	ldi	$8, 0x3fff
+	bic	$sp, $8, $8
+	ldi	$4, NR_SYSCALLS($31)
+	stl	$16, PT_REGS_R16($sp)
+	ldi	$5, sys_call_table
+	ldi	$27, sys_ni_syscall
+	cmpult	$0, $4, $4
+	ldw	$3, TI_FLAGS($8)
+	stl	$17, PT_REGS_R17($sp)
+	s8addl	$0, $5, $5
+	stl	$18, PT_REGS_R18($sp)
+	ldi	$6, _TIF_SYSCALL_WORK
+	and	$3, $6, $3
+	bne	$3, strace
+
+	beq	$4, 1f
+	ldl	$27, 0($5)
+1:	call	$26, ($27), ni_syscall
+	ldgp	$gp, 0($26)
+	blt	$0, $syscall_error	/* the call failed */
+	stl	$0, PT_REGS_R0($sp)
+	stl	$31, PT_REGS_R19($sp)	/* a3=0 => no error */
+
+	.align 4
+ret_from_sys_call:
+#ifdef CONFIG_SUBARCH_C3B
+	fillcs	0($sp)			/* prefetch */
+	fillcs	128($sp)		/* prefetch */
+#endif
+	selne	$26, 0, $18, $18		/* $18 = 0 => non-restartable */
+	ldl	$0, PT_REGS_PS($sp)
+	and	$0, 8, $0
+	beq	$0, ret_to_kernel
+ret_to_user:
+	/* Make sure need_resched and sigpending don't change between
+		sampling and the rti.  */
+	ldi	$16, 7
+	sys_call HMC_swpipl
+	ldw	$17, TI_FLAGS($8)
+	and	$17, _TIF_WORK_MASK, $2
+	bne	$2, work_pending
+restore_all:
+	RESTORE_ALL
+	sys_call HMC_rti
+
+ret_to_kernel:
+	ldi	$16, 7
+	sys_call HMC_swpipl
+	br restore_all
+
+
+	.align 3
+$syscall_error:
+	/*
+	 * Some system calls (e.g., ptrace) can return arbitrary
+	 * values which might normally be mistaken as error numbers.
+	 * Those functions must zero $0 (v0) directly in the stack
+	 * frame to indicate that a negative return value wasn't an
+	 * error number..
+	 */
+	ldl	$18, PT_REGS_R0($sp)	/* old syscall nr (zero if success) */
+	beq	$18, $ret_success
+
+	ldl	$19, PT_REGS_R19($sp)	/* .. and this a3 */
+	subl	$31, $0, $0	/* with error in v0 */
+	addl	$31, 1, $1	/* set a3 for errno return */
+	stl	$0, PT_REGS_R0($sp)
+	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
+	stl	$1, PT_REGS_R19($sp)	/* a3 for return */
+	br	ret_from_sys_call
+
+
+$ret_success:
+	stl	$0, PT_REGS_R0($sp)
+	stl	$31, PT_REGS_R19($sp)	/* a3=0 => no error */
+	br	ret_from_sys_call
+	.end entSys
+
+/*
+ * Do all cleanup when returning from all interrupts and system calls.
+ *
+ * Arguments:
+ *	$8: current.
+ *	$17: TI_FLAGS.
+ *	$18: The old syscall number, or zero if this is not a return
+ *		from a syscall that errored and is possibly restartable.
+ *	$19: The old a3 value
+ */
+
+	.align 4
+	.ent work_pending
+work_pending:
+	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_UPROBE, $2
+	bne	$2, $work_notifysig
+
+$work_resched:
+	/*
+	 * We can get here only if we returned from syscall without SIGPENDING
+	 * or got through work_notifysig already.  Either case means no syscall
+	 * restarts for us, so let $18 and $19 burn.
+	 */
+	call	$26, schedule
+	mov	0, $18
+	br	ret_to_user
+
+$work_notifysig:
+	mov	$sp, $16
+	bsr	$1, do_switch_stack
+	call	$26, do_work_pending
+	bsr	$1, undo_switch_stack
+	br	restore_all
+	.end work_pending
+
+
+
+/*
+ * PTRACE syscall handler
+ */
+
+	.align 4
+	.ent strace
+strace:
+	/* set up signal stack, call syscall_trace */
+	bsr	$1, do_switch_stack
+	mov	$0, $9
+	mov	$19, $10
+	call	$26, syscall_trace_enter
+	mov	$9, $18
+	mov	$10, $19
+	bsr	$1, undo_switch_stack
+
+	blt	$0, $syscall_trace_failed
+
+	/* get the system call number and the arguments back.. */
+	ldl	$16, PT_REGS_R16($sp)
+	ldl	$17, PT_REGS_R17($sp)
+	ldl	$18, PT_REGS_R18($sp)
+	ldl	$19, PT_REGS_R19($sp)
+	ldl	$20, PT_REGS_R20($sp)
+	ldl	$21, PT_REGS_R21($sp)
+
+	/* get the system call pointer.. */
+	ldi	$1, NR_SYSCALLS($31)
+	ldi	$2, sys_call_table
+	ldi	$27, ni_syscall
+
+	cmpult	$0, $1, $1
+	s8addl	$0, $2, $2
+	beq	$1, 1f
+	ldl	$27, 0($2)
+1:	call	$26, ($27), sys_gettimeofday
+ret_from_straced:
+	ldgp	$gp, 0($26)
+
+	/* check return.. */
+	blt	$0, $strace_error	/* the call failed */
+	stl	$31, PT_REGS_R19($sp)	/* a3=0 => no error */
+$strace_success:
+	stl	$0, PT_REGS_R0($sp)	/* save return value */
+
+	bsr	$1, do_switch_stack
+	call	$26, syscall_trace_leave
+	bsr	$1, undo_switch_stack
+	br	$31, ret_from_sys_call
+
+	.align 3
+$strace_error:
+	ldl	$18, PT_REGS_R0($sp)	/* old syscall nr (zero if success) */
+
+	beq	$18, $strace_success
+	ldl	$19, PT_REGS_R19($sp)	/* .. and this a3 */
+
+	subl	$31, $0, $0	/* with error in v0 */
+	addl	$31, 1, $1	/* set a3 for errno return */
+	stl	$0, PT_REGS_R0($sp)
+	stl	$1, PT_REGS_R19($sp)	/* a3 for return */
+
+	bsr	$1, do_switch_stack
+	mov	$18, $9		/* save old syscall number */
+	mov	$19, $10	/* save old a3 */
+	call	$26, syscall_trace_leave
+	mov	$9, $18
+	mov	$10, $19
+	bsr	$1, undo_switch_stack
+
+	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
+	br	ret_from_sys_call
+
+$syscall_trace_failed:
+	bsr	$1, do_switch_stack
+	mov	$18, $9
+	mov	$19, $10
+	call	$26, syscall_trace_leave
+	mov	$9, $18
+	mov	$10, $19
+	bsr	$1, undo_switch_stack
+	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
+	br	ret_from_sys_call
+	.end strace
+
+	.align 4
+	.ent do_switch_stack
+do_switch_stack:
+	ldi	$sp, -SWITCH_STACK_SIZE($sp)
+	flds	$f31, 0($sp) /* fillde hint */
+	stl	$9, SWITCH_STACK_R9($sp)
+	stl	$10, SWITCH_STACK_R10($sp)
+	stl	$11, SWITCH_STACK_R11($sp)
+	stl	$12, SWITCH_STACK_R12($sp)
+	stl	$13, SWITCH_STACK_R13($sp)
+	stl	$14, SWITCH_STACK_R14($sp)
+	stl	$15, SWITCH_STACK_R15($sp)
+	stl	$26, SWITCH_STACK_RA($sp)
+	// SIMD-FP
+	ldl	$9, TI_TASK($8)
+	ldi	$9, TASK_THREAD($9)
+	ldi	$10, THREAD_CTX_FP($9)
+	vstd	$f0, CTX_FP_F0($10)
+	vstd	$f1, CTX_FP_F1($10)
+	vstd	$f2, CTX_FP_F2($10)
+	vstd	$f3, CTX_FP_F3($10)
+	vstd	$f4, CTX_FP_F4($10)
+	vstd	$f5, CTX_FP_F5($10)
+	vstd	$f6, CTX_FP_F6($10)
+	vstd	$f7, CTX_FP_F7($10)
+	vstd	$f8, CTX_FP_F8($10)
+	vstd	$f9, CTX_FP_F9($10)
+	vstd	$f10, CTX_FP_F10($10)
+	vstd	$f11, CTX_FP_F11($10)
+	vstd	$f12, CTX_FP_F12($10)
+	vstd	$f13, CTX_FP_F13($10)
+	vstd	$f14, CTX_FP_F14($10)
+	vstd	$f15, CTX_FP_F15($10)
+	vstd	$f16, CTX_FP_F16($10)
+	vstd	$f17, CTX_FP_F17($10)
+	vstd	$f18, CTX_FP_F18($10)
+	vstd	$f19, CTX_FP_F19($10)
+	vstd	$f20, CTX_FP_F20($10)
+	vstd	$f21, CTX_FP_F21($10)
+	vstd	$f22, CTX_FP_F22($10)
+	vstd	$f23, CTX_FP_F23($10)
+	vstd	$f24, CTX_FP_F24($10)
+	vstd	$f25, CTX_FP_F25($10)
+	vstd	$f26, CTX_FP_F26($10)
+	vstd	$f27, CTX_FP_F27($10)
+	rfpcr	$f0
+	vstd	$f28, CTX_FP_F28($10)
+	vstd	$f29, CTX_FP_F29($10)
+	vstd	$f30, CTX_FP_F30($10)
+	fstd	$f0, THREAD_FPCR($9)
+	vldd	$f0, CTX_FP_F0($10)
+	ldl	$9, SWITCH_STACK_R9($sp)
+	ldl	$10, SWITCH_STACK_R10($sp)
+	ret	$31, ($1), 1
+	.end do_switch_stack
+
+	.align 4
+	.ent undo_switch_stack
+undo_switch_stack:
+#ifdef CONFIG_SUBARCH_C3B
+	fillcs	0($sp)		/* prefetch */
+#endif
+	ldl	$11, SWITCH_STACK_R11($sp)
+	ldl	$12, SWITCH_STACK_R12($sp)
+	ldl	$13, SWITCH_STACK_R13($sp)
+	ldl	$14, SWITCH_STACK_R14($sp)
+	ldl	$15, SWITCH_STACK_R15($sp)
+	ldl	$26, SWITCH_STACK_RA($sp)
+	// SIMD-FP
+	ldl	$9, TI_TASK($8)
+	ldi	$9, TASK_THREAD($9)
+	fldd	$f0, THREAD_FPCR($9)
+	wfpcr	$f0
+	fimovd	$f0, $10
+	and	$10, 0x3, $10
+	beq	$10, $setfpec_0
+	subl	$10, 0x1, $10
+	beq	$10, $setfpec_1
+	subl	$10, 0x1, $10
+	beq	$10, $setfpec_2
+	setfpec3
+	br	$setfpec_over
+$setfpec_0:
+	setfpec0
+	br	$setfpec_over
+$setfpec_1:
+	setfpec1
+	br	$setfpec_over
+$setfpec_2:
+	setfpec2
+$setfpec_over:
+	ldi	$10, THREAD_CTX_FP($9)
+	vldd	$f0, CTX_FP_F0($10)
+	vldd	$f1, CTX_FP_F1($10)
+	vldd	$f2, CTX_FP_F2($10)
+	vldd	$f3, CTX_FP_F3($10)
+	vldd	$f4, CTX_FP_F4($10)
+	vldd	$f5, CTX_FP_F5($10)
+	vldd	$f6, CTX_FP_F6($10)
+	vldd	$f7, CTX_FP_F7($10)
+	vldd	$f8, CTX_FP_F8($10)
+	vldd	$f9, CTX_FP_F9($10)
+	vldd	$f10, CTX_FP_F10($10)
+	vldd	$f11, CTX_FP_F11($10)
+	vldd	$f12, CTX_FP_F12($10)
+	vldd	$f13, CTX_FP_F13($10)
+	vldd	$f14, CTX_FP_F14($10)
+	vldd	$f15, CTX_FP_F15($10)
+	vldd	$f16, CTX_FP_F16($10)
+	vldd	$f17, CTX_FP_F17($10)
+	vldd	$f18, CTX_FP_F18($10)
+	vldd	$f19, CTX_FP_F19($10)
+	vldd	$f20, CTX_FP_F20($10)
+	vldd	$f21, CTX_FP_F21($10)
+	vldd	$f22, CTX_FP_F22($10)
+	vldd	$f23, CTX_FP_F23($10)
+	vldd	$f24, CTX_FP_F24($10)
+	vldd	$f25, CTX_FP_F25($10)
+	vldd	$f26, CTX_FP_F26($10)
+	vldd	$f27, CTX_FP_F27($10)
+	vldd	$f28, CTX_FP_F28($10)
+	vldd	$f29, CTX_FP_F29($10)
+	vldd	$f30, CTX_FP_F30($10)
+	ldl	$9, SWITCH_STACK_R9($sp)
+	ldl	$10, SWITCH_STACK_R10($sp)
+	ldi	$sp, SWITCH_STACK_SIZE($sp)
+	ret	$31, ($1), 1
+	.end undo_switch_stack
+
+/*
+ * The meat of the context switch code.
+ */
+
+	.align 4
+	.globl __switch_to
+	.ent __switch_to
+__switch_to:
+	.prologue 0
+	bsr	$1, do_switch_stack
+	sys_call HMC_swpctx
+	ldi	$8, 0x3fff
+	bic	$sp, $8, $8
+	bsr	$1, undo_switch_stack
+	mov	$17, $0
+	ret
+	.end __switch_to
+
+/*
+ * New processes begin life here.
+ */
+
+	.globl ret_from_fork
+	.align 4
+	.ent ret_from_fork
+ret_from_fork:
+	ldi	$26, ret_from_sys_call
+	mov	$17, $16
+	jmp	$31, schedule_tail
+	.end ret_from_fork
+
+/*
+ * ... and new kernel threads - here
+ */
+	.align 4
+	.globl ret_from_kernel_thread
+	.ent ret_from_kernel_thread
+ret_from_kernel_thread:
+	mov	$17, $16
+	call	$26, schedule_tail
+	mov	$9, $27
+	mov	$10, $16
+	call	$26, ($9)
+	mov	$31, $19		/* to disable syscall restarts */
+	br	$31, ret_to_user
+	.end ret_from_kernel_thread
+
+/*
+ * Special system calls.  Most of these are special in that they either
+ * have to play switch_stack games or in some way use the pt_regs struct.
+ */
+
+.macro	fork_like name
+	.align 4
+	.globl sw64_\name
+	.ent sw64_\name
+sw64_\name:
+	.prologue 0
+	bsr	$1, do_switch_stack
+	call	$26, sys_\name
+	ldl	$26, SWITCH_STACK_RA($sp)
+	ldi	$sp, SWITCH_STACK_SIZE($sp)
+	ret
+	.end	sw64_\name
+	.endm
+
+fork_like fork
+fork_like vfork
+fork_like clone
+
+	.align 4
+	.globl sys_sigreturn
+	.ent sys_sigreturn
+sys_sigreturn:
+	.prologue 0
+	ldi	$9, ret_from_straced
+	cmpult	$26, $9, $9
+	ldi	$sp, -SWITCH_STACK_SIZE($sp)
+	call	$26, do_sigreturn
+	bne	$9, 1f
+	call	$26, syscall_trace_leave
+1:	br	$1, undo_switch_stack
+	br	ret_from_sys_call
+	.end sys_sigreturn
+
+	.align 4
+	.globl sys_rt_sigreturn
+	.ent sys_rt_sigreturn
+sys_rt_sigreturn:
+	.prologue 0
+	ldi	$9, ret_from_straced
+	cmpult	$26, $9, $9
+	ldi	$sp, -SWITCH_STACK_SIZE($sp)
+	call	$26, do_rt_sigreturn
+	bne	$9, 1f
+	call	$26, syscall_trace_leave
+1:	br	$1, undo_switch_stack
+	br	ret_from_sys_call
+	.end sys_rt_sigreturn
+
+	.align 4
+	.globl ni_syscall
+	.ent ni_syscall
+ni_syscall:
+	.prologue 0
+	/* Special because it also implements overflow handling via
+	 * syscall number 0.  And if you recall, zero is a special
+	 * trigger for "not an error".  Store large non-zero there.
+	 */
+	ldi	$0, -ENOSYS
+	unop
+	stl	$0, PT_REGS_R0($sp)
+	ret
+	.end ni_syscall
diff --git a/arch/sw_64/kernel/ftrace.c b/arch/sw_64/kernel/ftrace.c
new file mode 100644
index 000000000000..413562b5d9be
--- /dev/null
+++ b/arch/sw_64/kernel/ftrace.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Based on arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2019 os kernel team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/swab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+unsigned long current_tracer = (unsigned long)ftrace_stub;
+
+/*
+ * Replace two instruction, which may be a branch or NOP.
+ */
+static int ftrace_modify_double_code(unsigned long pc, u64 new)
+{
+	if (sw64_insn_double_write((void *)pc, new))
+		return -EPERM;
+	return 0;
+}
+
+/*
+ * Replace a single instruction, which may be a branch or NOP.
+ */
+static int ftrace_modify_code(unsigned long pc, u32 new)
+{
+	if (sw64_insn_write((void *)pc, new))
+		return -EPERM;
+	return 0;
+}
+
+/*
+ * Replace tracer function in ftrace_caller()
+ */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long pc;
+	int ret;
+	u32 new;
+
+	current_tracer = (unsigned long)func;
+
+	pc = (unsigned long)&ftrace_call;
+
+	new = sw64_insn_call(R26, R27);
+	if (ftrace_modify_code(pc, new))
+		return ret;
+	return 0;
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	u32 new;
+	int ret;
+
+	/* ldl r28,(ftrace_addr_offset)(r8) */
+	new = (0x23U << 26) | (28U << 21) | (8U << 16) | offsetof(struct thread_info, dyn_ftrace_addr);
+	if (ftrace_modify_code(pc, new))
+		return ret;
+	pc = pc + 4;
+	new = sw64_insn_call(R28, R28);
+	if (ftrace_modify_code(pc, new))
+		return ret;
+	return 0;
+}
+
+/*
+ * Turn off the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	unsigned long insn;
+	int ret;
+
+	insn = sw64_insn_nop();
+	insn = (insn << 32) | insn;
+	ret = ftrace_modify_double_code(pc, insn);
+	return ret;
+
+}
+
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
+/*tracer_addr must be same with syscall_ftrace*/
+int __init ftrace_dyn_arch_init(void)
+{
+	init_thread_info.dyn_ftrace_addr = FTRACE_ADDR;
+	return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	unsigned long old;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	/*
+	 * Note:
+	 * No protection against faulting at *parent, which may be seen
+	 * on other archs. It's unlikely on AArch64.
+	 */
+	old = *parent;
+
+	if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+		*parent = return_hooker;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
+ * depending on @enable.
+ */
+static int ftrace_modify_graph_caller(bool enable)
+{
+	unsigned long pc = (unsigned long)&ftrace_graph_call;
+	u32 branch, nop;
+
+	branch = sw64_insn_br(R31, pc, (unsigned long)ftrace_graph_caller);
+	nop = sw64_insn_nop();
+
+	if (enable)
+		return ftrace_modify_code(pc, branch);
+	else
+		return ftrace_modify_code(pc, nop);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/sw_64/kernel/head.S b/arch/sw_64/kernel/head.S
new file mode 100644
index 000000000000..5fff0f33c9e2
--- /dev/null
+++ b/arch/sw_64/kernel/head.S
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * initial boot stuff.. At this point, the bootloader has already
+ * switched into HMcode, and loaded us at the correct address
+ * (START_ADDR).  So there isn't much left for us to do: just set up
+ * the kernel global pointer and jump to the kernel entry-point.
+ */
+
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/hmcall.h>
+#include <asm/setup.h>
+
+__HEAD
+	.globl _stext
+	.set noreorder
+	.globl __start
+	.ent __start
+_stext:
+__start:
+	.prologue 0
+	br	$27, 1f
+1:	ldgp	$29, 0($27)
+	/* We need to get current_task_info loaded up...  */
+	ldi	$8, init_thread_union
+	/* ... and find our stack ... */
+	ldi	$30, 0x4000 - PT_REGS_SIZE($8)
+	/* ... and then we can clear bss data.  */
+	ldi	$2, __bss_start
+	ldi	$3, __bss_stop
+	/* 8 bytes alignment */
+1:	and	$2, 0x7, $1	# align check
+	bne	$1, 3f
+2:	subl	$3, $2, $1	# align clear
+	ble	$1, 4f
+	subl	$1, 0x8, $1
+	ble	$1, 3f
+	stl	$31, 0($2)
+	addl	$2, 8, $2
+	br	$31, 2b
+3:	stb	$31, 0($2)	# non align clear
+	addl	$2, 1, $2
+	subl	$3, $2, $1
+	bgt	$1, 1b
+4:# finish clear
+#ifdef CONFIG_RELOCATABLE
+	ldi	$30, -8($30)
+	stl	$29, 0($30)
+	/* Copy kernel and apply the relocations */
+	call	$26, relocate_kernel
+	ldl	$29, 0($30)
+	addl	$29, $0, $29
+	/* Repoint the sp into the new kernel image */
+	ldi	$30, 0x4000 - PT_REGS_SIZE($8)
+#endif
+	/* ... and then we can start the kernel.  */
+	call	$26, sw64_start_kernel
+	sys_call HMC_halt
+	.end __start
+
+#ifdef CONFIG_SMP
+	.align 3
+	.globl __smp_callin
+	.ent __smp_callin
+	/* On entry here the PCB of the idle task for this processor
+	 * has been loaded.  We've arranged for the tilde_pcb[x] for
+	 * this process to contain the PCBB of the target idle task.
+	 */
+__smp_callin:
+	.prologue 1
+	br	$27, 2f		# we copy this from above "br $27 1f"
+2:	ldgp	$29, 0($27)	# First order of business, load the GP.
+
+	subl	$31, 2, $16
+	sys_call HMC_tbi
+
+	sys_call HMC_whami	# Get hard cid
+
+	sll	$0, 2, $0
+	ldi	$1, __rcid_to_cpu
+	addl	$1, $0, $1
+	ldw	$0, 0($1)	# Get logical cpu number
+
+	sll	$0, 3, $0
+	ldi	$1, tidle_pcb
+	addl	$1, $0, $1
+	ldl	$16, 0($1)	# Get PCBB of idle thread
+
+	sys_call HMC_swpctx
+	ldi	$8, 0x3fff	# Find "current".
+	bic	$30, $8, $8
+
+	call	$26, smp_callin
+	sys_call HMC_halt
+	.end __smp_callin
+#endif /* CONFIG_SMP */
+	#
+	# It is handy, on occasion, to make halt actually just loop.
+	# Putting it here means we dont have to recompile the whole
+	# kernel.
+	#
+
+	.align 3
+	.globl halt
+	.ent halt
+halt:
+	.prologue 0
+	sys_call HMC_halt
+	.end halt
diff --git a/arch/sw_64/kernel/hibernate.c b/arch/sw_64/kernel/hibernate.c
new file mode 100644
index 000000000000..33426e3ed305
--- /dev/null
+++ b/arch/sw_64/kernel/hibernate.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/suspend.h>
+#include <asm/hmcall.h>
+#include <asm/suspend.h>
+
+struct processor_state hibernate_state;
+/* Defined in hibernate_asm.S */
+extern int restore_image(void);
+
+void save_processor_state(void)
+{
+	struct vcpucb *vcb = &(hibernate_state.vcb);
+
+	vcb->ksp = rdksp();
+	vcb->usp = rdusp();
+	vcb->pcbb = rdpcbb();
+	vcb->ptbr = rdptbr();
+}
+
+void restore_processor_state(void)
+{
+	struct vcpucb *vcb = &(hibernate_state.vcb);
+
+	wrksp(vcb->ksp);
+	wrusp(vcb->usp);
+	wrpcbb(vcb->pcbb);
+	wrptbr(vcb->ptbr);
+	sflush();
+	tbia();
+	imb();
+}
+
+int swsusp_arch_resume(void)
+{
+	restore_image();
+	return 0;
+}
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+	unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end));
+
+	return	(pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+struct restore_data_record {
+	unsigned long magic;
+};
+
+#define RESTORE_MAGIC	0x0123456789ABCDEFUL
+
+/**
+ *	arch_hibernation_header_save - populate the architecture specific part
+ *		of a hibernation image header
+ *	@addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+	struct restore_data_record *rdr = addr;
+
+	if (max_size < sizeof(struct restore_data_record))
+		return -EOVERFLOW;
+	rdr->magic = RESTORE_MAGIC;
+	return 0;
+}
+
+/**
+ *	arch_hibernation_header_restore - read the architecture specific data
+ *		from the hibernation image header
+ *	@addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+	struct restore_data_record *rdr = addr;
+
+	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}
diff --git a/arch/sw_64/kernel/hibernate_asm.S b/arch/sw_64/kernel/hibernate_asm.S
new file mode 100644
index 000000000000..3acbcdbae0b3
--- /dev/null
+++ b/arch/sw_64/kernel/hibernate_asm.S
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/regdef.h>
+
+	.text
+	.set noat
+ENTRY(swsusp_arch_suspend)
+	ldi	$16, hibernate_state
+	ldi	$1, PSTATE_REGS($16)
+	stl	$9, CALLEE_R9($1)
+	stl	$10, CALLEE_R10($1)
+	stl	$11, CALLEE_R11($1)
+	stl	$12, CALLEE_R12($1)
+	stl	$13, CALLEE_R13($1)
+	stl	$14, CALLEE_R14($1)
+	stl	$15, CALLEE_R15($1)
+	stl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	ldi	$1, PSTATE_FPREGS($16)
+	vstd	$f2, CALLEE_F2($1)
+	vstd	$f3, CALLEE_F3($1)
+	vstd	$f4, CALLEE_F4($1)
+	vstd	$f5, CALLEE_F5($1)
+	vstd	$f6, CALLEE_F6($1)
+	vstd	$f7, CALLEE_F7($1)
+	vstd	$f8, CALLEE_F8($1)
+	vstd	$f9, CALLEE_F9($1)
+	rfpcr	$f0
+	fstd	$f0, PSTATE_FPCR($16)
+
+	ldi	$1, PSTATE_PCB($16)
+	stl	sp, PCB_KSP($1)
+	call	swsusp_save
+	ldi	$16, hibernate_state
+	ldi	$1, PSTATE_REGS($16)
+	ldl	$26, CALLEE_RA($1)
+
+	/* save current_thread_info()->pcbb */
+	ret
+END(swsusp_arch_suspend)
+
+ENTRY(restore_image)
+	/* prepare to copy image data to their original locations */
+	ldi	t0, restore_pblist
+	ldl	t0, 0(t0)
+$loop:
+	beq	t0, $done
+
+	/* get addresses from the pbe and copy the page */
+	ldl	t1, PBE_ADDR(t0)  /* source */
+	ldl	t2, PBE_ORIG_ADDR(t0) /* destination */
+	ldi	t3, PAGE_SIZE
+	addl	t1, t3, t3
+$cpyloop:
+	ldl	t8, 0(t1)
+	stl	t8, 0(t2)
+	addl	t1, 8, t1
+	addl	t2, 8, t2
+	cmpeq	t1, t3, t4
+	beq	t4, $cpyloop
+
+	/* progress to the next pbe */
+	ldl	t0, PBE_NEXT(t0)
+	bne	t0, $loop
+$done:
+
+	/* tell the hibernation core that we've just restored the memory */
+	ldi	$0, in_suspend
+	stl	$31, 0($0)
+
+	ldi	$16, hibernate_state
+	ldi	$1, PSTATE_REGS($16)
+
+	ldl	$9, CALLEE_R9($1)
+	ldl	$10, CALLEE_R10($1)
+	ldl	$11, CALLEE_R11($1)
+	ldl	$12, CALLEE_R12($1)
+	ldl	$13, CALLEE_R13($1)
+	ldl	$14, CALLEE_R14($1)
+	ldl	$15, CALLEE_R15($1)
+	ldl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	fldd	$f0, PSTATE_FPCR($16)
+	wfpcr	$f0
+	fimovd	$f0, $2
+	and	$2, 0x3, $2
+	beq	$2, $hibernate_setfpec_0
+	subl	$2, 0x1, $2
+	beq	$2, $hibernate_setfpec_1
+	subl	$2, 0x1, $2
+	beq	$2, $hibernate_setfpec_2
+	setfpec3
+	br	$hibernate_setfpec_over
+$hibernate_setfpec_0:
+	setfpec0
+	br	$hibernate_setfpec_over
+$hibernate_setfpec_1:
+	setfpec1
+	br	$hibernate_setfpec_over
+$hibernate_setfpec_2:
+	setfpec2
+$hibernate_setfpec_over:
+	ldi	$1, PSTATE_FPREGS($16)
+	vldd	$f2, CALLEE_F2($1)
+	vldd	$f3, CALLEE_F3($1)
+	vldd	$f4, CALLEE_F4($1)
+	vldd	$f5, CALLEE_F5($1)
+	vldd	$f6, CALLEE_F6($1)
+	vldd	$f7, CALLEE_F7($1)
+	vldd	$f8, CALLEE_F8($1)
+	vldd	$f9, CALLEE_F9($1)
+
+	ldi	$1, PSTATE_PCB($16)
+	ldl	sp, PCB_KSP($1)
+
+	ldi	$8, 0x3fff
+	bic	sp, $8, $8
+
+	ldi	$0, 0($31)
+
+	ret
+END(restore_image)
diff --git a/arch/sw_64/kernel/insn.c b/arch/sw_64/kernel/insn.c
new file mode 100644
index 000000000000..71d3832d1fe3
--- /dev/null
+++ b/arch/sw_64/kernel/insn.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, serveros, linyue
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+
+#include <asm/cacheflush.h>
+#include <asm/insn.h>
+
+
+//static DEFINE_RAW_SPINLOCK(patch_lock);
+
+int __kprobes sw64_insn_read(void *addr, u32 *insnp)
+{
+	int ret;
+	__le32 val;
+
+	ret = copy_from_kernel_nofault(&val, addr, SW64_INSN_SIZE);
+	if (!ret)
+		*insnp = le32_to_cpu(val);
+
+	return ret;
+}
+
+static int __kprobes __sw64_insn_write(void *addr, __le32 insn)
+{
+	void *waddr = addr;
+	int ret;
+
+	//raw_spin_lock_irqsave(&patch_lock, flags);
+
+	ret = copy_to_kernel_nofault(waddr, &insn, SW64_INSN_SIZE);
+
+	//raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
+static int __kprobes __sw64_insn_double_write(void *addr, __le64 insn)
+{
+	void *waddr = addr;
+	//unsigned long flags = 0;
+	int ret;
+
+	//raw_spin_lock_irqsave(&patch_lock, flags);
+
+	ret = copy_to_kernel_nofault(waddr, &insn, 2 * SW64_INSN_SIZE);
+
+	//raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
+int __kprobes sw64_insn_write(void *addr, u32 insn)
+{
+	u32 *tp = addr;
+	/* SW64 instructions must be word aligned */
+	if ((uintptr_t)tp & 0x3)
+		return -EINVAL;
+	return __sw64_insn_write(addr, cpu_to_le32(insn));
+}
+
+int __kprobes sw64_insn_double_write(void *addr, u64 insn)
+{
+	u32 *tp = addr;
+	/* SW64 instructions must be word aligned */
+	if ((uintptr_t)tp & 0x3)
+		return -EINVAL;
+	return __sw64_insn_double_write(addr, cpu_to_le64(insn));
+}
+unsigned int __kprobes sw64_insn_nop(void)
+{
+	return SW64_BIS(R31, R31, R31);
+}
+
+unsigned int __kprobes sw64_insn_call(unsigned int ra, unsigned int rb)
+{
+	return SW64_CALL(ra, rb, 1);
+}
+
+unsigned int __kprobes sw64_insn_sys_call(unsigned int num)
+{
+	return  SW64_SYS_CALL(num);
+}
+
+/* 'pc' is the address of br instruction, not the +4 PC. 'new_pc' is the target address. */
+unsigned int __kprobes sw64_insn_br(unsigned int ra, unsigned long pc, unsigned long new_pc)
+{
+	int offset = new_pc - pc;
+	unsigned int disp, minus = 0x1fffff;
+
+	if (!(offset <= BR_MAX_DISP && offset >= -BR_MAX_DISP))
+		return -1;
+	if (offset > 0)
+		disp = (offset - 4) / 4;
+	else
+		disp = ~(-offset / 4) & minus;
+
+	return SW64_BR(ra, disp);
+
+}
diff --git a/arch/sw_64/kernel/irq.c b/arch/sw_64/kernel/irq.c
new file mode 100644
index 000000000000..6cd26af15b23
--- /dev/null
+++ b/arch/sw_64/kernel/irq.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/sw_64/kernel/irq.c
+ *
+ *	Copyright (C) 1995 Linus Torvalds
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+#include <linux/random.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/uaccess.h>
+
+#include <asm/hw_init.h>
+#include <asm/hardirq.h>
+#include <asm/io.h>
+
+volatile unsigned long irq_err_count;
+DEFINE_PER_CPU(unsigned long, irq_pmi_count);
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+void ack_bad_irq(unsigned int irq)
+{
+	irq_err_count++;
+	pr_crit("Unexpected IRQ trap at vector %u\n", irq);
+}
+
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
+
+	return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+	return 0;
+}
+
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	int j;
+
+	seq_printf(p, "%*s: ", prec, "TIMER");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u", per_cpu(irq_stat, j).timer_irqs_event);
+	seq_puts(p, "\n");
+
+#ifdef CONFIG_SMP
+	seq_printf(p, "%*s: ", prec, "IPI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10lu ", cpu_data[j].ipi_count);
+	seq_puts(p, "\n");
+#endif
+	seq_printf(p, "%*s: ", prec, "PMI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10lu ", per_cpu(irq_pmi_count, j));
+	seq_puts(p, "\n");
+
+	seq_printf(p, "ERR: %10lu\n", irq_err_count);
+	return 0;
+}
+
+/*
+ * handle_irq handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+
+#define MAX_ILLEGAL_IRQS 16
+
+void
+handle_irq(int irq)
+{
+	/*
+	 * We ack quickly, we don't want the irq controller
+	 * thinking we're snobs just because some other CPU has
+	 * disabled global interrupts (we have already done the
+	 * INT_ACK cycles, it's too late to try to pretend to the
+	 * controller that we aren't taking the interrupt).
+	 *
+	 * 0 return value means that this irq is already being
+	 * handled by some other CPU. (or is disabled)
+	 */
+	static unsigned int illegal_count;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (!desc || ((unsigned int) irq > ACTUAL_NR_IRQS &&
+	    illegal_count < MAX_ILLEGAL_IRQS)) {
+		irq_err_count++;
+		illegal_count++;
+		pr_crit("device_interrupt: invalid interrupt %d\n", irq);
+		return;
+	}
+
+	irq_enter();
+	generic_handle_irq_desc(desc);
+	irq_exit();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(void)
+{
+	irq_migrate_all_off_this_cpu();
+}
+#endif
diff --git a/arch/sw_64/kernel/irq_sw64.c b/arch/sw_64/kernel/irq_sw64.c
new file mode 100644
index 000000000000..376e8397ba35
--- /dev/null
+++ b/arch/sw_64/kernel/irq_sw64.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SW64 specific irq code.
+ */
+
+#include <linux/sched.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/irqchip.h>
+#include <linux/irqdesc.h>
+#include <linux/irqdomain.h>
+#include <asm/dma.h>
+#include <asm/irq_impl.h>
+#include <asm/core.h>
+#include <asm/perf_event.h>
+#include <asm/hmcall.h>
+
+asmlinkage void
+do_entInt(unsigned long type, unsigned long vector,
+	  unsigned long irq_arg, struct pt_regs *regs)
+{
+	local_irq_disable();
+	handle_chip_irq(type, vector, irq_arg, regs);
+}
+EXPORT_SYMBOL(do_entInt);
+
+void __init
+init_IRQ(void)
+{
+	/*
+	 * Just in case the platform init_irq() causes interrupts/mchecks
+	 * (as is the case with RAWHIDE, at least).
+	 */
+	wrent(entInt, 0);
+
+	sw64_init_irq();
+	irqchip_init();
+}
+
+DEFINE_SPINLOCK(irq_lock);
+
+static void
+__enable_irq(struct irq_data *d)
+{
+}
+
+static void
+__disable_irq(struct irq_data *d)
+{
+}
+
+static unsigned int
+__startup_irq(struct irq_data *d)
+{
+	__enable_irq(d);
+	return 0;
+}
+
+static void
+__mask_and_ack_irq(struct irq_data *d)
+{
+	spin_lock(&irq_lock);
+	__disable_irq(d);
+	spin_unlock(&irq_lock);
+}
+
+struct irq_chip sw64_irq_chip = {
+	.name = "SW64_NODE",
+	.irq_startup = __startup_irq,
+	.irq_unmask = __enable_irq,
+	.irq_mask = __disable_irq,
+	.irq_mask_ack = __mask_and_ack_irq,
+};
+
+void __weak arch_init_msi_domain(struct irq_domain *parent) {}
+
+int __init arch_early_irq_init(void)
+{
+	int i;
+
+	for (i = 0; i < NR_IRQS; ++i) {
+		irq_set_chip_and_handler(i, &sw64_irq_chip, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+	arch_init_msi_domain(NULL);
+	return 0;
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	return NR_IRQS_LEGACY;
+}
diff --git a/arch/sw_64/kernel/jump_label.c b/arch/sw_64/kernel/jump_label.c
new file mode 100644
index 000000000000..a67d16eb3076
--- /dev/null
+++ b/arch/sw_64/kernel/jump_label.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/insn.h>
+#include <asm/bug.h>
+#include <asm/cacheflush.h>
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	u32 *insnp = (u32 *)entry->code;
+	u32 insn;
+
+	if (type == JUMP_LABEL_JMP) {
+		insn = sw64_insn_br(R31, (entry->code), entry->target);
+		BUG_ON(insn == -1);
+	} else {
+		insn = sw64_insn_nop();
+	}
+
+	*insnp = insn;
+
+	flush_icache_range(entry->code, entry->code + SW64_INSN_SIZE);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	/*
+	 * no need to rewrite NOP
+	 */
+}
diff --git a/arch/sw_64/kernel/kgdb.c b/arch/sw_64/kernel/kgdb.c
new file mode 100644
index 000000000000..c1100ef8fcdd
--- /dev/null
+++ b/arch/sw_64/kernel/kgdb.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sw64 KGDB support
+ *
+ * Based on arch/arm64/kernel/kgdb.c
+ *
+ * Copyright (C) Xia Bin
+ * Author: Xia Bin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include <linux/irq.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+	{ "r0", 8, offsetof(struct pt_regs, r0)},
+	{ "r1", 8, offsetof(struct pt_regs, r1)},
+	{ "r2", 8, offsetof(struct pt_regs, r2)},
+	{ "r3", 8, offsetof(struct pt_regs, r3)},
+	{ "r4", 8, offsetof(struct pt_regs, r4)},
+	{ "r5", 8, offsetof(struct pt_regs, r5)},
+	{ "r6", 8, offsetof(struct pt_regs, r6)},
+	{ "r7", 8, offsetof(struct pt_regs, r7)},
+	{ "r8", 8, offsetof(struct pt_regs, r8)},
+
+	{ "r9",  8, -1 },
+	{ "r10", 8, -1 },
+	{ "r11", 8, -1 },
+	{ "r12", 8, -1 },
+	{ "r13", 8, -1 },
+	{ "r14", 8, -1 },
+	{ "r15", 8, -1 },
+
+	{ "r16", 8, offsetof(struct pt_regs, r16)},
+	{ "r17", 8, offsetof(struct pt_regs, r17)},
+	{ "r18", 8, offsetof(struct pt_regs, r18)},
+
+	{ "r19", 8, offsetof(struct pt_regs, r19)},
+	{ "r20", 8, offsetof(struct pt_regs, r20)},
+	{ "r21", 8, offsetof(struct pt_regs, r21)},
+	{ "r22", 8, offsetof(struct pt_regs, r22)},
+	{ "r23", 8, offsetof(struct pt_regs, r23)},
+	{ "r24", 8, offsetof(struct pt_regs, r24)},
+	{ "r25", 8, offsetof(struct pt_regs, r25)},
+	{ "r26", 8, offsetof(struct pt_regs, r26)},
+	{ "r27", 8, offsetof(struct pt_regs, r27)},
+	{ "at", 8, offsetof(struct pt_regs, r28)},
+	{ "gp", 8, offsetof(struct pt_regs, gp)},
+	{ "sp", 8, -1 },
+	{ "zero", 8, -1 },
+
+	{ "f0", 8, -1 },
+	{ "f1", 8, -1 },
+	{ "f2", 8, -1 },
+	{ "f3", 8, -1 },
+	{ "f4", 8, -1 },
+	{ "f5", 8, -1 },
+	{ "f6", 8, -1 },
+	{ "f7", 8, -1 },
+	{ "f8", 8, -1 },
+	{ "f9", 8, -1 },
+	{ "f10", 8, -1 },
+	{ "f11", 8, -1 },
+	{ "f12", 8, -1 },
+	{ "f13", 8, -1 },
+	{ "f14", 8, -1 },
+	{ "f15", 8, -1 },
+	{ "f16", 8, -1 },
+	{ "f17", 8, -1 },
+	{ "f18", 8, -1 },
+	{ "f19", 8, -1 },
+	{ "f20", 8, -1 },
+	{ "f21", 8, -1 },
+	{ "f22", 8, -1 },
+	{ "f23", 8, -1 },
+	{ "f24", 8, -1 },
+	{ "f25", 8, -1 },
+	{ "f26", 8, -1 },
+	{ "f27", 8, -1 },
+	{ "f28", 8, -1 },
+	{ "f29", 8, -1 },
+	{ "f30", 8, -1 },
+	{ "fpcr", 8, -1 },
+
+	{ "pc", 8, offsetof(struct pt_regs, pc)},
+	{ "", 8, -1 },
+	{ "unique", 8, -1},
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+				dbg_reg_def[regno].size);
+	else
+		memset(mem, 0, dbg_reg_def[regno].size);
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+				dbg_reg_def[regno].size);
+	return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+	int i;
+	/* Initialize to zero */
+	memset((char *)gdb_regs, 0, NUMREGBYTES);
+	for (i = 0; i < DBG_MAX_REG_NUM; i++)
+		gdb_regs[i] = get_reg(task, i);
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	pr_info("BEFORE SET PC WITH %lx\n", pc);
+	instruction_pointer(regs) = pc;
+	pr_info("AFTER SET PC IS %lx\n", instruction_pointer(regs));
+}
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+			       int err_code, char *remcom_in_buffer,
+			       char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	char *ptr;
+	unsigned long address = -1;
+
+	switch (remcom_in_buffer[0]) {
+	case 'c':
+		ptr = &remcom_in_buffer[1];
+		if (kgdb_hex2long(&ptr, &address))
+			kgdb_arch_set_pc(linux_regs, address);
+		return 0;
+	}
+	return -1;
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+
+	/* Userspace events, ignore. */
+	if (user_mode(regs))
+		return NOTIFY_DONE;
+
+	if (kgdb_handle_exception(1, args->signr, cmd, regs))
+		return  NOTIFY_DONE;
+
+	return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call  = kgdb_notify,
+};
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	int ret = register_die_notifier(&kgdb_notifier);
+
+	if (ret != 0)
+		return ret;
+	return 0;
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * sw64 instructions are always in LE.
+ * Break instruction is encoded in LE format
+ */
+struct kgdb_arch arch_kgdb_ops = {
+	.gdb_bpt_instr = {0x80, 00, 00, 00}
+};
diff --git a/arch/sw_64/kernel/kprobes/Makefile b/arch/sw_64/kernel/kprobes/Makefile
new file mode 100644
index 000000000000..b3b1d849a63a
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_KPROBES)           += kprobes.o decode-insn.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/sw_64/kernel/kprobes/common.h b/arch/sw_64/kernel/kprobes/common.h
new file mode 100644
index 000000000000..de10058f0376
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/common.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SW64_KERNEL_KPROBES_COMMON_H
+#define _SW64_KERNEL_KPROBES_COMMON_H
+
+
+extern bool sw64_insn_can_kprobe(kprobe_opcode_t *addr);
+
+
+#endif /* _SW64_KERNEL_KPROBES_COMMON_H */
diff --git a/arch/sw_64/kernel/kprobes/decode-insn.c b/arch/sw_64/kernel/kprobes/decode-insn.c
new file mode 100644
index 000000000000..e3ab856d6084
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/decode-insn.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Based on arch/arm64/kernel/probes/decode-insn.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <asm/insn.h>
+#include <asm/sections.h>
+#include "common.h"
+
+static bool __kprobes sw64_insn_is_steppable(u32 insn)
+{
+	/*
+	 * Branch instructions will write a new value into the PC which is
+	 * likely to be relative to the XOL address and therefore invalid.
+	 * Deliberate generation of an exception during stepping is also not
+	 * currently safe. Lastly, MSR instructions can do any number of nasty
+	 * things we can't handle during single-stepping.
+	 */
+	if (sw64_insn_is_sys_call_b(insn) ||
+		sw64_insn_is_sys_call(insn) ||
+		sw64_insn_is_call(insn) ||
+		sw64_insn_is_ret(insn) ||
+		sw64_insn_is_jmp(insn) ||
+		sw64_insn_is_br(insn) ||
+		sw64_insn_is_bsr(insn) ||
+		sw64_insn_is_memb(insn) ||
+		sw64_insn_is_imemb(insn) ||
+		sw64_insn_is_rtc(insn) ||
+		sw64_insn_is_lldl(insn) ||
+		sw64_insn_is_lldw(insn) ||
+		sw64_insn_is_beq(insn) ||
+		sw64_insn_is_bne(insn) ||
+		sw64_insn_is_blt(insn) ||
+		sw64_insn_is_ble(insn) ||
+		sw64_insn_is_bgt(insn) ||
+		sw64_insn_is_bge(insn) ||
+		sw64_insn_is_blbc(insn) ||
+		sw64_insn_is_blbs(insn) ||
+		sw64_insn_is_fbeq(insn) ||
+		sw64_insn_is_fbne(insn) ||
+		sw64_insn_is_fblt(insn) ||
+		sw64_insn_is_fble(insn) ||
+		sw64_insn_is_fbgt(insn) ||
+		sw64_insn_is_fbge(insn))
+		return false;
+
+	return true;
+}
+
+
+#ifdef CONFIG_KPROBES
+//  lldl  rd_f
+static bool __kprobes is_probed_between_atomic(kprobe_opcode_t *addr)
+{
+	int count = 0;
+	unsigned long size = 0, offset = 0;
+	kprobe_opcode_t *scan_start = NULL;
+
+	if (kallsyms_lookup_size_offset((unsigned long)addr, &size, &offset))
+		scan_start = addr - (offset / sizeof(kprobe_opcode_t));
+
+	while (scan_start < addr) {
+		if (sw64_insn_is_lldl(le32_to_cpu(*scan_start)) ||
+				sw64_insn_is_lldw(le32_to_cpu(*scan_start)))
+			count++;
+		if (sw64_insn_is_rd_f(le32_to_cpu(*scan_start)))
+			count--;
+		scan_start++;
+	}
+	if (count)
+		return false;
+
+	return true;
+}
+
+bool __kprobes sw64_insn_can_kprobe(kprobe_opcode_t *addr)
+{
+	u32 insn = le32_to_cpu(*addr);
+
+	if (!sw64_insn_is_steppable(insn)) {
+		printk("addr can't steppable\n");
+		return false;
+	}
+	if (!is_probed_between_atomic(addr)) {
+		printk("addr between atomic cant probe\n");
+		return false;
+	}
+	return true;
+}
+#endif
diff --git a/arch/sw_64/kernel/kprobes/kprobes.c b/arch/sw_64/kernel/kprobes/kprobes.c
new file mode 100644
index 000000000000..85400f96f991
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/kprobes.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Kernel Probes (KProbes)
+ *  arch/sw_64/kernel/kprobes.c
+ */
+
+#include <linux/kprobes.h>
+#include <linux/preempt.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+#include <linux/slab.h>
+
+#include <asm/ptrace.h>
+#include <asm/insn.h>
+#include "common.h"
+
+static u32 breakpoint_insn = BREAK_KPROBE;
+static u32 breakpoint2_insn = BREAK_KPROBE_SS;
+
+int post_kprobe_handler(struct pt_regs *regs);
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+	extern char __start_rodata[];
+	extern char __end_rodata[];
+	unsigned long probe_addr = (unsigned long)p->addr;
+
+	if (probe_addr & 0x3)
+		return -EINVAL;
+
+	if (!sw64_insn_can_kprobe(p->addr))
+		return -EINVAL;
+	/* copy instruction */
+	p->opcode = le32_to_cpu(*p->addr);
+
+
+	if (probe_addr >= (unsigned long) __start_rodata &&
+			probe_addr <= (unsigned long) __end_rodata)
+		return -EINVAL;
+
+
+	/* insn: must be on special executable page on mips. */
+	p->ainsn.insn = get_insn_slot();
+	if (!p->ainsn.insn) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	/*
+	 * In the kprobe->ainsn.insn[] array we store the original
+	 * instruction at index zero and a break trap instruction at
+	 * index one.
+	 */
+	p->ainsn.insn[0] = p->opcode;
+	p->ainsn.insn[1] = breakpoint2_insn;
+out:
+	return ret;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	sw64_insn_write(p->addr, breakpoint_insn);
+	flush_insn_slot(p);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	sw64_insn_write(p->addr, p->opcode);
+	flush_insn_slot(p);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
+}
+
+static void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__this_cpu_write(current_kprobe, p);
+}
+
+
+static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+		struct kprobe_ctlblk *kcb, int reenter)
+{
+	if (reenter) {
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p);
+		kcb->kprobe_status = KPROBE_REENTER;
+	} else {
+		kcb->kprobe_status = KPROBE_HIT_SS;
+	}
+
+	/* insn simulation */
+	kcb->target_pc = regs->pc;
+	regs->pc = (unsigned long)&p->ainsn.insn[0];
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+		struct pt_regs *regs,
+		struct kprobe_ctlblk *kcb)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		kprobes_inc_nmissed_count(p);
+		setup_singlestep(p, regs, kcb, 1);
+		break;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		pr_warn("Unrecoverable kprobe detected.\n");
+		dump_kprobe(p);
+		BUG();
+		break;
+	default:
+		WARN_ON(1);
+		return 0;
+	}
+	return 1;
+}
+
+int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	unsigned long addr = instruction_pointer(regs);
+
+	if (user_mode(regs))
+		return 0;
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+	p = get_kprobe((kprobe_opcode_t *)(addr - 4));
+
+	if (p) {
+		if (kprobe_running()) {
+			if (reenter_kprobe(p, regs, kcb))
+				return 1;
+		} else {
+			set_current_kprobe(p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with normal processing.  If we have a
+			 * pre-handler and it returned non-zero, that means
+			 * user handler setup registers to exit to another
+			 * instruction, we must skip the single stepping.
+			 */
+			if (!p->pre_handler || !p->pre_handler(p, regs))
+				setup_singlestep(p, regs, kcb, 0);
+			else
+				reset_current_kprobe();
+			return 1;
+		}
+	}
+	return 0;
+
+}
+int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	//	resume_execution(cur, regs, kcb);
+	regs->pc = kcb->target_pc;
+
+
+	/* Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+out:
+	preempt_enable_no_resched();
+
+	return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long mmcsr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (cur->fault_handler && cur->fault_handler(cur, regs, mmcsr))
+		return 1;
+
+	if (kcb->kprobe_status & KPROBE_HIT_SS) {
+		regs->pc = kcb->target_pc;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+		unsigned long val, void *data)
+{
+
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BREAK:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEPBP:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+/*
+ * Function return probe trampoline:
+ *	- init_kprobes() establishes a probepoint here
+ *	- When the probed function returns, this probe causes the
+ *	  handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+	asm volatile(
+			/* Keep the assembler from reordering and placing JR here. */
+			".set noreorder\n\t"
+			"nop\n\t"
+			".global kretprobe_trampoline\n"
+			"kretprobe_trampoline:\n\t"
+			"nop\n\t"
+			: : : "memory");
+}
+
+void kretprobe_trampoline(void);
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+		struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *) regs->r26;
+	ri->fp = NULL;
+
+	/* Replace the return addr with trampoline addr */
+	regs->r26 = (unsigned long)kretprobe_trampoline;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+		struct pt_regs *regs)
+{
+	unsigned long orig_ret_address;
+
+	orig_ret_address = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL);
+	instruction_pointer(regs) = orig_ret_address;
+
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
+	return 1;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline)
+		return 1;
+
+	return 0;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *)kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline_p);
+}
diff --git a/arch/sw_64/kernel/kvm_cma.c b/arch/sw_64/kernel/kvm_cma.c
new file mode 100644
index 000000000000..dc61e2e369e8
--- /dev/null
+++ b/arch/sw_64/kernel/kvm_cma.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Contiguous Memory Allocator for KVM
+ *
+ * This program is modified on the basis of CMA, to achieve cross-node
+ * memory reservation, as well as reserved memory information statistics.
+ */
+
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/cma.h>
+#include <linux/page-isolation.h>
+
+#include "../../../mm/cma.h"
+#include "../../../mm/internal.h"
+
+struct cma kvm_cma_areas[MAX_CMA_AREAS];
+unsigned int kvm_cma_area_count;
+
+static void __init init_kvm_cma_reserved_pageblock(struct page *page)
+{
+	unsigned int i = pageblock_nr_pages;
+	struct page *p = page;
+
+	do {
+		__ClearPageReserved(p);
+		set_page_count(p, 0);
+	} while (++p, --i);
+
+	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+
+	if (pageblock_order >= MAX_ORDER) {
+		i = pageblock_nr_pages;
+		p = page;
+		do {
+			set_page_refcounted(p);
+			__free_pages(p, MAX_ORDER - 1);
+			p += MAX_ORDER_NR_PAGES;
+		} while (i -= MAX_ORDER_NR_PAGES);
+	} else {
+		set_page_refcounted(page);
+		__free_pages(page, pageblock_order);
+	}
+
+	adjust_managed_page_count(page, pageblock_nr_pages);
+}
+
+static int __init kvm_cma_activate_area(struct cma *cma)
+{
+	int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+	unsigned int i = cma->count >> pageblock_order;
+
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+	if (!cma->bitmap) {
+		cma->count = 0;
+		return -ENOMEM;
+	}
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+
+	do {
+		unsigned int j;
+
+		base_pfn = pfn;
+
+		for (j = pageblock_nr_pages; j; --j, pfn++)
+			WARN_ON_ONCE(!pfn_valid(pfn));
+
+		init_kvm_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+
+	spin_lock_init(&cma->lock);
+
+	return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+	int i;
+
+	for (i = 0; i < kvm_cma_area_count; i++) {
+		int ret = kvm_cma_activate_area(&kvm_cma_areas[i]);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+core_initcall(kvm_cma_init_reserved_areas);
+
+/**
+ * kvm_cma_init_reserved_mem() - create custom contiguous area
+ * from reserved memory
+ * @base: Base address of the reserved area
+ * @size: Size of the reserved area (in bytes),
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @name: The name of the area. If this parameter is NULL, the name of
+ *        the area will be set to "cmaN", where N is a running counter of
+ *        used areas.
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function creates custom contiguous area from already reserved memory.
+ */
+int __init kvm_cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
+				 unsigned int order_per_bit, const char *name,
+				 struct cma **res_cma)
+{
+	struct cma *cma;
+	phys_addr_t alignment;
+
+	/* Sanity checks */
+	if (kvm_cma_area_count == ARRAY_SIZE(kvm_cma_areas)) {
+		pr_err("Not enough slots for CMA reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	if (!size || !memblock_is_region_reserved(base, size))
+		return -EINVAL;
+
+	/* ensure minimal alignment required by mm core */
+	alignment = PAGE_SIZE <<
+			max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
+
+	/* alignment should be aligned with order_per_bit */
+	if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
+		return -EINVAL;
+
+	if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size)
+		return -EINVAL;
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma = &kvm_cma_areas[kvm_cma_area_count];
+
+	if (name)
+		snprintf(cma->name, CMA_MAX_NAME, name);
+	else
+		snprintf(cma->name, CMA_MAX_NAME,  "cma%d\n", cma_area_count);
+
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	cma->order_per_bit = order_per_bit;
+	*res_cma = cma;
+	kvm_cma_area_count++;
+	totalcma_pages += (size / PAGE_SIZE);
+
+	return 0;
+}
+
+/**
+ * kvm_cma_declare_contiguous() - reserve contiguous area for VM
+ * @base: Base address of the reserved area optional,
+ * @size: Size of the reserved area (in bytes),
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @name: The name of the area. See function cma_init_reserved_mem()
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ */
+int __init kvm_cma_declare_contiguous(phys_addr_t base,
+			phys_addr_t size, phys_addr_t limit,
+			phys_addr_t alignment, unsigned int order_per_bit,
+			const char *name, struct cma **res_cma)
+{
+	phys_addr_t memblock_end = memblock_end_of_DRAM();
+	phys_addr_t highmem_start;
+	int ret = 0;
+
+	/*
+	 * We can't use __pa(high_memory) directly, since high_memory
+	 * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly)
+	 * complain. Find the boundary by adding one to the last valid
+	 * address.
+	 */
+	highmem_start = __pa(high_memory - 1) + 1;
+
+	if (!size)
+		return -EINVAL;
+
+	if (alignment && !is_power_of_2(alignment))
+		return -EINVAL;
+
+	/*
+	 * Sanitise input arguments.
+	 * Pages both ends in CMA area could be merged into adjacent unmovable
+	 * migratetype page by page allocator's buddy algorithm. In the case,
+	 * you couldn't get a contiguous memory, which is not what we want.
+	 */
+	alignment = max(alignment,  (phys_addr_t)PAGE_SIZE <<
+			  max_t(unsigned long, MAX_ORDER - 1, pageblock_order));
+	if (base & (alignment - 1)) {
+		ret = -EINVAL;
+		pr_err("Region at %pa must be aligned to %pa bytes\n",
+			&base, &alignment);
+		goto err;
+	}
+	base = ALIGN(base, alignment);
+	size = ALIGN(size, alignment);
+	limit &= ~(alignment - 1);
+
+	if (!base) {
+		pr_err("Base address of region must be needed!\n");
+		goto err;
+	}
+
+	/* size should be aligned with order_per_bit */
+	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+		return -EINVAL;
+
+	/*
+	 * The request region must not cross the low/high memory boundary.
+	 */
+	if (base < highmem_start && base + size > highmem_start) {
+		ret = -EINVAL;
+		pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
+			&base, &highmem_start);
+		goto err;
+	}
+
+	/*
+	 * If the limit is unspecified or above the memblock end, its effective
+	 * value will be the memblock end. Set it explicitly to simplify further
+	 * checks.
+	 */
+	if (limit == 0 || limit > memblock_end)
+		limit = memblock_end;
+
+	if (base + size > limit) {
+		ret = -EINVAL;
+		pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n",
+			&size, &base, &limit);
+		goto err;
+	}
+
+	/* Reserve memory */
+	if (memblock_is_region_reserved(base, size) ||
+			memblock_reserve(base, size) < 0) {
+		ret = -EBUSY;
+		goto err;
+	}
+	ret = kvm_cma_init_reserved_mem(base, size, order_per_bit,
+			name, res_cma);
+	if (ret)
+		goto free_mem;
+
+	pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M,
+		&base);
+	return 0;
+
+free_mem:
+	memblock_free(base, size);
+err:
+	pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return ret;
+}
diff --git a/arch/sw_64/kernel/machine_kexec.c b/arch/sw_64/kernel/machine_kexec.c
new file mode 100644
index 000000000000..c778bc1374af
--- /dev/null
+++ b/arch/sw_64/kernel/machine_kexec.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * machine_kexec.c for kexec
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <linux/compiler.h>
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/reboot.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+
+extern void *kexec_control_page;
+extern const unsigned char relocate_new_kernel[];
+extern const size_t relocate_new_kernel_size;
+
+extern unsigned long kexec_start_address;
+extern unsigned long kexec_indirection_page;
+
+static atomic_t waiting_for_crash_ipi;
+
+#ifdef CONFIG_SMP
+extern struct smp_rcb_struct *smp_rcb;
+
+/*
+ * Wait for relocation code is prepared and send
+ * secondary CPUs to spin until kernel is relocated.
+ */
+static void kexec_smp_down(void *ignored)
+{
+	int cpu = smp_processor_id();
+
+	local_irq_disable();
+	while (READ_ONCE(smp_rcb->ready) != 0)
+		mdelay(1);
+	set_cpu_online(cpu, false);
+	reset_cpu(cpu);
+}
+#endif
+
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_SMP
+	WRITE_ONCE(smp_rcb->ready, 0);
+	smp_call_function(kexec_smp_down, NULL, 0);
+	smp_wmb();
+	while (num_online_cpus() > 1) {
+		cpu_relax();
+		mdelay(1);
+	}
+#endif
+}
+
+#ifdef CONFIG_SMP
+static void machine_crash_nonpanic_core(void *unused)
+{
+	int cpu;
+	struct pt_regs regs;
+
+	cpu = smp_processor_id();
+
+	local_irq_disable();
+	crash_setup_regs(&regs, NULL);
+	pr_debug("CPU %u will stop doing anything useful since another CPU has crashed\n", cpu);
+	crash_save_cpu(&regs, cpu);
+	flush_cache_all();
+
+	set_cpu_online(cpu, false);
+	atomic_dec(&waiting_for_crash_ipi);
+	while (READ_ONCE(smp_rcb->ready) != 0)
+		mdelay(1);
+	if (cpu != 0)
+		reset_cpu(cpu);
+	else
+		machine_kexec(kexec_crash_image);
+}
+#else
+static inline void machine_crash_nonpanic_core(void *unused) { }
+#endif
+
+static void machine_kexec_mask_interrupts(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	int cpu;
+	unsigned long msecs;
+
+	cpu = smp_processor_id();
+	local_irq_disable();
+	kernel_restart_prepare(NULL);
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+	smp_call_function(machine_crash_nonpanic_core, NULL, false);
+	msecs = 1000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+		mdelay(1);
+		msecs--;
+	}
+	if (atomic_read(&waiting_for_crash_ipi) > 0)
+		pr_warn("Non-crashing CPUs did not react to IPI\n");
+
+	crash_save_cpu(regs, cpu);
+	machine_kexec_mask_interrupts();
+	pr_info("Loading crashdump kernel...\n");
+#ifdef CONFIG_SMP
+	WRITE_ONCE(smp_rcb->ready, 0);
+	if (cpu != 0)
+		reset_cpu(cpu);
+#endif
+}
+
+#define phys_to_ktext(pa)    (__START_KERNEL_map + (pa))
+
+typedef void (*noretfun_t)(void) __noreturn;
+
+void machine_kexec(struct kimage *image)
+{
+	void *reboot_code_buffer;
+	unsigned long entry;
+	unsigned long *ptr;
+	struct boot_params *params = sunway_boot_params;
+
+
+	reboot_code_buffer = kexec_control_page;
+	pr_info("reboot_code_buffer = %px\n", reboot_code_buffer);
+	kexec_start_address = phys_to_ktext(image->start);
+	pr_info("kexec_start_address = %#lx\n", kexec_start_address);
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		kexec_indirection_page =
+			(unsigned long) phys_to_virt(image->head & PAGE_MASK);
+	else
+		kexec_indirection_page = (unsigned long)&image->head;
+
+	pr_info("kexec_indirection_page = %#lx, image->head=%#lx\n",
+			kexec_indirection_page, image->head);
+
+	params->cmdline = kexec_start_address - COMMAND_LINE_OFF;
+	params->initrd_start = *(__u64 *)(kexec_start_address - INITRD_START_OFF);
+	params->initrd_size = *(__u64 *)(kexec_start_address - INITRD_SIZE_OFF);
+
+	pr_info("initrd_start = %#llx, initrd_size = %#llx\n"
+		"dtb_start = %#llx, efi_systab = %#llx\n"
+		"efi_memmap = %#llx, efi_memmap_size = %#llx\n"
+		"efi_memdesc_size = %#llx, efi_memdesc_version = %#llx\n"
+		"cmdline = %#llx\n",
+		params->initrd_start, params->initrd_size,
+		params->dtb_start, params->efi_systab,
+		params->efi_memmap, params->efi_memmap_size,
+		params->efi_memdesc_size, params->efi_memdesc_version,
+		params->cmdline);
+
+	memcpy(reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
+
+	/*
+	 * The generic kexec code builds a page list with physical
+	 * addresses. they are directly accessible through KSEG0 (or
+	 * CKSEG0 or XPHYS if on 64bit system), hence the
+	 * phys_to_virt() call.
+	 */
+	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+	     ptr = (entry & IND_INDIRECTION) ?
+	       phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+		if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
+		    *ptr & IND_DESTINATION)
+			*ptr = (unsigned long) phys_to_virt(*ptr);
+	}
+
+	/*
+	 * we do not want to be bothered.
+	 */
+	local_irq_disable();
+
+	pr_info("Will call new kernel at %08lx\n", image->start);
+	pr_info("Bye ...\n");
+	//flush_cache_all();
+	//sflush();
+	//tbia();
+	smp_wmb();
+	((noretfun_t) reboot_code_buffer)();
+}
diff --git a/arch/sw_64/kernel/module.c b/arch/sw_64/kernel/module.c
new file mode 100644
index 000000000000..c75d8a2e4309
--- /dev/null
+++ b/arch/sw_64/kernel/module.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+/* Allocate the GOT at the end of the core sections.  */
+
+struct got_entry {
+	struct got_entry *next;
+	Elf64_Sxword r_addend;
+	int got_offset;
+};
+
+static inline void
+process_reloc_for_got(Elf64_Rela *rela,
+		      struct got_entry *chains, Elf64_Xword *poffset)
+{
+	unsigned long r_sym = ELF64_R_SYM(rela->r_info);
+	unsigned long r_type = ELF64_R_TYPE(rela->r_info);
+	Elf64_Sxword r_addend = rela->r_addend;
+	struct got_entry *g;
+
+	if (r_type != R_SW64_LITERAL)
+		return;
+
+	for (g = chains + r_sym; g ; g = g->next)
+		if (g->r_addend == r_addend) {
+			if (g->got_offset == 0) {
+				g->got_offset = *poffset;
+				*poffset += 8;
+			}
+			goto found_entry;
+		}
+
+	g = kmalloc(sizeof(*g), GFP_KERNEL);
+	g->next = chains[r_sym].next;
+	g->r_addend = r_addend;
+	g->got_offset = *poffset;
+	*poffset += 8;
+	chains[r_sym].next = g;
+
+ found_entry:
+	/*
+	 * Trick: most of the ELF64_R_TYPE field is unused.  There are
+	 * 42 valid relocation types, and a 32-bit field.  Co-opt the
+	 * bits above 256 to store the got offset for this reloc.
+	 */
+	rela->r_info |= g->got_offset << 8;
+}
+
+int
+module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs,
+			  char *secstrings, struct module *me)
+{
+	struct got_entry *chains;
+	Elf64_Rela *rela;
+	Elf64_Shdr *esechdrs, *symtab, *s, *got;
+	unsigned long nsyms, nrela, i;
+
+	esechdrs = sechdrs + hdr->e_shnum;
+	symtab = got = NULL;
+
+	/* Find out how large the symbol table is.  Allocate one got_entry
+	 * head per symbol.  Normally this will be enough, but not always.
+	 * We'll chain different offsets for the symbol down each head.
+	 */
+	for (s = sechdrs; s < esechdrs; ++s)
+		if (s->sh_type == SHT_SYMTAB)
+			symtab = s;
+		else if (!strcmp(".got", secstrings + s->sh_name)) {
+			got = s;
+			me->arch.gotsecindex = s - sechdrs;
+		}
+
+	if (!symtab) {
+		pr_err("module %s: no symbol table\n", me->name);
+		return -ENOEXEC;
+	}
+	if (!got) {
+		pr_err("module %s: no got section\n", me->name);
+		return -ENOEXEC;
+	}
+
+	nsyms = symtab->sh_size / sizeof(Elf64_Sym);
+	chains = kcalloc(nsyms, sizeof(struct got_entry), GFP_KERNEL);
+	if (!chains) {
+		pr_err("module %s: no memory for symbol chain buffer\n",
+		       me->name);
+		return -ENOMEM;
+	}
+
+	got->sh_size = 0;
+	got->sh_addralign = 8;
+	got->sh_type = SHT_NOBITS;
+
+	/* Examine all LITERAL relocations to find out what GOT entries
+	 * are required.  This sizes the GOT section as well.
+	 */
+	for (s = sechdrs; s < esechdrs; ++s)
+		if (s->sh_type == SHT_RELA) {
+			nrela = s->sh_size / sizeof(Elf64_Rela);
+			rela = (void *)hdr + s->sh_offset;
+			for (i = 0; i < nrela; ++i)
+				process_reloc_for_got(rela+i, chains,
+						      &got->sh_size);
+		}
+
+	/* Free the memory we allocated.  */
+	for (i = 0; i < nsyms; ++i) {
+		struct got_entry *g, *n;
+
+		for (g = chains[i].next; g ; g = n) {
+			n = g->next;
+			kfree(g);
+		}
+	}
+	kfree(chains);
+
+	return 0;
+}
+
+int
+apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
+		   unsigned int symindex, unsigned int relsec,
+		   struct module *me)
+{
+	Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	unsigned long i, n = sechdrs[relsec].sh_size / sizeof(*rela);
+	Elf64_Sym *symtab, *sym;
+	void *base, *location;
+	unsigned long got, gp;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+
+	base = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr;
+	symtab = (Elf64_Sym *)sechdrs[symindex].sh_addr;
+
+	/* The small sections were sorted to the end of the segment.
+	 * The following should definitely cover them.
+	 */
+	got = sechdrs[me->arch.gotsecindex].sh_addr;
+	if (me->core_layout.size > 0x10000)
+		gp = got + 0x8000;
+	else
+		gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000;
+
+	for (i = 0; i < n; i++) {
+		unsigned long r_sym = ELF64_R_SYM(rela[i].r_info);
+		unsigned long r_type = ELF64_R_TYPE(rela[i].r_info);
+		unsigned long r_got_offset = r_type >> 8;
+		unsigned long value, hi, lo;
+
+		r_type &= 0xff;
+
+		/* This is where to make the change.  */
+		location = base + rela[i].r_offset;
+
+		/* This is the symbol it is referring to.  Note that all
+		 * unresolved symbols have been resolved.
+		 */
+		sym = symtab + r_sym;
+		value = sym->st_value + rela[i].r_addend;
+
+		switch (r_type) {
+		case R_SW64_NONE:
+			break;
+		case R_SW64_REFLONG:
+			*(u32 *)location = value;
+			break;
+		case R_SW64_REFQUAD:
+			/* BUG() can produce misaligned relocations. */
+			((u32 *)location)[0] = value;
+			((u32 *)location)[1] = value >> 32;
+			break;
+		case R_SW64_GPREL32:
+			value -= gp;
+			if ((int)value != value)
+				goto reloc_overflow;
+			*(u32 *)location = value;
+			break;
+		case R_SW64_LITERAL:
+			hi = got + r_got_offset;
+			lo = hi - gp;
+			if ((short)lo != lo) {
+				unsigned long over_offset = (lo + 0x8000) >> 16;
+
+				if ((over_offset & 0x8000) == 0) {
+					*(u16 *)(location - 0x4) = over_offset;
+					*(u16 *)location = lo - ((over_offset << 16) + gp);
+					*(u64 *)hi = value;
+				} else {
+					goto reloc_overflow;
+				}
+			} else {
+				*(u16 *)location = lo;
+				*(u64 *)hi = value;
+			}
+			break;
+		case R_SW64_LITERAL_GOT:
+			/* empty for now need to fill */
+			break;
+		case R_SW64_LITUSE:
+			break;
+		case R_SW64_GPDISP:
+			value = gp - (u64)location;
+			lo = (short)value;
+			hi = (int)(value - lo);
+			if (hi + lo != value)
+				goto reloc_overflow;
+			*(u16 *)location = hi >> 16;
+			*(u16 *)(location + rela[i].r_addend) = lo;
+			break;
+		case R_SW64_BRSGP:
+			/*
+			 * BRSGP is only allowed to bind to local symbols.
+			 * If the section is undef, this means that the
+			 *  value was resolved from somewhere else.
+			 */
+			if (sym->st_shndx == SHN_UNDEF)
+				goto reloc_overflow;
+			if ((sym->st_other & STO_SW64_STD_GPLOAD) ==
+			    STO_SW64_STD_GPLOAD)
+				/* Omit the prologue. */
+				value += 8;
+			/* FALLTHRU */
+		case R_SW64_BRADDR:
+			value -= (u64)location + 4;
+			if (value & 3)
+				goto reloc_overflow;
+			value = (long)value >> 2;
+			if (value + (1<<21) >= 1<<22)
+				goto reloc_overflow;
+			value &= 0x1fffff;
+			value |= *(u32 *)location & ~0x1fffff;
+			*(u32 *)location = value;
+			break;
+		case R_SW64_HINT:
+			break;
+		case R_SW64_SREL32:
+			value -= (u64)location;
+			if ((int)value != value)
+				goto reloc_overflow;
+			*(u32 *)location = value;
+			break;
+		case R_SW64_SREL64:
+			value -= (u64)location;
+			*(u64 *)location = value;
+			break;
+		case R_SW64_GPRELHIGH:
+			value = (long)(value - gp + 0x8000) >> 16;
+			if ((short) value != value)
+				goto reloc_overflow;
+			*(u16 *)location = value;
+			break;
+		case R_SW64_GPRELLOW:
+			value -= gp;
+			*(u16 *)location = value;
+			break;
+		case R_SW64_GPREL16:
+			value -= gp;
+			if ((short) value != value)
+				goto reloc_overflow;
+			*(u16 *)location = value;
+			break;
+		default:
+			pr_err("module %s: Unknown relocation: %lu\n", me->name, r_type);
+			return -ENOEXEC;
+reloc_overflow:
+			if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
+				pr_err("module %s: Relocation (type %lu) overflow vs section %d\n",
+					me->name, r_type, sym->st_shndx);
+			else
+				pr_err("module %s: Relocation (type %lu) overflow vs %s\n",
+					me->name, r_type, strtab + sym->st_name);
+			return -ENOEXEC;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/sw_64/kernel/msi.c b/arch/sw_64/kernel/msi.c
new file mode 100644
index 000000000000..644e4010af8a
--- /dev/null
+++ b/arch/sw_64/kernel/msi.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/cpumask.h>
+#include <asm/sw64io.h>
+#include <asm/msi.h>
+#include <asm/pci.h>
+
+
+int msi_compose_msg(unsigned int irq, struct msi_msg *msg)
+{
+	msg->address_hi = (unsigned int)(MSIX_MSG_ADDR >> 32);
+	msg->address_lo = (unsigned int)(MSIX_MSG_ADDR & 0xffffffff);
+	msg->data = irq;
+	return irq;
+}
+
+void sw64_irq_noop(struct irq_data *d)
+{
+}
+
+void destroy_irq(unsigned int irq)
+{
+#if 0
+	int pos;
+
+	irq_init_desc(irq);
+
+	if (irq < RC1_FIRST_MSI_VECTOR) {
+		pos = irq - RC0_FIRST_MSI_VECTOR;
+		clear_bit(pos, msi0_irq_in_use);
+	} else {
+		pos = irq - RC1_FIRST_MSI_VECTOR;
+		clear_bit(pos, msi1_irq_in_use);
+	}
+#endif
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+	destroy_irq(irq);
+}
+
+static int __init msi_init(void)
+{
+	return 0;
+}
+
+static void __exit msi_exit(void)
+{
+}
+
+module_init(msi_init);
+module_exit(msi_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/arch/sw_64/kernel/pci-noop.c b/arch/sw_64/kernel/pci-noop.c
new file mode 100644
index 000000000000..4ef694e629e8
--- /dev/null
+++ b/arch/sw_64/kernel/pci-noop.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/sw/kernel/pci-noop.c
+ *
+ * Stub PCI interfaces for NO PCI kernels.
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/gfp.h>
+#include <linux/capability.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/module.h>
+
+/*
+ * The PCI controller list.
+ */
+
+struct pci_controller *hose_head, **hose_tail = &hose_head;
+
+struct pci_controller * __init
+alloc_pci_controller(void)
+{
+	struct pci_controller *hose;
+
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
+
+	*hose_tail = hose;
+	hose_tail = &hose->next;
+
+	return hose;
+}
+
+struct resource * __init
+alloc_resource(void)
+{
+	struct resource *res;
+
+	res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES);
+
+	return res;
+}
+
+asmlinkage long
+sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
+{
+	return -ENODEV;
+}
+
+asmlinkage long
+sys_pciconfig_read(unsigned long bus, unsigned long dfn,
+		   unsigned long off, unsigned long len, void *buf)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	else
+		return -ENODEV;
+}
+
+asmlinkage long
+sys_pciconfig_write(unsigned long bus, unsigned long dfn,
+		    unsigned long off, unsigned long len, void *buf)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	else
+		return -ENODEV;
+}
+
+static void *sw64_noop_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *dma_handle, gfp_t gfp,
+				       unsigned long attrs)
+{
+	void *ret;
+
+	if (!dev || *dev->dma_mask >= 0xffffffffUL)
+		gfp &= ~GFP_DMA;
+	ret = (void *)__get_free_pages(gfp, get_order(size));
+	if (ret) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_phys(ret);
+	}
+	return ret;
+}
+
+static void sw64_noop_free_coherent(struct device *dev, size_t size,
+				     void *cpu_addr, dma_addr_t dma_addr,
+				     unsigned long attrs)
+{
+	free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+static dma_addr_t sw64_noop_map_page(struct device *dev, struct page *page,
+				      unsigned long offset, size_t size,
+				      enum dma_data_direction dir,
+				      unsigned long attrs)
+{
+	return page_to_pa(page) + offset;
+}
+
+static int sw64_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+			     enum dma_data_direction dir, unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		void *va;
+
+		BUG_ON(!sg_page(sg));
+		va = sg_virt(sg);
+		sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
+		sg_dma_len(sg) = sg->length;
+	}
+
+	return nents;
+}
+
+static int sw64_noop_supported(struct device *dev, u64 mask)
+{
+	return mask < 0x00ffffffUL ? 0 : 1;
+}
+
+const struct dma_map_ops sw64_noop_ops = {
+	.alloc			= sw64_noop_alloc_coherent,
+	.free			= sw64_noop_free_coherent,
+	.map_page		= sw64_noop_map_page,
+	.map_sg			= sw64_noop_map_sg,
+	.dma_supported		= sw64_noop_supported,
+};
+
+const struct dma_map_ops *dma_ops = &sw64_noop_ops;
+EXPORT_SYMBOL(dma_ops);
+
+void __init common_init_pci(void)
+{
+}
+
+void __init sw64_init_arch(void) { }
+void __init sw64_init_irq(void) { }
diff --git a/arch/sw_64/kernel/pci-sysfs.c b/arch/sw_64/kernel/pci-sysfs.c
new file mode 100644
index 000000000000..584243922df9
--- /dev/null
+++ b/arch/sw_64/kernel/pci-sysfs.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sw_64/kernel/pci-sysfs.c
+ *
+ * Copyright (C) 2009 Ivan Kokshaysky
+ *
+ * Sw_64 PCI resource files.
+ *
+ * Loosely based on generic HAVE_PCI_MMAP implementation in
+ * drivers/pci/pci-sysfs.c
+ */
+
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+
+static int hose_mmap_page_range(struct pci_controller *hose,
+				struct vm_area_struct *vma,
+				enum pci_mmap_state mmap_type, int sparse)
+{
+	unsigned long base;
+
+	if (mmap_type == pci_mmap_mem)
+		base = sparse ? hose->sparse_mem_base : hose->dense_mem_base;
+	else
+		base = sparse ? hose->sparse_io_base : hose->dense_io_base;
+
+	vma->vm_pgoff |= base >> PAGE_SHIFT;
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static int __pci_mmap_fits(struct pci_dev *pdev, int num,
+			   struct vm_area_struct *vma, int sparse)
+{
+	unsigned long nr, start, size;
+	int shift = sparse ? 5 : 0;
+
+	nr = vma_pages(vma);
+	start = vma->vm_pgoff;
+	size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process "%s" tried to map%s 0x%08lx-0x%08lx on %s BAR %d "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		pci_name(pdev), num, size);
+	return 0;
+}
+
+/**
+ * pci_mmap_resource - map a PCI resource into user memory space
+ * @kobj: kobject for mapping
+ * @attr: struct bin_attribute for the file being mapped
+ * @vma: struct vm_area_struct passed into the mmap
+ * @sparse: address space type
+ *
+ * Use the bus mapping routines to map a PCI resource into userspace.
+ */
+static int pci_mmap_resource(struct kobject *kobj,
+			     struct bin_attribute *attr,
+			     struct vm_area_struct *vma, int sparse)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+	struct resource *res = attr->private;
+	enum pci_mmap_state mmap_type;
+	struct pci_bus_region bar;
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+		if (res == &pdev->resource[i])
+			break;
+	if (i >= PCI_ROM_RESOURCE)
+		return -ENODEV;
+
+	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
+		return -EINVAL;
+
+	if (!__pci_mmap_fits(pdev, i, vma, sparse))
+		return -EINVAL;
+
+	pcibios_resource_to_bus(pdev->bus, &bar, res);
+	vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0));
+	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+
+	return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
+}
+
+static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj,
+				    struct bin_attribute *attr,
+				    struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 1);
+}
+
+static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr,
+				   struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @dev: dev to cleanup
+ *
+ * If we created resource files for @dev, remove them from sysfs and
+ * free their resources.
+ */
+void pci_remove_resource_files(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		struct bin_attribute *res_attr;
+
+		res_attr = pdev->res_attr[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+
+		res_attr = pdev->res_attr_wc[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+	}
+}
+
+static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num)
+{
+	struct pci_bus_region bar;
+	struct pci_controller *hose = pdev->sysdata;
+	long dense_offset;
+	unsigned long sparse_size;
+
+	pcibios_resource_to_bus(pdev->bus, &bar, &pdev->resource[num]);
+
+	/*
+	 * All core logic chips have 4G sparse address space, except
+	 * CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM
+	 * definitions in asm/core_xxx.h files). This corresponds
+	 * to 128M or 512M of the bus space.
+	 */
+	dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base);
+	sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000;
+
+	return bar.end < sparse_size;
+}
+
+static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name,
+			       char *suffix, struct bin_attribute *res_attr,
+			       unsigned long sparse)
+{
+	size_t size = pci_resource_len(pdev, num);
+
+	sprintf(name, "resource%d%s", num, suffix);
+	res_attr->mmap = sparse ? pci_mmap_resource_sparse :
+				  pci_mmap_resource_dense;
+	res_attr->attr.name = name;
+	res_attr->attr.mode = S_IRUSR | S_IWUSR;
+	res_attr->size = sparse ? size << 5 : size;
+	res_attr->private = &pdev->resource[num];
+	return sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+}
+
+static int pci_create_attr(struct pci_dev *pdev, int num)
+{
+	/* allocate attribute structure, piggyback attribute name */
+	int retval, nlen1, nlen2 = 0, res_count = 1;
+	unsigned long sparse_base, dense_base;
+	struct bin_attribute *attr;
+	struct pci_controller *hose = pdev->sysdata;
+	char *suffix, *attr_name;
+
+	suffix = "";
+	nlen1 = 10;
+
+	if (pdev->resource[num].flags & IORESOURCE_MEM) {
+		sparse_base = hose->sparse_mem_base;
+		dense_base = hose->dense_mem_base;
+		if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) {
+			sparse_base = 0;
+			suffix = "_dense";
+			nlen1 = 16;	/* resourceN_dense */
+		}
+	} else {
+		sparse_base = hose->sparse_io_base;
+		dense_base = hose->dense_io_base;
+	}
+
+	if (sparse_base) {
+		suffix = "_sparse";
+		nlen1 = 17;
+		if (dense_base) {
+			nlen2 = 16;	/* resourceN_dense */
+			res_count = 2;
+		}
+	}
+
+	attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC);
+	if (!attr)
+		return -ENOMEM;
+
+	attr_name = (char *)(attr + res_count);
+	pdev->res_attr[num] = attr;
+	retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr,
+				     sparse_base);
+	if (retval || res_count == 1)
+		return retval;
+
+	/* Create dense file */
+	attr_name += nlen1;
+	attr++;
+	pdev->res_attr_wc[num] = attr;
+	return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0);
+}
+
+/**
+ * pci_create_resource_files - create resource files in sysfs for @dev
+ * @dev: dev in question
+ *
+ * Walk the resources in @dev creating files for each resource available.
+ */
+int pci_create_resource_files(struct pci_dev *pdev)
+{
+	int i;
+	int retval;
+
+	/* Expose the PCI resources from this device as files */
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+
+		/* skip empty resources */
+		if (!pci_resource_len(pdev, i))
+			continue;
+
+		retval = pci_create_attr(pdev, i);
+		if (retval) {
+			pci_remove_resource_files(pdev);
+			return retval;
+		}
+	}
+	return 0;
+}
+
+/* Legacy I/O bus mapping stuff. */
+
+static int __legacy_mmap_fits(struct pci_controller *hose,
+			      struct vm_area_struct *vma,
+			      unsigned long res_size, int sparse)
+{
+	unsigned long nr, start, size;
+
+	nr = vma_pages(vma);
+	start = vma->vm_pgoff;
+	size = ((res_size - 1) >> PAGE_SHIFT) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process "%s" tried to map%s 0x%08lx-0x%08lx on hose %ld "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		hose->index, size);
+	return 0;
+}
+
+static inline int has_sparse(struct pci_controller *hose,
+			     enum pci_mmap_state mmap_type)
+{
+	unsigned long base;
+
+	base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base :
+					     hose->sparse_io_base;
+
+	return base != 0;
+}
+
+int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+	int sparse = has_sparse(hose, mmap_type);
+	unsigned long res_size;
+
+	res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size :
+						 bus->legacy_io->size;
+	if (!__legacy_mmap_fits(hose, vma, res_size, sparse))
+		return -EINVAL;
+
+	return hose_mmap_page_range(hose, vma, mmap_type, sparse);
+}
+
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Adjust file name and size for sparse mappings.
+ */
+void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (!has_sparse(hose, mmap_type))
+		return;
+
+	if (mmap_type == pci_mmap_mem) {
+		bus->legacy_mem->attr.name = "legacy_mem_sparse";
+		bus->legacy_mem->size <<= 5;
+	} else {
+		bus->legacy_io->attr.name = "legacy_io_sparse";
+		bus->legacy_io->size <<= 5;
+	}
+}
+
+/* Legacy I/O bus read/write functions */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch (size) {
+	case 1:
+		*((u8 *)val) = inb(port);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = inw(port);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = inl(port);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch (size) {
+	case 1:
+		outb(port, val);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		outw(port, val);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		outl(port, val);
+		return 4;
+	}
+	return -EINVAL;
+}
diff --git a/arch/sw_64/kernel/pci.c b/arch/sw_64/kernel/pci.c
new file mode 100644
index 000000000000..36616d31f32f
--- /dev/null
+++ b/arch/sw_64/kernel/pci.c
@@ -0,0 +1,733 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/sw_64/kernel/pci.c
+ *	Modified by Suweiqiang 2013-9-30
+ */
+
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <linux/msi.h>
+#include <linux/irq.h>
+#include <asm/msi.h>
+#include <linux/delay.h>
+#include <linux/syscore_ops.h>
+#include <linux/platform_device.h>
+#include <asm/sw64_init.h>
+#include <asm/pci.h>
+
+#include "pci_impl.h"
+
+unsigned long rc_linkup;
+
+/* Indicate whether we respect the PCI setup left by console. */
+/*
+ * Make this long-lived  so that we know when shutting down
+ * whether we probed only or not.
+ */
+int pci_probe_only;
+
+/*
+ * raw_pci_read/write - Platform-specific PCI config space access.
+ */
+int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
+			int reg, int len, u32 *val)
+{
+	struct pci_bus *bus_tmp = pci_find_bus(domain, bus);
+
+	if (bus_tmp)
+		return bus_tmp->ops->read(bus_tmp, devfn, reg, len, val);
+
+	return -EINVAL;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
+			int reg, int len, u32 val)
+{
+	struct pci_bus *bus_tmp = pci_find_bus(domain, bus);
+
+	if (bus_tmp)
+		return bus_tmp->ops->write(bus_tmp, devfn, reg, len, val);
+
+	return -EINVAL;
+}
+
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
+{
+	struct pci_bus *bus;
+
+	return bus;
+}
+
+/*
+ * The PCI controller list.
+ */
+
+struct pci_controller *hose_head, **hose_tail = &hose_head;
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus);
+
+/* Quirks */
+static void quirk_isa_bridge(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_ISA << 8;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_isa_bridge);
+
+/* Just declaring that the power-of-ten prefixes are actually the
+ * power-of-two ones doesn't make it true :)
+ */
+#define KB			1024
+#define MB			(1024*KB)
+#define GB			(1024*MB)
+
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+		resource_size_t size, resource_size_t align)
+{
+	struct pci_dev *dev = data;
+	struct pci_controller *hose = dev->sysdata;
+	unsigned long alignto;
+	resource_size_t start = res->start;
+
+	if (res->flags & IORESOURCE_IO) {
+		/* Make sure we start at our min on all hoses */
+		if (start - hose->io_space->start < PCIBIOS_MIN_IO)
+			start = PCIBIOS_MIN_IO + hose->io_space->start;
+		/*
+		 * Put everything into 0x00-0xff region modulo 0x400
+		 */
+		if (start & 0x300)
+			start = (start + 0x3ff) & ~0x3ff;
+	} else if (res->flags & IORESOURCE_MEM) {
+		/* Make sure we start at our min on all hoses */
+		if (start - hose->mem_space->start < PCIBIOS_MIN_MEM)
+			start = PCIBIOS_MIN_MEM + hose->mem_space->start;		//0xc0000000- 0xffffffff
+		/*
+		 * The following holds at least for the Low Cost
+		 * Sw_64 implementation of the PCI interface:
+		 *
+		 * In sparse memory address space, the first
+		 * octant (16MB) of every 128MB segment is
+		 * aliased to the very first 16 MB of the
+		 * address space (i.e., it aliases the ISA
+		 * memory address space).  Thus, we try to
+		 * avoid allocating PCI devices in that range.
+		 * Can be allocated in 2nd-7th octant only.
+		 * Devices that need more than 112MB of
+		 * address space must be accessed through
+		 * dense memory space only!
+		 */
+
+		/* Align to multiple of size of minimum base.  */
+		alignto = max_t(resource_size_t, 0x1000UL, align);
+		start = ALIGN(start, alignto);
+		if (hose->sparse_mem_base && size <= 7 * 16*MB) {
+			if (((start / (16*MB)) & 0x7) == 0) {
+				start &= ~(128*MB - 1);
+				start += 16*MB;
+				start  = ALIGN(start, alignto);
+			}
+			if (start/(128*MB) != (start + size - 1)/(128*MB)) {
+				start &= ~(128*MB - 1);
+				start += (128 + 16)*MB;
+				start  = ALIGN(start, alignto);
+			}
+		}
+	}
+
+	return start;
+}
+
+#undef KB
+#undef MB
+#undef GB
+
+static int __init
+pcibios_init(void)
+{
+	sw64_init_pci();
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+char *pcibios_setup(char *str)
+{
+	return str;
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+	/* Propagate hose info into the subordinate devices.  */
+
+	struct pci_controller *hose = bus->sysdata;
+	struct pci_dev *dev = bus->self;
+
+	if (!dev || bus->number == hose->first_busno)	{
+		/* Root bus. */
+		unsigned long end;
+
+		bus->resource[0] = hose->io_space;
+		bus->resource[1] = hose->mem_space;
+		bus->resource[2] = hose->pre_mem_space;
+	} else if (pci_probe_only &&
+			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pci_read_bridge_bases(bus);
+	}
+}
+
+void pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+/* Helper for generic DMA-mapping functions. */
+struct pci_dev *sw64_gendev_to_pci(struct device *dev)
+{
+	if (dev && dev->bus == &pci_bus_type)
+		return to_pci_dev(dev);
+
+	return NULL;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check the latency
+ *  timer as certain firmware forgets to set it properly.
+ */
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat >= 16)
+		return;
+	pr_info("PCI: Setting latency timer of device %s to 64\n", pci_name(dev));
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64);
+}
+
+void __init pcibios_claim_one_bus(struct pci_bus *b)
+{
+	struct pci_dev *dev;
+	struct pci_bus *child_bus;
+
+	list_for_each_entry(dev, &b->devices, bus_list) {
+		int i;
+
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *r = &dev->resource[i];
+
+			if (r->parent || !r->start || !r->flags)
+				continue;
+			if (pci_probe_only || (r->flags & IORESOURCE_PCI_FIXED)) {
+				if (pci_claim_resource(dev, i) == 0)
+					continue;
+
+				pci_claim_bridge_resource(dev, i);
+			}
+		}
+	}
+
+	list_for_each_entry(child_bus, &b->children, node)
+		pcibios_claim_one_bus(child_bus);
+}
+
+static void __init
+pcibios_claim_console_setup(void)
+{
+	struct pci_bus *b;
+
+	list_for_each_entry(b, &pci_root_buses, node)
+		pcibios_claim_one_bus(b);
+}
+
+int __weak chip_pcie_configure(struct pci_controller *hose)
+{
+	return 0;
+}
+
+unsigned char last_bus = PCI0_BUS;
+void __init common_init_pci(void)
+{
+	struct pci_controller *hose;
+	struct pci_host_bridge *bridge;
+	struct pci_bus *bus;
+	unsigned int init_busnr;
+	int need_domain_info = 0;
+	int ret, iov_bus;
+	unsigned long offset;
+
+	/* Scan all of the recorded PCI controllers. */
+	hose = hose_head;
+	for (hose = hose_head; hose; hose = hose->next) {
+		bridge = pci_alloc_host_bridge(0);
+		if (!bridge)
+			continue;
+		hose->busn_space->start = last_bus;
+		init_busnr = (0xff << 16) + ((last_bus + 1) << 8) + (last_bus);
+		write_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS, init_busnr);
+		if (is_in_host()) {
+			offset = hose->mem_space->start - PCI_32BIT_MEMIO;
+			hose->first_busno = last_bus + 1;
+		} else {
+			offset = hose->mem_space->start - PCI_32BIT_VT_MEMIO;
+			hose->first_busno = last_bus;
+		}
+		pci_add_resource_offset(&bridge->windows, hose->mem_space, offset);
+		pci_add_resource_offset(&bridge->windows, hose->io_space, hose->io_space->start);
+		pci_add_resource_offset(&bridge->windows, hose->pre_mem_space, 0);
+		pci_add_resource_offset(&bridge->windows, hose->busn_space, 0);
+		bridge->dev.parent = NULL;
+		bridge->sysdata = hose;
+		bridge->busnr = hose->busn_space->start;
+		bridge->ops = &sw64_pci_ops;
+		bridge->swizzle_irq = sw64_swizzle;
+		bridge->map_irq = sw64_map_irq;
+
+		ret = pci_scan_root_bus_bridge(bridge);
+		if (ret) {
+			pci_free_host_bridge(bridge);
+			continue;
+		}
+
+		bus = hose->bus = bridge->bus;
+		hose->need_domain_info = need_domain_info;
+		while (pci_find_bus(pci_domain_nr(bus), last_bus))
+			last_bus++;
+
+		if (is_in_host())
+			iov_bus = chip_pcie_configure(hose);
+		last_bus += iov_bus;
+
+		hose->last_busno = hose->busn_space->end = last_bus - 1;
+		init_busnr = read_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS);
+		init_busnr &= ~(0xff << 16);
+		init_busnr |= (last_bus - 1) << 16;
+		write_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS, init_busnr);
+		pci_bus_update_busn_res_end(bus, last_bus - 1);
+
+	}
+
+	pcibios_claim_console_setup();
+#ifdef CONFIG_SUNWAY_IOMMU
+	register_syscore_ops(&iommu_cpu_syscore_ops);
+#endif
+
+	if (is_in_host()) {
+		list_for_each_entry(bus, &pci_root_buses, node)
+			pcibios_reserve_legacy_regions(bus);
+	}
+
+	pr_info("SW arch assign unassigned resources.\n");
+
+	pci_assign_unassigned_resources();
+
+	for (hose = hose_head; hose; hose = hose->next) {
+		bus = hose->bus;
+		if (bus)
+			pci_bus_add_devices(bus);
+	}
+}
+
+struct pci_controller * __init
+alloc_pci_controller(void)
+{
+	struct pci_controller *hose;
+
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
+
+	*hose_tail = hose;
+	hose_tail = &hose->next;
+
+	return hose;
+}
+
+struct resource * __init
+alloc_resource(void)
+{
+	struct resource *res;
+
+	res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES);
+
+	return res;
+}
+
+static struct pci_controller *pci_bus_to_hose(unsigned long bus)
+{
+	struct pci_controller *hose;
+
+	for (hose = hose_head; hose; hose = hose->next) {
+		if (bus >= hose->first_busno && bus <= hose->last_busno)
+			return hose;
+	}
+	return NULL;
+}
+
+/* Provide information on locations of various I/O regions in physical
+ * memory.  Do this on a per-card basis so that we choose the right hose.
+ */
+
+asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
+{
+	struct pci_controller *hose;
+
+	hose = pci_bus_to_hose(bus);
+	if (hose == NULL)
+		return -ENODEV;
+
+	switch (which & ~IOBASE_FROM_HOSE) {
+	case IOBASE_HOSE:
+		return hose->index;
+	case IOBASE_SPARSE_MEM:
+		return hose->sparse_mem_base;
+	case IOBASE_DENSE_MEM:
+		return hose->dense_mem_base;
+	case IOBASE_SPARSE_IO:
+		return hose->sparse_io_base;
+	case IOBASE_DENSE_IO:
+		return hose->dense_io_base;
+	case IOBASE_ROOT_BUS:
+		return hose->bus->number;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+/* Destroy an __iomem token.  Not copied from lib/iomap.c.  */
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	if (__is_mmio(addr))
+		iounmap(addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+	resource_size_t offset;
+	struct resource *res;
+
+	pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
+
+	/* Check for IO */
+	if (!(hose->io_space->flags & IORESOURCE_IO))
+		goto no_io;
+	offset = (unsigned long)hose->io_space->start;
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+	BUG_ON(res == NULL);
+	res->name = "Legacy IO";
+	res->flags = IORESOURCE_IO;
+	res->start = offset;
+	res->end = (offset + 0xfff) & 0xfffffffffffffffful;
+	pr_debug("Candidate legacy IO: %pR\n", res);
+	if (request_resource(hose->io_space, res)) {
+		pr_debug("PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
+				pci_domain_nr(bus), bus->number, res);
+		kfree(res);
+	}
+
+no_io:
+	return;
+}
+
+/* PCIe RC operations */
+int sw6_pcie_read_rc_cfg(struct pci_bus *bus, unsigned int devfn,
+		int where, int size, u32 *val)
+{
+	u32 data;
+	struct pci_controller *hose = bus->sysdata;
+	void __iomem *cfg_iobase = (void *)hose->rc_config_space_base;
+
+	if (IS_ENABLED(CONFIG_PCI_DEBUG))
+		pr_debug("rc read addr:%px bus %d, devfn %#x, where %#x size=%d\t",
+				cfg_iobase + ((where & ~3) << 5), bus->number, devfn, where, size);
+
+	if ((uintptr_t)where & (size - 1)) {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	if (unlikely(devfn > 0)) {
+		*val = ~0;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	data = readl(cfg_iobase + ((where & ~3) << 5));
+
+	switch (size) {
+	case 1:
+		*val = (data >> (8 * (where & 0x3))) & 0xff;
+		break;
+	case 2:
+		*val = (data >> (8 * (where & 0x2))) & 0xffff;
+		break;
+	default:
+		*val = data;
+		break;
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_DEBUG))
+		pr_debug("*val %#x\n ", *val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int sw6_pcie_write_rc_cfg(struct pci_bus *bus, unsigned int devfn,
+		int where, int size, u32 val)
+{
+	u32 data;
+	u32 shift = 8 * (where & 3);
+	struct pci_controller *hose = bus->sysdata;
+	void __iomem *cfg_iobase = (void *)hose->rc_config_space_base;
+
+	if ((uintptr_t)where & (size - 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	switch (size) {
+	case 1:
+		data = readl(cfg_iobase + ((where & ~3) << 5));
+		data &= ~(0xff << shift);
+		data |= (val & 0xff) << shift;
+		break;
+	case 2:
+		data = readl(cfg_iobase + ((where & ~3) << 5));
+		data &= ~(0xffff << shift);
+		data |= (val & 0xffff) << shift;
+		break;
+	default:
+		data = val;
+		break;
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_DEBUG))
+		pr_debug("rc write addr:%px bus %d, devfn %#x, where %#x *val %#x size %d\n",
+				cfg_iobase + ((where & ~3) << 5), bus->number, devfn, where, val, size);
+
+	writel(data, cfg_iobase + ((where & ~3) << 5));
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int sw6_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+		int where, int size, u32 *val)
+{
+	struct pci_controller *hose = bus->sysdata;
+	int ret = PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (is_guest_or_emul())
+		return pci_generic_config_read(bus, devfn, where, size, val);
+
+	hose->self_busno = hose->busn_space->start;
+
+	if (unlikely(bus->number == hose->self_busno)) {
+		ret = sw6_pcie_read_rc_cfg(bus, devfn, where, size, val);
+	} else {
+		if (test_bit(hose->node * 8 + hose->index, &rc_linkup)) {
+			ret = pci_generic_config_read(bus, devfn, where, size, val);
+		} else {
+			return ret;
+		}
+	}
+	return ret;
+}
+
+int sw6_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
+		int where, int size, u32 val)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (is_guest_or_emul())
+		return pci_generic_config_write(bus, devfn, where, size, val);
+
+	hose->self_busno = hose->busn_space->start;
+
+	if (unlikely(bus->number == hose->self_busno))
+		return sw6_pcie_write_rc_cfg(bus, devfn, where, size, val);
+	else
+		return pci_generic_config_write(bus, devfn, where, size, val);
+}
+
+/*
+ *sw6_pcie_valid_device - Check if a valid device is present on bus
+ *@bus: PCI Bus structure
+ *@devfn: device/function
+ *
+ *Return: 'true' on success and 'false' if invalid device is found
+ */
+static bool sw6_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (is_in_host()) {
+		/* Only one device down on each root complex */
+		if (bus->number == hose->self_busno && devfn > 0)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ *sw6_pcie_map_bus - Get configuration base
+ *@bus: PCI Bus structure
+ *@devfn: Device/function
+ *@where: Offset from base
+ *
+ *Return: Base address of the configuration space needed to be
+ *accessed.
+ */
+static void __iomem *sw6_pcie_map_bus(struct pci_bus *bus,
+		unsigned int devfn, int where)
+{
+	struct pci_controller *hose = bus->sysdata;
+	void __iomem *cfg_iobase;
+	unsigned long relbus;
+
+	if (!sw6_pcie_valid_device(bus, devfn))
+		return NULL;
+
+	relbus = (bus->number << 24) | (devfn << 16) | where;
+	relbus |= PCI_EP_CFG;
+
+	cfg_iobase = (void *)(hose->ep_config_space_base | relbus);
+
+	if (IS_ENABLED(CONFIG_PCI_DEBUG))
+		pr_debug("addr:%px bus %d, devfn %d, where %d\n",
+				cfg_iobase, bus->number, devfn, where);
+	return cfg_iobase;
+}
+
+struct pci_ops sw64_pci_ops = {
+	.map_bus = sw6_pcie_map_bus,
+	.read    = sw6_pcie_config_read,
+	.write   = sw6_pcie_config_write,
+};
+
+int sw64_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	return sw64_chip_init->pci_init.map_irq(dev, slot, pin);
+}
+
+unsigned char sw64_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	return PCI_SLOT(dev->devfn);
+}
+
+static void __init
+sw64_init_host(unsigned long node, unsigned long index)
+{
+	struct pci_controller *hose;
+	int ret = 0;
+
+	hose = alloc_pci_controller();
+	if (!hose) {
+		printk("alloc NODE %ld RC %ld hose failed\n", node, index);
+		return;
+	}
+	hose->iommu_enable = false;
+	hose->io_space = alloc_resource();
+	hose->mem_space = alloc_resource();
+	hose->pre_mem_space = alloc_resource();
+	hose->busn_space = alloc_resource();
+	hose->index = index;
+	hose->node = node;
+
+	sw64_chip_init->pci_init.hose_init(hose);
+
+	if (sw64_chip_init->pci_init.set_rc_piu)
+		sw64_chip_init->pci_init.set_rc_piu(node, index);
+
+	ret = sw64_chip_init->pci_init.check_pci_linkup(node, index);
+	if (ret == 0) {
+		/* Root Complex downstream port is link up */
+		set_bit(node * 8 + index, &rc_linkup);          //8-bit per node
+	}
+}
+
+void __init sw64_init_arch(void)
+{
+	if (IS_ENABLED(CONFIG_PCI)) {
+		unsigned long node, cpu_num;
+		unsigned long rc_enable;
+		char id[8], msg[64];
+		int i;
+
+		pr_info("SW arch PCI initialize!\n");
+		cpu_num = sw64_chip->get_cpu_num();
+
+		for (node = 0; node < cpu_num; node++) {
+			rc_enable = sw64_chip_init->pci_init.get_rc_enable(node);
+			if (rc_enable == 0) {
+				printk("PCIe is disabled on node %ld\n", node);
+				continue;
+			}
+			for (i = 0; i < MAX_NR_RCS; i++) {
+				if ((rc_enable >> i) & 0x1)
+					sw64_init_host(node, i);
+			}
+			if ((rc_linkup >> node * 8) & 0xff) {
+				memset(msg, 0, 64);
+				sprintf(msg, "Node %ld: RC [ ", node);
+				for (i = 0; i < MAX_NR_RCS; i++) {
+					if ((rc_linkup >> (i + node * 8)) & 1) {
+						memset(id, 0, 8);
+						sprintf(id, "%d ", i);
+						strcat(msg, id);
+					}
+				}
+				strcat(msg, "] link up");
+				pr_info("%s\n", msg);
+			} else {
+				pr_info("Node %ld: no RC link up\n", node);
+			}
+		}
+	}
+}
+
+static void __init sw64_init_intx(struct pci_controller *hose)
+{
+	unsigned long int_conf, node, val_node;
+	unsigned long index, irq;
+	int rcid;
+
+	node = hose->node;
+	index = hose->index;
+
+	if (!node_online(node))
+		val_node = next_node_in(node, node_online_map);
+	else
+		val_node = node;
+	irq = irq_alloc_descs_from(NR_IRQS_LEGACY, 1, val_node);
+	WARN_ON(irq < 0);
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_level_irq);
+	irq_set_status_flags(irq, IRQ_LEVEL);
+	hose->int_irq = irq;
+	rcid = cpu_to_rcid(0);
+
+	printk_once(KERN_INFO "INTx are directed to node %d core %d.\n",
+			((rcid >> 6) & 0x3), (rcid & 0x1f));
+	int_conf = 1UL << 62 | rcid; /* rebase all intx on the first logical cpu */
+	if (sw64_chip_init->pci_init.set_intx)
+		sw64_chip_init->pci_init.set_intx(node, index, int_conf);
+}
+
+void __init sw64_init_irq(void)
+{
+	struct pci_controller *hose;
+
+	/* Scan all of the recorded PCI controllers. */
+	hose = hose_head;
+	for (hose = hose_head; hose; hose = hose->next)
+		sw64_init_intx(hose);
+}
+
+void __init
+sw64_init_pci(void)
+{
+	common_init_pci();
+}
diff --git a/arch/sw_64/kernel/pci_common.c b/arch/sw_64/kernel/pci_common.c
new file mode 100644
index 000000000000..c8c4bf08a458
--- /dev/null
+++ b/arch/sw_64/kernel/pci_common.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/sw_64/kernel/pci_iommu.c
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/scatterlist.h>
+#include <linux/log2.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu-helper.h>
+#include <linux/slab.h>
+#include <linux/dma-direct.h>
+#include <linux/swiotlb.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+
+#include "pci_impl.h"
+
+#define DEBUG_ALLOC 0
+#if DEBUG_ALLOC > 0
+# define DBGA(args...)		printk(KERN_DEBUG args)
+#else
+# define DBGA(args...)
+#endif
+#if DEBUG_ALLOC > 1
+# define DBGA2(args...)		printk(KERN_DEBUG args)
+#else
+# define DBGA2(args...)
+#endif
+
+#define DEBUG_NODIRECT 0
+
+#define ISA_DMA_MASK		0x00ffffff
+
+/*
+ * Map a single buffer of the indicated size for PCI DMA in streaming
+ * mode.  The 32-bit PCI bus mastering address to use is returned.
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+
+static dma_addr_t
+pci_direct_map_single_1(struct pci_dev *pdev, void *cpu_addr)
+{
+	struct pci_controller *hose = pdev->sysdata;
+	unsigned long paddr;
+	unsigned long dma_offset;
+
+	if (hose == NULL) {
+		pr_err("%s: hose does not exist!\n", __func__);
+		return 0;
+	}
+
+	dma_offset = read_piu_ior0(hose->node, hose->index, EPDMABAR);
+	paddr = __pa(cpu_addr) + dma_offset;
+	return paddr;
+}
+
+/* Helper for generic DMA-mapping functions. */
+static struct pci_dev *sw64_direct_gendev_to_pci(struct device *dev)
+{
+	if (dev && dev->bus == &pci_bus_type)
+		return to_pci_dev(dev);
+
+	/* This assumes ISA bus master with dma_mask 0xffffff. */
+	return NULL;
+}
+
+static dma_addr_t sw64_direct_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size,
+		enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	struct pci_dev *pdev = sw64_direct_gendev_to_pci(dev);
+
+	if (dir == PCI_DMA_NONE)
+		BUG();
+
+	return pci_direct_map_single_1(pdev, (char *)page_address(page) + offset);
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The DMA_ADDR and
+ * SIZE must match what was provided for in a previous pci_map_single
+ * call.  All other usages are undefined.  After this call, reads by
+ * the cpu to the buffer are guaranteed to see whatever the device
+ * wrote there.
+ */
+
+static inline void sw64_direct_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+}
+
+/* Allocate and map kernel buffer using consistent mode DMA for PCI
+ * device.  Returns non-NULL cpu-view pointer to the buffer if
+ * successful and sets *DMA_ADDRP to the pci side dma address as well,
+ * else DMA_ADDRP is undefined.
+ */
+
+static void *sw64_direct_alloc_coherent(struct device *dev, size_t size,
+		dma_addr_t *dma_addrp, gfp_t gfp,
+		unsigned long attrs)
+{
+	struct pci_dev *pdev = sw64_direct_gendev_to_pci(dev);
+	void *cpu_addr;
+	long order = get_order(size);
+
+	gfp &= ~GFP_DMA;
+
+#ifdef CONFIG_ZONE_DMA
+	if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
+		gfp |= GFP_DMA;
+#endif
+
+try_again:
+	cpu_addr = (void *)__get_free_pages(gfp, order);
+	if (!cpu_addr) {
+		pr_info("pci_alloc_consistent: get_free_pages failed from %ps\n",
+				__builtin_return_address(0));
+		/* ??? Really atomic allocation?  Otherwise we could play
+		 * with vmalloc and sg if we can't find contiguous memory.
+		 */
+		return NULL;
+	}
+	memset(cpu_addr, 0, size);
+
+	*dma_addrp = pci_direct_map_single_1(pdev, cpu_addr);
+	if (*dma_addrp == 0) {
+		free_pages((unsigned long)cpu_addr, order);
+		if (gfp & GFP_DMA)
+			return NULL;
+		/* The address doesn't fit required mask and we
+		 * do not have iommu. Try again with GFP_DMA.
+		 */
+		gfp |= GFP_DMA;
+		goto try_again;
+	}
+
+	DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %ps\n",
+			size, cpu_addr, *dma_addrp, __builtin_return_address(0));
+
+	return cpu_addr;
+}
+
+/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
+ * be values that were returned from pci_alloc_consistent.  SIZE must
+ * be the same as what as passed into pci_alloc_consistent.
+ * References to the memory and mappings associated with CPU_ADDR or
+ * DMA_ADDR past this call are illegal.
+ */
+
+static void sw64_direct_free_coherent(struct device *dev, size_t size,
+		void *cpu_addr, dma_addr_t dma_addr,
+		unsigned long attrs)
+{
+	struct pci_dev *pdev = sw64_direct_gendev_to_pci(dev);
+
+	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
+	free_pages((unsigned long)cpu_addr, get_order(size));
+	DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n",
+			dma_addr, size, __builtin_return_address(0));
+}
+#define SG_ENT_VIRT_ADDRESS(SG) (sg_virt((SG)))
+#define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG))
+
+static dma_addr_t sw64_phys_to_dma(struct device *dev, phys_addr_t pa)
+{
+	unsigned long dma_offset;
+	struct pci_dev *pdev = sw64_gendev_to_pci(dev);
+	struct pci_controller *hose = pdev->sysdata;
+
+	if (hose == NULL) {
+		pr_err("%s: hose does not exist!\n", __func__);
+		return 0;
+	}
+
+	dma_offset = read_piu_ior0(hose->node, hose->index, EPDMABAR);
+	return pa + dma_offset;
+}
+
+static bool
+check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
+		const char *caller)
+{
+	if (unlikely(dev && !dma_capable(dev, dma_addr, size, true))) {
+		if (!dev->dma_mask) {
+			dev_err(dev,
+					"%s: call on device without dma_mask\n",
+					caller);
+			return false;
+		}
+
+		if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
+			dev_err(dev,
+					"%s: overflow %pad+%zu of device mask %llx\n",
+					caller, &dma_addr, size, *dev->dma_mask);
+		}
+		return false;
+	}
+	return true;
+}
+
+static int sw64_direct_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg_dma_address(sg) = sw64_phys_to_dma(dev, sg_phys(sg));
+		if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
+			return 0;
+		sg_dma_len(sg) = sg->length;
+	}
+
+	return nents;
+}
+
+/* Unmap a set of streaming mode DMA translations.  Again, cpu read
+ * rules concerning calls here are the same as for pci_unmap_single()
+ * above.
+ */
+
+static inline void sw64_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+}
+
+/* Return whether the given PCI device DMA address mask can be
+ * supported properly.
+ */
+
+static int sw64_direct_supported(struct device *dev, u64 mask)
+{
+	struct pci_dev *pdev = sw64_direct_gendev_to_pci(dev);
+	struct pci_controller *hose;
+
+	if ((max_low_pfn << PAGE_SHIFT) - 1 <= mask)
+		return 1;
+
+	/* Check that we have a scatter-gather arena that fits.  */
+	hose = pdev->sysdata;
+	if (hose == NULL) {
+		pr_err("%s: hose does not exist!\n", __func__);
+		return 0;
+	}
+
+	/* As last resort try ZONE_DMA.  */
+	if (MAX_DMA_ADDRESS - PAGE_OFFSET - 1 <= mask)
+		return 1;
+
+	/*
+	 * Upstream PCI/PCIe bridges or SoC interconnects may not carry
+	 * as many DMA address bits as the device itself supports.
+	 */
+	if (dev->bus_dma_limit && mask > dev->bus_dma_limit)
+		return 0;
+
+	return 0;
+}
+
+const struct dma_map_ops sw64_dma_direct_ops = {
+	.alloc = sw64_direct_alloc_coherent,
+	.free = sw64_direct_free_coherent,
+	.map_page = sw64_direct_map_page,
+	.unmap_page = sw64_direct_unmap_page,
+	.map_sg = sw64_direct_map_sg,
+	.unmap_sg = sw64_direct_unmap_sg,
+	.dma_supported = sw64_direct_supported,
+};
+
+const struct dma_map_ops *dma_ops = &sw64_dma_direct_ops;
+EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sw_64/kernel/pci_impl.h b/arch/sw_64/kernel/pci_impl.h
new file mode 100644
index 000000000000..0cb6d1b1d1e3
--- /dev/null
+++ b/arch/sw_64/kernel/pci_impl.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file contains declarations and inline functions for interfacing
+ * with the PCI initialization routines.
+ */
+#ifndef _SW64_KERNEL_PCI_IMPL_H
+#define	_SW64_KERNEL_PCI_IMPL_H
+
+struct pci_dev;
+struct pci_controller;
+struct pci_iommu_arena;
+
+/*
+ * We can't just blindly use 64K for machines with EISA busses; they
+ * may also have PCI-PCI bridges present, and then we'd configure the
+ * bridge incorrectly.
+ *
+ * Also, we start at 0x8000 or 0x9000, in hopes to get all devices'
+ * IO space areas allocated *before* 0xC000; this is because certain
+ * BIOSes (Millennium for one) use PCI Config space "mechanism #2"
+ * accesses to probe the bus. If a device's registers appear at 0xC000,
+ * it may see an INx/OUTx at that address during BIOS emulation of the
+ * VGA BIOS, and some cards, notably Adaptec 2940UW, take mortal offense.
+ */
+
+#define EISA_DEFAULT_IO_BASE	0x9000	/* start above 8th slot */
+#define DEFAULT_IO_BASE		0x0     /* start at 8th slot */
+
+/*
+ * We try to make the DEFAULT_MEM_BASE addresses *always* have more than
+ * a single bit set. This is so that devices like the broken Myrinet card
+ * will always have a PCI memory address that will never match a IDSEL
+ * address in PCI Config space, which can cause problems with early rev cards.
+ */
+
+#define DEFAULT_MEM_BASE 0
+
+/*
+ * A PCI IOMMU allocation arena.  There are typically two of these
+ * regions per bus.
+ * ??? The 8400 has a 32-byte pte entry, and the entire table apparently
+ * lives directly on the host bridge (no tlb?).  We don't support this
+ * machine, but if we ever did, we'd need to parameterize all this quite
+ * a bit further.  Probably with per-bus operation tables.
+ */
+
+struct pci_iommu_arena {
+	spinlock_t lock;
+	struct pci_controller *hose;
+#define IOMMU_INVALID_PTE 0x2 /* 32:63 bits MBZ */
+#define IOMMU_RESERVED_PTE 0xface
+	unsigned long *ptes;
+	dma_addr_t dma_base;
+	unsigned int size;
+	unsigned int next_entry;
+	unsigned int align_entry;
+};
+
+
+/* The hose list.  */
+extern struct pci_controller *hose_head, **hose_tail;
+
+extern void common_init_pci(void);
+#define common_swizzle pci_common_swizzle
+extern struct pci_controller *alloc_pci_controller(void);
+extern struct resource *alloc_resource(void);
+
+extern unsigned long size_for_memory(unsigned long max);
+
+extern struct pci_dev *sw64_gendev_to_pci(struct device *dev);
+extern const struct dma_map_ops sw64_dma_direct_ops;
+
+extern struct cma *sw64_kvm_cma;
+extern struct gen_pool *sw64_kvm_pool;
+#endif
diff --git a/arch/sw_64/kernel/pci_iommu.c b/arch/sw_64/kernel/pci_iommu.c
new file mode 100644
index 000000000000..79760c4ac6fc
--- /dev/null
+++ b/arch/sw_64/kernel/pci_iommu.c
@@ -0,0 +1,772 @@
+// SPDX-License-Identifier: GPL-2.0
+/* iommu.c: Generic sw_64 IOMMU support for 3231
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/scatterlist.h>
+#include <linux/log2.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu-helper.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/syscore_ops.h>
+#include <linux/swiotlb.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/swio.h>
+#include <asm/pci.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+#include "sw_pci_impl.h"
+
+#define DEBUG_ALLOC 0
+#if DEBUG_ALLOC > 0
+# define DBGA(args...)		printk(KERN_DEBUG args)
+#else
+# define DBGA(args...)
+#endif
+#if DEBUG_ALLOC > 1
+# define DBGA2(args...)		printk(KERN_DEBUG args)
+#else
+# define DBGA2(args...)
+#endif
+
+unsigned long iommu_cmd;
+
+static void sw_iommu_create_new(struct pci_controller *hose, unsigned int error_bus_number,
+				unsigned int error_devfn, unsigned int error_da)
+{
+	unsigned long dtbr;
+	u64 *paddr;
+	u32 ofs;
+	unsigned long dtbbaseaddr, dtbbasecond;
+
+	sw_read_piu_ior0(hose->node, hose->index, DTBASEADDR, &dtbr);
+	dtbr += PAGE_OFFSET;
+	ofs = error_da >> PAGE_SHIFT;
+
+	dtbbaseaddr = dtbr + (error_bus_number << 3);
+	dtbbasecond = (*(u64 *)(dtbbaseaddr)) & (~(SW_IOMMU_ENTRY_VALID)) & PAGE_MASK;
+	dtbbasecond += (error_devfn << 3) + PAGE_OFFSET;
+
+	paddr = (u64 *)get_zeroed_page(GFP_DMA);
+	sw_iommu_map(__pa(paddr), ofs, dtbbasecond, hose, NULL);
+}
+
+irqreturn_t iommu_interrupt(int irq, void *dev)
+{
+	struct pci_controller *hose = (struct pci_controller *)dev;
+	unsigned long iommu_status;
+	unsigned int type, bus_number;
+	unsigned int devfn, error_da;
+
+	sw_read_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS, &iommu_status);
+	if (!(iommu_status >> 63))
+		return IRQ_NONE;
+
+	type = (iommu_status >> 59) & 0x7;
+	bus_number = (iommu_status >> 45) & 0xff;
+	devfn = (iommu_status >> 37) & 0xff;
+	error_da = iommu_status & 0xffffffff;
+
+	if (type == 0x3) {
+		iommu_status &= ~(1UL << 62);
+		iommu_status = iommu_status | (1UL << 63);
+		sw_write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS, iommu_status);
+		return IRQ_HANDLED;
+	}
+
+	if (type == 0x2)
+		sw_iommu_create_new(hose, bus_number, devfn, error_da);
+
+	udelay(100);
+	sw_write_piu_ior0(hose->node, hose->index, PTLB_FLUSHALL, 0);
+	sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHALL, 0);
+
+	iommu_status = iommu_status | (3UL << 62);
+	sw_write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS, iommu_status);
+
+	return IRQ_HANDLED;
+}
+
+struct irqaction iommu_irqaction = {
+	.handler        = iommu_interrupt,
+	.flags          = IRQF_SHARED | IRQF_NO_THREAD,
+	.name           = "sw_iommu",
+};
+
+void sw_enable_iommu_func(struct pci_controller *hose)
+{
+	struct irqaction *action;
+	unsigned int  iommu_irq;
+	unsigned long iommu_conf, iommu_ctrl;
+
+	iommu_irq = hose->int_irq;
+	action = &iommu_irqaction;
+	action->dev_id = hose;
+	request_irq(iommu_irq, action.iommu_interrupt, action.flags, "sw_iommu", action->dev_id);
+	iommu_ctrl = (1UL << 63) | (0x100UL << 10);
+	sw_write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_CTRL, iommu_ctrl);
+	sw_read_piu_ior0(hose->node, hose->index, PIUCONFIG0, &iommu_conf);
+	iommu_conf = iommu_conf | (0x3 << 7);
+	sw_write_piu_ior0(hose->node, hose->index, PIUCONFIG0, iommu_conf);
+	sw_write_piu_ior0(hose->node, hose->index, TIMEOUT_CONFIG, 0xf);
+	sw_read_piu_ior0(hose->node, hose->index, PIUCONFIG0, &iommu_conf);
+	pr_info("SW arch configure node %ld hose-%ld iommu_conf = %#lx\n",
+			hose->node, hose->index, iommu_conf);
+}
+
+struct sw_iommu_dev *pci_to_iommu(struct pci_dev *pdev)
+{
+	struct sw_iommu *iommu;
+	struct pci_controller *hose = (struct pci_controller *)pdev->sysdata;
+	struct sw_iommu_dev *sw_dev;
+	int busnumber, devid;
+
+	iommu = hose->pci_iommu;
+
+	list_for_each_entry(sw_dev, &iommu->dev_list, list) {
+		busnumber = sw_dev->dev_id >> 8;
+		devid = sw_dev->dev_id & 0xff;
+		if ((busnumber == pdev->bus->number) && (devid == pdev->devfn))
+			return sw_dev;
+	}
+
+	return NULL;
+}
+
+struct sw_iommu_dev *create_sw_iommu_dev(struct pci_dev *dev, unsigned long *pte, struct sw_iommu *iommu)
+{
+	struct sw_iommu_dev *sw_dev = kzalloc(sizeof(struct sw_iommu_dev), GFP_KERNEL);
+
+	sw_dev->dev_id = (dev->bus->number << 8) + dev->devfn;
+	sw_dev->io_page_base = pte;
+	sw_dev->iommu = iommu;
+
+	list_add_tail(&sw_dev->list, &iommu->dev_list);
+	return sw_dev;
+}
+
+void __sw_pci_iommu_dte_alloc(struct pci_bus *bus, struct sw_iommu *iommu)
+{
+	struct pci_dev *dev;
+	struct sw_iommu_dev *iommu_dev;
+	unsigned long *pte;
+	u64 *dte;
+	u64 dtebaseaddr;
+	u64 dtentry;
+	u64 dtebaseaddr2, ptentry;
+
+	dtebaseaddr = (unsigned long)iommu->iommu_dtbr + (bus->number << 3);
+	dte = (u64 *)get_zeroed_page(GFP_KERNEL);
+	dtentry = (__pa(dte) & PAGE_MASK) | (1UL << 63);
+	*(u64 *)dtebaseaddr = dtentry;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+			pte = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+			dtebaseaddr2 = ((unsigned long)dte & PAGE_MASK) + ((dev->devfn) << 3);
+			iommu_dev = create_sw_iommu_dev(dev, pte, iommu);
+			ptentry = (__pa(pte) & PAGE_MASK) | (1UL << 63);
+			iommu_dev->iommu_bypass = 0;
+			*(u64 *)dtebaseaddr2 = ptentry;
+			/* legacy VGA frame buffer has occupied 0xA0000-0xBFFFF memory segment */
+			iommu_dev->iommu_area = sw_iommu_area_new(iommu_dev, 0x100000UL);
+		} else if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+			struct pci_bus *b = dev->subordinate;
+
+			if (b)
+				__sw_pci_iommu_dte_alloc(b, iommu);
+		}
+	}
+}
+
+static int iommu_cpu_suspend(void)
+{
+	return 0;
+}
+
+static void iommu_cpu_resume(void)
+{
+}
+
+struct syscore_ops iommu_cpu_syscore_ops = {
+	.suspend	= iommu_cpu_suspend,
+	.resume         = iommu_cpu_resume,
+};
+
+int sw_iommu_init(struct pci_controller *hose)
+{
+	struct sw_iommu *iommu;
+	unsigned long base;
+	unsigned long rc_mask = 0x1;
+
+	rc_mask <<= (8 * hose->node + hose->index);
+	if (!(iommu_cmd & rc_mask))
+		return 0;
+	sw_write_piu_ior0(hose->node, hose->index, DTLB_FLUSHALL, 0);
+	sw_write_piu_ior0(hose->node, hose->index, PTLB_FLUSHALL, 0);
+	sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHALL, 0);
+	hose->pci_iommu = kzalloc(sizeof(struct sw_iommu), GFP_KERNEL);
+	if (!hose->pci_iommu) {
+		printk("Can't alloc memory for pci_iommu!\n");
+		return 0;
+	}
+	iommu = hose->pci_iommu;
+	spin_lock_init(&iommu->dt_lock);
+	iommu->index = hose->index;
+	iommu->enabled = true;
+	iommu->iommu_dtbr = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	base = __pa(iommu->iommu_dtbr) & PAGE_MASK;
+	sw_write_piu_ior0(hose->node, hose->index, DTBASEADDR, base);
+	INIT_LIST_HEAD(&iommu->dev_list);
+	__sw_pci_iommu_dte_alloc(hose->bus, iommu);
+	sw_write_piu_ior0(hose->node, hose->index, DTLB_FLUSHALL, 0);
+	sw_write_piu_ior0(hose->node, hose->index, PTLB_FLUSHALL, 0);
+	sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHALL, 0);
+	sw_enable_iommu_func(hose);
+	hose->iommu_enable = true;
+
+	return 0;
+}
+
+struct sw_pci_dev_iommu_area *sw_iommu_area_new(struct sw_iommu_dev *iommu_dev, dma_addr_t base)
+{
+	struct sw_pci_dev_iommu_area *iommu_area = kzalloc(sizeof(struct sw_pci_dev_iommu_area), GFP_KERNEL);
+
+	if (!iommu_area) {
+		pr_err("SW arch could not allocate pci iommu dma_area.\n");
+		return NULL;
+	}
+
+	spin_lock_init(&iommu_area->lock);
+	iommu_area->iommu = iommu_dev->iommu;
+	iommu_area->dma_base = base;
+	iommu_area->bitmap = (void *)__get_free_pages(GFP_KERNEL, 3);
+	if (!iommu_area->bitmap) {
+		free_pages((unsigned long)iommu_area->bitmap, 3);
+		pr_err("SW arch could not allocate dma_area->bitmap.\n");
+		return NULL;
+	}
+	memset(iommu_area->bitmap, 0, 8*PAGE_SIZE);
+	iommu_area->next_address = 0;
+	return iommu_area;
+}
+
+/**
+ * sw_iommu_map -
+ * @paddr: buffer of the indicated size for PCI DMA
+ * @dma_ofs: virtual DMA buffer page frame number allocated from pdev private DMA zone
+ * @dtbaddr: Device Table Base Addr for Level 2
+ * @index: PCIe host index
+ */
+int sw_iommu_map(unsigned long paddr, long dma_ofs, unsigned long dtbaddr,
+		 struct pci_controller *hose, struct pci_dev *pdev)
+{
+	unsigned long pde, pte;             /*pde means Page Table Base Addr for Level 2 pte means Page Table Entry*/
+	unsigned long pdebaseaddr;
+	u64 *ptebasesecond, ptebaseaddr;    /*ptebasesecond means Page Table Pointer for Level 2*/
+	unsigned long pcache_flush_addr;
+
+	pdebaseaddr = ((dma_ofs >> 10) & SW_IOMMU_LEVEL1_OFFSET) << 3; /* Offset of Page Table Entry for Level 1 */
+	pdebaseaddr += ((*(volatile u64 *)dtbaddr) & (~(SW_IOMMU_ENTRY_VALID)) & (PAGE_MASK)) + PAGE_OFFSET;
+	pte = (paddr & PAGE_MASK) | SW_IOMMU_ENTRY_VALID | SW_IOMMU_GRN | SW_IOMMU_ENABLE;
+
+	/* If pde exists, no need to allocate a new page */
+	if ((*(volatile u64 *)pdebaseaddr) & SW_IOMMU_ENTRY_VALID) {
+		ptebaseaddr = ((*(volatile u64 *)pdebaseaddr) & (~(SW_IOMMU_ENTRY_VALID)) & (PAGE_MASK)) + PAGE_OFFSET;
+		ptebaseaddr += (dma_ofs & SW_IOMMU_LEVEL2_OFFSET) << 3;
+
+		pcache_flush_addr = __pa(ptebaseaddr) & 0xffffffff80;
+
+		*(volatile u64 *)ptebaseaddr = pte;
+		mb();
+		sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHPADDR, pcache_flush_addr);
+	} else {
+		ptebasesecond = (u64 *)get_zeroed_page(GFP_ATOMIC);
+
+		if (!ptebasesecond) {
+			printk("allocating pages fails.\n");
+			free_page((unsigned long)ptebasesecond);
+			return -1;
+		}
+		pde = (__pa(ptebasesecond) & PAGE_MASK) | SW_IOMMU_ENTRY_VALID;
+
+		pcache_flush_addr = __pa(pdebaseaddr) & 0xffffffff80;
+
+		*(volatile u64 *)pdebaseaddr = pde;
+		mb();
+		sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHPADDR, pcache_flush_addr);
+
+		ptebaseaddr = (unsigned long)ptebasesecond + ((dma_ofs & SW_IOMMU_LEVEL2_OFFSET) << 3);
+
+		pcache_flush_addr = __pa(ptebaseaddr) & 0xffffffff80;
+
+		*(volatile u64 *)ptebaseaddr = pte;
+		mb();
+		sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHPADDR, pcache_flush_addr);
+	}
+
+	return 0;
+}
+
+static unsigned long
+sw_iommu_area_alloc(struct sw_pci_dev_iommu_area *area, unsigned int pages,
+		unsigned long start)
+{
+	unsigned long next_bit = start >> PAGE_SHIFT;
+	unsigned long address = -1;
+	unsigned long boundary_size = ((4UL << 30)) >> PAGE_SHIFT;
+	unsigned long limit = boundary_size - (1UL << 17) - (area->dma_base >> PAGE_SHIFT);
+
+	address = iommu_area_alloc(area->bitmap, limit, next_bit, pages, 0, boundary_size, 0);
+	if (address != -1) {
+		address = address << PAGE_SHIFT;
+		area->next_address = address + (pages << PAGE_SHIFT);
+	}
+
+	return address;
+}
+
+static long
+sw_iommu_alloc_da(struct sw_pci_dev_iommu_area *area, long n)
+{
+	unsigned long address;
+
+	address = sw_iommu_area_alloc(area, n, area->next_address);
+	if (address == -1) {
+		area->next_address = 0;
+		address = sw_iommu_area_alloc(area, n, area->next_address);
+		if (address == -1)
+			pr_err("SW arch failed to allocate device address.\n");
+	}
+
+	return address;
+}
+
+static void sw_iommu_free_da(unsigned long *map, long dma_ofs, long n)
+{
+	bitmap_clear(map, dma_ofs, n);
+}
+
+static void sw_iommu_unmap(struct pci_dev *pdev, long ofs)
+{
+	unsigned long dtbbaseaddr, dtbbasecond;  /* dtbbaseaddr means Device Table Base Addr for Level 1 */
+						 /* dtbbasecond means Device Table Base Addr for Level 2 */
+	unsigned long pde, pte;                  /* pde means Page Table Base Addr for Level 2 */
+						 /* pte means Page Table Entry */
+	unsigned long tlb_flush_addr, pcache_flush_addr;
+	unsigned long addr;
+	unsigned long pdebaseaddr;      /* ptebasefirst means Page Table Pointer for Level 1  */
+	unsigned long ptebaseaddr;      /* ptebasesecond means Page Table Pointer for Level 2 */
+	unsigned long ptebaseaddr_full; /* ptebasesecond means Page Table Pointer for Level 2 */
+	unsigned long ptebaseaddr_offset;
+	struct pci_controller *hose = (struct pci_controller *)pdev->sysdata;
+	int i;
+	u64 per_pte;
+	struct sw_iommu *sw_pci_iommu = hose->pci_iommu;
+
+	addr = (unsigned long)sw_pci_iommu->iommu_dtbr;
+	dtbbaseaddr = addr + (pdev->bus->number << 3);
+
+	dtbbasecond = (*(volatile u64 *)dtbbaseaddr) & (~(SW_IOMMU_ENTRY_VALID)) & PAGE_MASK;
+	dtbbasecond += (pdev->devfn << 3) + PAGE_OFFSET;
+
+	pdebaseaddr = ((*(volatile u64 *)dtbbasecond) & (~(SW_IOMMU_ENTRY_VALID)) & (PAGE_MASK)) + PAGE_OFFSET;
+	pdebaseaddr += ((ofs >> 10) & SW_IOMMU_LEVEL1_OFFSET) << 3;
+
+	pde = *(volatile u64 *)(pdebaseaddr);
+	ptebaseaddr = (pde & (~(SW_IOMMU_ENTRY_VALID)) & PAGE_MASK) + PAGE_OFFSET;
+	ptebaseaddr_offset = ptebaseaddr + ((ofs & SW_IOMMU_LEVEL2_OFFSET) << 3);
+
+	tlb_flush_addr = (pdev->bus->number << 8) | pdev->devfn | (ofs << 16);
+	sw_write_piu_ior0(hose->node, hose->index, PTLB_FLUSHVADDR, tlb_flush_addr);    /* TLB FLUSH*/
+
+	pte = *(volatile u64 *)(ptebaseaddr_offset);
+	pte &= ~(SW_IOMMU_ENTRY_VALID);      /*disable Page Table Entry*/
+	pcache_flush_addr = __pa(ptebaseaddr_offset) & 0xffffffff80;
+
+	*(volatile u64 *)(ptebaseaddr_offset) = pte;
+	mb();
+	sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHPADDR, pcache_flush_addr);
+
+	ptebaseaddr_full = ptebaseaddr + 0x1ff8;
+	if (ptebaseaddr_offset == ptebaseaddr_full) {
+		for (i = 0; i < 1024; i++) {
+			per_pte = *(volatile u64 *)(ptebaseaddr + i * 8);
+			if (per_pte & SW_IOMMU_ENTRY_VALID)
+				break;
+		}
+		if (i == 1024) {
+			free_page(ptebaseaddr);
+			pde &= ~(SW_IOMMU_ENTRY_VALID);
+
+			pcache_flush_addr = __pa(pdebaseaddr) & 0xffffffff80;
+
+			*(volatile u64 *)(pdebaseaddr) = pde;
+			mb();
+			sw_write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHPADDR, pcache_flush_addr);
+		}
+	}
+}
+
+static dma_addr_t __sw_map_single(struct pci_dev *pdev, unsigned long paddr,
+		struct sw_pci_dev_iommu_area *iommu_area, size_t size)
+{
+	long npages, dma_ofs, i, ofs;
+	unsigned long dtbbaseaddr;     /* dtbbaseaddr means Device Table Base Addr for Level 1 */
+	unsigned long dtbbasecond;     /* dtbbasecond means Device Table Base Addr for Level 2 */
+	unsigned long addr;
+	dma_addr_t ret = -1;
+	struct pci_controller *hose = pdev->sysdata;
+	struct sw_iommu *sw_pci_iommu = hose->pci_iommu;
+	unsigned long flags;
+
+	if (hose == NULL) {
+		pr_err("%s: hose does not exist!\n", __func__);
+		return 0;
+	}
+
+	addr = (unsigned long)sw_pci_iommu->iommu_dtbr;
+	dtbbaseaddr = addr + (pdev->bus->number << 3);
+
+	dtbbasecond = (*(volatile u64 *)dtbbaseaddr) & ~(SW_IOMMU_ENTRY_VALID) & PAGE_MASK;
+	dtbbasecond += (pdev->devfn << 3) + PAGE_OFFSET;
+	npages = iommu_num_pages(paddr, size, PAGE_SIZE);
+
+	if (hose->iommu_enable) {
+		spin_lock_irqsave(&iommu_area->lock, flags);
+
+		dma_ofs = sw_iommu_alloc_da(iommu_area, npages);
+		if (dma_ofs == -1) {
+			pr_warn("%s %s failed: could not allocate dma page tables\n",
+					pci_name(pdev), __func__);
+			spin_unlock_irqrestore(&iommu_area->lock, flags);
+			return 0;
+		}
+
+		ret = iommu_area->dma_base + dma_ofs;
+
+		for (i = 0; i < npages; ++i, paddr += PAGE_SIZE) {
+			ofs = (ret >> PAGE_SHIFT) + i;
+			sw_iommu_map(paddr, ofs, dtbbasecond, hose, pdev);
+		}
+
+		spin_unlock_irqrestore(&iommu_area->lock, flags);
+
+		ret += paddr & ~PAGE_MASK;
+	}
+
+	return ret;
+}
+
+/*
+ * Map a single buffer of the indicated size for PCI DMA in streaming
+ * mode.  The 32-bit PCI bus mastering address to use is returned.
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+
+static dma_addr_t
+pci_iommu_map_single(struct pci_dev *pdev, void *cpu_addr, size_t size)
+{
+	struct pci_controller *hose = pdev->sysdata;
+	unsigned long paddr;
+
+	if (hose == NULL) {
+		pr_err("%s: hose does not exist!\n", __func__);
+		return 0;
+	}
+
+	if (!hose->iommu_enable) {
+		unsigned long dma_offset;
+
+		sw_read_piu_ior0(hose->node, hose->index, EPDMABAR, &dma_offset);
+		paddr = __pa(cpu_addr) + dma_offset;
+	} else {
+		struct sw_pci_dev_iommu_area *iommu_area;
+		struct sw_iommu_dev *sw_dev = pci_to_iommu(pdev);
+
+		paddr = __pa(cpu_addr);
+		iommu_area = sw_dev->iommu_area;
+		if (!iommu_area) {
+			pr_err("SW arch get iommu_area error!\n");
+			return 0;
+		}
+
+		paddr = __sw_map_single(pdev, paddr, iommu_area, size);
+	}
+
+	return paddr;
+}
+
+static dma_addr_t sw_iommu_map_page(struct device *dev, struct page *page,
+				    unsigned long offset, size_t size,
+				    enum dma_data_direction dir,
+				    unsigned long attrs)
+{
+	struct pci_dev *pdev = sw_gendev_to_pci(dev);
+
+	if (dir == PCI_DMA_NONE)
+		BUG();
+
+	return pci_iommu_map_single(pdev, (char *)page_address(page) + offset, size);
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The DMA_ADDR and
+ * SIZE must match what was provided for in a previous pci_map_single
+ * call.  All other usages are undefined.  After this call, reads by
+ * the cpu to the buffer are guaranteed to see whatever the device
+ * wrote there.
+ */
+
+static void sw_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	struct pci_dev *pdev = sw_gendev_to_pci(dev);
+	struct pci_controller *hose = pdev->sysdata;
+	struct sw_iommu_dev *sw_dev = pci_to_iommu(pdev);
+	struct sw_pci_dev_iommu_area *iommu_area;
+	long dma_ofs, npages, ofs;
+	unsigned long flags;
+	int i;
+
+	if (hose == NULL) {
+		pr_err("%s: hose does not exist!\n", __func__);
+		return 0;
+	}
+
+	if (!hose->iommu_enable)
+		return;
+
+	iommu_area = sw_dev->iommu_area;
+	dma_ofs = (dma_addr - iommu_area->dma_base) >> PAGE_SHIFT;
+	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+
+	spin_lock_irqsave(&iommu_area->lock, flags);
+
+	for (i = 0; i < npages; ++i) {
+		ofs = (dma_addr >> PAGE_SHIFT) + i;
+		sw_iommu_unmap(pdev, ofs);
+	}
+
+	sw_iommu_free_da(iommu_area->bitmap, dma_ofs, npages);
+	spin_unlock_irqrestore(&iommu_area->lock, flags);
+}
+
+/*
+ * Allocate and map kernel buffer using consistent mode DMA for PCI
+ * device.  Returns non-NULL cpu-view pointer to the buffer if
+ * successful and sets *DMA_ADDRP to the pci side dma address as well,
+ * else DMA_ADDRP is undefined.
+ */
+static void *sw_iommu_alloc_coherent(struct device *dev, size_t size,
+		dma_addr_t *dma_addrp, gfp_t gfp,
+		unsigned long attrs)
+{
+	struct pci_dev *pdev = sw_gendev_to_pci(dev);
+	void *cpu_addr;
+	long order = get_order(size);
+
+	gfp &= ~GFP_DMA;
+
+try_again:
+	cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order);
+	if (!cpu_addr) {
+		pr_info("pci_alloc_consistent: get_free_pages failed from %ps\n",
+				__builtin_return_address(0));
+		/* ??? Really atomic allocation?  Otherwise we could play
+		 * with vmalloc and sg if we can't find contiguous memory.
+		 */
+		return NULL;
+	}
+	memset(cpu_addr, 0, size);
+
+	*dma_addrp = pci_iommu_map_single(pdev, cpu_addr, size);
+	if (*dma_addrp == 0) {
+		free_pages((unsigned long)cpu_addr, order);
+		if (gfp & GFP_DMA)
+			return NULL;
+		/* The address doesn't fit required mask and we
+		 * do not have iommu. Try again with GFP_DMA.
+		 */
+		gfp |= GFP_DMA;
+		goto try_again;
+	}
+
+	DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %ps\n",
+			size, cpu_addr, *dma_addrp, __builtin_return_address(0));
+
+	return cpu_addr;
+}
+
+/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
+ * be values that were returned from pci_alloc_consistent.  SIZE must
+ * be the same as what as passed into pci_alloc_consistent.
+ * References to the memory and mappings associated with CPU_ADDR or
+ * DMA_ADDR past this call are illegal.
+ */
+
+static void sw_iommu_free_coherent(struct device *dev, size_t size,
+		void *cpu_addr, dma_addr_t dma_addr,
+		unsigned long attrs)
+{
+	struct pci_dev *pdev = sw_gendev_to_pci(dev);
+
+	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
+	free_pages((unsigned long)cpu_addr, get_order(size));
+
+	DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n",
+			dma_addr, size, __builtin_return_address(0));
+}
+
+#define SG_ENT_VIRT_ADDRESS(SG) (sg_virt((SG)))
+#define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG))
+
+static int sw_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+	struct pci_dev *pdev = sw_gendev_to_pci(dev);
+	int out_nents = 0;
+
+	if (dir == PCI_DMA_NONE)
+		BUG();
+
+	for_each_sg(sgl, sg, nents, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg_dma_address(sg) = pci_iommu_map_single(pdev, SG_ENT_VIRT_ADDRESS(sg), sg->length);
+		if (sg_dma_address(sg) == 0)
+			goto error;
+		sg_dma_len(sg) = sg->length;
+		out_nents++;
+	}
+
+	return nents;
+
+error:
+	pr_warn("pci_map_sg failed: could not allocate dma page tables\n");
+
+	/* Some allocation failed while mapping the scatterlist
+	 * entries.  Unmap them now.
+	 */
+	if (out_nents)
+		pci_unmap_sg(pdev, sgl, out_nents, dir);
+	return 0;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.  Again, cpu read
+ * rules concerning calls here are the same as for pci_unmap_single()
+ * above.
+ */
+static void sw_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	struct pci_dev *pdev = sw_gendev_to_pci(dev);
+	struct pci_controller *hose = pdev->sysdata;
+	struct scatterlist *sg;
+	int i, j;
+	dma_addr_t dma_addr;
+	struct sw_pci_dev_iommu_area *iommu_area;
+	struct sw_iommu_dev *sw_dev = pci_to_iommu(pdev);
+	long dma_ofs, npages, ofs, size;
+	unsigned long flags;
+
+	if (hose == NULL) {
+		pr_err("%s: hose does not exist!\n", __func__);
+		return 0;
+	}
+
+	if (!hose->iommu_enable)
+		return;
+
+	iommu_area = sw_dev->iommu_area;
+	for_each_sg(sgl, sg, nents, j) {
+		BUG_ON(!sg_page(sg));
+		dma_addr = sg->dma_address;
+		size = sg->dma_length;
+		if (!size)
+			break;
+		npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+		dma_ofs = (dma_addr - iommu_area->dma_base) >> PAGE_SHIFT;
+
+		spin_lock_irqsave(&iommu_area->lock, flags);
+		for (i = 0; i < npages; ++i) {
+			ofs = (dma_addr >> PAGE_SHIFT) + i;
+			sw_iommu_unmap(pdev, ofs);
+		}
+
+		sw_iommu_free_da(iommu_area->bitmap, dma_ofs, npages);
+
+		spin_unlock_irqrestore(&iommu_area->lock, flags);
+	}
+}
+
+/* Return whether the given PCI device DMA address mask can be
+ * supported properly.
+ */
+
+static int sw_iommu_supported(struct device *dev, u64 mask)
+{
+	/* As last resort try ZONE_DMA.  */
+	if (MAX_DMA_ADDRESS - PAGE_OFFSET - 1 <= mask)
+		return 1;
+
+	return 0;
+}
+
+static int sw_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == 0;
+}
+
+static int iommu_get_option(char **str, unsigned long *pint)
+{
+	char *cur = *str;
+
+	if (!cur || !(*cur))
+		return 0;
+	*pint = kstrtol(cur, str, 16);
+
+	return 1;
+}
+
+static int __init iommu_enable_setup(char *s)
+{
+	unsigned long rc_bitmap = 0;
+
+	iommu_get_option(&s, &rc_bitmap);
+	iommu_cmd = rc_bitmap;
+
+	return 1;
+}
+__setup("iommu_enable=", iommu_enable_setup);
+
+const struct dma_map_ops sw_iommu_dma_ops = {
+	.alloc			= sw_iommu_alloc_coherent,
+	.free			= sw_iommu_free_coherent,
+	.map_page		= sw_iommu_map_page,
+	.unmap_page		= sw_iommu_unmap_page,
+	.map_sg			= sw_iommu_map_sg,
+	.unmap_sg		= sw_iommu_unmap_sg,
+	.mapping_error		= sw_iommu_mapping_error,
+	.dma_supported		= sw_iommu_supported,
+};
+
+const struct dma_map_ops *dma_ops = &sw_iommu_dma_ops;
+EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c
new file mode 100644
index 000000000000..dac979d4b09a
--- /dev/null
+++ b/arch/sw_64/kernel/perf_event.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance events support for SW64 platforms.
+ *
+ * This code is based upon riscv and sparc perf event code.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+#include <linux/uaccess.h>
+
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/hmcall.h>
+#include <asm/hw_irq.h>
+
+/* For tracking PMCs and the hw events they monitor on each CPU. */
+struct cpu_hw_events {
+	/* Number of events currently scheduled onto this cpu.
+	 * This tells how many entries in the arrays below
+	 * are valid.
+	 */
+	int			n_events;
+	/* Track counter usage of each counter */
+#define PMC_IN_USE  1
+#define PMC_NOT_USE 0
+	int			pmcs[MAX_HWEVENTS];
+	/* Array of events current scheduled on this cpu. */
+	struct perf_event	*event[MAX_HWEVENTS];
+};
+
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+static void sw64_pmu_start(struct perf_event *event, int flags);
+static void sw64_pmu_stop(struct perf_event *event, int flags);
+
+struct sw64_perf_event {
+	/* pmu index */
+	int counter;
+	/* events selector */
+	int event;
+};
+
+/*
+ * A structure to hold the description of the PMCs available on a particular
+ * type of SW64 CPU.
+ */
+struct sw64_pmu_t {
+	/* generic hw/cache events table */
+	const struct sw64_perf_event *hw_events;
+	const struct sw64_perf_event (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+		[PERF_COUNT_HW_CACHE_OP_MAX]
+		[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+	/* method used to map hw/cache events */
+	const struct sw64_perf_event *(*map_hw_event)(u64 config);
+	const struct sw64_perf_event *(*map_cache_event)(u64 config);
+
+	/* The number of entries in the hw_event_map */
+	int  max_events;
+
+	/* The number of counters on this pmu */
+	int  num_pmcs;
+
+	/*
+	 * All PMC counters reside in the IBOX register PCTR.  This is the
+	 * LSB of the counter.
+	 */
+	int  pmc_count_shift[MAX_HWEVENTS];
+
+	/*
+	 * The mask that isolates the PMC bits when the LSB of the counter
+	 * is shifted to bit 0.
+	 */
+	unsigned long pmc_count_mask;
+
+	/* The maximum period the PMC can count. */
+	unsigned long pmc_max_period;
+
+	/*
+	 * The maximum value that may be written to the counter due to
+	 * hardware restrictions is pmc_max_period - pmc_left.
+	 */
+	long pmc_left;
+
+	/* Subroutine for checking validity of a raw event for this PMU. */
+	bool (*raw_event_valid)(u64 config);
+};
+
+/*
+ * The SW64 PMU description currently in operation.  This is set during
+ * the boot process to the specific CPU of the machine.
+ */
+static const struct sw64_pmu_t *sw64_pmu;
+
+/*
+ * SW64 PMC event types
+ *
+ * There is no one-to-one mapping of the possible hw event types to the
+ * actual codes that are used to program the PMCs hence we introduce our
+ * own hw event type identifiers.
+ */
+#define SW64_OP_UNSUP {-1, -1}
+
+/* Mapping of the hw event types to the perf tool interface */
+static const struct sw64_perf_event core3_hw_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= {PERFMON_PC0, PC0_CPU_CYCLES},
+	[PERF_COUNT_HW_INSTRUCTIONS]		= {PERFMON_PC0, PC0_INSTRUCTIONS},
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= {PERFMON_PC0,	PC0_SCACHE_REFERENCES},
+	[PERF_COUNT_HW_CACHE_MISSES]		= {PERFMON_PC1, PC1_SCACHE_MISSES},
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= {PERFMON_PC0, PC0_BRANCH_INSTRUCTIONS},
+	[PERF_COUNT_HW_BRANCH_MISSES]		= {PERFMON_PC1, PC1_BRANCH_MISSES},
+};
+
+/* Mapping of the hw cache event types to the perf tool interface */
+#define C(x) PERF_COUNT_HW_CACHE_##x
+static const struct sw64_perf_event core3_cache_event_map
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PERFMON_PC0, PC0_DCACHE_READ},
+			[C(RESULT_MISS)]	= {PERFMON_PC1, PC1_DCACHE_MISSES}
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PERFMON_PC0, PC0_ICACHE_READ},
+			[C(RESULT_MISS)]	= {PERFMON_PC1, PC1_ICACHE_READ_MISSES},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PERFMON_PC0, PC0_DTB_READ},
+			[C(RESULT_MISS)]	= {PERFMON_PC1, PC1_DTB_SINGLE_MISSES},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PERFMON_PC0, PC0_ITB_READ},
+			[C(RESULT_MISS)]	= {PERFMON_PC1, PC1_ITB_MISSES},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+
+};
+
+static const struct sw64_perf_event *core3_map_hw_event(u64 config)
+{
+	return &sw64_pmu->hw_events[config];
+}
+
+static const struct sw64_perf_event *core3_map_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	const struct sw64_perf_event *perf_event;
+
+	cache_type = (config >> 0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_op = (config >> 8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	perf_event = &((*sw64_pmu->cache_events)[cache_type][cache_op][cache_result]);
+	if (perf_event->counter == -1) /* SW64_OP_UNSUP */
+		return ERR_PTR(-ENOENT);
+
+	return perf_event;
+}
+
+/*
+ * r0xx for counter0, r1yy for counter1.
+ * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 37
+ */
+static bool core3_raw_event_valid(u64 config)
+{
+	if ((config >= (PC0_RAW_BASE + PC0_MIN) && config <= (PC0_RAW_BASE + PC0_MAX)) ||
+		(config >= (PC1_RAW_BASE + PC1_MIN) && config <= (PC1_RAW_BASE + PC1_MAX))) {
+		return true;
+	}
+
+	pr_info("sw64 pmu: invalid raw event config %#llx\n", config);
+	return false;
+}
+
+static const struct sw64_pmu_t core3_pmu = {
+	.max_events = ARRAY_SIZE(core3_hw_event_map),
+	.hw_events = core3_hw_event_map,
+	.map_hw_event = core3_map_hw_event,
+	.cache_events = &core3_cache_event_map,
+	.map_cache_event = core3_map_cache_event,
+	.num_pmcs = MAX_HWEVENTS,
+	.pmc_count_mask = PMC_COUNT_MASK,
+	.pmc_max_period = PMC_COUNT_MASK,
+	.pmc_left = 4,
+	.raw_event_valid = core3_raw_event_valid,
+};
+
+/*
+ * Low-level functions: reading/writing counters
+ */
+static void sw64_write_pmc(int idx, unsigned long val)
+{
+	if (idx == PERFMON_PC0)
+		wrperfmon(PERFMON_CMD_WRITE_PC0, val);
+	else
+		wrperfmon(PERFMON_CMD_WRITE_PC1, val);
+}
+
+static unsigned long sw64_read_pmc(int idx)
+{
+	unsigned long val;
+
+	if (idx == PERFMON_PC0)
+		val = wrperfmon(PERFMON_CMD_READ, PERFMON_READ_PC0);
+	else
+		val = wrperfmon(PERFMON_CMD_READ, PERFMON_READ_PC1);
+	return val;
+}
+
+/* Set a new period to sample over */
+static int sw64_perf_event_set_period(struct perf_event *event,
+				struct hw_perf_event *hwc, int idx)
+{
+	long left = local64_read(&hwc->period_left);
+	long period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (left > (long)sw64_pmu->pmc_max_period)
+		left = sw64_pmu->pmc_max_period;
+
+	local64_set(&hwc->prev_count, (unsigned long)(-left));
+	sw64_write_pmc(idx,  (unsigned long)(sw64_pmu->pmc_max_period - left));
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+/*
+ * Calculates the count (the 'delta') since the last time the PMC was read.
+ *
+ * As the PMCs' full period can easily be exceeded within the perf system
+ * sampling period we cannot use any high order bits as a guard bit in the
+ * PMCs to detect overflow as is done by other architectures.  The code here
+ * calculates the delta on the basis that there is no overflow when ovf is
+ * zero.  The value passed via ovf by the interrupt handler corrects for
+ * overflow.
+ *
+ * This can be racey on rare occasions -- a call to this routine can occur
+ * with an overflowed counter just before the PMI service routine is called.
+ * The check for delta negative hopefully always rectifies this situation.
+ */
+static unsigned long sw64_perf_event_update(struct perf_event *event,
+					struct hw_perf_event *hwc, int idx, long ovf)
+{
+	long prev_raw_count, new_raw_count;
+	long delta;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = sw64_read_pmc(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - (prev_raw_count & sw64_pmu->pmc_count_mask)) + ovf;
+
+	/* It is possible on very rare occasions that the PMC has overflowed
+	 * but the interrupt is yet to come.  Detect and fix this situation.
+	 */
+	if (unlikely(delta < 0))
+		delta += sw64_pmu->pmc_max_period + 1;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+/*
+ * State transition functions:
+ *
+ * add()/del() & start()/stop()
+ *
+ */
+
+/*
+ * pmu->add: add the event to PMU.
+ */
+static int sw64_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int err = 0;
+	unsigned long irq_flags;
+
+	perf_pmu_disable(event->pmu);
+	local_irq_save(irq_flags);
+
+	if (cpuc->pmcs[hwc->idx] == PMC_IN_USE) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	cpuc->pmcs[hwc->idx] = PMC_IN_USE;
+	cpuc->event[hwc->idx] = event;
+
+
+	cpuc->n_events++;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		sw64_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	local_irq_restore(irq_flags);
+	perf_pmu_enable(event->pmu);
+
+	return err;
+}
+
+/*
+ * pmu->del: delete the event from PMU.
+ */
+static void sw64_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long irq_flags;
+
+	perf_pmu_disable(event->pmu);
+	local_irq_save(irq_flags);
+
+	if (cpuc->event[hwc->idx] != event)
+		goto out;
+
+	cpuc->event[hwc->idx] = NULL;
+	cpuc->pmcs[hwc->idx] = PMC_NOT_USE;
+	cpuc->n_events--;
+
+	sw64_pmu_stop(event, PERF_EF_UPDATE);
+
+	/* Absorb the final count and turn off the event. */
+	perf_event_update_userpage(event);
+
+out:
+	local_irq_restore(irq_flags);
+	perf_pmu_enable(event->pmu);
+}
+
+/*
+ * pmu->start: start the event.
+ */
+static void sw64_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+		sw64_perf_event_set_period(event, hwc, hwc->idx);
+	}
+
+	hwc->state = 0;
+
+	/* counting in all modes, for both counters */
+	wrperfmon(PERFMON_CMD_PM, 4);
+	if (hwc->idx == PERFMON_PC0) {
+		wrperfmon(PERFMON_CMD_EVENT_PC0, hwc->event_base);
+		wrperfmon(PERFMON_CMD_ENABLE, PERFMON_ENABLE_ARGS_PC0);
+	} else {
+		wrperfmon(PERFMON_CMD_EVENT_PC1, hwc->event_base);
+		wrperfmon(PERFMON_CMD_ENABLE, PERFMON_ENABLE_ARGS_PC1);
+	}
+}
+
+/*
+ * pmu->stop: stop the counter
+ */
+static void sw64_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		hwc->state |= PERF_HES_STOPPED;
+		barrier();
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		sw64_perf_event_update(event, hwc, hwc->idx, 0);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	if (hwc->idx == 0)
+		wrperfmon(PERFMON_CMD_DISABLE, PERFMON_DISABLE_ARGS_PC0);
+	else
+		wrperfmon(PERFMON_CMD_DISABLE, PERFMON_DISABLE_ARGS_PC1);
+
+}
+
+/*
+ * pmu->read: read and update the counter
+ */
+static void sw64_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	sw64_perf_event_update(event, hwc, hwc->idx, 0);
+}
+
+static bool supported_cpu(void)
+{
+	return true;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Nothing to be done! */
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	const struct sw64_perf_event *event_type;
+
+
+	/* SW64 do not have per-counter usr/os/guest/host bits */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+			event->attr.exclude_hv || event->attr.exclude_idle ||
+			event->attr.exclude_host || event->attr.exclude_guest)
+		return -EINVAL;
+
+	/*
+	 * SW64 does not support precise ip feature, and system hang when
+	 * detecting precise_ip by perf_event_attr__set_max_precise_ip
+	 * in userspace
+	 */
+	if (attr->precise_ip != 0)
+		return -EOPNOTSUPP;
+
+	/* SW64 has fixed counter for given event type */
+	if (attr->type == PERF_TYPE_HARDWARE) {
+		if (attr->config >= sw64_pmu->max_events)
+			return -EINVAL;
+		event_type = sw64_pmu->map_hw_event(attr->config);
+		hwc->idx = event_type->counter;
+		hwc->event_base = event_type->event;
+	} else if (attr->type == PERF_TYPE_HW_CACHE) {
+		event_type = sw64_pmu->map_cache_event(attr->config);
+		if (IS_ERR(event_type))	/* */
+			return PTR_ERR(event_type);
+		hwc->idx = event_type->counter;
+		hwc->event_base = event_type->event;
+	} else { /* PERF_TYPE_RAW */
+		if (!sw64_pmu->raw_event_valid(attr->config))
+			return -EINVAL;
+		hwc->idx = attr->config >> 8;	/* counter selector */
+		hwc->event_base = attr->config & 0xff;	/* event selector */
+	}
+
+	hwc->config = attr->config;
+
+	if (!is_sampling_event(event))
+		pr_debug("not sampling event\n");
+
+	event->destroy = hw_perf_event_destroy;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = sw64_pmu->pmc_max_period;
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return 0;
+}
+
+/*
+ * Main entry point to initialise a HW performance event.
+ */
+static int sw64_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (!sw64_pmu)
+		return -ENODEV;
+
+	/* Do the real initialisation work. */
+	err = __hw_perf_event_init(event);
+
+	return err;
+}
+
+static struct pmu pmu = {
+	.name		= "core3-base",
+	.capabilities   = PERF_PMU_CAP_NO_NMI,
+	.event_init	= sw64_pmu_event_init,
+	.add		= sw64_pmu_add,
+	.del		= sw64_pmu_del,
+	.start		= sw64_pmu_start,
+	.stop		= sw64_pmu_stop,
+	.read		= sw64_pmu_read,
+};
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	unsigned long pcr0, pcr1;
+	int cpu;
+
+	if (!supported_cpu())
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pcr0 = wrperfmon(PERFMON_CMD_READ, PERFMON_READ_PC0);
+	pcr1 = wrperfmon(PERFMON_CMD_READ, PERFMON_READ_PC1);
+
+	pr_info("CPU#%d: PCTR0[%lx] PCTR1[%lx]\n", cpu, pcr0, pcr1);
+
+	local_irq_restore(flags);
+}
+
+static void sw64_perf_event_irq_handler(unsigned long perfmon_num,
+					struct pt_regs *regs)
+{
+	struct cpu_hw_events *cpuc;
+	struct perf_sample_data data;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx;
+
+	__this_cpu_inc(irq_pmi_count);
+	cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	idx = perfmon_num;
+
+	event = cpuc->event[idx];
+
+	if (unlikely(!event)) {
+		/* This should never occur! */
+		irq_err_count++;
+		pr_warn("PMI: No event at index %d!\n", idx);
+		wrperfmon(PERFMON_CMD_ENABLE, idx == 0 ? PERFMON_DISABLE_ARGS_PC0 : PERFMON_DISABLE_ARGS_PC1);
+		return;
+	}
+
+	hwc = &event->hw;
+	sw64_perf_event_update(event, hwc, idx, sw64_pmu->pmc_max_period + 1);
+	perf_sample_data_init(&data, 0, hwc->last_period);
+
+	if (sw64_perf_event_set_period(event, hwc, idx)) {
+		if (perf_event_overflow(event, &data, regs)) {
+			/* Interrupts coming too quickly; "throttle" the
+			 * counter, i.e., disable it for a little while.
+			 */
+			sw64_pmu_stop(event, 0);
+		}
+	}
+}
+
+bool valid_utext_addr(unsigned long addr)
+{
+	return addr >= current->mm->start_code && addr <= current->mm->end_code;
+}
+
+bool valid_dy_addr(unsigned long addr)
+{
+	bool ret = false;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+
+	if (addr > TASK_SIZE || addr < TASK_UNMAPPED_BASE)
+		return ret;
+	vma = find_vma(mm, addr);
+	if (vma && vma->vm_start <= addr && (vma->vm_flags & VM_EXEC))
+		ret = true;
+	return ret;
+}
+
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+		struct pt_regs *regs)
+{
+	unsigned long usp = current_user_stack_pointer();
+	unsigned long user_addr;
+	int err;
+
+	perf_callchain_store(entry, regs->pc);
+
+	while (entry->nr < entry->max_stack && usp < current->mm->start_stack) {
+		if (!access_ok(usp, 8))
+			break;
+		pagefault_disable();
+		err = __get_user(user_addr, (unsigned long *)usp);
+		pagefault_enable();
+		if (err)
+			break;
+		if (valid_utext_addr(user_addr) || valid_dy_addr(user_addr))
+			perf_callchain_store(entry, user_addr);
+		usp = usp + 8;
+	}
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	unsigned long *sp = (unsigned long *)current_thread_info()->pcb.ksp;
+	unsigned long addr;
+
+	perf_callchain_store(entry, regs->pc);
+
+	while (!kstack_end(sp) && entry->nr < entry->max_stack) {
+		addr = *sp++;
+		if (__kernel_text_address(addr))
+			perf_callchain_store(entry, addr);
+	}
+}
+
+/*
+ * Init call to initialise performance events at kernel startup.
+ */
+int __init init_hw_perf_events(void)
+{
+	if (!supported_cpu()) {
+		pr_info("Performance events: Unsupported CPU type!\n");
+		return 0;
+	}
+
+	pr_info("Performance events: Supported CPU type!\n");
+
+	/* Override performance counter IRQ vector */
+
+	perf_irq = sw64_perf_event_irq_handler;
+
+	/* And set up PMU specification */
+	sw64_pmu = &core3_pmu;
+
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
+	return 0;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/sw_64/kernel/perf_regs.c b/arch/sw_64/kernel/perf_regs.c
new file mode 100644
index 000000000000..8eec2179eb86
--- /dev/null
+++ b/arch/sw_64/kernel/perf_regs.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <linux/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_SW64_MAX))
+		return 0;
+
+	return ((unsigned long *)regs)[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_SW64_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs)
+{
+	regs_user->regs = NULL;
+	regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+}
diff --git a/arch/sw_64/kernel/proc_misc.c b/arch/sw_64/kernel/proc_misc.c
new file mode 100644
index 000000000000..ca107ec1e05e
--- /dev/null
+++ b/arch/sw_64/kernel/proc_misc.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+extern const struct seq_operations cpu_active_mask_op;
+static int cpu_active_mask_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &cpu_active_mask_op);
+}
+
+static const struct file_operations proc_cpu_active_mask_operations = {
+	.open		= cpu_active_mask_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init proc_cpu_active_mask_init(void)
+{
+	proc_create("cpu_active_mask", 0, NULL, &proc_cpu_active_mask_operations);
+	return 0;
+}
+fs_initcall(proc_cpu_active_mask_init);
diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c
new file mode 100644
index 000000000000..8fd493776bec
--- /dev/null
+++ b/arch/sw_64/kernel/process.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file handles the architecture-dependent parts of process handling.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/time.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/vt.h>
+#include <linux/mman.h>
+#include <linux/elfcore.h>
+#include <linux/reboot.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/tick.h>
+#include <linux/random.h>
+#include <linux/uaccess.h>
+
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/fpu.h>
+#include <asm/hcall.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
+
+struct halt_info {
+	int mode;
+	char *restart_cmd;
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+	play_dead();
+}
+#endif
+
+void arch_cpu_idle(void)
+{
+	int i;
+
+	local_irq_enable();
+	cpu_relax();
+
+	if (is_in_guest())
+		hcall(HCALL_HALT, 0, 0, 0);
+	else {
+		for (i = 0; i < 16; i++)
+			asm("nop");
+		asm("halt");
+	}
+}
+
+static void common_shutdown_1(void *generic_ptr)
+{
+	struct halt_info *how = (struct halt_info *)generic_ptr;
+	int cpuid = smp_processor_id();
+
+	/* No point in taking interrupts anymore. */
+	local_irq_disable();
+
+#ifdef CONFIG_SMP
+	/* Secondaries halt here. */
+	if (cpuid != 0) {
+		set_cpu_present(cpuid, false);
+		set_cpu_possible(cpuid, false);
+
+		if (is_in_guest()) {
+			hcall(HCALL_SET_CLOCKEVENT, 0, 0, 0);
+			while (1)
+				asm("nop");
+		} else
+			asm("halt");
+	}
+#endif
+	if (sw64_platform->kill_arch)
+		sw64_platform->kill_arch(how->mode);
+}
+
+static void common_shutdown(int mode, char *restart_cmd)
+{
+	struct halt_info args;
+
+	args.mode = mode;
+	args.restart_cmd = restart_cmd;
+	on_each_cpu(common_shutdown_1, &args, 0);
+}
+
+void machine_restart(char *restart_cmd)
+{
+	common_shutdown(LINUX_REBOOT_CMD_RESTART, restart_cmd);
+}
+
+
+void machine_halt(void)
+{
+	common_shutdown(LINUX_REBOOT_CMD_HALT, NULL);
+}
+
+
+void machine_power_off(void)
+{
+	common_shutdown(LINUX_REBOOT_CMD_POWER_OFF, NULL);
+}
+
+
+/* Used by sysrq-p, among others.  I don't believe r9-r15 are ever
+ * saved in the context it's used.
+ */
+
+void
+show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+	dik_show_regs(regs, NULL);
+}
+
+/*
+ * Re-start a thread when doing execve()
+ */
+void
+start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
+{
+	regs->pc = pc;
+	regs->ps = 8;
+	wrusp(sp);
+}
+EXPORT_SYMBOL(start_thread);
+
+
+void
+flush_thread(void)
+{
+	/* Arrange for each exec'ed process to start off with a clean slate
+	 * with respect to the FPU.  This is all exceptions disabled.
+	 */
+	current_thread_info()->ieee_state = 0;
+	wrfpcr(FPCR_DYN_NORMAL | ieee_swcr_to_fpcr(0));
+
+	/* Clean slate for TLS.  */
+	current_thread_info()->pcb.unique = 0;
+}
+
+void
+release_thread(struct task_struct *dead_task)
+{
+}
+
+/*
+ * Copy architecture-specific thread state
+ */
+
+int
+copy_thread(unsigned long clone_flags, unsigned long usp,
+	   unsigned long kthread_arg, struct task_struct *p,
+	   unsigned long tls)
+{
+	extern void ret_from_fork(void);
+	extern void ret_from_kernel_thread(void);
+
+	struct thread_info *childti = task_thread_info(p);
+	struct pt_regs *childregs = task_pt_regs(p);
+	struct pt_regs *regs = current_pt_regs();
+	struct switch_stack *childstack, *stack;
+
+	childstack = ((struct switch_stack *) childregs) - 1;
+	childti->pcb.ksp = (unsigned long) childstack;
+	childti->pcb.flags = 7;	/* set FEN, clear everything else */
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
+		memset(childstack, 0,
+			sizeof(struct switch_stack) + sizeof(struct pt_regs));
+		childstack->r26 = (unsigned long) ret_from_kernel_thread;
+		childstack->r9 = usp;	/* function */
+		childstack->r10 = kthread_arg;
+		childti->pcb.usp = 0;
+		return 0;
+	}
+	/*
+	 * Note: if CLONE_SETTLS is not set, then we must inherit the
+	 * value from the parent, which will have been set by the block
+	 * copy in dup_task_struct.  This is non-intuitive, but is
+	 * required for proper operation in the case of a threaded
+	 * application calling fork.
+	 */
+	if (clone_flags & CLONE_SETTLS)
+		childti->pcb.unique = tls;
+	else
+		regs->r20 = 0;
+	childti->pcb.usp = usp ?: rdusp();
+	*childregs = *regs;
+	childregs->r0 = 0;
+	childregs->r19 = 0;
+	stack = ((struct switch_stack *) regs) - 1;
+	*childstack = *stack;
+	p->thread = current->thread;
+	childstack->r26 = (unsigned long) ret_from_fork;
+	return 0;
+}
+
+/*
+ * Fill in the user structure for a ELF core dump.
+ */
+void
+dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti)
+{
+	/* switch stack follows right below pt_regs: */
+	struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+
+	dest[0] = pt->r0;
+	dest[1] = pt->r1;
+	dest[2] = pt->r2;
+	dest[3] = pt->r3;
+	dest[4] = pt->r4;
+	dest[5] = pt->r5;
+	dest[6] = pt->r6;
+	dest[7] = pt->r7;
+	dest[8] = pt->r8;
+	dest[9] = sw->r9;
+	dest[10] = sw->r10;
+	dest[11] = sw->r11;
+	dest[12] = sw->r12;
+	dest[13] = sw->r13;
+	dest[14] = sw->r14;
+	dest[15] = sw->r15;
+	dest[16] = pt->r16;
+	dest[17] = pt->r17;
+	dest[18] = pt->r18;
+	dest[19] = pt->r19;
+	dest[20] = pt->r20;
+	dest[21] = pt->r21;
+	dest[22] = pt->r22;
+	dest[23] = pt->r23;
+	dest[24] = pt->r24;
+	dest[25] = pt->r25;
+	dest[26] = pt->r26;
+	dest[27] = pt->r27;
+	dest[28] = pt->r28;
+	dest[29] = pt->gp;
+	dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp;
+	dest[31] = pt->pc;
+
+	/* Once upon a time this was the PS value.  Which is stupid
+	 * since that is always 8 for usermode.  Usurped for the more
+	 * useful value of the thread's UNIQUE field.
+	 */
+	dest[32] = ti->pcb.unique;
+}
+EXPORT_SYMBOL(dump_elf_thread);
+
+int
+dump_elf_task(elf_greg_t *dest, struct task_struct *task)
+{
+	dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task));
+	return 1;
+}
+EXPORT_SYMBOL(dump_elf_task);
+
+int
+dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task)
+{
+	memcpy(dest, &task->thread.ctx_fp, 32 * 8);
+	return 1;
+}
+EXPORT_SYMBOL(dump_elf_task_fp);
+
+/*
+ * Return saved PC of a blocked thread.  This assumes the frame
+ * pointer is the 6th saved long on the kernel stack and that the
+ * saved return address is the first long in the frame.  This all
+ * holds provided the thread blocked through a call to schedule() ($15
+ * is the frame pointer in schedule() and $15 is saved at offset 48 by
+ * entry.S:do_switch_stack).
+ *
+ * Under heavy swap load I've seen this lose in an ugly way.  So do
+ * some extra sanity checking on the ranges we expect these pointers
+ * to be in so that we can fail gracefully.  This is just for ps after
+ * all.  -- r~
+ */
+
+unsigned long
+thread_saved_pc(struct task_struct *t)
+{
+	unsigned long base = (unsigned long)task_stack_page(t);
+	unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
+
+	if (sp > base && sp+6*8 < base + 16*1024) {
+		fp = ((unsigned long *)sp)[6];
+		if (fp > sp && fp < base + 16*1024)
+			return *(unsigned long *)fp;
+	}
+
+	return 0;
+}
+
+unsigned long
+get_wchan(struct task_struct *p)
+{
+	unsigned long schedule_frame;
+	unsigned long pc, base, sp;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+	/*
+	 * This one depends on the frame size of schedule().  Do a
+	 * "disass schedule" in gdb to find the frame size.  Also, the
+	 * code assumes that sleep_on() follows immediately after
+	 * interruptible_sleep_on() and that add_timer() follows
+	 * immediately after interruptible_sleep().  Ugly, isn't it?
+	 * Maybe adding a wchan field to task_struct would be better,
+	 * after all...
+	 */
+
+	pc = thread_saved_pc(p);
+	if (in_sched_functions(pc)) {
+		base = (unsigned long)task_stack_page(p);
+		sp = task_thread_info(p)->pcb.ksp;
+		schedule_frame = ((unsigned long *)sp)[6];
+		if (schedule_frame > sp && schedule_frame < base + 16*1024)
+			return ((unsigned long *)schedule_frame)[12];
+	}
+	return pc;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	return randomize_page(mm->brk, 0x02000000);
+}
diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h
new file mode 100644
index 000000000000..1a729a8f21c3
--- /dev/null
+++ b/arch/sw_64/kernel/proto.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SW64_KERNEL_PROTO_H
+#define _SW64_KERNEL_PROTO_H
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <asm/pgtable.h>
+
+/* ptrace.c */
+extern int ptrace_set_bpt(struct task_struct *child);
+extern int ptrace_cancel_bpt(struct task_struct *child);
+
+/* traps.c */
+extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15);
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15);
+
+/* timer.c */
+extern void setup_timer(void);
+
+extern void __init setup_sched_clock(void);
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern void __init sw64_sched_clock_init(void);
+#endif
+
+#endif /* _SW64_PROTO_H */
diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c
new file mode 100644
index 000000000000..5f29c500c8b1
--- /dev/null
+++ b/arch/sw_64/kernel/ptrace.c
@@ -0,0 +1,707 @@
+// SPDX-License-Identifier: GPL-2.0
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/* edited by Linus Torvalds */
+/* mangled further by Bob Manson (manson@santafe.edu) */
+/* more mutilation by David Mosberger (davidm@azstarnet.com) */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/signal.h>
+#include <linux/tracehook.h>
+#include <linux/seccomp.h>
+#include <linux/audit.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgtable.h>
+#include <asm/fpu.h>
+#include <asm/core.h>
+#include "proto.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#define DEBUG	DBG_MEM
+#undef DEBUG
+
+#define DEBUG  0
+
+#ifdef DEBUG
+enum {
+	DBG_MEM = (1 << 0),
+	DBG_BPT = (1 << 1),
+	DBG_MEM_ALL = (1 << 2)
+};
+#define DBG(fac, args)		\
+{				\
+	if ((fac) & DEBUG)	\
+		printk args;	\
+}
+#else
+#define DBG(fac, args)
+#endif
+
+#define BREAKINST	0x00000080 /* sys_call bpt */
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Processes always block with the following stack-layout:
+ *
+ *  +================================+ <---- task + 2*PAGE_SIZE
+ *  | HMcode saved frame (ps, pc,    | ^
+ *  | gp, a0, a1, a2)		     | |
+ *  +================================+ | struct pt_regs
+ *  |				     | |
+ *  | frame generated by SAVE_ALL    | |
+ *  |				     | v
+ *  +================================+
+ *  |				     | ^
+ *  | frame saved by do_switch_stack | | struct switch_stack
+ *  |				     | v
+ *  +================================+
+ */
+
+/*
+ * The following table maps a register index into the stack offset at
+ * which the register is saved.  Register indices are 0-31 for integer
+ * regs, 32-63 for fp regs, and 64 for the pc.  Notice that sp and
+ * zero have no stack-slot and need to be treated specially (see
+ * get_reg/put_reg below).
+ */
+enum {
+	REG_R0 = 0,
+	REG_F0 = 32,
+	REG_FPCR = 63,
+	REG_PC = 64,
+	REG_SP = 30,
+	REG_PS = 31,
+	REG_GP = 29
+};
+
+#define PT_REG(reg) \
+	(PAGE_SIZE * 2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg))
+
+#define SW_REG(reg) \
+	(PAGE_SIZE * 2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \
+	+ offsetof(struct switch_stack, reg))
+
+#define FP_REG(fp_regno, vector_regno) \
+	(fp_regno * 32 + vector_regno * 8)
+
+static int regoff[] = {
+	PT_REG(r0), PT_REG(r1), PT_REG(r2), PT_REG(r3),
+	PT_REG(r4), PT_REG(r5), PT_REG(r6), PT_REG(r7),
+	PT_REG(r8), SW_REG(r9), SW_REG(r10), SW_REG(r11),
+	SW_REG(r12), SW_REG(r13), SW_REG(r14), SW_REG(r15),
+	PT_REG(r16), PT_REG(r17), PT_REG(r18), PT_REG(r19),
+	PT_REG(r20), PT_REG(r21), PT_REG(r22), PT_REG(r23),
+	PT_REG(r24), PT_REG(r25), PT_REG(r26), PT_REG(r27),
+	PT_REG(r28), PT_REG(gp), -1, -1
+};
+
+#define PCB_OFF(var)	offsetof(struct pcb_struct, var)
+
+static int pcboff[] = {
+	[USP] = PCB_OFF(usp),
+	[UNIQUE] = PCB_OFF(unique),
+	[DA_MATCH] = PCB_OFF(da_match),
+	[DA_MASK] = PCB_OFF(da_mask),
+	[DV_MATCH] = PCB_OFF(dv_match),
+	[DV_MASK] = PCB_OFF(dv_mask),
+	[DC_CTL] = PCB_OFF(dc_ctl)
+};
+
+static unsigned long zero;
+
+/*
+ * Get address of register REGNO in task TASK.
+ */
+
+static unsigned long *
+get_reg_addr(struct task_struct *task, unsigned long regno)
+{
+	unsigned long *addr;
+	int fp_regno, vector_regno;
+
+	switch (regno) {
+	case USP:
+	case UNIQUE:
+	case DA_MATCH:
+	case DA_MASK:
+	case DV_MATCH:
+	case DV_MASK:
+	case DC_CTL:
+		addr = (void *)task_thread_info(task) + pcboff[regno];
+		break;
+	case REG_BASE ... REG_END:
+		addr = (void *)task_thread_info(task) + regoff[regno];
+		break;
+	case FPREG_BASE ... FPREG_END:
+		fp_regno = regno - FPREG_BASE;
+		vector_regno = 0;
+		addr = (void *)((unsigned long)&task->thread.ctx_fp + FP_REG(fp_regno, vector_regno));
+		break;
+	case VECREG_BASE ... VECREG_END:
+		/*
+		 * return addr for zero value if we catch vectors of f31
+		 * v0 and v3 of f31 are not in this range so ignore them
+		 */
+		if (regno == F31_V1 || regno == F31_V2) {
+			addr = &zero;
+			break;
+		}
+		fp_regno = (regno - VECREG_BASE) & 0x1f;
+		vector_regno = 1 + ((regno - VECREG_BASE) >> 5);
+		addr = (void *)((unsigned long)&task->thread.ctx_fp + FP_REG(fp_regno, vector_regno));
+		break;
+	case FPCR:
+		addr = (void *)&task->thread.fpcr;
+		break;
+	case PC:
+		addr = (void *)task_thread_info(task) + PT_REG(pc);
+		break;
+	default:
+		addr = &zero;
+	}
+
+	return addr;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+unsigned long
+get_reg(struct task_struct *task, unsigned long regno)
+{
+	return *get_reg_addr(task, regno);
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+static int
+put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
+{
+	*get_reg_addr(task, regno) = data;
+	return 0;
+}
+
+static inline int
+read_int(struct task_struct *task, unsigned long addr, int *data)
+{
+	int copied = access_process_vm(task, addr, data, sizeof(int), FOLL_FORCE);
+
+	return (copied == sizeof(int)) ? 0 : -EIO;
+}
+
+static inline int
+write_int(struct task_struct *task, unsigned long addr, int data)
+{
+	int copied = access_process_vm(task, addr, &data, sizeof(int),
+			FOLL_FORCE | FOLL_WRITE);
+	return (copied == sizeof(int)) ? 0 : -EIO;
+}
+
+/*
+ * Set breakpoint.
+ */
+int
+ptrace_set_bpt(struct task_struct *child)
+{
+	int displ, i, res, reg_b, nsaved = 0;
+	unsigned int insn, op_code;
+	unsigned long pc;
+
+	pc = get_reg(child, REG_PC);
+	res = read_int(child, pc, (int *)&insn);
+	if (res < 0)
+		return res;
+
+	op_code = insn >> 26;
+	/* br bsr beq bne blt ble bgt bge blbc blbs fbeq fbne fblt fble fbgt fbge */
+	if ((1UL << op_code) & 0x3fff000000000030UL) {
+		/*
+		 * It's a branch: instead of trying to figure out
+		 * whether the branch will be taken or not, we'll put
+		 * a breakpoint at either location.  This is simpler,
+		 * more reliable, and probably not a whole lot slower
+		 * than the alternative approach of emulating the
+		 * branch (emulation can be tricky for fp branches).
+		 */
+		displ = ((s32)(insn << 11)) >> 9;
+		task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
+		if (displ) /* guard against unoptimized code */
+			task_thread_info(child)->bpt_addr[nsaved++]
+				= pc + 4 + displ;
+		DBG(DBG_BPT, ("execing branch\n"));
+		/*call ret jmp*/
+	} else if (op_code >= 0x1 && op_code <= 0x3) {
+		reg_b = (insn >> 16) & 0x1f;
+		task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b);
+		DBG(DBG_BPT, ("execing jump\n"));
+	} else {
+		task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
+		DBG(DBG_BPT, ("execing normal insn\n"));
+	}
+
+	/* install breakpoints: */
+	for (i = 0; i < nsaved; ++i) {
+		res = read_int(child, task_thread_info(child)->bpt_addr[i],
+				(int *)&insn);
+		if (res < 0)
+			return res;
+		task_thread_info(child)->bpt_insn[i] = insn;
+		DBG(DBG_BPT, ("    -> next_pc=%lx\n",
+					task_thread_info(child)->bpt_addr[i]));
+		res = write_int(child, task_thread_info(child)->bpt_addr[i],
+				BREAKINST);
+		if (res < 0)
+			return res;
+	}
+	task_thread_info(child)->bpt_nsaved = nsaved;
+	return 0;
+}
+
+/*
+ * Ensure no single-step breakpoint is pending.  Returns non-zero
+ * value if child was being single-stepped.
+ */
+int
+ptrace_cancel_bpt(struct task_struct *child)
+{
+	int i, nsaved = task_thread_info(child)->bpt_nsaved;
+
+	task_thread_info(child)->bpt_nsaved = 0;
+
+	if (nsaved > 2) {
+		printk("%s: bogus nsaved: %d!\n", __func__, nsaved);
+		nsaved = 2;
+	}
+
+	for (i = 0; i < nsaved; ++i) {
+		write_int(child, task_thread_info(child)->bpt_addr[i],
+				task_thread_info(child)->bpt_insn[i]);
+	}
+	return (nsaved != 0);
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+	/* Mark single stepping.  */
+	task_thread_info(child)->bpt_nsaved = -1;
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+	ptrace_cancel_bpt(child);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	user_disable_single_step(child);
+}
+
+int ptrace_getregs(struct task_struct *child, __s64 __user *data)
+{
+	int ret, retval = 0;
+	int i;
+	unsigned long regval;
+
+	if (!access_ok(data, sizeof(long) * 33))
+		return -EIO;
+
+	/* r0-r15 */
+	for (i = 0; i < 16; i++) {
+		regval = get_reg(child, i);
+		retval |= __put_user((long)regval, data + i);
+	}
+	/* r19-r28 */
+	for (i = 19; i < 29; i++) {
+		regval = get_reg(child, i);
+		retval |= __put_user((long)regval, data + i - 3);
+	}
+	/*SP, PS ,PC,GP*/
+	retval |= __put_user((long)(get_reg(child, REG_SP)), data + EF_SP);
+	retval |= __put_user((long)(get_reg(child, REG_PS)), data + EF_PS);
+	retval |= __put_user((long)(get_reg(child, REG_PC)), data + EF_PC);
+	retval |= __put_user((long)(get_reg(child, REG_GP)), data + EF_GP);
+	/* r16-r18 */
+	retval |= __put_user((long)(get_reg(child, 16)), data + EF_A0);
+	retval |= __put_user((long)(get_reg(child, 17)), data + EF_A1);
+	retval |= __put_user((long)(get_reg(child, 18)), data + EF_A2);
+
+	ret = retval ? -EIO : 0;
+	return ret;
+}
+
+int ptrace_setregs(struct task_struct *child, __s64 __user *data)
+{
+	int ret, retval = 0;
+	int i;
+	unsigned long regval;
+
+	if (!access_ok(data, sizeof(long) * 33))
+		return -EIO;
+
+	/* r0-r15 */
+	for (i = 0; i < 16; i++) {
+		retval |= __get_user(regval, data + i);
+		ret = put_reg(child, i, regval);
+	}
+	/* r19-r28 */
+	for (i = 19; i < 29; i++) {
+		retval |= __get_user(regval, data + i - 3);
+		ret = put_reg(child, i, regval);
+	}
+	/*SP, PS ,PC,GP*/
+	retval |= __get_user(regval, data + EF_SP);
+	ret = put_reg(child, REG_SP, regval);
+	retval |= __get_user(regval, data + EF_PS);
+	ret = put_reg(child, REG_PS, regval);
+	retval |= __get_user(regval, data + EF_PC);
+	ret = put_reg(child, REG_PC, regval);
+	retval |= __get_user(regval, data + EF_GP);
+	ret = put_reg(child, REG_GP, regval);
+	/* r16-r18 */
+	retval |= __get_user(regval, data + EF_A0);
+	ret = put_reg(child, 16, regval);
+	retval |= __get_user(regval, data + EF_A1);
+	ret = put_reg(child, 17, regval);
+	retval |= __get_user(regval, data + EF_A2);
+	ret = put_reg(child, 18, regval);
+
+	ret = retval ? -EIO : 0;
+	return 0;
+}
+
+int ptrace_getfpregs(struct task_struct *child, __s64 __user *data)
+{
+	int ret, retval = 0;
+	int i;
+	unsigned long regval;
+
+	if (!access_ok(data, sizeof(long) * 32))
+		return -EIO;
+
+	/* fp0-fp31 */
+	for (i = 0; i < 32; i++) {
+		regval = get_reg(child, REG_F0 + i);
+		retval |= __put_user((long)regval, data + i);
+	}
+
+	ret = retval ? -EIO : 0;
+	return 0;
+}
+
+int ptrace_setfpregs(struct task_struct *child, __s64 __user *data)
+{
+	int ret, retval = 0;
+	int i;
+	unsigned long regval;
+
+	if (!access_ok(data, sizeof(long) * 32))
+		return -EIO;
+
+	/* fp0-fp31 */
+	for (i = 0; i < 32; i++) {
+		retval |= __get_user(regval, data + i);
+		ret = put_reg(child, REG_F0 + i, regval);
+	}
+
+	return ret;
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		unsigned long addr, unsigned long data)
+{
+	unsigned long tmp;
+	size_t copied;
+	long ret;
+	void __user *datavp = (void __user *) data;
+
+	switch (request) {
+	/* When I and D space are separate, these will need to be fixed.  */
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
+	case PTRACE_PEEKDATA:
+		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+		ret = -EIO;
+		if (copied != sizeof(tmp))
+			break;
+
+		force_successful_syscall_return();
+		ret = tmp;
+		break;
+
+	/* Read register number ADDR. */
+	case PTRACE_PEEKUSR:
+		force_successful_syscall_return();
+		ret = get_reg(child, addr);
+		DBG(DBG_MEM, ("peek $%lu->%#lx\n", addr, ret));
+		break;
+
+	/* When I and D space are separate, this will have to be fixed.  */
+	case PTRACE_POKETEXT: /* write the word at location addr. */
+	case PTRACE_POKEDATA:
+		ret = generic_ptrace_pokedata(child, addr, data);
+		break;
+
+	case PTRACE_POKEUSR: /* write the specified register */
+		DBG(DBG_MEM, ("poke $%lu<-%#lx\n", addr, data));
+		ret = put_reg(child, addr, data);
+		break;
+	case PTRACE_GETREGS:
+		ret = ptrace_getregs(child, datavp);
+		break;
+	case PTRACE_SETREGS:
+		ret = ptrace_setregs(child, datavp);
+		break;
+	case PTRACE_GETFPREGS:
+		ret = ptrace_getfpregs(child, datavp);
+		break;
+	case PTRACE_SETFPREGS:
+		ret = ptrace_setfpregs(child, datavp);
+		break;
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+	return ret;
+}
+
+asmlinkage unsigned long syscall_trace_enter(void)
+{
+	unsigned long ret = 0;
+	struct pt_regs *regs = current_pt_regs();
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+		tracehook_report_syscall_entry(current_pt_regs()))
+		ret = -1UL;
+
+#ifdef CONFIG_SECCOMP
+	/* Do seccomp after ptrace, to catch any tracer changes. */
+	if (secure_computing() == -1)
+		return -1;
+#endif
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_enter(regs, regs->r0);
+	audit_syscall_entry(regs->r0, regs->r16, regs->r17, regs->r18, regs->r19);
+	return ret ?: current_pt_regs()->r0;
+}
+
+asmlinkage void
+syscall_trace_leave(void)
+{
+	struct pt_regs *regs = current_pt_regs();
+
+	audit_syscall_exit(current_pt_regs());
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(current_pt_regs(), 0);
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_exit(regs, regs_return_value(regs));
+}
+
+static long rwcsr(int rw, unsigned long csr, unsigned long value)
+{
+	register unsigned long __r0 __asm__("$0");
+	register unsigned long __r16 __asm__("$16") = rw;
+	register unsigned long __r17 __asm__("$17") = csr;
+	register unsigned long __r18 __asm__("$18") = value;
+
+	__asm__ __volatile__(
+			"sys_call %4"
+			: "=r"(__r0), "=r"(__r16), "=r"(__r17), "=r"(__r18)
+			: "i"(HMC_rwreg), "1"(__r16), "2"(__r17), "3"(__r18)
+			: "$1", "$22", "$23", "$24", "$25");
+
+	return __r0;
+}
+
+#define RCSR 0
+#define WCSR 1
+
+#define CSR_DA_MATCH	0
+#define CSR_DA_MASK	1
+#define CSR_IA_MATCH	2
+#define CSR_IA_MASK	3
+#define CSR_IDA_MATCH	6
+#define CSR_IDA_MASK	7
+#define CSR_DC_CTL	11
+#define CSR_DV_MATCH	15
+#define CSR_DV_MASK	16
+
+#define DV_MATCH_EN_S	19
+#define DAV_MATCH_EN_S	20
+
+int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_regs *regs)
+{
+	unsigned long dc_ctl;
+	unsigned long value;
+
+	printk("%s: pid %d, name = %s,cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n",
+			__func__, current->pid, current->comm, cause, mmcsr, address, regs->pc);
+
+	switch (mmcsr) {
+	case MMCSR__DA_MATCH:
+	case MMCSR__DV_MATCH:
+	case MMCSR__DAV_MATCH:
+		dik_show_regs(regs, (unsigned long *)regs-15);
+
+		if (!(current->ptrace & PT_PTRACED)) {
+			printk(" pid %d %s not be ptraced, return\n", current->pid, current->comm);
+			if (mmcsr == MMCSR__DA_MATCH)
+				rwcsr(WCSR, CSR_DA_MATCH, 0);   //clear da_match
+			if (mmcsr == MMCSR__DV_MATCH) {
+				value = rwcsr(RCSR, CSR_DV_MATCH, 0);
+				printk("value is %#lx\n", value);
+				value = rwcsr(RCSR, CSR_DV_MASK, 0);
+				printk("value is %#lx\n", value);
+				dc_ctl = rwcsr(RCSR, CSR_DC_CTL, 0);
+				dc_ctl &= ~(0x1UL << DV_MATCH_EN_S);
+				rwcsr(WCSR, CSR_DC_CTL, dc_ctl);
+			}
+			if (mmcsr == MMCSR__DAV_MATCH) {
+				dc_ctl = rwcsr(RCSR, CSR_DC_CTL, 0);
+				dc_ctl &= ~((0x1UL << DV_MATCH_EN_S) | (0x1UL << DAV_MATCH_EN_S));
+				rwcsr(WCSR, CSR_DC_CTL, dc_ctl);
+				rwcsr(WCSR, CSR_DA_MATCH, 0);   //clear da_match
+			}
+			task_thread_info(current)->pcb.da_match = 0;
+			task_thread_info(current)->pcb.dv_match = 0;
+			task_thread_info(current)->pcb.dc_ctl = 0;
+			return 1;
+		}
+
+		if (mmcsr == MMCSR__DA_MATCH) {
+			rwcsr(WCSR, CSR_DA_MATCH, 0);   //clear da_match
+			task_thread_info(current)->pcb.da_match = 0;
+		}
+		if (mmcsr == MMCSR__DV_MATCH) {
+			dc_ctl = rwcsr(RCSR, CSR_DC_CTL, 0);
+			dc_ctl &= ~(0x1UL << DV_MATCH_EN_S);
+			rwcsr(WCSR, CSR_DC_CTL, dc_ctl);
+		}
+		if (mmcsr == MMCSR__DAV_MATCH) {
+			dc_ctl = rwcsr(RCSR, CSR_DC_CTL, 0);
+			dc_ctl &= ~((0x1UL << DV_MATCH_EN_S) | (0x1UL << DAV_MATCH_EN_S));
+			rwcsr(WCSR, CSR_DC_CTL, dc_ctl);
+			rwcsr(WCSR, CSR_DA_MATCH, 0);   //clear da_match
+		}
+		task_thread_info(current)->pcb.dv_match = 0;
+		task_thread_info(current)->pcb.dc_ctl = 0;
+		printk("do_page_fault: want to send SIGTRAP, pid = %d\n", current->pid);
+		force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void *) address, 0);
+		return 1;
+
+	case MMCSR__IA_MATCH:
+		rwcsr(WCSR, CSR_IA_MATCH, 0);       //clear ia_match
+		return 1;
+	case MMCSR__IDA_MATCH:
+		rwcsr(WCSR, CSR_IDA_MATCH, 0);       //clear ida_match
+		return 1;
+	}
+
+	return 0;
+}
+
+void restore_da_match_after_sched(void)
+{
+	unsigned long dc_ctl_mode;
+	unsigned long dc_ctl;
+	struct pcb_struct *pcb = &task_thread_info(current)->pcb;
+
+	if (!(pcb->da_match || pcb->da_mask || pcb->dv_match || pcb->dv_mask || pcb->dc_ctl))
+		return;
+	printk("Restroe MATCH status, pid: %d\n", current->pid);
+	rwcsr(WCSR, CSR_DA_MATCH, 0);
+	rwcsr(WCSR, CSR_DA_MASK, pcb->da_mask);
+	rwcsr(WCSR, CSR_DA_MATCH, pcb->da_match);
+	dc_ctl_mode = pcb->dc_ctl;
+	dc_ctl = rwcsr(RCSR, CSR_DC_CTL, 0);
+	dc_ctl &= ~((0x1UL << DV_MATCH_EN_S) | (0x1UL << DAV_MATCH_EN_S));
+	dc_ctl |= ((dc_ctl_mode << DV_MATCH_EN_S) & ((0x1UL << DV_MATCH_EN_S) | (0x1UL << DAV_MATCH_EN_S)));
+	if (dc_ctl_mode & 0x1) {
+		rwcsr(WCSR, CSR_DV_MATCH, pcb->dv_match);
+		rwcsr(WCSR, CSR_DV_MASK, pcb->dv_mask);
+		rwcsr(WCSR, CSR_DC_CTL, dc_ctl);
+	}
+}
+
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_NAME(reg, r) {				\
+	.name = #reg,						\
+	.offset = offsetof(struct pt_regs, r)			\
+}
+
+#define REG_OFFSET_END {					\
+	.name = NULL,						\
+	.offset = 0						\
+}
+
+static const struct pt_regs_offset regoffset_table[] = {
+	REG_OFFSET_NAME(r0, r0),
+	REG_OFFSET_NAME(r1, r1),
+	REG_OFFSET_NAME(r2, r2),
+	REG_OFFSET_NAME(r3, r3),
+	REG_OFFSET_NAME(r4, r4),
+	REG_OFFSET_NAME(r5, r5),
+	REG_OFFSET_NAME(r6, r6),
+	REG_OFFSET_NAME(r7, r7),
+	REG_OFFSET_NAME(r8, r8),
+	REG_OFFSET_NAME(r19, r19),
+	REG_OFFSET_NAME(r20, r20),
+	REG_OFFSET_NAME(r21, r21),
+	REG_OFFSET_NAME(r22, r22),
+	REG_OFFSET_NAME(r23, r23),
+	REG_OFFSET_NAME(r24, r24),
+	REG_OFFSET_NAME(r25, r25),
+	REG_OFFSET_NAME(r26, r26),
+	REG_OFFSET_NAME(r27, r27),
+	REG_OFFSET_NAME(r28, r28),
+	REG_OFFSET_NAME(hae, hae),
+	REG_OFFSET_NAME(trap_a0, trap_a0),
+	REG_OFFSET_NAME(trap_a1, trap_a1),
+	REG_OFFSET_NAME(trap_a2, trap_a2),
+	REG_OFFSET_NAME(ps, ps),
+	REG_OFFSET_NAME(pc, pc),
+	REG_OFFSET_NAME(gp, gp),
+	REG_OFFSET_NAME(r16, r16),
+	REG_OFFSET_NAME(r17, r17),
+	REG_OFFSET_NAME(r18, r18),
+	REG_OFFSET_END,
+};
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:       the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
diff --git a/arch/sw_64/kernel/relocate.c b/arch/sw_64/kernel/relocate.c
new file mode 100644
index 000000000000..36b16d84d5ab
--- /dev/null
+++ b/arch/sw_64/kernel/relocate.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Support for Kernel relocation at boot time
+ *
+ * Copyright (C) 2019 He Sheng
+ * Authors: He Sheng (hesheng05@gmail.com)
+ */
+#include <asm/hmcall.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <linux/mm_types.h>
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+#include <linux/of_fdt.h>
+#include <linux/sched.h>
+#include <linux/start_kernel.h>
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+
+#define INITRD_ADDR  0x3000000UL
+#define KTEXT_MAX    0xffffffffa0000000UL
+#define RELOCATED(x) ((void *)((unsigned long)x + offset))
+
+extern unsigned long _got_start[];
+extern unsigned long _got_end[];
+extern char pre_start_kernel[];
+
+extern unsigned int _relocation_start[];	/* End kernel image / start relocation table */
+extern unsigned int _relocation_end[];	/* End relocation table */
+
+extern unsigned long __start___ex_table;	/* Start exception table */
+extern unsigned long __stop___ex_table;	/* End exception table */
+extern union thread_union init_thread_union;
+
+extern void __weak plat_fdt_relocated(void *new_location);
+
+/*
+ * This function may be defined for a platform to perform any post-relocation
+ * fixup necessary.
+ * Return non-zero to abort relocation
+ */
+int __weak plat_post_relocation(long offset)
+{
+	return 0;
+}
+
+
+static void __init sync_icache(void)
+{
+	// IC_FLUSH
+	imb();
+}
+
+static int __init apply_r_sw64_refquad(unsigned long *loc_orig, unsigned long *loc_new, unsigned int offset)
+{
+	*(unsigned long *)loc_new += offset;
+
+	return 0;
+}
+
+static int (*reloc_handlers_rel[]) (unsigned long *, unsigned long *, unsigned int) __initdata = {
+	[R_SW64_REFQUAD]		= apply_r_sw64_refquad,
+};
+
+int __init do_relocations(void *kbase_old, void *kbase_new, unsigned int offset)
+{
+	unsigned int *r;
+	unsigned long *loc_orig;
+	unsigned long *loc_new;
+	int type;
+	int res;
+
+	for (r = _relocation_start; r < _relocation_end; r++) {
+		/* Sentinel for last relocation */
+		if (*r == 0)
+			break;
+
+		type = (*r >> 24) & 0xff;
+		loc_orig = kbase_old + ((*r & 0x00ffffff) << 2);
+		loc_new = RELOCATED(loc_orig);
+
+		if (reloc_handlers_rel[type] == NULL) {
+			/* Unsupported relocation */
+			pr_err("Unhandled relocation type %d at 0x%pK\n",
+			       type, loc_orig);
+			return -ENOEXEC;
+		}
+
+		res = reloc_handlers_rel[type](loc_orig, loc_new, offset);
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int __init relocate_got(unsigned int offset)
+{
+	unsigned long *got_start, *got_end, *e;
+
+	got_start = RELOCATED(&_got_start);
+	got_end = RELOCATED(&_got_end);
+
+	for (e = got_start; e < got_end; e++)
+		*e += offset;
+
+	return 0;
+}
+
+#ifdef CONFIG_RANDOMIZE_BASE
+
+static inline __init unsigned long rotate_xor(unsigned long hash,
+					      const void *area, size_t size)
+{
+	size_t i;
+	unsigned long start, *ptr;
+	/* Make sure start is 8 byte aligned */
+	start = ALIGN((unsigned long)area, 8);
+	size -= (start - (unsigned long)area);
+	ptr = (unsigned long *) start;
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+	return hash;
+}
+
+static inline __init unsigned long get_random_boot(void)
+{
+	unsigned long entropy = random_get_entropy();
+	unsigned long hash = 0;
+
+	/* Attempt to create a simple but unpredictable starting entropy. */
+	hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+
+	/* Add in any runtime entropy we can get */
+	hash = rotate_xor(hash, &entropy, sizeof(entropy));
+
+	return hash;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+	char *str;
+
+	str = strstr(COMMAND_LINE, "nokaslr");
+	if (str == COMMAND_LINE || (str > COMMAND_LINE && *(str - 1) == ' '))
+		return true;
+
+	return false;
+}
+
+static unsigned long __init determine_relocation_offset(void)
+{
+	/* Choose a new address for the kernel */
+	unsigned long kernel_length;
+	unsigned long offset;
+
+	if (kaslr_disabled())
+		return 0;
+
+	kernel_length = (unsigned long)_end - (unsigned long)(&_text);
+
+	/* TODO: offset is 64K align. maybe 8KB align is okay.  */
+	offset = get_random_boot() << 16;
+	offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1);
+	if (offset < kernel_length)
+		offset += ALIGN(kernel_length, 0x10000);
+
+	/* TODO: 119MB is for test */
+	offset = (119 << 20);
+	if ((KTEXT_MAX - (unsigned long)_end) < offset)
+		offset = 0;
+
+	// TODO:new location should not overlaps initrd
+
+	return offset;
+}
+
+#else
+
+static inline unsigned long __init determine_relocation_offset(void)
+{
+	/*
+	 * Choose a new address for the kernel
+	 * For now we'll hard code the destination offset.
+	 */
+	return 0;
+}
+
+#endif
+
+static inline int __init relocation_offset_valid(unsigned long offset)
+{
+	unsigned long loc_new = (unsigned long)_text + offset;
+
+	if (loc_new & 0x0000ffff) {
+		/* Inappropriately aligned new location */
+		return 0;
+	}
+	if (loc_new < (unsigned long)&_end) {
+		/* New location overlaps original kernel */
+		return 0;
+	}
+	return 1;
+}
+
+unsigned int __init relocate_kernel(void)
+{
+	void *loc_new;
+	unsigned long kernel_length;
+	unsigned long bss_length;
+	unsigned int offset = 0;
+	int res = 1;
+
+	kernel_length = (unsigned long)(&_relocation_start) - (long)(&_text);
+	bss_length = (unsigned long)&__bss_stop - (long)&__bss_start;
+
+	offset = determine_relocation_offset();
+
+	/* Reset the command line now so we don't end up with a duplicate */
+	//arcs_cmdline[0] = '\0';
+
+	/* Sanity check relocation address */
+	if (offset && relocation_offset_valid(offset)) {
+
+		loc_new = RELOCATED(&_text);
+		/* Copy the kernel to it's new location */
+		memcpy(loc_new, &_text, kernel_length);
+
+		/* Perform relocations on the new kernel */
+		res = do_relocations(&_text, loc_new, offset);
+		if (res < 0)
+			goto out;
+
+		/* Sync the caches ready for execution of new kernel */
+		sync_icache();
+
+		res = relocate_got(offset);
+		if (res < 0)
+			goto out;
+
+		/*
+		 * The original .bss has already been cleared, and
+		 * some variables such as command line parameters
+		 * stored to it so make a copy in the new location.
+		 */
+		memcpy(RELOCATED(&__bss_start), &__bss_start, bss_length);
+
+		/*
+		 * Last chance for the platform to abort relocation.
+		 * This may also be used by the platform to perform any
+		 * initialisation required now that the new kernel is
+		 * resident in memory and ready to be executed.
+		 */
+		if (plat_post_relocation(offset))
+			goto out;
+
+		/* The current thread is now within the relocated image */
+		__current_thread_info = RELOCATED(&init_thread_union);
+
+		/* Return the new kernel's offset */
+		//printk("loc_new:%p, start_kernel: %p, gp:%p\n", loc_new, kernel_entry, kgp);
+		return offset;
+	}
+out:
+	return 0;
+}
+
+/*
+ * Show relocation information on panic.
+ */
+void show_kernel_relocation(const char *level)
+{
+	unsigned long offset;
+
+	offset = __pa_symbol(_text) - __pa_symbol(_TEXT_START);
+
+	if (IS_ENABLED(CONFIG_RELOCATABLE) && offset > 0) {
+		printk(level);
+		pr_cont("Kernel relocated by 0x%pK\n", (void *)offset);
+		pr_cont(" .text @ 0x%pK\n", _text);
+		pr_cont(" .data @ 0x%pK\n", _sdata);
+		pr_cont(" .bss  @ 0x%pK\n", __bss_start);
+	}
+}
+
+static int kernel_location_notifier_fn(struct notifier_block *self,
+				       unsigned long v, void *p)
+{
+	show_kernel_relocation(KERN_EMERG);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kernel_location_notifier = {
+	.notifier_call = kernel_location_notifier_fn
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &kernel_location_notifier);
+	return 0;
+}
+device_initcall(register_kernel_offset_dumper);
diff --git a/arch/sw_64/kernel/relocate_kernel.S b/arch/sw_64/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..f1a160636212
--- /dev/null
+++ b/arch/sw_64/kernel/relocate_kernel.S
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * relocate_kernel.S for kexec
+ * Created by hesheng05@gmail.com Jul 2 2019
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <asm/regdef.h>
+#include <asm/page.h>
+
+	.align 3
+	.globl relocate_new_kernel
+	.ent relocate_new_kernel
+
+relocate_new_kernel:
+	.prologue 0
+	ldl	a0, arg0
+	ldl	a1, arg1
+	ldl	a2, arg2
+	ldl	a3, arg3
+
+	ldl	s0, kexec_indirection_page
+	ldl	s1, kexec_start_address
+
+process_entry:
+	ldl	s2, 0(s0)
+	addl	s0, 8, s0
+
+	/*
+	 * In case of a kdump/crash kernel, the indirection page is not
+	 * populated as the kernel is directly copied to a reserved location
+	 */
+	beq	s2, done
+
+	/* destination page */
+	and	s2, 0x1, s3
+	beq	s3, 1f
+	bic	s2, 0x1, s4/* store destination addr in s4 */
+	br	$31, process_entry
+
+1:
+	/* indirection page, update s0*/
+	and	s2, 0x2, s3
+	beq	s3, 1f
+	bic	s2, 0x2, s0
+	br	$31, process_entry
+
+1:
+	/* done page */
+	and	s2, 0x4, s3
+	beq	s3, 1f
+	br	$31, done
+1:
+	/* source page */
+	and	s2, 0x8, s3
+	beq	s3, process_entry
+	bic	s2, 0x8, s2
+	ldi	s6, 0x1
+	sll	s6, (PAGE_SHIFT - 3), s6
+
+copy_word:
+	/* copy page word by word */
+	ldl	s5, 0(s2)
+	stl	s5, 0(s4)
+	addl	s4, 8, s4
+	addl	s2, 8, s2
+	subl	s6, 1, s6
+	beq	s6, process_entry
+	br	$31, copy_word
+	br	$31, process_entry
+
+done:
+#ifdef CONFIG_CRASH_SMP /* unsupported now!!!! */
+	/* kexec_flag reset is signal to other CPUs what kernel
+	 * was moved to it's location. Note - we need relocated address
+	 * of kexec_flag.
+	 */
+
+	br	ra, 1f
+1:	mov	ra, t1
+	ldi	t2, 1b
+	ldi	t0, kexec_flag
+	subl	t0, t2, t0
+	addl	t1, t0, t0
+	stl	zero, 0(t0)
+#endif
+	memb
+	jmp	ra, (s1)
+	.end relocate_new_kernel
+	.size relocate_new_kernel, .-relocate_new_kernel
+
+#ifdef CONFIG_CRASH_SMP
+	/*
+	 * Other CPUs should wait until code is relocated and
+	 * then start at entry (?) point.
+	 */
+	.align 3
+	.globl kexec_smp_wait
+	.ent kexec_smp_wait
+kexec_smp_wait:
+	ldl	a0, s_arg0
+	ldl	a1, s_arg1
+	ldl	a2, s_arg2
+	ldl	a3, s_arg3
+	ldl	s1, kexec_start_address
+
+	/* Non-relocated address works for args and kexec_start_address (old
+	 * kernel is not overwritten). But we need relocated address of
+	 * kexec_flag.
+	 */
+
+	bsr	ra, 1f
+1:	mov	ra, t1
+	ldi	t2, 1b
+	ldi	t0, kexec_flag
+	subl	t0, t2, t0
+	addl	t1, t0, t0
+
+1:	stl	s0, 0(t0)
+	bne	s0, 1b
+	memb
+	jmp	ra, (s1)
+	.end kexec_smp_wait
+	.size kexec_smp_wait, .-kexec_smp_wait
+#endif
+
+	.align 3
+
+	/* All parameters to new kernel are passed in registers a0-a3.
+	 * kexec_args[0..3] are uses to prepare register values.
+	 */
+
+kexec_args:
+	.globl kexec_args
+arg0:	.quad 0x0
+arg1:	.quad 0x0
+arg2:	.quad 0x0
+arg3:	.quad 0x0
+	.size kexec_args, 8*4
+
+#ifdef CONFIG_CRASH_SMP
+	/*
+	 * Secondary CPUs may have different kernel parameters in
+	 * their registers a0-a3. secondary_kexec_args[0..3] are used
+	 * to prepare register values.
+	 */
+secondary_kexec_args:
+	.globl secondary_kexec_args
+s_arg0:	.quad 0x0
+s_arg1:	.quad 0x0
+s_arg2:	.quad 0x0
+s_arg3:	.quad 0x0
+	.size secondary_kexec_args, 8*4
+
+kexec_flag:
+	.quad 0x1
+#endif
+
+kexec_start_address:
+	.globl kexec_start_address
+	.quad 0x0
+	.size kexec_start_address, 8
+
+kexec_indirection_page:
+	.globl kexec_indirection_page
+	.quad 0
+	.size kexec_indirection_page, 8
+
+relocate_new_kernel_end:
+
+relocate_new_kernel_size:
+	.global relocate_new_kernel_size
+	.quad relocate_new_kernel_end - relocate_new_kernel
+	.size relocate_new_kernel_size, 8
diff --git a/arch/sw_64/kernel/segvdbg.c b/arch/sw_64/kernel/segvdbg.c
new file mode 100644
index 000000000000..aee4b3863072
--- /dev/null
+++ b/arch/sw_64/kernel/segvdbg.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Zhi Tongze
+ * Author: Zhi Tongze
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <asm/debug.h>
+
+extern bool segv_debug_enabled;
+
+static int __init segv_debug_init(void)
+{
+	struct dentry *segvdbg;
+
+	if (!sw64_debugfs_dir)
+		return -ENODEV;
+
+	segvdbg = debugfs_create_bool("segv_debug", 0644,
+			sw64_debugfs_dir, &segv_debug_enabled);
+	if (!segvdbg)
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(segv_debug_init);
diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c
new file mode 100644
index 000000000000..cc33a6f3b4f9
--- /dev/null
+++ b/arch/sw_64/kernel/setup.c
@@ -0,0 +1,1047 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/sw/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/*
+ * Bootup setup stuff.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/screen_info.h>
+#include <linux/delay.h>
+#include <linux/kexec.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/memblock.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/eisa.h>
+#include <linux/pfn.h>
+#ifdef CONFIG_MAGIC_SYSRQ
+#include <linux/sysrq.h>
+#include <linux/reboot.h>
+#endif
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+#include <linux/notifier.h>
+#include <linux/log2.h>
+#include <linux/export.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/uaccess.h>
+#include <linux/cma.h>
+#include <linux/genalloc.h>
+#include <linux/acpi.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/sw64_init.h>
+#include <asm/pgtable.h>
+#include <asm/dma.h>
+#include <asm/mmu_context.h>
+#include <asm/console.h>
+#include <asm/core.h>
+#include <asm/hw_init.h>
+#include <asm/mmzone.h>
+#include <asm/memory.h>
+#include <asm/efi.h>
+#include <asm/kvm_cma.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+#undef DEBUG_DISCONTIG
+#ifdef DEBUG_DISCONTIG
+#define DBGDCONT(args...) pr_debug(args)
+#else
+#define DBGDCONT(args...)
+#endif
+
+DEFINE_PER_CPU(unsigned long, hard_node_id) = { 0 };
+
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+struct cma *sw64_kvm_cma;
+EXPORT_SYMBOL(sw64_kvm_cma);
+
+static phys_addr_t size_cmdline;
+static phys_addr_t base_cmdline;
+
+struct gen_pool *sw64_kvm_pool;
+EXPORT_SYMBOL(sw64_kvm_pool);
+#endif
+
+static inline int phys_addr_valid(unsigned long addr)
+{
+	/*
+	 * At this point memory probe has not been done such that max_pfn
+	 * and other physical address variables cannnot be used, so let's
+	 * roughly judge physical address based on arch specific bit.
+	 */
+	return !(addr >> (cpu_desc.pa_bits - 1));
+}
+
+extern struct atomic_notifier_head panic_notifier_list;
+static int sw64_panic_event(struct notifier_block *, unsigned long, void *);
+static struct notifier_block sw64_panic_block = {
+	sw64_panic_event,
+	NULL,
+	INT_MAX /* try to do it first */
+};
+
+/* the value is IOR: CORE_ONLIE*/
+cpumask_t core_start = CPU_MASK_NONE;
+
+static struct resource data_resource = {
+	.name   = "Kernel data",
+	.start  = 0,
+	.end    = 0,
+	.flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
+static struct resource code_resource = {
+	.name   = "Kernel code",
+	.start  = 0,
+	.end    = 0,
+	.flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
+static struct resource bss_resource = {
+	.name   = "Kernel bss",
+	.start  = 0,
+	.end    = 0,
+	.flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
+/* A collection of per-processor data.  */
+struct cpuinfo_sw64 cpu_data[NR_CPUS];
+EXPORT_SYMBOL(cpu_data);
+
+struct cpu_desc_t cpu_desc;
+struct socket_desc_t socket_desc[MAX_NUMSOCKETS];
+int memmap_nr;
+struct memmap_entry memmap_map[MAX_NUMMEMMAPS];
+bool memblock_initialized;
+
+cpumask_t cpu_offline = CPU_MASK_NONE;
+
+static char command_line[COMMAND_LINE_SIZE] __initdata;
+#ifdef CONFIG_CMDLINE_BOOL
+static char builtin_cmdline[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
+#endif
+
+/* boot_params */
+struct boot_params *sunway_boot_params = (struct boot_params *) (PARAM + 0x100);
+
+/*
+ * The format of "screen_info" is strange, and due to early
+ * i386-setup code. This is just enough to make the console
+ * code think we're on a VGA color display.
+ */
+
+struct screen_info screen_info = {
+	.orig_x = 0,
+	.orig_y = 25,
+	.orig_video_cols = 80,
+	.orig_video_lines = 25,
+	.orig_video_isVGA = 1,
+	.orig_video_points = 16
+};
+EXPORT_SYMBOL(screen_info);
+
+#ifdef CONFIG_KEXEC
+
+void *kexec_control_page;
+
+#define KTEXT_MAX	KERNEL_IMAGE_SIZE
+
+static void __init kexec_control_page_init(void)
+{
+	phys_addr_t addr;
+
+	addr = memblock_alloc_base(KEXEC_CONTROL_PAGE_SIZE, PAGE_SIZE, KTEXT_MAX);
+	kexec_control_page = (void *)(__START_KERNEL_map + addr);
+}
+
+/*
+ * reserve_crashkernel() - reserves memory are for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by a dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, mem_desc.size,
+			&crash_size, &crash_base);
+	if (ret || !crash_size)
+		return;
+
+	if (!memblock_is_region_memory(crash_base, crash_size))
+		memblock_add(crash_base, crash_size);
+
+	ret = memblock_reserve(crash_base, crash_size);
+	if (ret < 0) {
+		pr_warn("crashkernel reservation failed - memory is in use [mem %#018llx-%#018llx]\n",
+				crash_base, crash_base + crash_size - 1);
+		return;
+	}
+
+	pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
+			(unsigned long)(crash_size >> 20),
+			(unsigned long)(crash_base >> 20),
+			(unsigned long)(mem_desc.size >> 20));
+
+	ret = add_memmap_region(crash_base, crash_size, memmap_crashkernel);
+	if (ret)
+		pr_warn("Add crash kernel area [mem %#018llx-%#018llx] to memmap region failed.\n",
+				crash_base, crash_base + crash_size - 1);
+
+	if (crash_base >= KERNEL_IMAGE_SIZE)
+		pr_warn("Crash base should be less than %#x\n", KERNEL_IMAGE_SIZE);
+
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+	insert_resource(&iomem_resource, &crashk_res);
+}
+#else /* !defined(CONFIG_KEXEC)         */
+static void __init reserve_crashkernel(void) {}
+static void __init kexec_control_page_init(void) {}
+#endif /* !defined(CONFIG_KEXEC)  */
+
+/*
+ * I/O resources inherited from PeeCees. Except for perhaps the
+ * turbochannel SWs, everyone has these on some sort of SuperIO chip.
+ *
+ * ??? If this becomes less standard, move the struct out into the
+ * machine vector.
+ */
+
+static void __init
+reserve_std_resources(void)
+{
+	static struct resource standard_io_resources[] = {
+		{ .name = "rtc", .start = -1, .end = -1 },
+		{ .name = "dma1", .start = 0x00, .end = 0x1f },
+		{ .name = "pic1", .start = 0x20, .end = 0x3f },
+		{ .name = "timer", .start = 0x40, .end = 0x5f },
+		{ .name = "keyboard", .start = 0x60, .end = 0x6f },
+		{ .name = "dma page reg", .start = 0x80, .end = 0x8f },
+		{ .name = "pic2", .start = 0xa0, .end = 0xbf },
+		{ .name = "dma2", .start = 0xc0, .end = 0xdf },
+	};
+
+	struct resource *io = &ioport_resource;
+	size_t i;
+
+	if (hose_head) {
+		struct pci_controller *hose;
+
+		for (hose = hose_head; hose; hose = hose->next)
+			if (hose->index == 0) {
+				io = hose->io_space;
+				break;
+			}
+	}
+
+	/* Fix up for the Jensen's queer RTC placement.  */
+	standard_io_resources[0].start = RTC_PORT(0);
+	standard_io_resources[0].end = RTC_PORT(0) + 0x10;
+
+	for (i = 0; i < ARRAY_SIZE(standard_io_resources); ++i)
+		request_resource(io, standard_io_resources+i);
+}
+
+static int __init parse_memmap_one(char *p)
+{
+	char *oldp;
+	u64 start_at, mem_size;
+	int ret;
+
+	if (!p)
+		return -EINVAL;
+
+	if (!strncmp(p, "exactmap", 8)) {
+		pr_err(""memmap=exactmap" not valid on sw64\n");
+		return 0;
+	}
+
+	oldp = p;
+	mem_size = memparse(p, &p);
+	if (p == oldp)
+		return -EINVAL;
+
+	if (*p == '@') {
+		pr_err(""memmap=nn@ss" invalid on sw64\n");
+	} else if (*p == '#') {
+		pr_err(""memmap=nn#ss" (force ACPI data) invalid on sw64\n");
+	} else if (*p == '$') {
+		start_at = memparse(p + 1, &p);
+		ret = add_memmap_region(start_at, mem_size, memmap_reserved);
+		if (ret)
+			return ret;
+	} else {
+		return -EINVAL;
+	}
+	return *p == '\0' ? 0 : -EINVAL;
+}
+
+static int __init setup_memmap(char *str)
+{
+	while (str) {
+		char *k = strchr(str, ',');
+
+		if (k)
+			*k++ = 0;
+
+		parse_memmap_one(str);
+		str = k;
+	}
+
+	return 0;
+}
+early_param("memmap", setup_memmap);
+
+static int __init setup_cpuoffline(char *p)
+{
+	cpulist_parse(p, &cpu_offline);
+	cpumask_clear_cpu(0, &cpu_offline);
+	return 0;
+}
+early_param("cpuoffline", setup_cpuoffline);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static void * __init move_initrd(unsigned long mem_limit)
+{
+	void *start;
+	unsigned long size;
+
+	size = initrd_end - initrd_start;
+	start = memblock_alloc_from(PAGE_ALIGN(size), PAGE_SIZE, 0);
+	if (!start || __pa(start) + size > mem_limit) {
+		initrd_start = initrd_end = 0;
+		return NULL;
+	}
+	memmove(start, (void *)initrd_start, size);
+	initrd_start = (unsigned long)start;
+	initrd_end = initrd_start + size;
+	pr_info("initrd moved to 0x%px\n", start);
+	return start;
+}
+#else
+static void * __init move_initrd(unsigned long mem_limit)
+{
+	return NULL;
+}
+#endif
+
+static int __init memmap_range_valid(phys_addr_t base, phys_addr_t size)
+{
+	if (phys_to_virt(base + size - 1) < phys_to_virt(PFN_PHYS(max_low_pfn)))
+		return true;
+	else
+		return false;
+}
+
+void __init process_memmap(void)
+{
+	static int i;	// Make it static so we won't start over again every time.
+	int ret;
+	phys_addr_t base, size;
+
+	if (!memblock_initialized)
+		return;
+
+	for (; i < memmap_nr; i++) {
+		base = memmap_map[i].addr;
+		size = memmap_map[i].size;
+		switch (memmap_map[i].type) {
+		case memmap_reserved:
+			if (!memmap_range_valid(base, size)) {
+				pr_err("reserved memmap region [mem %#018llx-%#018llx] extends beyond end of memory (%#018llx)\n",
+						base, base + size - 1, PFN_PHYS(max_low_pfn));
+			} else {
+				pr_info("reserved memmap region [mem %#018llx-%#018llx]\n",
+						base, base + size - 1);
+				ret = memblock_remove(base, size);
+				if (ret)
+					pr_err("reserve memmap region [mem %#018llx-%#018llx] failed\n",
+							base, base + size - 1);
+			}
+			break;
+		case memmap_pci:
+			if (!memmap_range_valid(base, size)) {
+				pr_info("pci memmap region [mem %#018llx-%#018llx] extends beyond end of memory (%#018llx)\n",
+						base, base + size - 1, PFN_PHYS(max_low_pfn));
+			} else {
+				pr_info("pci memmap region [mem %#018llx-%#018llx]\n",
+						base, base + size - 1);
+				ret = memblock_remove(base, size);
+				if (ret)
+					pr_err("reserve memmap region [mem %#018llx-%#018llx] failed\n",
+							base, base + size - 1);
+			}
+			break;
+		case memmap_initrd:
+			if (!memmap_range_valid(base, size)) {
+				base = (unsigned long) move_initrd(PFN_PHYS(max_low_pfn));
+				if (!base) {
+					pr_err("initrd memmap region [mem %#018llx-%#018llx] extends beyond end of memory (%#018llx)\n",
+							base, base + size - 1, PFN_PHYS(max_low_pfn));
+				} else {
+					memmap_map[i].addr = base;
+					pr_info("initrd memmap region [mem %#018llx-%#018llx]\n",
+							base, base + size - 1);
+					ret = memblock_reserve(base, size);
+					if (ret)
+						pr_err("reserve memmap region [mem %#018llx-%#018llx] failed\n",
+								base, base + size - 1);
+				}
+			} else {
+				pr_info("initrd memmap region [mem %#018llx-%#018llx]\n", base, base + size - 1);
+				ret = memblock_reserve(base, size);
+				if (ret)
+					pr_err("reserve memmap region [mem %#018llx-%#018llx] failed\n",
+							base, base + size - 1);
+			}
+			break;
+		case memmap_kvm:
+		case memmap_crashkernel:
+			/* kvm and crashkernel are handled elsewhere, skip */
+			break;
+		case memmap_acpi:
+			pr_err("ACPI memmap region is not supported.\n");
+			break;
+		case memmap_use:
+			pr_err("Force usage memmap region is not supported.\n");
+			break;
+		case memmap_protected:
+			pr_err("Protected memmap region is not supported.\n");
+			break;
+		default:
+			pr_err("Unknown type of memmap region.\n");
+		}
+	}
+}
+
+int __init add_memmap_region(u64 addr, u64 size, enum memmap_types type)
+{
+	if (memmap_nr >= ARRAY_SIZE(memmap_map)) {
+		pr_err("Ooops! Too many entries in the memory map!\n");
+		return -EPERM;
+	}
+
+	if (addr + size <= addr) {
+		pr_warn("Trying to add an invalid memory region, skipped\n");
+		return -EINVAL;
+	}
+
+	memmap_map[memmap_nr].addr = addr;
+	memmap_map[memmap_nr].size = size;
+	memmap_map[memmap_nr].type = type;
+	memmap_nr++;
+
+	process_memmap();
+
+	return 0;
+}
+
+static struct resource* __init
+insert_ram_resource(u64 start, u64 end, bool reserved)
+{
+	struct resource *res =
+		kzalloc(sizeof(struct resource), GFP_ATOMIC);
+	if (!res)
+		return NULL;
+	if (reserved) {
+		res->name = "reserved";
+		res->flags = IORESOURCE_MEM;
+	} else {
+		res->name = "System RAM";
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+	}
+	res->start = start;
+	res->end = end;
+	if (insert_resource(&iomem_resource, res)) {
+		kfree(res);
+		return NULL;
+	}
+	return res;
+}
+
+static int __init request_standard_resources(void)
+{
+	int i;
+	struct memblock_region *mblk;
+
+	extern char _text[], _etext[];
+	extern char _sdata[], _edata[];
+	extern char __bss_start[], __bss_stop[];
+
+	for_each_mem_region(mblk) {
+		insert_ram_resource(mblk->base, mblk->base + mblk->size - 1, 0);
+	}
+
+	for (i = 0; i < memmap_nr; i++) {
+		switch (memmap_map[i].type) {
+		case memmap_crashkernel:
+			break;
+		default:
+			insert_ram_resource(memmap_map[i].addr,
+					memmap_map[i].addr + memmap_map[i].size - 1, 1);
+		}
+	}
+
+	code_resource.start = __pa_symbol(_text);
+	code_resource.end = __pa_symbol(_etext)-1;
+	data_resource.start = __pa_symbol(_sdata);
+	data_resource.end = __pa_symbol(_edata)-1;
+	bss_resource.start = __pa_symbol(__bss_start);
+	bss_resource.end = __pa_symbol(__bss_stop)-1;
+
+	insert_resource(&iomem_resource, &code_resource);
+	insert_resource(&iomem_resource, &data_resource);
+	insert_resource(&iomem_resource, &bss_resource);
+
+	return 0;
+}
+subsys_initcall(request_standard_resources);
+
+#ifdef CONFIG_NUMA
+extern void cpu_set_node(void);
+#endif
+
+static void __init show_socket_mem_layout(void)
+{
+	int i;
+	phys_addr_t base, size, end;
+
+	base = 0;
+
+	pr_info("Socket memory layout:\n");
+	for (i = 0; i < MAX_NUMSOCKETS; i++) {
+		if (socket_desc[i].is_online) {
+			size = socket_desc[i].socket_mem;
+			end = base + size - 1;
+			pr_info("Socket %d: [mem %#018llx-%#018llx], size %llu\n",
+					i, base, end, size);
+			base = end + 1;
+		}
+	}
+	pr_info("Reserved memory size for Socket 0: %#lx\n", NODE0_START);
+}
+
+int page_is_ram(unsigned long pfn)
+{
+	pfn <<= PAGE_SHIFT;
+
+	return pfn >= mem_desc.base && pfn < (mem_desc.base + mem_desc.size);
+}
+
+static int __init topology_init(void)
+{
+	int i;
+
+#ifdef CONFIG_NUMA
+	for_each_online_node(i)
+		register_one_node(i);
+#endif
+
+	for_each_possible_cpu(i) {
+		struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
+
+		if (!p)
+			return -ENOMEM;
+#ifdef CONFIG_HOTPLUG_CPU
+		if (i != 0)
+			p->hotpluggable = 1;
+#endif
+		register_cpu(p, i);
+	}
+
+	return 0;
+}
+subsys_initcall(topology_init);
+
+static void __init setup_machine_fdt(void)
+{
+#ifdef CONFIG_USE_OF
+	void *dt_virt;
+	const char *name;
+	unsigned long phys_addr;
+
+	/* Give a chance to select kernel builtin DTB firstly */
+	if (IS_ENABLED(CONFIG_SW64_BUILTIN_DTB))
+		dt_virt = (void *)__dtb_start;
+	else
+		dt_virt = (void *)sunway_boot_params->dtb_start;
+
+	phys_addr = __phys_addr((unsigned long)dt_virt);
+	if (!phys_addr_valid(phys_addr) ||
+			!early_init_dt_scan(dt_virt)) {
+		pr_crit("\n"
+			"Error: invalid device tree blob at virtual address %px\n"
+			"The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
+			"\nPlease check your bootloader.",
+			dt_virt);
+
+		while (true)
+			cpu_relax();
+	}
+
+	name = of_flat_dt_get_machine_name();
+	if (!name)
+		return;
+
+	pr_info("Machine model: %s\n", name);
+#else
+	pr_info("Kernel disable device tree support.\n");
+	return;
+#endif
+}
+
+void __init device_tree_init(void)
+{
+	unflatten_and_copy_device_tree();
+	sunway_boot_params->dtb_start = (__u64)initial_boot_params;
+}
+
+static void __init setup_cpu_info(void)
+{
+	int i;
+	struct cache_desc *c;
+	unsigned long val;
+
+	val = cpuid(GET_TABLE_ENTRY, 0);
+	cpu_desc.model = CPUID_MODEL(val);
+	cpu_desc.family = CPUID_FAMILY(val);
+	cpu_desc.chip_var = CPUID_CHIP_VAR(val);
+	cpu_desc.arch_var = CPUID_ARCH_VAR(val);
+	cpu_desc.arch_rev = CPUID_ARCH_REV(val);
+	cpu_desc.pa_bits = CPUID_PA_BITS(val);
+	cpu_desc.va_bits = CPUID_VA_BITS(val);
+	cpu_desc.run_mode = HOST_MODE;
+
+	if (*(unsigned long *)MMSIZE)
+		cpu_desc.run_mode = GUEST_MODE;
+
+	for (i = 0; i < VENDOR_ID_MAX; i++) {
+		val = cpuid(GET_VENDOR_ID, i);
+		memcpy(cpu_desc.vendor_id + (i * 8), &val, 8);
+	}
+
+	for (i = 0; i < MODEL_MAX; i++) {
+		val = cpuid(GET_MODEL, i);
+		memcpy(cpu_desc.model_id + (i * 8), &val, 8);
+	}
+
+	cpu_desc.frequency = cpuid(GET_CPU_FREQ, 0) * 1000UL * 1000UL;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		c = &(cpu_data[i].icache);
+		val = cpuid(GET_CACHE_INFO, L1_ICACHE);
+		c->size = CACHE_SIZE(val);
+		c->linesz = 1 << (CACHE_LINE_BITS(val));
+		c->sets = 1 << (CACHE_INDEX_BITS(val));
+		c->ways = c->size / c->sets / c->linesz;
+
+		c = &(cpu_data[i].dcache);
+		val = cpuid(GET_CACHE_INFO, L1_DCACHE);
+		c->size = CACHE_SIZE(val);
+		c->linesz = 1 << (CACHE_LINE_BITS(val));
+		c->sets = 1 << (CACHE_INDEX_BITS(val));
+		c->ways = c->size / c->sets / c->linesz;
+
+		c = &(cpu_data[i].scache);
+		val = cpuid(GET_CACHE_INFO, L2_CACHE);
+		c->size = CACHE_SIZE(val);
+		c->linesz = 1 << (CACHE_LINE_BITS(val));
+		c->sets = 1 << (CACHE_INDEX_BITS(val));
+		c->ways = c->size / c->sets / c->linesz;
+
+		c = &(cpu_data[i].tcache);
+		val = cpuid(GET_CACHE_INFO, L3_CACHE);
+		c->size = CACHE_SIZE(val);
+		c->linesz = 1 << (CACHE_LINE_BITS(val));
+		c->sets = 1 << (CACHE_INDEX_BITS(val));
+		c->ways = c->size / c->sets / c->linesz;
+	}
+}
+
+static void __init setup_socket_info(void)
+{
+	int i;
+	int numsockets = sw64_chip->get_cpu_num();
+
+	memset(socket_desc, 0, MAX_NUMSOCKETS * sizeof(struct socket_desc_t));
+
+	for (i = 0; i < numsockets; i++) {
+		socket_desc[i].is_online = 1;
+		if (sw64_chip_init->early_init.get_node_mem)
+			socket_desc[i].socket_mem = sw64_chip_init->early_init.get_node_mem(i);
+	}
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static void __init reserve_mem_for_initrd(void)
+{
+	int ret;
+
+	initrd_start = sunway_boot_params->initrd_start;
+	if (initrd_start) {
+		initrd_start = __pa(initrd_start) + PAGE_OFFSET;
+		initrd_end = initrd_start + sunway_boot_params->initrd_size;
+		pr_info("Initial ramdisk at: 0x%px (%llu bytes)\n",
+				(void *)initrd_start, sunway_boot_params->initrd_size);
+
+		ret = add_memmap_region(__pa(initrd_start), initrd_end - initrd_start, memmap_initrd);
+		if (ret)
+			pr_err("Add initrd area [mem %#018lx-%#018lx] to memmap region failed.\n",
+				__pa(initrd_start), __pa(initrd_end - 1));
+	}
+}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+static int __init early_kvm_reserved_mem(char *p)
+{
+	if (!p) {
+		pr_err("Config string not provided\n");
+		return -EINVAL;
+	}
+
+	size_cmdline = memparse(p, &p);
+	if (*p != '@')
+		return -EINVAL;
+	base_cmdline = memparse(p + 1, &p);
+	return 0;
+}
+early_param("kvm_mem", early_kvm_reserved_mem);
+
+void __init sw64_kvm_reserve(void)
+{
+	kvm_cma_declare_contiguous(base_cmdline, size_cmdline, 0,
+			PAGE_SIZE, 0, "sw64_kvm_cma", &sw64_kvm_cma);
+}
+#endif
+
+void __init
+setup_arch(char **cmdline_p)
+{
+	setup_cpu_info();
+	sw64_chip->fixup();
+	sw64_chip_init->fixup();
+	setup_socket_info();
+	show_socket_mem_layout();
+	sw64_chip_init->early_init.setup_core_start(&core_start);
+
+	jump_label_init();
+	setup_sched_clock();
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+	sw64_sched_clock_init();
+#endif
+
+	setup_machine_fdt();
+
+	/* Register a call for panic conditions. */
+	atomic_notifier_chain_register(&panic_notifier_list,
+			&sw64_panic_block);
+
+	callback_init();
+
+	/* command line */
+	if (!sunway_boot_params->cmdline)
+		sunway_boot_params->cmdline = (unsigned long)COMMAND_LINE;
+
+	strlcpy(boot_command_line, (char *)sunway_boot_params->cmdline, COMMAND_LINE_SIZE);
+
+#if IS_ENABLED(CONFIG_CMDLINE_BOOL)
+#if IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)
+	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+	strlcpy((char *)sunway_boot_params->cmdline, boot_command_line, COMMAND_LINE_SIZE);
+#else
+	if (builtin_cmdline[0]) {
+		/* append builtin to boot loader cmdline */
+		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		strlcat(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+	}
+#endif	/* CMDLINE_EXTEND */
+#endif
+	if (IS_ENABLED(CONFIG_SW64_CHIP3_ASIC_DEBUG) &&
+			IS_ENABLED(CONFIG_SW64_CHIP3)) {
+		unsigned long bmc, cpu_online, node;
+
+		bmc = *(unsigned long *)__va(0x800000);
+		pr_info("bmc = %ld\n", bmc);
+		cpu_online = sw64_chip->get_cpu_num();
+		for (node = 0; node < cpu_online; node++)
+			sw64_io_write(node, SI_FAULT_INT_EN, 0);
+		sprintf(boot_command_line, "root=/dev/sda2 ip=172.16.137.%ld::172.16.137.254:255.255.255.0::eth0:off", 180+bmc);
+	}
+
+	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+	*cmdline_p = command_line;
+
+	/*
+	 * Process command-line arguments.
+	 */
+	parse_early_param();
+
+	/* Find our memory.  */
+	mem_detect();
+
+#ifdef CONFIG_PCI
+	reserve_mem_for_pci();
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	reserve_mem_for_initrd();
+#endif
+
+	sw64_memblock_init();
+
+	/* Reserve large chunks of memory for use by CMA for KVM. */
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+	sw64_kvm_reserve();
+#endif
+
+	sw64_numa_init();
+
+	memblock_dump_all();
+
+	sparse_init();
+
+	zone_sizes_init();
+
+	paging_init();
+
+	kexec_control_page_init();
+
+	efi_init();
+
+	/* Parse the ACPI tables for possible boot-time configuration */
+	acpi_boot_table_init();
+
+	/*
+	 * Initialize the machine. Usually has to do with setting up
+	 * DMA windows and the like.
+	 */
+	sw64_init_arch();
+
+	reserve_crashkernel();
+	/* Reserve standard resources.  */
+	reserve_std_resources();
+
+	/*
+	 * Give us a default console. TGA users will see nothing until
+	 * chr_dev_init is called, rather late in the boot sequence.
+	 */
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+
+	/* Default root filesystem to sda2.  */
+	ROOT_DEV = Root_SDA2;
+
+	/*
+	 * Identify the flock of penguins.
+	 */
+
+#ifdef CONFIG_SMP
+	setup_smp();
+#endif
+#ifdef CONFIG_NUMA
+	cpu_set_node();
+#endif
+	if (acpi_disabled)
+		device_tree_init();
+}
+
+
+static int
+show_cpuinfo(struct seq_file *f, void *slot)
+{
+	int i;
+	unsigned long cpu_freq;
+
+	cpu_freq = get_cpu_freq() / 1000 / 1000;
+
+	for_each_online_cpu(i) {
+		/*
+		 * glibc reads /proc/cpuinfo to determine the number of
+		 * online processors, looking for lines beginning with
+		 * "processor".  Give glibc what it expects.
+		 */
+		seq_printf(f, "processor\t: %u\n"
+				"vendor_id\t: %s\n"
+				"cpu family\t: %d\n"
+				"model\t\t: %u\n"
+				"model name\t: %s CPU @ %lu.%lu%luGHz\n"
+				"cpu variation\t: %u\n"
+				"cpu revision\t: %u\n",
+				i, cpu_desc.vendor_id, cpu_desc.family,
+				cpu_desc.model, cpu_desc.model_id,
+				cpu_freq / 1000, (cpu_freq % 1000) / 100,
+				(cpu_freq % 100) / 10,
+				cpu_desc.arch_var, cpu_desc.arch_rev);
+		seq_printf(f, "cpu MHz\t\t: %lu.00\n"
+				"cache size\t: %u KB\n"
+				"physical id\t: %d\n"
+				"bogomips\t: %lu.%02lu\n",
+				cpu_freq, cpu_data[i].tcache.size >> 10,
+				cpu_to_rcid(i),
+				loops_per_jiffy / (500000/HZ),
+				(loops_per_jiffy / (5000/HZ)) % 100);
+
+		seq_printf(f, "flags\t\t: fpu simd vpn upn cpuid\n");
+		seq_printf(f, "page size\t: %d\n", 8192);
+		seq_printf(f, "cache_alignment\t: %d\n", cpu_data[i].tcache.linesz);
+		seq_printf(f, "address sizes\t: %u bits physical, %u bits virtual\n\n",
+				cpu_desc.pa_bits, cpu_desc.va_bits);
+	}
+	return 0;
+}
+
+/*
+ * We show only CPU #0 info.
+ */
+static void *
+c_start(struct seq_file *f, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *
+c_next(struct seq_file *f, void *v, loff_t *pos)
+{
+	return NULL;
+}
+
+static void
+c_stop(struct seq_file *f, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
+
+
+static int
+sw64_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	return NOTIFY_DONE;
+}
+
+static __init int add_pcspkr(void)
+{
+	struct platform_device *pd;
+	int ret;
+
+	pd = platform_device_alloc("pcspkr", -1);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		platform_device_put(pd);
+
+	return ret;
+}
+device_initcall(add_pcspkr);
+
+#ifdef CONFIG_DEBUG_FS
+struct dentry *sw64_debugfs_dir;
+static int __init debugfs_sw64(void)
+{
+	struct dentry *d;
+
+	d = debugfs_create_dir("sw_64", NULL);
+	if (!d)
+		return -ENOMEM;
+	sw64_debugfs_dir = d;
+	return 0;
+}
+arch_initcall(debugfs_sw64);
+#endif
+
+#ifdef CONFIG_OF
+static int __init sw64_of_init(void)
+{
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+	return 0;
+}
+core_initcall(sw64_of_init);
+#endif
+
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+static int __init sw64_kvm_pool_init(void)
+{
+	int status = 0;
+	unsigned long kvm_pool_virt;
+	struct page *base_page, *end_page, *p;
+
+	if (!sw64_kvm_cma)
+		goto out;
+
+	kvm_pool_virt = (unsigned long)base_cmdline;
+
+	sw64_kvm_pool = gen_pool_create(PAGE_SHIFT, -1);
+	if (!sw64_kvm_pool)
+		goto out;
+
+	status = gen_pool_add_virt(sw64_kvm_pool, kvm_pool_virt, base_cmdline,
+			size_cmdline, -1);
+	if (status < 0) {
+		pr_err("failed to add memory chunks to sw64 kvm pool\n");
+		gen_pool_destroy(sw64_kvm_pool);
+		sw64_kvm_pool = NULL;
+		goto out;
+	}
+	gen_pool_set_algo(sw64_kvm_pool, gen_pool_best_fit, NULL);
+
+	base_page = pfn_to_page(base_cmdline >> PAGE_SHIFT);
+	end_page  = pfn_to_page((base_cmdline + size_cmdline) >> PAGE_SHIFT);
+
+	p = base_page;
+	while (page_ref_count(p) == 0 &&
+			(unsigned long)p <= (unsigned long)end_page) {
+		set_page_count(p, 1);
+		SetPageReserved(p);
+		p++;
+	}
+
+	return status;
+
+out:
+	return -ENOMEM;
+}
+core_initcall_sync(sw64_kvm_pool_init);
+#endif
diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c
new file mode 100644
index 000000000000..74e98063c874
--- /dev/null
+++ b/arch/sw_64/kernel/signal.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/sw_64/kernel/signal.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/bitops.h>
+#include <linux/syscalls.h>
+#include <linux/tracehook.h>
+#include <linux/uaccess.h>
+
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+
+#include "proto.h"
+
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage void ret_from_sys_call(void);
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+#if _NSIG_WORDS > 1
+# error "Non SA_SIGINFO frame needs rearranging"
+#endif
+
+struct rt_sigframe {
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned int retcode[3];
+};
+
+/*
+ * If this changes, userland unwinders that Know Things about our signal
+ * frame will break.  Do not undertake lightly.  It also implies an ABI
+ * change wrt the size of siginfo_t, which may cause some pain.
+ */
+extern char compile_time_assert
+	[offsetof(struct rt_sigframe, uc.uc_mcontext) == 176 ? 1 : -1];
+
+#define INSN_MOV_R30_R16	0x47fe0410
+#define INSN_LDI_R0		0x201f0000
+#define INSN_CALLSYS		0x00000083
+
+static long
+restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
+{
+	unsigned long usp;
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	unsigned long *ctx_fp = (unsigned long *)&current->thread.ctx_fp;
+	long i, err = __get_user(regs->pc, &sc->sc_pc);
+
+	current->restart_block.fn = do_no_restart_syscall;
+
+	sw->r26 = (unsigned long) ret_from_sys_call;
+
+	err |= __get_user(regs->r0, sc->sc_regs+0);
+	err |= __get_user(regs->r1, sc->sc_regs+1);
+	err |= __get_user(regs->r2, sc->sc_regs+2);
+	err |= __get_user(regs->r3, sc->sc_regs+3);
+	err |= __get_user(regs->r4, sc->sc_regs+4);
+	err |= __get_user(regs->r5, sc->sc_regs+5);
+	err |= __get_user(regs->r6, sc->sc_regs+6);
+	err |= __get_user(regs->r7, sc->sc_regs+7);
+	err |= __get_user(regs->r8, sc->sc_regs+8);
+	err |= __get_user(sw->r9, sc->sc_regs+9);
+	err |= __get_user(sw->r10, sc->sc_regs+10);
+	err |= __get_user(sw->r11, sc->sc_regs+11);
+	err |= __get_user(sw->r12, sc->sc_regs+12);
+	err |= __get_user(sw->r13, sc->sc_regs+13);
+	err |= __get_user(sw->r14, sc->sc_regs+14);
+	err |= __get_user(sw->r15, sc->sc_regs+15);
+	err |= __get_user(regs->r16, sc->sc_regs+16);
+	err |= __get_user(regs->r17, sc->sc_regs+17);
+	err |= __get_user(regs->r18, sc->sc_regs+18);
+	err |= __get_user(regs->r19, sc->sc_regs+19);
+	err |= __get_user(regs->r20, sc->sc_regs+20);
+	err |= __get_user(regs->r21, sc->sc_regs+21);
+	err |= __get_user(regs->r22, sc->sc_regs+22);
+	err |= __get_user(regs->r23, sc->sc_regs+23);
+	err |= __get_user(regs->r24, sc->sc_regs+24);
+	err |= __get_user(regs->r25, sc->sc_regs+25);
+	err |= __get_user(regs->r26, sc->sc_regs+26);
+	err |= __get_user(regs->r27, sc->sc_regs+27);
+	err |= __get_user(regs->r28, sc->sc_regs+28);
+	err |= __get_user(regs->gp, sc->sc_regs+29);
+	err |= __get_user(usp, sc->sc_regs+30);
+	wrusp(usp);
+	/* simd-fp */
+	for (i = 0; i < 31 * 4; i++)
+		err |= __get_user(ctx_fp[i], sc->sc_fpregs + i);
+	err |= __get_user(current->thread.fpcr, &sc->sc_fpcr);
+
+	return err;
+}
+
+/*
+ * Note that this syscall is also used by setcontext(3) to install
+ * a given sigcontext.  This because it's impossible to set *all*
+ * registers and transfer control from userland.
+ */
+
+asmlinkage void
+do_sigreturn(struct sigcontext __user *sc)
+{
+	struct pt_regs *regs = current_pt_regs();
+	sigset_t set;
+
+	/* Verify that it's a good sigcontext before using it */
+	if (!access_ok(sc, sizeof(*sc)))
+		goto give_sigsegv;
+	if (__get_user(set.sig[0], &sc->sc_mask))
+		goto give_sigsegv;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(sc, regs))
+		goto give_sigsegv;
+
+	/* Send SIGTRAP if we're single-stepping: */
+	if (ptrace_cancel_bpt(current)) {
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0,
+			       current);
+	}
+	return;
+
+give_sigsegv:
+	force_sig(SIGSEGV);
+}
+
+asmlinkage void
+do_rt_sigreturn(struct rt_sigframe __user *frame)
+{
+	struct pt_regs *regs = current_pt_regs();
+	sigset_t set;
+
+	/* Verify that it's a good ucontext_t before using it */
+	if (!access_ok(&frame->uc, sizeof(frame->uc)))
+		goto give_sigsegv;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto give_sigsegv;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(&frame->uc.uc_mcontext, regs))
+		goto give_sigsegv;
+
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto give_sigsegv;
+
+	/* Send SIGTRAP if we're single-stepping: */
+	if (ptrace_cancel_bpt(current)) {
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0,
+			       current);
+	}
+	return;
+
+give_sigsegv:
+	force_sig(SIGSEGV);
+}
+
+
+/*
+ * Set up a signal frame.
+ */
+
+static inline void __user *
+get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size)
+{
+	return (void __user *)((sigsp(sp, ksig) - frame_size) & -32ul);
+}
+
+static long
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+		 unsigned long mask, unsigned long sp)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	unsigned long *ctx_fp = (unsigned long *)&current->thread.ctx_fp;
+	long i, err = 0;
+
+	err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack);
+	err |= __put_user(mask, &sc->sc_mask);
+	err |= __put_user(regs->pc, &sc->sc_pc);
+	err |= __put_user(8, &sc->sc_ps);
+
+	err |= __put_user(regs->r0, sc->sc_regs+0);
+	err |= __put_user(regs->r1, sc->sc_regs+1);
+	err |= __put_user(regs->r2, sc->sc_regs+2);
+	err |= __put_user(regs->r3, sc->sc_regs+3);
+	err |= __put_user(regs->r4, sc->sc_regs+4);
+	err |= __put_user(regs->r5, sc->sc_regs+5);
+	err |= __put_user(regs->r6, sc->sc_regs+6);
+	err |= __put_user(regs->r7, sc->sc_regs+7);
+	err |= __put_user(regs->r8, sc->sc_regs+8);
+	err |= __put_user(sw->r9, sc->sc_regs+9);
+	err |= __put_user(sw->r10, sc->sc_regs+10);
+	err |= __put_user(sw->r11, sc->sc_regs+11);
+	err |= __put_user(sw->r12, sc->sc_regs+12);
+	err |= __put_user(sw->r13, sc->sc_regs+13);
+	err |= __put_user(sw->r14, sc->sc_regs+14);
+	err |= __put_user(sw->r15, sc->sc_regs+15);
+	err |= __put_user(regs->r16, sc->sc_regs+16);
+	err |= __put_user(regs->r17, sc->sc_regs+17);
+	err |= __put_user(regs->r18, sc->sc_regs+18);
+	err |= __put_user(regs->r19, sc->sc_regs+19);
+	err |= __put_user(regs->r20, sc->sc_regs+20);
+	err |= __put_user(regs->r21, sc->sc_regs+21);
+	err |= __put_user(regs->r22, sc->sc_regs+22);
+	err |= __put_user(regs->r23, sc->sc_regs+23);
+	err |= __put_user(regs->r24, sc->sc_regs+24);
+	err |= __put_user(regs->r25, sc->sc_regs+25);
+	err |= __put_user(regs->r26, sc->sc_regs+26);
+	err |= __put_user(regs->r27, sc->sc_regs+27);
+	err |= __put_user(regs->r28, sc->sc_regs+28);
+	err |= __put_user(regs->gp, sc->sc_regs+29);
+	err |= __put_user(sp, sc->sc_regs+30);
+	err |= __put_user(0, sc->sc_regs+31);
+	/* simd-fp */
+	for (i = 0; i < 31 * 4; i++)
+		err |= __put_user(ctx_fp[i], sc->sc_fpregs + i);
+	err |= __put_user(current->thread.fpcr, &sc->sc_fpcr);
+
+	err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0);
+	err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1);
+	err |= __put_user(regs->trap_a2, &sc->sc_traparg_a2);
+
+	return err;
+}
+
+static int
+setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	unsigned long oldsp, r26, err = 0;
+	struct rt_sigframe __user *frame;
+
+	oldsp = rdusp();
+	frame = get_sigframe(ksig, oldsp, sizeof(*frame));
+	if (!access_ok(frame, sizeof(*frame)))
+		return -EFAULT;
+
+	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(set->sig[0], &frame->uc.uc_old_sigmask);
+	err |= __save_altstack(&frame->uc.uc_stack, oldsp);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs,
+			set->sig[0], oldsp);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	 * already in userspace.
+	 */
+	r26 = VDSO_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+
+	if (err)
+		return -EFAULT;
+
+	/* "Return" to the handler */
+	regs->r26 = r26;
+	regs->r27 = regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->r16 = ksig->sig;                    /* a0: signal number */
+	regs->r17 = (unsigned long) &frame->info; /* a1: siginfo pointer */
+	regs->r18 = (unsigned long) &frame->uc;   /* a2: ucontext pointer */
+	wrusp((unsigned long) frame);
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+			current->comm, current->pid, frame, regs->pc, regs->r26);
+#endif
+
+	return 0;
+}
+
+/*
+ * OK, we're invoking a handler.
+ */
+static inline void
+handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int ret;
+
+	ret = setup_rt_frame(ksig, oldset, regs);
+
+	signal_setup_done(ret, ksig, 0);
+}
+
+static inline void
+syscall_restart(unsigned long r0, unsigned long r19,
+		struct pt_regs *regs, struct k_sigaction *ka)
+{
+	switch (regs->r0) {
+	case ERESTARTSYS:
+		if (!(ka->sa.sa_flags & SA_RESTART)) {
+			regs->r0 = EINTR;
+			break;
+		}
+		/* else: fallthrough */
+	case ERESTARTNOINTR:
+		regs->r0 = r0;	/* reset v0 and a3 and replay syscall */
+		regs->r19 = r19;
+		regs->pc -= 4;
+		break;
+	case ERESTART_RESTARTBLOCK:
+		regs->r0 = EINTR;
+		break;
+	case ERESTARTNOHAND:
+		regs->r0 = EINTR;
+		break;
+	}
+}
+
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ *
+ * "r0" and "r19" are the registers we need to restore for system call
+ * restart. "r0" is also used as an indicator whether we can restart at
+ * all (if we get here from anything but a syscall return, it will be 0)
+ */
+static void
+do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19)
+{
+	unsigned long single_stepping = ptrace_cancel_bpt(current);
+	struct ksignal ksig;
+
+	/* This lets the debugger run, ... */
+	if (get_signal(&ksig)) {
+		/* ... so re-check the single stepping. */
+		single_stepping |= ptrace_cancel_bpt(current);
+		/* Whee!  Actually deliver the signal.  */
+		if (r0)
+			syscall_restart(r0, r19, regs, &ksig.ka);
+		handle_signal(&ksig, regs);
+	} else {
+		single_stepping |= ptrace_cancel_bpt(current);
+		if (r0) {
+			switch (regs->r0) {
+			case ERESTARTNOHAND:
+			case ERESTARTSYS:
+			case ERESTARTNOINTR:
+				/* Reset v0 and a3 and replay syscall.  */
+				regs->r0 = r0;
+				regs->r19 = r19;
+				regs->pc -= 4;
+				break;
+			case ERESTART_RESTARTBLOCK:
+				/* Set v0 to the restart_syscall and replay */
+				regs->r0 = __NR_restart_syscall;
+				regs->pc -= 4;
+				break;
+			}
+		}
+		restore_saved_sigmask();
+	}
+	if (single_stepping)
+		ptrace_set_bpt(current);        /* re-set breakpoint */
+}
+
+void
+do_work_pending(struct pt_regs *regs, unsigned long thread_flags,
+		unsigned long r0, unsigned long r19)
+{
+	do {
+		if (thread_flags & _TIF_NEED_RESCHED) {
+			schedule();
+		} else {
+			local_irq_enable();
+
+			if (thread_flags & _TIF_UPROBE)
+				uprobe_notify_resume(regs);
+
+			if (thread_flags & _TIF_SIGPENDING) {
+				do_signal(regs, r0, r19);
+				r0 = 0;
+			} else {
+				clear_thread_flag(TIF_NOTIFY_RESUME);
+				tracehook_notify_resume(regs);
+			}
+		}
+		local_irq_disable();
+		thread_flags = current_thread_info()->flags;
+	} while (thread_flags & _TIF_WORK_MASK);
+}
diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c
new file mode 100644
index 000000000000..7d9c5c90f1ac
--- /dev/null
+++ b/arch/sw_64/kernel/smp.c
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/sw_64/kernel/smp.c
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/hotplug.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+
+#include <asm/core.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/suspend.h>
+#include <asm/hcall.h>
+#include <asm/sw64io.h>
+#include <asm/sw64_init.h>
+#include <asm/topology.h>
+#include <asm/tc.h>
+#include "proto.h"
+
+struct smp_rcb_struct *smp_rcb;
+
+extern struct cpuinfo_sw64 cpu_data[NR_CPUS];
+
+int smp_booted;
+
+#define smp_debug 0
+#define DBGS(fmt, arg...) \
+	do { if (smp_debug) printk("SMP: " fmt, ## arg); } while (0)
+
+int __cpu_to_rcid[NR_CPUS];		/* Map logical to physical */
+EXPORT_SYMBOL(__cpu_to_rcid);
+
+int __rcid_to_cpu[NR_CPUS];		/* Map physical to logical */
+EXPORT_SYMBOL(__rcid_to_cpu);
+
+unsigned long tidle_pcb[NR_CPUS];
+
+/* State of each CPU */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
+/* A collection of single bit ipi messages.  */
+static struct {
+	unsigned long bits ____cacheline_aligned;
+} ipi_data[NR_CPUS] __cacheline_aligned;
+
+enum ipi_message_type {
+	IPI_RESCHEDULE,
+	IPI_CALL_FUNC,
+	IPI_CPU_STOP,
+};
+
+/* Set to a secondary's cpuid when it comes online.  */
+static int smp_secondary_alive;
+
+int smp_num_cpus = 1;		/* Number that came online.  */
+EXPORT_SYMBOL(smp_num_cpus);
+
+#define send_sleep_interrupt(cpu)	send_ipi((cpu), II_SLEEP)
+#define send_wakeup_interrupt(cpu)	send_ipi((cpu), II_WAKE)
+
+/*
+ * Called by both boot and secondaries to move global data into
+ *  per-processor storage.
+ */
+static inline void __init
+smp_store_cpu_info(int cpuid)
+{
+	cpu_data[cpuid].loops_per_jiffy = loops_per_jiffy;
+	cpu_data[cpuid].last_asn = ASN_FIRST_VERSION;
+	cpu_data[cpuid].need_new_asn = 0;
+	cpu_data[cpuid].asn_lock = 0;
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
+ */
+static inline void __init
+smp_setup_percpu_timer(int cpuid)
+{
+	setup_timer();
+	cpu_data[cpuid].prof_counter = 1;
+	cpu_data[cpuid].prof_multiplier = 1;
+}
+
+static void __init wait_boot_cpu_to_stop(int cpuid)
+{
+	unsigned long stop = jiffies + 10*HZ;
+
+	while (time_before(jiffies, stop)) {
+		if (!smp_secondary_alive)
+			return;
+		barrier();
+	}
+
+	printk("%s: FAILED on CPU %d, hanging now\n", __func__, cpuid);
+	for (;;)
+		barrier();
+}
+
+void __weak enable_chip_int(void) { }
+
+/*
+ * Where secondaries begin a life of C.
+ */
+void smp_callin(void)
+{
+	int cpuid = smp_processor_id();
+
+	local_irq_disable();
+
+	enable_chip_int();
+
+	if (cpu_online(cpuid)) {
+		printk("??, cpu 0x%x already present??\n", cpuid);
+		BUG();
+	}
+	set_cpu_online(cpuid, true);
+
+	/* clear ksp, usp  */
+	wrksp(0);
+	wrusp(0);
+
+	/* Set trap vectors.  */
+	trap_init();
+
+	/* Set interrupt vector.  */
+	wrent(entInt, 0);
+
+	/* Get our local ticker going. */
+	smp_setup_percpu_timer(cpuid);
+
+	/* All kernel threads share the same mm context.  */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+
+	/* inform the notifiers about the new cpu */
+	notify_cpu_starting(cpuid);
+
+	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
+	per_cpu(hard_node_id, cpuid) = cpu_to_rcid(cpuid) >> CORES_PER_NODE_SHIFT;
+
+	/* Must have completely accurate bogos.  */
+	local_irq_enable();
+
+	/* Wait boot CPU to stop with irq enabled before running
+	 * calibrate_delay.
+	 */
+	wait_boot_cpu_to_stop(cpuid);
+	mb();
+
+	/* Allow master to continue only after we written loops_per_jiffy.  */
+	wmb();
+	smp_secondary_alive = 1;
+
+	DBGS("%s: commencing CPU %d (RCID: %d)current %p active_mm %p\n",
+		__func__, cpuid, cpu_to_rcid(cpuid), current, current->active_mm);
+
+	/* Cpu0 init preempt_count at start_kernel, other smp cpus do here. */
+	preempt_disable();
+
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+
+/*
+ * Set ready for secondary cpu.
+ */
+static inline void set_secondary_ready(int cpuid)
+{
+	smp_rcb->ready = cpuid;
+}
+
+/*
+ * Convince the hmcode to have a secondary cpu begin execution.
+ */
+static int secondary_cpu_start(int cpuid, struct task_struct *idle)
+{
+	struct pcb_struct *ipcb;
+	unsigned long timeout;
+
+	ipcb = &task_thread_info(idle)->pcb;
+
+	/*
+	 * Initialize the idle's PCB to something just good enough for
+	 * us to get started.  Immediately after starting, we'll swpctx
+	 * to the target idle task's pcb.  Reuse the stack in the mean
+	 * time.  Precalculate the target PCBB.
+	 */
+	ipcb->ksp = (unsigned long)ipcb + sizeof(union thread_union) - 16;
+	ipcb->usp = 0;
+	ipcb->pcc = 0;
+	ipcb->asn = 0;
+	tidle_pcb[cpuid] = ipcb->unique = virt_to_phys(ipcb);
+	ipcb->dv_match = ipcb->dv_mask = 0;
+
+	DBGS("Starting secondary cpu %d: state 0x%lx\n", cpuid, idle->state);
+
+	set_cpu_online(cpuid, false);
+	wmb();
+
+	set_secondary_ready(cpuid);
+
+	/* Wait 10 seconds for secondary cpu.  */
+	timeout = jiffies + 10*HZ;
+	while (time_before(jiffies, timeout)) {
+		if (cpu_online(cpuid))
+			goto started;
+		udelay(10);
+		barrier();
+	}
+	pr_err("SMP: Processor %d failed to start.\n", cpuid);
+	return -1;
+
+started:
+	DBGS("%s: SUCCESS for CPU %d!!!\n", __func__, cpuid);
+	store_cpu_topology(cpuid);
+	numa_add_cpu(cpuid);
+	return 0;
+}
+
+/*
+ * Bring one cpu online.
+ */
+static int smp_boot_one_cpu(int cpuid, struct task_struct *idle)
+{
+	unsigned long timeout;
+
+	/* Signal the secondary to wait a moment.  */
+	smp_secondary_alive = -1;
+
+	per_cpu(cpu_state, cpuid) = CPU_UP_PREPARE;
+
+	/* Whirrr, whirrr, whirrrrrrrrr... */
+	if (secondary_cpu_start(cpuid, idle))
+		return -1;
+
+	/* Notify the secondary CPU it can run calibrate_delay.  */
+	mb();
+	smp_secondary_alive = 0;
+
+	/* We've been acked by the console; wait one second for
+	 * the task to start up for real.
+	 */
+	timeout = jiffies + 1*HZ;
+	while (time_before(jiffies, timeout)) {
+		if (smp_secondary_alive == 1)
+			goto alive;
+		udelay(10);
+		barrier();
+	}
+
+	/* We failed to boot the CPU.  */
+
+	pr_err("SMP: Processor %d is stuck.\n", cpuid);
+	return -1;
+
+alive:
+	/* Another "Red Snapper". */
+	return 0;
+}
+
+static void __init process_nr_cpu_ids(void)
+{
+	int i;
+
+	for (i = nr_cpu_ids; i < NR_CPUS; i++) {
+		set_cpu_possible(i, false);
+		set_cpu_present(i, false);
+	}
+
+	nr_cpu_ids = num_possible_cpus();
+}
+
+void __init smp_rcb_init(void)
+{
+	smp_rcb = INIT_SMP_RCB;
+	memset(smp_rcb, 0, sizeof(struct smp_rcb_struct));
+	/* Setup SMP_RCB fields that uses to activate secondary CPU */
+	smp_rcb->restart_entry = __smp_callin;
+	smp_rcb->init_done = 0xDEADBEEFUL;
+	mb();
+}
+
+/*
+ * Called from setup_arch.  Detect an SMP system and which processors
+ * are present.
+ */
+void __init setup_smp(void)
+{
+	int i = 0, num = 0; /* i: physical id, num: logical id */
+
+	init_cpu_possible(cpu_none_mask);
+
+	/* For unified kernel, NR_CPUS is the maximum possible value */
+	for (; i < NR_CPUS; i++) {
+		if (cpumask_test_cpu(i, &core_start)) {
+			__cpu_to_rcid[num] = i;
+			__rcid_to_cpu[i] = num;
+			set_cpu_possible(num, true);
+			smp_store_cpu_info(num);
+			if (!cpumask_test_cpu(i, &cpu_offline))
+				set_cpu_present(num, true);
+			num++;
+		} else
+			__rcid_to_cpu[i] = -1;
+	}
+	/* for sw64, the BSP must be logical core 0 */
+	BUG_ON(cpu_to_rcid(0) != hard_smp_processor_id());
+
+	while (num < NR_CPUS) {
+		__cpu_to_rcid[num] = -1;
+		num++;
+	}
+
+	process_nr_cpu_ids();
+
+	pr_info("Detected %u possible CPU(s), %u CPU(s) are present\n",
+			nr_cpu_ids, num_present_cpus());
+
+	smp_rcb_init();
+}
+/*
+ * Called by smp_init prepare the secondaries
+ */
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
+{
+	unsigned int cpu;
+	/* Take care of some initial bookkeeping.  */
+	memset(ipi_data, 0, sizeof(ipi_data));
+
+	init_cpu_topology();
+	current_thread_info()->cpu = 0;
+	store_cpu_topology(smp_processor_id());
+	numa_add_cpu(smp_processor_id());
+
+	for_each_possible_cpu(cpu) {
+		numa_store_cpu_info(cpu);
+	}
+
+	/* Nothing to do on a UP box, or when told not to.  */
+	if (nr_cpu_ids == 1 || max_cpus == 0) {
+		init_cpu_possible(cpumask_of(0));
+		init_cpu_present(cpumask_of(0));
+		pr_info("SMP mode deactivated.\n");
+		return;
+	}
+
+	pr_info("SMP starting up secondaries.\n");
+}
+
+void  native_smp_prepare_boot_cpu(void)
+{
+	int me = smp_processor_id();
+
+	per_cpu(cpu_state, me) = CPU_ONLINE;
+}
+
+int native_vt_cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	printk("%s: cpu = %d\n", __func__, cpu);
+
+	wmb();
+	smp_rcb->ready = 0;
+	smp_boot_one_cpu(cpu, tidle);
+
+	return cpu_online(cpu) ? 0 : -ENOSYS;
+}
+
+DECLARE_STATIC_KEY_FALSE(use_tc_as_sched_clock);
+int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	if (is_in_guest())
+		return native_vt_cpu_up(cpu, tidle);
+
+	wmb();
+	smp_rcb->ready = 0;
+
+#ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_NONBOOT_CORE
+	/* send wake up signal */
+	send_wakeup_interrupt(cpu);
+#endif
+	/* send reset signal */
+	if (smp_booted) {
+		if (is_in_host()) {
+			reset_cpu(cpu);
+		} else {
+			while (1) {
+				cpu_relax();
+			}
+		}
+	}
+	smp_boot_one_cpu(cpu, tidle);
+
+#ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_NONBOOT_CORE
+	if (static_branch_likely(&use_tc_as_sched_clock)) {
+		if (smp_booted) {
+			tc_sync_clear();
+			smp_call_function_single(cpu, tc_sync_ready, NULL, 0);
+			tc_sync_set();
+		}
+	}
+#endif
+
+	return cpu_online(cpu) ? 0 : -ENOSYS;
+}
+
+void __init native_smp_cpus_done(unsigned int max_cpus)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		if (cpu_online(cpu))
+			bogosum += cpu_data[cpu].loops_per_jiffy;
+
+	smp_booted = 1;
+	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+		num_online_cpus(),
+		(bogosum + 2500) / (500000/HZ),
+		((bogosum + 2500) / (5000/HZ)) % 100);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+
+static void send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
+{
+	int i;
+
+	mb();
+	for_each_cpu(i, to_whom)
+		set_bit(operation, &ipi_data[i].bits);
+
+	mb();
+	for_each_cpu(i, to_whom)
+		send_ipi(i, II_II0);
+}
+
+void handle_ipi(struct pt_regs *regs)
+{
+	int this_cpu = smp_processor_id();
+	unsigned long *pending_ipis = &ipi_data[this_cpu].bits;
+	unsigned long ops;
+
+	mb();	/* Order interrupt and bit testing. */
+	while ((ops = xchg(pending_ipis, 0)) != 0) {
+		mb();	/* Order bit clearing and data access. */
+		do {
+			unsigned long which;
+
+			which = ops & -ops;
+			ops &= ~which;
+			which = __ffs(which);
+
+			switch (which) {
+			case IPI_RESCHEDULE:
+				scheduler_ipi();
+				break;
+
+			case IPI_CALL_FUNC:
+				irq_enter();
+				generic_smp_call_function_interrupt();
+				irq_exit();
+				break;
+
+			case IPI_CPU_STOP:
+				local_irq_disable();
+				pr_crit("other core panic, now halt...\n");
+				while (1)
+					asm("nop");
+				halt();
+
+			default:
+				pr_crit("Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+				break;
+			}
+		} while (ops);
+
+		mb();	/* Order data access and bit testing. */
+	}
+
+	cpu_data[this_cpu].ipi_count++;
+}
+
+void native_smp_send_reschedule(int cpu)
+{
+#ifdef DEBUG_IPI_MSG
+	if (cpu == hard_smp_processor_id())
+		pr_warn("smp_send_reschedule: Sending IPI to self.\n");
+#endif
+	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+static void native_stop_other_cpus(int wait)
+{
+	cpumask_t to_whom;
+
+	cpumask_copy(&to_whom, cpu_possible_mask);
+	cpumask_clear_cpu(smp_processor_id(), &to_whom);
+#ifdef DEBUG_IPI_MSG
+	if (hard_smp_processor_id() != boot_cpu_id)
+		pr_warn("smp_send_stop: Not on boot cpu.\n");
+#endif
+	send_ipi_message(&to_whom, IPI_CPU_STOP);
+
+}
+
+void native_send_call_func_ipi(const struct cpumask *mask)
+{
+	send_ipi_message(mask, IPI_CALL_FUNC);
+}
+
+void native_send_call_func_single_ipi(int cpu)
+{
+	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
+}
+
+static void
+ipi_imb(void *ignored)
+{
+	imb();
+}
+
+void smp_imb(void)
+{
+	/* Must wait other processors to flush their icache before continue. */
+	on_each_cpu(ipi_imb, NULL, 1);
+}
+EXPORT_SYMBOL(smp_imb);
+
+static void ipi_flush_tlb_all(void *ignored)
+{
+	tbia();
+}
+
+void flush_tlb_all(void)
+{
+	/* Although we don't have any data to pass, we do want to
+	 * synchronize with the other processors.
+	 */
+	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+}
+
+#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
+
+static void ipi_flush_tlb_mm(void *x)
+{
+	struct mm_struct *mm = (struct mm_struct *) x;
+
+	if (mm == current->mm)
+		flush_tlb_current(mm);
+	else
+		flush_tlb_other(mm);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	preempt_disable();
+
+	/* happens as a result of exit_mmap()
+	 * Shall we clear mm->context.asid[] here?
+	 */
+	if (atomic_read(&mm->mm_users) == 0) {
+		preempt_enable();
+		return;
+	}
+
+	if (mm == current->mm) {
+		flush_tlb_current(mm);
+		if (atomic_read(&mm->mm_users) == 1) {
+			int cpu, this_cpu = smp_processor_id();
+
+			for (cpu = 0; cpu < NR_CPUS; cpu++) {
+				if (!cpu_online(cpu) || cpu == this_cpu)
+					continue;
+				if (mm->context.asid[cpu])
+					mm->context.asid[cpu] = 0;
+			}
+			preempt_enable();
+			return;
+		}
+	} else
+		flush_tlb_other(mm);
+
+	smp_call_function(ipi_flush_tlb_mm, mm, 1);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_mm);
+
+struct flush_tlb_page_struct {
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	unsigned long addr;
+};
+
+static void ipi_flush_tlb_page(void *x)
+{
+	struct flush_tlb_page_struct *data = (struct flush_tlb_page_struct *)x;
+	struct mm_struct *mm = data->mm;
+
+	if (mm == current->mm)
+		flush_tlb_current_page(mm, data->vma, data->addr);
+	else
+		flush_tlb_other(mm);
+
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct flush_tlb_page_struct data;
+	struct mm_struct *mm = vma->vm_mm;
+
+	preempt_disable();
+
+	if (mm == current->mm) {
+		flush_tlb_current_page(mm, vma, addr);
+		if (atomic_read(&mm->mm_users) == 1) {
+			int cpu, this_cpu = smp_processor_id();
+
+			for (cpu = 0; cpu < NR_CPUS; cpu++) {
+				if (!cpu_online(cpu) || cpu == this_cpu)
+					continue;
+				if (mm->context.asid[cpu])
+					mm->context.asid[cpu] = 0;
+			}
+			preempt_enable();
+			return;
+		}
+	} else
+		flush_tlb_other(mm);
+
+	data.vma = vma;
+	data.mm = mm;
+	data.addr = addr;
+
+	smp_call_function(ipi_flush_tlb_page, &data, 1);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	/* On the SW we always flush the whole user tlb.  */
+	flush_tlb_mm(vma->vm_mm);
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+static void ipi_flush_icache_page(void *x)
+{
+	struct mm_struct *mm = (struct mm_struct *) x;
+
+	if (mm == current->mm)
+		__load_new_mm_context(mm);
+	else
+		flush_tlb_other(mm);
+}
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+			unsigned long addr, int len)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((vma->vm_flags & VM_EXEC) == 0)
+		return;
+	if (!icache_is_vivt_no_ictag())
+		return;
+
+	preempt_disable();
+
+	if (mm == current->mm) {
+		__load_new_mm_context(mm);
+		if (atomic_read(&mm->mm_users) == 1) {
+			int cpu, this_cpu = smp_processor_id();
+
+			for (cpu = 0; cpu < NR_CPUS; cpu++) {
+				if (!cpu_online(cpu) || cpu == this_cpu)
+					continue;
+				if (mm->context.asid[cpu])
+					mm->context.asid[cpu] = 0;
+			}
+			preempt_enable();
+			return;
+		}
+	} else
+		flush_tlb_other(mm);
+
+	smp_call_function(ipi_flush_icache_page, mm, 1);
+
+	preempt_enable();
+}
+
+int native_cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	set_cpu_online(cpu, false);
+	remove_cpu_topology(cpu);
+	numa_remove_cpu(cpu);
+#ifdef CONFIG_HOTPLUG_CPU
+	clear_tasks_mm_cpumask(cpu);
+#endif
+	return 0;
+}
+
+void native_cpu_die(unsigned int cpu)
+{
+	/* We don't do anything here: idle task is faking death itself. */
+	unsigned int i;
+
+	for (i = 0; i < 10; i++) {
+		/* They ack this in play_dead by setting CPU_DEAD */
+		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+			if (system_state == SYSTEM_RUNNING)
+				pr_info("CPU %u is now offline\n", cpu);
+			return;
+		}
+		msleep(100);
+	}
+	pr_err("CPU %u didn't die...\n", cpu);
+}
+
+static void disable_timer(void)
+{
+	if (is_in_guest())
+		hcall(HCALL_SET_CLOCKEVENT, 0, 0, 0);
+	else
+		wrtimer(0);
+}
+
+void native_play_dead(void)
+{
+	idle_task_exit();
+	mb();
+	__this_cpu_write(cpu_state, CPU_DEAD);
+#ifdef CONFIG_HOTPLUG_CPU
+	fixup_irqs();
+#endif
+	local_irq_disable();
+
+	disable_timer();
+
+	if (is_in_guest())
+		hcall(HCALL_STOP, 0, 0, 0);
+
+#ifdef CONFIG_SUSPEND
+
+#ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_NONBOOT_CORE
+	sleepen();
+	send_sleep_interrupt(smp_processor_id());
+	while (1)
+		asm("nop");
+#else
+	asm volatile("halt");
+	while (1)
+		asm("nop");
+#endif /* SW64_SUSPEND_DEEPSLEEP */
+
+
+#else
+	asm volatile("memb");
+	asm volatile("halt");
+#endif
+}
+
+struct smp_ops smp_ops = {
+	.smp_prepare_boot_cpu	= native_smp_prepare_boot_cpu,
+	.smp_prepare_cpus	= native_smp_prepare_cpus,
+	.smp_cpus_done		= native_smp_cpus_done,
+
+	.stop_other_cpus	= native_stop_other_cpus,
+	.smp_send_reschedule	= native_smp_send_reschedule,
+
+	.cpu_up			= native_cpu_up,
+	.cpu_die		= native_cpu_die,
+	.cpu_disable		= native_cpu_disable,
+	.play_dead		= native_play_dead,
+
+	.send_call_func_ipi	= native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
+};
+EXPORT_SYMBOL_GPL(smp_ops);
diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c
new file mode 100644
index 000000000000..bb501c14565b
--- /dev/null
+++ b/arch/sw_64/kernel/stacktrace.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2018 snyh xiabin@deepin.com
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/export.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/debug.h>
+
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+void save_stack_trace(struct stack_trace *trace)
+{
+	save_stack_trace_tsk(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	unsigned long *sp = (unsigned long *)task_thread_info(tsk)->pcb.ksp;
+	unsigned long addr;
+
+	WARN_ON(trace->nr_entries || !trace->max_entries);
+
+	while (!kstack_end(sp)) {
+		addr = *sp++;
+		if (__kernel_text_address(addr) &&
+				!in_sched_functions(addr)) {
+			if (trace->skip > 0)
+				trace->skip--;
+			else
+				trace->entries[trace->nr_entries++] = addr;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+	}
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/sw_64/kernel/suspend.c b/arch/sw_64/kernel/suspend.c
new file mode 100644
index 000000000000..b2b07ac3042b
--- /dev/null
+++ b/arch/sw_64/kernel/suspend.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <asm/ptrace.h>
+#include <asm/suspend.h>
+#include <asm/smp.h>
+#include <asm/io.h>
+#include <asm/hmcall.h>
+#include <asm/delay.h>
+#include <asm/sw64io.h>
+#include <asm/sw64_init.h>
+
+struct processor_state suspend_state;
+
+static int native_suspend_state_valid(suspend_state_t pm_state)
+{
+	switch (pm_state) {
+	case PM_SUSPEND_ON:
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+void disable_local_timer(void)
+{
+	wrtimer(0);
+}
+
+/*
+ * Boot Core will enter suspend stat here.
+ */
+void sw64_suspend_enter(void)
+{
+	/* boot processor will go to deep sleep mode from here
+	 * After wake up  boot processor, pc will go here
+	 */
+
+	disable_local_timer();
+#ifdef CONFIG_PCI
+	if (sw64_chip->suspend)
+		sw64_chip->suspend(0);
+#endif
+#ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_BOOTCORE
+	sw64_suspend_deep_sleep(&suspend_state);
+#else
+	mtinten();
+	asm("halt");
+#endif
+#ifdef CONFIG_PCI
+	if (sw64_chip->suspend)
+		sw64_chip->suspend(1);
+#endif
+	disable_local_timer();
+}
+
+static int native_suspend_enter(suspend_state_t state)
+{
+	/* processor specific suspend */
+	sw64_suspend_enter();
+	return 0;
+}
+
+static const struct platform_suspend_ops native_suspend_ops = {
+	.valid = native_suspend_state_valid,
+	.enter = native_suspend_enter,
+};
+static int __init sw64_pm_init(void)
+{
+	suspend_set_ops(&native_suspend_ops);
+	return 0;
+}
+arch_initcall(sw64_pm_init);
diff --git a/arch/sw_64/kernel/suspend_asm.S b/arch/sw_64/kernel/suspend_asm.S
new file mode 100644
index 000000000000..73232de4cf19
--- /dev/null
+++ b/arch/sw_64/kernel/suspend_asm.S
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/regdef.h>
+
+	.text
+	.set	noat
+ENTRY(sw64_suspend_deep_sleep)
+	/* a0 $16 will be the address of suspend_state */
+	ldi	$1, PSTATE_REGS($16)
+	stl	$9, CALLEE_R9($1)
+	stl	$10, CALLEE_R10($1)
+	stl	$11, CALLEE_R11($1)
+	stl	$12, CALLEE_R12($1)
+	stl	$13, CALLEE_R13($1)
+	stl	$14, CALLEE_R14($1)
+	stl	$15, CALLEE_R15($1)
+	stl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	ldi	$1, PSTATE_FPREGS($16)
+	vstd	$f2, CALLEE_F2($1)
+	vstd	$f3, CALLEE_F3($1)
+	vstd	$f4, CALLEE_F4($1)
+	vstd	$f5, CALLEE_F5($1)
+	vstd	$f6, CALLEE_F6($1)
+	vstd	$f7, CALLEE_F7($1)
+	vstd	$f8, CALLEE_F8($1)
+	vstd	$f9, CALLEE_F9($1)
+	rfpcr	$f0
+	fstd	$f0, PSTATE_FPCR($16)
+
+	/* save the address of suspend_state to $18 */
+	mov	$16, $18
+
+	/*
+	 * Now will Go to Deep Sleep
+	 * HMcode should save  pc, gp, ps, r16, r17, r18
+	 */
+
+	sys_call HMC_sleepen
+	sys_call HMC_whami
+	bis	$0, $0, $16
+	ldi	$17, 0x2($31)
+	sys_call HMC_sendii
+
+	/* wait for a while to receive interrupt */
+	ldi	$16, 0x1($31)
+	sll	$16, 24, $16
+$subloop:
+	subl	$16, 1, $16
+	bis	$16, $16, $16
+	bis	$16, $16, $16
+	bne	$16, $subloop
+
+	ldi	$8, 0x3fff
+	bic	sp, $8, $8
+
+	ldi	$1, PSTATE_REGS($18)
+	ldl	$9, CALLEE_R9($1)
+	ldl	$10, CALLEE_R10($1)
+	ldl	$11, CALLEE_R11($1)
+	ldl	$12, CALLEE_R12($1)
+	ldl	$13, CALLEE_R13($1)
+	ldl	$14, CALLEE_R14($1)
+	ldl	$15, CALLEE_R15($1)
+	ldl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	fldd	$f0, PSTATE_FPCR($18)
+	wfpcr	$f0
+	fimovd	$f0, $2
+	and	$2, 0x3, $2
+	beq	$2, $suspend_setfpec_0
+	subl	$2, 0x1, $2
+	beq	$2, $suspend_setfpec_1
+	subl	$2, 0x1, $2
+	beq	$2, $suspend_setfpec_2
+	setfpec3
+	br	$suspend_setfpec_over
+$suspend_setfpec_0:
+	setfpec0
+	br	$suspend_setfpec_over
+$suspend_setfpec_1:
+	setfpec1
+	br	$suspend_setfpec_over
+$suspend_setfpec_2:
+	setfpec2
+$suspend_setfpec_over:
+	ldi	$1, PSTATE_FPREGS($18)
+	vldd	$f2, CALLEE_F2($1)
+	vldd	$f3, CALLEE_F3($1)
+	vldd	$f4, CALLEE_F4($1)
+	vldd	$f5, CALLEE_F5($1)
+	vldd	$f6, CALLEE_F6($1)
+	vldd	$f7, CALLEE_F7($1)
+	vldd	$f8, CALLEE_F8($1)
+	vldd	$f9, CALLEE_F9($1)
+	ret
+END(sw64_suspend_deep_sleep)
diff --git a/arch/sw_64/kernel/sys_sw64.c b/arch/sw_64/kernel/sys_sw64.c
new file mode 100644
index 000000000000..470c74853d47
--- /dev/null
+++ b/arch/sw_64/kernel/sys_sw64.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/syscalls.h>
+#include <asm/fpu.h>
+
+SYSCALL_DEFINE5(getsysinfo, unsigned long, op, void __user *, buffer,
+		unsigned long, nbytes, int __user *, start, void __user *, arg)
+{
+	unsigned long w;
+
+	switch (op) {
+	case GSI_IEEE_FP_CONTROL:
+		/* Return current software fp control & status bits.  */
+		/* Note that DU doesn't verify available space here.  */
+
+		w = current_thread_info()->ieee_state & IEEE_SW_MASK;
+		w = swcr_update_status(w, rdfpcr());
+		if (put_user(w, (unsigned long __user *) buffer))
+			return -EFAULT;
+		return 0;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+SYSCALL_DEFINE5(setsysinfo, unsigned long, op, void __user *, buffer,
+		unsigned long, nbytes, int __user *, start, void __user *, arg)
+{
+	switch (op) {
+	case SSI_IEEE_FP_CONTROL: {
+		unsigned long swcr, fpcr;
+		unsigned int *state;
+
+		/*
+		 * Sw_64 Architecture Handbook 4.7.7.3:
+		 * To be fully IEEE compiant, we must track the current IEEE
+		 * exception state in software, because spurious bits can be
+		 * set in the trap shadow of a software-complete insn.
+		 */
+
+		if (get_user(swcr, (unsigned long __user *)buffer))
+			return -EFAULT;
+		state = &current_thread_info()->ieee_state;
+
+		/* Update softare trap enable bits.  */
+		*state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK);
+
+		/* Update the real fpcr.  */
+		fpcr = rdfpcr() & FPCR_DYN_MASK;
+		fpcr |= ieee_swcr_to_fpcr(swcr);
+		wrfpcr(fpcr);
+
+		return 0;
+	}
+
+	case SSI_IEEE_RAISE_EXCEPTION: {
+		unsigned long exc, swcr, fpcr, fex;
+		unsigned int *state;
+
+		if (get_user(exc, (unsigned long __user *)buffer))
+			return -EFAULT;
+		state = &current_thread_info()->ieee_state;
+		exc &= IEEE_STATUS_MASK;
+
+		/* Update softare trap enable bits.  */
+		swcr = (*state & IEEE_SW_MASK) | exc;
+		*state |= exc;
+
+		/* Update the real fpcr.  */
+		fpcr = rdfpcr();
+		fpcr |= ieee_swcr_to_fpcr(swcr);
+		wrfpcr(fpcr);
+
+		/* If any exceptions set by this call, and are unmasked,
+		 * send a signal.  Old exceptions are not signaled.
+		 */
+		fex = (exc >> IEEE_STATUS_TO_EXCSUM_SHIFT) & swcr;
+		if (fex) {
+			int si_code = FPE_FLTUNK;
+
+			if (fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+
+			send_sig_fault(SIGFPE, si_code, (void __user *)NULL, 0, current);
+		}
+		return 0;
+	}
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+SYSCALL_DEFINE2(odd_getpriority, int, which, int, who)
+{
+	int prio = sys_getpriority(which, who);
+
+	if (prio >= 0) {
+		/* Return value is the unbiased priority, i.e. 20 - prio.
+		 * This does result in negative return values, so signal
+		 * no error.
+		 */
+		force_successful_syscall_return();
+		prio = 20 - prio;
+	}
+	return prio;
+}
+
+SYSCALL_DEFINE0(getxuid)
+{
+	current_pt_regs()->r20 = sys_geteuid();
+	return sys_getuid();
+}
+
+SYSCALL_DEFINE0(getxgid)
+{
+	current_pt_regs()->r20 = sys_getegid();
+	return sys_getgid();
+}
+
+SYSCALL_DEFINE0(getxpid)
+{
+	current_pt_regs()->r20 = sys_getppid();
+	return sys_getpid();
+}
+
+SYSCALL_DEFINE0(sw64_pipe)
+{
+	int fd[2];
+	int res = do_pipe_flags(fd, 0);
+
+	if (!res) {
+		/* The return values are in $0 and $20.  */
+		current_pt_regs()->r20 = fd[1];
+		res = fd[0];
+	}
+	return res;
+}
diff --git a/arch/sw_64/kernel/syscalls/Makefile b/arch/sw_64/kernel/syscalls/Makefile
new file mode 100644
index 000000000000..f466e9400301
--- /dev/null
+++ b/arch/sw_64/kernel/syscalls/Makefile
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+
+_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')	\
+	  $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+syscall := $(srctree)/$(src)/syscall.tbl
+syshdr := $(srctree)/$(src)/syscallhdr.sh
+systbl := $(srctree)/$(src)/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR  $@
+      cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@'	\
+		   '$(syshdr_abis_$(basetarget))'		\
+		   '$(syshdr_pfx_$(basetarget))'		\
+		   '$(syshdr_offset_$(basetarget))'
+
+quiet_cmd_systbl = SYSTBL  $@
+      cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@'	\
+		   '$(systbl_abis_$(basetarget))'		\
+		   '$(systbl_abi_$(basetarget))'		\
+		   '$(systbl_offset_$(basetarget))'
+
+$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+	$(call if_changed,syshdr)
+
+$(kapi)/syscall_table.h: $(syscall) $(systbl)
+	$(call if_changed,systbl)
+
+uapisyshdr-y		+= unistd_64.h
+kapisyshdr-y		+= syscall_table.h
+
+targets	+= $(uapisyshdr-y) $(kapisyshdr-y)
+
+PHONY += all
+all: $(addprefix $(uapi)/,$(uapisyshdr-y))
+all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+	@:
diff --git a/arch/sw_64/kernel/syscalls/syscall.tbl b/arch/sw_64/kernel/syscalls/syscall.tbl
new file mode 100644
index 000000000000..37b1e3f9f9e2
--- /dev/null
+++ b/arch/sw_64/kernel/syscalls/syscall.tbl
@@ -0,0 +1,528 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for sw64
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The <abi> is always "common" for this file
+#
+#0 is unused
+1	common	exit				sys_exit
+2	common	fork				sys_fork
+3	common	read				sys_read
+4	common	write				sys_write
+#5 is unused
+6	common	close				sys_close
+#7 is unused
+#8 is unused
+9	common	link				sys_link
+10	common	unlink				sys_unlink
+#11 is unused
+12	common	chdir				sys_chdir
+13	common	fchdir				sys_fchdir
+14	common	mknod				sys_mknod
+15	common	chmod				sys_chmod
+16	common	chown				sys_chown
+17	common	brk				sys_brk
+#18 is unused
+19	common	lseek				sys_lseek
+20	common	getxpid				sys_getxpid
+#21 is unused
+22	common	umount2				sys_umount
+23	common	setuid				sys_setuid
+24	common	getxuid				sys_getxuid
+#25 is unused
+26	common	ptrace				sys_ptrace
+#27 is unused
+#28 is unused
+#29 is unused
+#30 is unused
+#31 is unused
+#32 is unused
+33	common	access				sys_access
+#34 is unused
+#35 is unused
+36	common	sync				sys_sync
+37	common	kill				sys_kill
+#38 is unused
+39	common	setpgid				sys_setpgid
+#40 is unused
+41	common	dup				sys_dup
+42	common	pipe				sys_sw64_pipe
+#43 is unused
+#44 is unused
+45	common	open				sys_open
+#46 is unused
+47	common	getxgid				sys_getxgid
+48	common	sigprocmask			sys_sigprocmask
+#49 is unused
+#50 is unused
+51	common	acct				sys_acct
+52	common	sigpending			sys_sigpending
+#53 is unused
+54	common	ioctl				sys_ioctl
+#55 is unused
+#56 is unused
+57	common	symlink				sys_symlink
+58	common	readlink			sys_readlink
+59	common	execve				sys_execve
+60	common	umask				sys_umask
+61	common	chroot				sys_chroot
+#62 is unused
+63	common	getpgrp				sys_getpgrp
+#64 is unused
+#65 is unused
+66	common	vfork				sw64_vfork
+67	common	stat				sys_newstat
+68	common	lstat				sys_newlstat
+#69 is unused
+#70 is unused
+71	common	mmap				sys_mmap
+#72 is unused
+73	common	munmap				sys_munmap
+74	common	mprotect			sys_mprotect
+75	common	madvise				sys_madvise
+76	common	vhangup				sys_vhangup
+#77 is unused
+#78 is unused
+79	common	getgroups			sys_getgroups
+80	common	setgroups			sys_setgroups
+#81 is unused
+82	common	setpgrp				sys_setpgid
+#83 is unused
+#84 is unused
+#85 is unused
+#86 is unused
+87	common	gethostname			sys_gethostname
+88	common	sethostname			sys_sethostname
+#89 is unused
+90	common	dup2				sys_dup2
+91	common	fstat				sys_newfstat
+92	common	fcntl				sys_fcntl
+#93 is unused
+94	common	poll				sys_poll
+95	common	fsync				sys_fsync
+96	common	setpriority			sys_setpriority
+97	common	socket				sys_socket
+98	common	connect				sys_connect
+99	common	accept				sys_accept
+100	common	odd_getpriority			sys_odd_getpriority
+101	common	send				sys_send
+102	common	recv				sys_recv
+103	common	sigreturn			sys_sigreturn
+104	common	bind				sys_bind
+105	common	setsockopt			sys_setsockopt
+106	common	listen				sys_listen
+#107 is unused
+#108 is unused
+#109 is unused
+#110 is unused
+111	common	sigsuspend			sys_sigsuspend
+#112 is unused
+113	common	recvmsg				sys_recvmsg
+114	common	sendmsg				sys_sendmsg
+#115 is unused
+#116 is unused
+#117 is unused
+118	common	getsockopt			sys_getsockopt
+119	common	socketcall			sys_socketcall
+120	common	readv				sys_readv
+121	common	writev				sys_writev
+#122 is unused
+123	common	fchown				sys_fchown
+124	common	fchmod				sys_fchmod
+125	common	recvfrom			sys_recvfrom
+126	common	setreuid			sys_setreuid
+127	common	setregid			sys_setregid
+128	common	rename				sys_rename
+129	common	truncate			sys_truncate
+130	common	ftruncate			sys_ftruncate
+131	common	flock				sys_flock
+132	common	setgid				sys_setgid
+133	common	sendto				sys_sendto
+134	common	shutdown			sys_shutdown
+135	common	socketpair			sys_socketpair
+136	common	mkdir				sys_mkdir
+137	common	rmdir				sys_rmdir
+#138 is unused
+#139 is unused
+#140 is unused
+141	common	getpeername			sys_getpeername
+#142 is unused
+#143 is unused
+144	common	getrlimit			sys_getrlimit
+145	common	setrlimit			sys_setrlimit
+#146 is unused
+147	common	setsid				sys_setsid
+148	common	quotactl			sys_quotactl
+#149 is unused
+150	common	getsockname			sys_getsockname
+#151 is unused
+#152 is unused
+#153 is unused
+#154 is unused
+#155 is unused
+156	common	sigaction			sys_sigaction
+#157 is unused
+#158 is unused
+#159 is unused
+#160 is unused
+#161 is unused
+#162 is unused
+#163 is unused
+#164 is unused
+#165 is unused
+166	common	setdomainname			sys_setdomainname
+#167 is unused
+#168 is unused
+#169 is unused
+170	common	bpf				sys_bpf
+171	common	userfaultfd			sys_userfaultfd
+172	common	membarrier			sys_membarrier
+173	common	mlock2				sys_mlock2
+174	common	getpid				sys_getpid
+175	common	getppid				sys_getppid
+176	common	getuid				sys_getuid
+177	common	geteuid				sys_geteuid
+178	common	getgid				sys_getgid
+179	common	getegid				sys_getegid
+#180 is unused
+#181 is unused
+#182 is unused
+#183 is unused
+#184 is unused
+#185 is unused
+#186 is unused
+#187 is unused
+#188 is unused
+#189 is unused
+#190 is unused
+#191 is unused
+#192 is unused
+#193 is unused
+#194 is unused
+#195 is unused
+#196 is unused
+#197 is unused
+#198 is unused
+#199 is unused
+200	common	msgctl				sys_old_msgctl
+201	common	msgget				sys_msgget
+202	common	msgrcv				sys_msgrcv
+203	common	msgsnd				sys_msgsnd
+204	common	semctl				sys_semctl
+205	common	semget				sys_semget
+206	common	semop				sys_semop
+#207 is unused
+208	common	lchown				sys_lchown
+209	common	shmat				sys_shmat
+210	common	shmctl				sys_shmctl
+211	common	shmdt				sys_shmdt
+212	common	shmget				sys_shmget
+#213 is unused
+#214 is unused
+#215 is unused
+#216 is unused
+217	common	msync				sys_msync
+#218 is unused
+#219 is unused
+#220 is unused
+#221 is unused
+#222 is unused
+#223 is unused
+#224 is unused
+#225 is unused
+#226 is unused
+#227 is unused
+#228 is unused
+229	common	statfs64			sys_statfs64
+230	common	fstatfs64			sys_fstatfs64
+#231 is unused
+#232 is unused
+233	common	getpgid				sys_getpgid
+234	common	getsid				sys_getsid
+235	common	sigaltstack			sys_sigaltstack
+#236 is unused
+#237 is unused
+#238 is unused
+#239 is unused
+#240 is unused
+#241 is unused
+#242 is unused
+#243 is unused
+#244 is unused
+#245 is unused
+#246 is unused
+#247 is unused
+#248 is unused
+#249 is unused
+#250 is unused
+#251 is unused
+#252 is unused
+#253 is unused
+254	common	sysfs				sys_sysfs
+#255 is unused
+256	common	getsysinfo			sys_getsysinfo
+257	common	setsysinfo			sys_setsysinfo
+#258 is unused
+#259 is unused
+#260 is unused
+#261 is unused
+#262 is unused
+#263 is unused
+#264 is unused
+#265 is unused
+#266 is unused
+#267 is unused
+#268 is unused
+#269 is unused
+#270 is unused
+271	common	pidfd_send_signal		sys_pidfd_send_signal
+272	common	io_uring_setup			sys_io_uring_setup
+273	common	io_uring_enter			sys_io_uring_enter
+274	common	io_uring_register		sys_io_uring_register
+275	common	open_tree			sys_open_tree
+276	common	move_mount			sys_move_mount
+277	common	fsopen				sys_fsopen
+278	common	fsconfig			sys_fsconfig
+279	common	fsmount				sys_fsmount
+280	common	fspick				sys_fspick
+281	common	pidfd_open			sys_pidfd_open
+282	common	clone3				sys_clone3
+283	common  close_range			sys_close_range
+284	common  openat2				sys_openat2
+285	common  pidfd_getfd			sys_pidfd_getfd
+286	common  faccessat2			sys_faccessat2
+287	common  process_madvise			sys_process_madvise
+#288 is unused
+#289 is unused
+#290 is unused
+#291 is unused
+#292 is unused
+#293 is unused
+#294 is unused
+#295 is unused
+#296 is unused
+#297 is unused
+298	common	getpriority			sys_getpriority
+#299 is unused
+300	common	bdflush				sys_bdflush
+#301 is unused
+302	common	mount				sys_mount
+#303 is unused
+304	common	swapoff				sys_swapoff
+305	common	getdents			sys_getdents
+306	common	create_module			sys_ni_syscall
+307	common	init_module			sys_init_module
+308	common	delete_module			sys_delete_module
+309	common	get_kernel_syms			sys_ni_syscall
+310	common	syslog				sys_syslog
+311	common	reboot				sys_reboot
+312	common	clone				sw64_clone
+313	common	uselib				sys_uselib
+314	common	mlock				sys_mlock
+315	common	munlock				sys_munlock
+316	common	mlockall			sys_mlockall
+317	common	munlockall			sys_munlockall
+318	common	sysinfo				sys_sysinfo
+#319 is unused
+#320 is unused
+321	common	oldumount			sys_oldumount
+322	common	swapon				sys_swapon
+323	common	times				sys_times
+324	common	personality			sys_personality
+325	common	setfsuid			sys_setfsuid
+326	common	setfsgid			sys_setfsgid
+327	common	ustat				sys_ustat
+328	common	statfs				sys_statfs
+329	common	fstatfs				sys_fstatfs
+330	common	sched_setparam			sys_sched_setparam
+331	common	sched_getparam			sys_sched_getparam
+332	common	sched_setscheduler		sys_sched_setscheduler
+333	common	sched_getscheduler		sys_sched_getscheduler
+334	common	sched_yield			sys_sched_yield
+335	common	sched_get_priority_max		sys_sched_get_priority_max
+336	common	sched_get_priority_min		sys_sched_get_priority_min
+337	common	sched_rr_get_interval		sys_sched_rr_get_interval
+338	common	afs_syscall			sys_ni_syscall
+339	common	uname				sys_newuname
+340	common	nanosleep			sys_nanosleep
+341	common	mremap				sys_mremap
+342	common	nfsservctl			sys_ni_syscall
+343	common	setresuid			sys_setresuid
+344	common	getresuid			sys_getresuid
+345	common	pciconfig_read			sys_pciconfig_read
+346	common	pciconfig_write			sys_pciconfig_write
+347	common	query_module			sys_ni_syscall
+348	common	prctl				sys_prctl
+349	common	pread64				sys_pread64
+350	common	pwrite64			sys_pwrite64
+351	common	rt_sigreturn			sys_rt_sigreturn
+352	common	rt_sigaction			sys_rt_sigaction
+353	common	rt_sigprocmask			sys_rt_sigprocmask
+354	common	rt_sigpending			sys_rt_sigpending
+355	common	rt_sigtimedwait			sys_rt_sigtimedwait
+356	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
+357	common	rt_sigsuspend			sys_rt_sigsuspend
+358	common	select				sys_select
+359	common	gettimeofday			sys_gettimeofday
+360	common	settimeofday			sys_settimeofday
+361	common	getitimer			sys_getitimer
+362	common	setitimer			sys_setitimer
+363	common	utimes				sys_utimes
+364	common	getrusage			sys_getrusage
+365	common	wait4				sys_wait4
+366	common	adjtimex			sys_adjtimex
+367	common	getcwd				sys_getcwd
+368	common	capget				sys_capget
+369	common	capset				sys_capset
+370	common	sendfile			sys_sendfile
+371	common	setresgid			sys_setresgid
+372	common	getresgid			sys_getresgid
+373	common	dipc				sys_ni_syscall
+374	common	pivot_root			sys_pivot_root
+375	common	mincore				sys_mincore
+376	common	pciconfig_iobase		sys_pciconfig_iobase
+377	common	getdents64			sys_getdents64
+378	common	gettid				sys_gettid
+379	common	readahead			sys_readahead
+#380 is unused
+381	common	tkill				sys_tkill
+382	common	setxattr			sys_setxattr
+383	common	lsetxattr			sys_lsetxattr
+384	common	fsetxattr			sys_fsetxattr
+385	common	getxattr			sys_getxattr
+386	common	lgetxattr			sys_lgetxattr
+387	common	fgetxattr			sys_fgetxattr
+388	common	listxattr			sys_listxattr
+389	common	llistxattr			sys_llistxattr
+390	common	flistxattr			sys_flistxattr
+391	common	removexattr			sys_removexattr
+392	common	lremovexattr			sys_lremovexattr
+393	common	fremovexattr			sys_fremovexattr
+394	common	futex				sys_futex
+395	common	sched_setaffinity		sys_sched_setaffinity
+396	common	sched_getaffinity		sys_sched_getaffinity
+397	common	tuxcall				sys_ni_syscall
+398	common	io_setup			sys_io_setup
+399	common	io_destroy			sys_io_destroy
+400	common	io_getevents			sys_io_getevents
+401	common	io_submit			sys_io_submit
+402	common	io_cancel			sys_io_cancel
+403	common	io_pgetevents			sys_io_pgetevents
+404	common	rseq				sys_rseq
+405	common	exit_group			sys_exit_group
+406	common	lookup_dcookie			sys_lookup_dcookie
+407	common	epoll_create			sys_epoll_create
+408	common	epoll_ctl			sys_epoll_ctl
+409	common	epoll_wait			sys_epoll_wait
+410	common	remap_file_pages		sys_remap_file_pages
+411	common	set_tid_address			sys_set_tid_address
+412	common	restart_syscall			sys_restart_syscall
+413	common	fadvise64			sys_fadvise64
+414	common	timer_create			sys_timer_create
+415	common	timer_settime			sys_timer_settime
+416	common	timer_gettime			sys_timer_gettime
+417	common	timer_getoverrun		sys_timer_getoverrun
+418	common	timer_delete			sys_timer_delete
+419	common	clock_settime			sys_clock_settime
+420	common	clock_gettime			sys_clock_gettime
+421	common	clock_getres			sys_clock_getres
+422	common	clock_nanosleep			sys_clock_nanosleep
+423	common	semtimedop			sys_semtimedop
+424	common	tgkill				sys_tgkill
+425	common	stat64				sys_stat64
+426	common	lstat64				sys_lstat64
+427	common	fstat64				sys_fstat64
+428	common	vserver				sys_ni_syscall
+429	common	mbind				sys_mbind
+430	common	get_mempolicy			sys_get_mempolicy
+431	common	set_mempolicy			sys_set_mempolicy
+432	common	mq_open				sys_mq_open
+433	common	mq_unlink			sys_mq_unlink
+434	common	mq_timedsend			sys_mq_timedsend
+435	common	mq_timedreceive			sys_mq_timedreceive
+436	common	mq_notify			sys_mq_notify
+437	common	mq_getsetattr			sys_mq_getsetattr
+438	common	waitid				sys_waitid
+439	common	add_key				sys_add_key
+440	common	request_key			sys_request_key
+441	common	keyctl				sys_keyctl
+442	common	ioprio_set			sys_ioprio_set
+443	common	ioprio_get			sys_ioprio_get
+444	common	inotify_init			sys_inotify_init
+445	common	inotify_add_watch		sys_inotify_add_watch
+446	common	inotify_rm_watch		sys_inotify_rm_watch
+447	common	fdatasync			sys_fdatasync
+448	common	kexec_load			sys_kexec_load
+449	common	migrate_pages			sys_migrate_pages
+450	common	openat				sys_openat
+451	common	mkdirat				sys_mkdirat
+452	common	mknodat				sys_mknodat
+453	common	fchownat			sys_fchownat
+454	common	futimesat			sys_futimesat
+455	common	fstatat64			sys_fstatat64
+456	common	unlinkat			sys_unlinkat
+457	common	renameat			sys_renameat
+458	common	linkat				sys_linkat
+459	common	symlinkat			sys_symlinkat
+460	common	readlinkat			sys_readlinkat
+461	common	fchmodat			sys_fchmodat
+462	common	faccessat			sys_faccessat
+463	common	pselect6			sys_pselect6
+464	common	ppoll				sys_ppoll
+465	common	unshare				sys_unshare
+466	common	set_robust_list			sys_set_robust_list
+467	common	get_robust_list			sys_get_robust_list
+468	common	splice				sys_splice
+469	common	sync_file_range			sys_sync_file_range
+470	common	tee				sys_tee
+471	common	vmsplice			sys_vmsplice
+472	common	move_pages			sys_move_pages
+473	common	getcpu				sys_getcpu
+474	common	epoll_pwait			sys_epoll_pwait
+475	common	utimensat			sys_utimensat
+476	common	signalfd			sys_signalfd
+477	common	timerfd				sys_ni_syscall
+478	common	eventfd				sys_eventfd
+479	common	recvmmsg			sys_recvmmsg
+480	common	fallocate			sys_fallocate
+481	common	timerfd_create			sys_timerfd_create
+482	common	timerfd_settime			sys_timerfd_settime
+483	common	timerfd_gettime			sys_timerfd_gettime
+484	common	signalfd4			sys_signalfd4
+485	common	eventfd2			sys_eventfd2
+486	common	epoll_create1			sys_epoll_create1
+487	common	dup3				sys_dup3
+488	common	pipe2				sys_pipe2
+489	common	inotify_init1			sys_inotify_init1
+490	common	preadv				sys_preadv
+491	common	pwritev				sys_pwritev
+492	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo
+493	common	perf_event_open			sys_perf_event_open
+494	common	fanotify_init			sys_fanotify_init
+495	common	fanotify_mark			sys_fanotify_mark
+496	common	prlimit64			sys_prlimit64
+497	common	name_to_handle_at		sys_name_to_handle_at
+498	common	open_by_handle_at		sys_open_by_handle_at
+499	common	clock_adjtime			sys_clock_adjtime
+500	common	syncfs				sys_syncfs
+501	common	setns				sys_setns
+502	common	accept4				sys_accept4
+503	common	sendmmsg			sys_sendmmsg
+504	common	process_vm_readv		sys_process_vm_readv
+505	common	process_vm_writev		sys_process_vm_writev
+506	common	kcmp				sys_kcmp
+507	common	finit_module			sys_finit_module
+508	common	sched_setattr			sys_sched_setattr
+509	common	sched_getattr			sys_sched_getattr
+510	common	renameat2			sys_renameat2
+511	common	getrandom			sys_getrandom
+512	common	memfd_create			sys_memfd_create
+513	common	execveat			sys_execveat
+514	common	seccomp				sys_seccomp
+515	common	copy_file_range			sys_copy_file_range
+516	common	preadv2				sys_preadv2
+517	common	pwritev2			sys_pwritev2
+518	common	statx				sys_statx
diff --git a/arch/sw_64/kernel/syscalls/syscallhdr.sh b/arch/sw_64/kernel/syscalls/syscallhdr.sh
new file mode 100644
index 000000000000..959f844498d6
--- /dev/null
+++ b/arch/sw_64/kernel/syscalls/syscallhdr.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+prefix="$4"
+offset="$5"
+
+fileguard=_UAPI_ASM_SW64_`basename "$out" | sed \
+	-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
+	-e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+	printf "#ifndef %s\n" "${fileguard}"
+	printf "#define %s\n" "${fileguard}"
+	printf "\n"
+
+	nxt=0
+	while read nr abi name entry ; do
+		if [ -z "$offset" ]; then
+			printf "#define __NR_%s%s\t%s\n" \
+				"${prefix}" "${name}" "${nr}"
+		else
+			printf "#define __NR_%s%s\t(%s + %s)\n" \
+				"${prefix}" "${name}" "${offset}" "${nr}"
+		fi
+		nxt=$((nr+1))
+	done
+
+	printf "\n"
+	printf "#ifdef __KERNEL__\n"
+	printf "#define __NR_syscalls\t%s\n" "${nxt}"
+	printf "#endif\n"
+	printf "\n"
+	printf "#endif /* %s */\n" "${fileguard}"
+) > "$out"
diff --git a/arch/sw_64/kernel/syscalls/syscalltbl.sh b/arch/sw_64/kernel/syscalls/syscalltbl.sh
new file mode 100644
index 000000000000..85d78d9309ad
--- /dev/null
+++ b/arch/sw_64/kernel/syscalls/syscalltbl.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+my_abi="$4"
+offset="$5"
+
+emit() {
+	t_nxt="$1"
+	t_nr="$2"
+	t_entry="$3"
+
+	while [ $t_nxt -lt $t_nr ]; do
+		printf "__SYSCALL(%s, sys_ni_syscall, )\n" "${t_nxt}"
+		t_nxt=$((t_nxt+1))
+	done
+	printf "__SYSCALL(%s, %s, )\n" "${t_nxt}" "${t_entry}"
+}
+
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+	nxt=0
+	if [ -z "$offset" ]; then
+		offset=0
+	fi
+
+	while read nr abi name entry ; do
+		emit $((nxt+offset)) $((nr+offset)) $entry
+		nxt=$((nr+1))
+	done
+) > "$out"
diff --git a/arch/sw_64/kernel/systbls.S b/arch/sw_64/kernel/systbls.S
new file mode 100644
index 000000000000..66e0f461dbb0
--- /dev/null
+++ b/arch/sw_64/kernel/systbls.S
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw_64/kernel/systbls.S
+ *
+ * The system call table.
+ */
+
+#include <asm/unistd.h>
+
+#define __SYSCALL(nr, entry, nargs) .quad entry
+	.data
+	.align 3
+	.globl sys_call_table
+sys_call_table:
+#include <asm/syscall_table.h>
+#undef __SYSCALL
diff --git a/arch/sw_64/kernel/tc.c b/arch/sw_64/kernel/tc.c
new file mode 100644
index 000000000000..c047d457e55a
--- /dev/null
+++ b/arch/sw_64/kernel/tc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, serveros, linyue
+ */
+
+
+#include <linux/topology.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <asm/tc.h>
+
+/*
+ * Entry/exit counters that make sure that both CPUs
+ * run the measurement code at once:
+ */
+unsigned long time_sync;
+
+DEFINE_PER_CPU(u64, tc_offset);
+
+void tc_sync_clear(void)
+{
+	time_sync = 0;
+}
+
+void tc_sync_ready(void *ignored)
+{
+	/* make sure we can see time_sync been set to 0 */
+	smp_mb();
+	while (!time_sync)
+		cpu_relax();
+
+	__this_cpu_write(tc_offset, time_sync - rdtc());
+}
+
+void tc_sync_set(void)
+{
+	time_sync = rdtc() + __this_cpu_read(tc_offset);
+}
diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c
new file mode 100644
index 000000000000..0815d06b03d4
--- /dev/null
+++ b/arch/sw_64/kernel/time.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/profile.h>
+#include <linux/irq_work.h>
+#include <linux/uaccess.h>
+
+#include <asm/io.h>
+#include <asm/sw64io.h>
+#include <asm/sw64_init.h>
+#include <asm/hw_init.h>
+#include <asm/irq_impl.h>
+#include <asm/debug.h>
+
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <linux/clk-provider.h>
+#include <linux/sched/clock.h>
+#include <linux/sched_clock.h>
+
+#include "proto.h"
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+DECLARE_PER_CPU(u64, tc_offset);
+
+#define TICK_SIZE (tick_nsec / 1000)
+
+/*
+ * Shift amount by which scaled_ticks_per_cycle is scaled.  Shifting
+ * by 48 gives us 16 bits for HZ while keeping the accuracy good even
+ * for large CPU clock rates.
+ */
+#define FIX_SHIFT	48
+
+unsigned long est_cycle_freq;
+
+static u64 sc_start;
+static u64 sc_shift;
+static u64 sc_multi;
+
+DEFINE_STATIC_KEY_FALSE(use_tc_as_sched_clock);
+static int __init sched_clock_setup(char *opt)
+{
+	if (!opt)
+		return -EINVAL;
+
+	if (!strncmp(opt, "on", 2)) {
+		static_branch_enable(&use_tc_as_sched_clock);
+		pr_info("Using TC instead of jiffies as source of sched_clock()\n");
+	}
+
+	return 0;
+}
+early_param("tc_sched_clock", sched_clock_setup);
+
+#ifdef CONFIG_IRQ_WORK
+
+DEFINE_PER_CPU(u8, irq_work_pending);
+
+#define set_irq_work_pending_flag()  __this_cpu_write(irq_work_pending, 1)
+#define test_irq_work_pending()      __this_cpu_read(irq_work_pending)
+#define clear_irq_work_pending()     __this_cpu_write(irq_work_pending, 0)
+
+void arch_irq_work_raise(void)
+{
+	set_irq_work_pending_flag();
+}
+
+#else  /* CONFIG_IRQ_WORK */
+
+#define test_irq_work_pending()      0
+#define clear_irq_work_pending()
+
+#endif /* CONFIG_IRQ_WORK */
+
+#ifndef CONFIG_SMP
+static u64 read_tc(struct clocksource *cs)
+{
+	return rdtc();
+}
+
+static struct clocksource clocksource_tc = {
+	.name		= "tc",
+	.rating		= 300,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.shift		= 22,
+	.mult		= 0,  /* To be filled in */
+	.read		= read_tc,
+};
+
+void setup_clocksource(void)
+{
+	clocksource_register_hz(&clocksource_tc, get_cpu_freq());
+	pr_info("Setup clocksource TC, mult = %d\n", clocksource_tc.mult);
+}
+
+#else /* !CONFIG_SMP */
+void setup_clocksource(void)
+{
+	setup_chip_clocksource();
+}
+#endif /* !CONFIG_SMP */
+
+void __init common_init_rtc(void)
+{
+	setup_timer();
+}
+
+void __init
+time_init(void)
+{
+	unsigned long cycle_freq;
+
+	cycle_freq = get_cpu_freq();
+
+	pr_info("CPU Cycle frequency = %ld Hz\n", cycle_freq);
+
+	/* Register clocksource */
+	setup_clocksource();
+	of_clk_init(NULL);
+	/* Startup the timer source. */
+	common_init_rtc();
+}
+
+void calibrate_delay(void)
+{
+	loops_per_jiffy = get_cpu_freq() / HZ;
+	pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n",
+			loops_per_jiffy / (500000 / HZ),
+			(loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy);
+}
+
+static void __init calibrate_sched_clock(void)
+{
+	sc_start = rdtc();
+}
+
+void __init setup_sched_clock(void)
+{
+	unsigned long step;
+
+	sc_shift = 7;
+	step = 1UL << sc_shift;
+	sc_multi = step * NSEC_PER_SEC / get_cpu_freq();
+	calibrate_sched_clock();
+
+	pr_info("sched_clock: sc_multi=%llu, sc_shift=%llu\n", sc_multi, sc_shift);
+}
+
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+static u64 notrace sched_clock_read(void)
+{
+	return (rdtc() - sc_start) >> sc_shift;
+}
+
+void __init sw64_sched_clock_init(void)
+{
+	sched_clock_register(sched_clock_read, BITS_PER_LONG, get_cpu_freq() >> sc_shift);
+}
+#else
+unsigned long long sched_clock(void)
+{
+	if (static_branch_likely(&use_tc_as_sched_clock))
+		return ((rdtc() - sc_start + __this_cpu_read(tc_offset)) >> sc_shift) * sc_multi;
+	else
+		return (jiffies - INITIAL_JIFFIES) * (NSEC_PER_SEC / HZ);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static ssize_t sched_clock_status_read(struct file *file, char __user *user_buf,
+				   size_t count, loff_t *ppos)
+{
+	char buf[2];
+
+	if (static_key_enabled(&use_tc_as_sched_clock))
+		buf[0] = 'Y';
+	else
+		buf[0] = 'N';
+	buf[1] = '\n';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t sched_clock_status_write(struct file *file, const char __user *user_buf,
+				   size_t count, loff_t *ppos)
+{
+	int r;
+	bool bv;
+	bool val = static_key_enabled(&use_tc_as_sched_clock);
+
+	r = kstrtobool_from_user(user_buf, count, &bv);
+	if (!r) {
+		if (val != bv) {
+			if (bv) {
+				static_branch_enable(&use_tc_as_sched_clock);
+				pr_info("source of sched_clock() switched from jiffies to TC\n");
+			} else {
+				static_branch_disable(&use_tc_as_sched_clock);
+				pr_info("source of sched_clock() switched from TC to jiffies\n");
+			}
+		} else {
+			if (val)
+				pr_info("source of sched_clock() unchanged (using TC)\n");
+			else
+				pr_info("source of sched_clock() unchanged (using jiffies)\n");
+		}
+	}
+
+	return count;
+}
+
+static const struct file_operations sched_clock_status_fops = {
+	.read		= sched_clock_status_read,
+	.write		= sched_clock_status_write,
+	.open		= nonseekable_open,
+	.llseek		= no_llseek,
+};
+
+static int __init sched_clock_debug_init(void)
+{
+	struct dentry *sched_clock_status;
+
+	if (!sw64_debugfs_dir)
+		return -ENODEV;
+
+	sched_clock_status = debugfs_create_file_unsafe("use_tc_as_sched_clock",
+			0666, sw64_debugfs_dir, NULL,
+			&sched_clock_status_fops);
+
+	if (!sched_clock_status)
+		return -ENOMEM;
+
+	return 0;
+}
+late_initcall(sched_clock_debug_init);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_GENERIC_SCHED_CLOCK */
diff --git a/arch/sw_64/kernel/timer.c b/arch/sw_64/kernel/timer.c
new file mode 100644
index 000000000000..c29e7d1b664b
--- /dev/null
+++ b/arch/sw_64/kernel/timer.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Filename:  timer.c
+ *  Description:  percpu local timer, based on arch/x86/kernel/apic/apic.c
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/ioport.h>
+#include <linux/cpu.h>
+#include <linux/clockchips.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/dmar.h>
+#include <asm/hcall.h>
+#include <asm/hw_init.h>
+#include <asm/hardirq.h>
+
+static int timer_next_event(unsigned long delta,
+		struct clock_event_device *evt);
+static int timer_shutdown(struct clock_event_device *evt);
+static int timer_set_oneshot(struct clock_event_device *evt);
+
+/*
+ * The local apic timer can be used for any function which is CPU local.
+ */
+static struct clock_event_device timer_clockevent = {
+	.name			= "timer",
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.shift			= 20,
+	.mult			= 0,
+	.set_state_shutdown	= timer_shutdown,
+	.set_state_oneshot	= timer_set_oneshot,
+	.set_next_event		= timer_next_event,
+	.rating			= 300,
+	.irq			= -1,
+};
+
+static int vtimer_next_event(unsigned long delta,
+		struct clock_event_device *evt)
+{
+	hcall(HCALL_SET_CLOCKEVENT, delta, 0, 0);
+	return 0;
+}
+
+static int vtimer_shutdown(struct clock_event_device *evt)
+{
+	hcall(HCALL_SET_CLOCKEVENT, 0, 0, 0);
+	return 0;
+}
+
+static int vtimer_set_oneshot(struct clock_event_device *evt)
+{
+	return 0;
+}
+static struct clock_event_device vtimer_clockevent = {
+	.name			= "vtimer",
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.shift			= 20,
+	.mult			= 0,
+	.set_state_shutdown	= vtimer_shutdown,
+	.set_state_oneshot	= vtimer_set_oneshot,
+	.set_next_event		= vtimer_next_event,
+	.rating			= 300,
+	.irq			= -1,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, timer_events);
+
+/*
+ * Program the next event, relative to now
+ */
+static int timer_next_event(unsigned long delta,
+		struct clock_event_device *evt)
+{
+	wrtimer(delta);
+	return 0;
+}
+
+static int timer_shutdown(struct clock_event_device *evt)
+{
+	wrtimer(0);
+	return 0;
+}
+
+static int timer_set_oneshot(struct clock_event_device *evt)
+{
+	/*
+	 * SW-TIMER support CLOCK_EVT_MODE_ONESHOT only, and automatically.
+	 * unlike PIT and HPET, which support ONESHOT or PERIODIC by setting PIT_MOD or HPET_Tn_CFG
+	 * so, nothing to do here ...
+	 */
+	return 0;
+}
+
+/*
+ * Setup the local timer for this CPU. Copy the initilized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+void setup_timer(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *swevt = &per_cpu(timer_events, cpu);
+
+	if (is_in_guest()) {
+		memcpy(swevt, &vtimer_clockevent, sizeof(*swevt));
+		/*
+		 * CUIWEI: This value is very important.
+		 * If it's too small, the timer will timeout when the IER
+		 * haven't been opened.
+		 */
+		swevt->min_delta_ns = 400;
+	} else {
+		memcpy(swevt, &timer_clockevent, sizeof(*swevt));
+		swevt->min_delta_ns = 100;
+	}
+
+	swevt->cpumask = cpumask_of(cpu);
+	swevt->mult = div_sc(get_cpu_freq(), NSEC_PER_SEC, swevt->shift);
+	swevt->max_delta_ns = clockevent_delta2ns(0xFFFFFFFFFFFFFFFF, swevt);
+
+	swevt->set_state_shutdown(swevt);
+
+	clockevents_register_device(swevt);
+}
+
+void sw64_timer_interrupt(void)
+{
+	struct clock_event_device *evt = this_cpu_ptr(&timer_events);
+
+	irq_enter();
+	if (!evt->event_handler) {
+		pr_warn("Spurious local timer interrupt on cpu %d\n",
+				smp_processor_id());
+		timer_shutdown(evt);
+		return;
+	}
+
+	inc_irq_stat(timer_irqs_event);
+
+	evt->event_handler(evt);
+
+	irq_exit();
+}
diff --git a/arch/sw_64/kernel/topology.c b/arch/sw_64/kernel/topology.c
new file mode 100644
index 000000000000..e6df86270583
--- /dev/null
+++ b/arch/sw_64/kernel/topology.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/acpi.h>
+#include <linux/arch_topology.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/sched/topology.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+
+#include <asm/topology.h>
+
+static int __init parse_dt_topology(void)
+{
+	return 0;
+}
+
+/*
+ * cpu topology table
+ */
+struct cpu_topology cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
+
+	/* Find the smaller of NUMA, core or LLC siblings */
+	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
+		/* not numa in package, lets use the package siblings */
+		core_mask = &cpu_topology[cpu].core_sibling;
+	}
+	if (cpu_topology[cpu].llc_id != -1) {
+		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
+			core_mask = &cpu_topology[cpu].llc_sibling;
+	}
+
+	return core_mask;
+}
+
+static void update_siblings_masks(int cpuid)
+{
+	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_online_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
+			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
+			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
+		}
+
+		if (cpuid_topo->package_id != cpu_topo->package_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+}
+
+void store_cpu_topology(int cpuid)
+{
+	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
+
+	if (cpuid_topo->package_id != -1)
+		goto topology_populated;
+
+	cpuid_topo->core_id = cpu_to_rcid(cpuid) & CORE_ID_MASK;
+	cpuid_topo->package_id = rcid_to_package(cpu_to_rcid(cpuid));
+	cpuid_topo->llc_id = rcid_to_package(cpuid);
+	cpuid_topo->thread_id = (cpu_to_rcid(cpuid) >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
+
+	pr_debug("CPU%u: socket %d core %d thread %d\n",
+		 cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
+		 cpuid_topo->thread_id);
+
+topology_populated:
+	update_siblings_masks(cpuid);
+}
+
+static void clear_cpu_topology(int cpu)
+{
+	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+	cpumask_clear(&cpu_topo->llc_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
+
+	cpumask_clear(&cpu_topo->core_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
+	cpumask_clear(&cpu_topo->thread_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+}
+
+static void __init reset_cpu_topology(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+		cpu_topo->thread_id = -1;
+		cpu_topo->core_id = 0;
+		cpu_topo->package_id = -1;
+		cpu_topo->llc_id = -1;
+
+		clear_cpu_topology(cpu);
+	}
+}
+
+void remove_cpu_topology(int cpu)
+{
+	int sibling;
+
+	for_each_cpu(sibling, topology_core_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
+	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
+	for_each_cpu(sibling, topology_llc_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
+
+	clear_cpu_topology(cpu);
+}
+
+#ifdef CONFIG_ACPI
+static bool __init acpi_cpu_is_threaded(int cpu)
+{
+	return 0;
+}
+
+static int __init parse_acpi_topology(void)
+{
+	return 0;
+}
+
+#else
+static inline int __init parse_acpi_topology(void)
+{
+	return -EINVAL;
+}
+#endif
+
+void __init init_cpu_topology(void)
+{
+	reset_cpu_topology();
+
+	/*
+	 * Discard anything that was parsed if we hit an error so we
+	 * don't use partial information.
+	 */
+	if (!acpi_disabled && parse_acpi_topology())
+		reset_cpu_topology();
+	else if (of_have_populated_dt() && parse_dt_topology())
+		reset_cpu_topology();
+}
diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c
new file mode 100644
index 000000000000..c736a67ef7b8
--- /dev/null
+++ b/arch/sw_64/kernel/traps.c
@@ -0,0 +1,1651 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sw_64/kernel/traps.c
+ *
+ * (C) Copyright 1994 Linus Torvalds
+ */
+
+/*
+ * This file initializes the trap entry points
+ */
+
+#include <linux/jiffies.h>
+#include <linux/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/tty.h>
+#include <linux/delay.h>
+#include <linux/extable.h>
+#include <linux/kallsyms.h>
+#include <linux/ratelimit.h>
+#include <linux/uaccess.h>
+#include <linux/perf_event.h>
+#include <linux/kdebug.h>
+#include <linux/kexec.h>
+
+#include <asm/gentrap.h>
+#include <asm/unaligned.h>
+#include <asm/sysinfo.h>
+#include <asm/mmu_context.h>
+#include <asm/special_insns.h>
+#include <asm/fpu.h>
+#include <asm/kprobes.h>
+#include <asm/uprobes.h>
+#include <asm/core.h>
+
+#include "proto.h"
+
+void
+dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
+{
+	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
+	       regs->pc, regs->r26, regs->ps, print_tainted());
+	printk("pc is at %pSR\n", (void *)regs->pc);
+	printk("ra is at %pSR\n", (void *)regs->r26);
+	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
+	       regs->r0, regs->r1, regs->r2);
+	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
+	       regs->r3, regs->r4, regs->r5);
+	printk("t5 = %016lx  t6 = %016lx  t7 = %016lx\n",
+	       regs->r6, regs->r7, regs->r8);
+
+	if (r9_15) {
+		printk("s0 = %016lx  s1 = %016lx  s2 = %016lx\n",
+		       r9_15[9], r9_15[10], r9_15[11]);
+		printk("s3 = %016lx  s4 = %016lx  s5 = %016lx\n",
+		       r9_15[12], r9_15[13], r9_15[14]);
+		printk("s6 = %016lx\n", r9_15[15]);
+	}
+
+	printk("a0 = %016lx  a1 = %016lx  a2 = %016lx\n",
+	       regs->r16, regs->r17, regs->r18);
+	printk("a3 = %016lx  a4 = %016lx  a5 = %016lx\n",
+	       regs->r19, regs->r20, regs->r21);
+	printk("t8 = %016lx  t9 = %016lx  t10 = %016lx\n",
+	       regs->r22, regs->r23, regs->r24);
+	printk("t11= %016lx  pv = %016lx  at = %016lx\n",
+	       regs->r25, regs->r27, regs->r28);
+	printk("gp = %016lx  sp = %p\n", regs->gp, regs+1);
+}
+
+static void
+dik_show_code(unsigned int *pc)
+{
+	long i;
+	unsigned int insn;
+
+	printk("Code:");
+	for (i = -6; i < 2; i++) {
+		if (__get_user(insn, (unsigned int __user *)pc + i))
+			break;
+		printk("%c%08x%c", i ? ' ' : '<', insn, i ? ' ' : '>');
+	}
+	printk("\n");
+}
+
+static void
+dik_show_trace(unsigned long *sp, const char *loglvl)
+{
+	long i = 0;
+	unsigned long tmp;
+
+	printk("%sTrace:\n", loglvl);
+	while (0x1ff8 & (unsigned long)sp) {
+		tmp = *sp;
+		sp++;
+		if (!__kernel_text_address(tmp))
+			continue;
+		printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp);
+		if (i > 40) {
+			printk("%s ...", loglvl);
+			break;
+		}
+	}
+	printk("\n");
+}
+
+static int kstack_depth_to_print = 24;
+
+void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+{
+	unsigned long *stack;
+	int i;
+
+	/*
+	 * debugging aid: "show_stack(NULL, NULL, KERN_EMERG);" prints the
+	 * back trace for this cpu.
+	 */
+	if (sp == NULL)
+		sp = (unsigned long *)&sp;
+
+	stack = sp;
+	for (i = 0; i < kstack_depth_to_print; i++) {
+		if (((long) stack & (THREAD_SIZE-1)) == 0)
+			break;
+		if (i && ((i % 4) == 0))
+			printk("%s       ", loglvl);
+		printk("%016lx ", *stack++);
+	}
+	printk("\n");
+	dik_show_trace(sp, loglvl);
+}
+
+void
+die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15)
+{
+	if (regs->ps & 8)
+		return;
+#ifdef CONFIG_SMP
+	printk("CPU %d ", hard_smp_processor_id());
+#endif
+	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
+	dik_show_regs(regs, r9_15);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT);
+	dik_show_code((unsigned int *)regs->pc);
+
+	if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) {
+		printk("die_if_kernel recursion detected.\n");
+		local_irq_enable();
+		while (1)
+			asm("nop");
+	}
+
+	if (kexec_should_crash(current))
+		crash_kexec(regs);
+
+	if (panic_on_oops)
+		panic("Fatal exception");
+
+	do_exit(SIGSEGV);
+}
+
+#ifndef CONFIG_MATHEMU
+static long dummy_emul(void)
+{
+	return 0;
+}
+
+long (*sw64_fp_emul_imprecise)(struct pt_regs *regs, unsigned long writemask) = (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(sw64_fp_emul_imprecise);
+
+long (*sw64_fp_emul)(unsigned long pc) = (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(sw64_fp_emul);
+#else
+long sw64_fp_emul_imprecise(struct pt_regs *regs, unsigned long writemask);
+long sw64_fp_emul(unsigned long pc);
+#endif
+
+asmlinkage void
+do_entArith(unsigned long summary, unsigned long write_mask,
+		struct pt_regs *regs)
+{
+	long si_code = FPE_FLTINV;
+
+	if (summary & 1) {
+		/* Software-completion summary bit is set, so try to
+		 * emulate the instruction.  If the processor supports
+		 * precise exceptions, we don't have to search.
+		 */
+		si_code = sw64_fp_emul(regs->pc - 4);
+		if (si_code == 0)
+			return;
+	}
+	die_if_kernel("Arithmetic fault", regs, 0, NULL);
+
+	send_sig_fault(SIGFPE, si_code, (void __user *) regs->pc, 0, current);
+}
+
+asmlinkage void
+do_entIF(unsigned long inst_type, struct pt_regs *regs)
+{
+	int signo, code;
+	unsigned int inst, type;
+
+	type = inst_type & 0xffffffff;
+	inst = inst_type >> 32;
+
+	if ((regs->ps & ~IPL_MAX) == 0 && type != 4) {
+		if (type == 1) {
+			const unsigned int *data
+				= (const unsigned int *) regs->pc;
+			printk("Kernel bug at %s:%d\n",
+				(const char *)(data[1] | (long)data[2] << 32),
+				data[0]);
+		} else if (type == 0) {
+			/* support kgdb */
+			notify_die(0, "kgdb trap", regs, 0, 0, SIGTRAP);
+			return;
+		}
+		die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"),
+				regs, type, NULL);
+	}
+
+	switch (type) {
+	case 0: /* breakpoint */
+		if (ptrace_cancel_bpt(current))
+			regs->pc -= 4;	/* make pc point to former bpt */
+
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0,
+				current);
+		return;
+
+	case 1: /* bugcheck */
+		send_sig_fault(SIGTRAP, TRAP_UNK, (void __user *)regs->pc, 0,
+				current);
+		return;
+
+	case 2: /* gentrap */
+		switch ((long)regs->r16) {
+		case GEN_INTOVF:
+			signo = SIGFPE;
+			code = FPE_INTOVF;
+			break;
+		case GEN_INTDIV:
+			signo = SIGFPE;
+			code = FPE_INTDIV;
+			break;
+		case GEN_FLTOVF:
+			signo = SIGFPE;
+			code = FPE_FLTOVF;
+			break;
+		case GEN_FLTDIV:
+			signo = SIGFPE;
+			code = FPE_FLTDIV;
+			break;
+		case GEN_FLTUND:
+			signo = SIGFPE;
+			code = FPE_FLTUND;
+			break;
+		case GEN_FLTINV:
+			signo = SIGFPE;
+			code = FPE_FLTINV;
+			break;
+		case GEN_FLTINE:
+			signo = SIGFPE;
+			code = FPE_FLTRES;
+			break;
+		case GEN_ROPRAND:
+			signo = SIGFPE;
+			code = FPE_FLTUNK;
+			break;
+
+		case GEN_DECOVF:
+		case GEN_DECDIV:
+		case GEN_DECINV:
+		case GEN_ASSERTERR:
+		case GEN_NULPTRERR:
+		case GEN_STKOVF:
+		case GEN_STRLENERR:
+		case GEN_SUBSTRERR:
+		case GEN_RANGERR:
+		case GEN_SUBRNG:
+		case GEN_SUBRNG1:
+		case GEN_SUBRNG2:
+		case GEN_SUBRNG3:
+		case GEN_SUBRNG4:
+		case GEN_SUBRNG5:
+		case GEN_SUBRNG6:
+		case GEN_SUBRNG7:
+		default:
+			signo = SIGTRAP;
+			code = TRAP_UNK;
+			break;
+		}
+
+		send_sig_fault(signo, code, (void __user *)regs->pc, 0,
+				current);
+		return;
+
+	case 4: /* opDEC */
+		switch (inst) {
+		case BREAK_KPROBE:
+			if (notify_die(DIE_BREAK, "kprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+				return;
+		case BREAK_KPROBE_SS:
+			if (notify_die(DIE_SSTEPBP, "single_step", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+				return;
+		case UPROBE_BRK_UPROBE:
+			if (notify_die(DIE_UPROBE, "uprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+				return;
+		case UPROBE_BRK_UPROBE_XOL:
+			if (notify_die(DIE_UPROBE_XOL, "uprobe_xol", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+				return;
+		}
+		if ((regs->ps & ~IPL_MAX) == 0)
+			die_if_kernel("Instruction fault", regs, type, NULL);
+		break;
+
+	case 3: /* FEN fault */
+		/*
+		 * Irritating users can call HMC_clrfen to disable the
+		 * FPU for the process. The kernel will then trap in
+		 * do_switch_stack and undo_switch_stack when we try
+		 * to save and restore the FP registers.
+
+		 * Given that GCC by default generates code that uses the
+		 * FP registers, HMC_clrfen is not useful except for DoS
+		 * attacks. So turn the bleeding FPU back on and be done
+		 * with it.
+		 */
+		current_thread_info()->pcb.flags |= 1;
+		__reload_thread(&current_thread_info()->pcb);
+		return;
+
+	case 5: /* illoc */
+	default: /* unexpected instruction-fault type */
+		break;
+	}
+
+	send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0,
+			current);
+}
+
+/*
+ * entUna has a different register layout to be reasonably simple. It
+ * needs access to all the integer registers (the kernel doesn't use
+ * fp-regs), and it needs to have them in order for simpler access.
+ *
+ * Due to the non-standard register layout (and because we don't want
+ * to handle floating-point regs), user-mode unaligned accesses are
+ * handled separately by do_entUnaUser below.
+ *
+ * Oh, btw, we don't handle the "gp" register correctly, but if we fault
+ * on a gp-register unaligned load/store, something is _very_ wrong
+ * in the kernel anyway..
+ */
+struct allregs {
+	unsigned long regs[32];
+	unsigned long ps, pc, gp, a0, a1, a2;
+};
+
+struct unaligned_stat {
+	unsigned long count, va, pc;
+} unaligned[2];
+
+
+/* Macro for exception fixup code to access integer registers. */
+#define una_reg(r) (_regs[(r) >= 16 && (r) <= 18 ? (r) + 19 : (r)])
+
+
+asmlinkage void
+do_entUna(void *va, unsigned long opcode, unsigned long reg,
+	  struct allregs *regs)
+{
+	long error;
+	unsigned long tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+	unsigned long pc = regs->pc - 4;
+	unsigned long *_regs = regs->regs;
+	const struct exception_table_entry *fixup;
+
+	unaligned[0].count++;
+	unaligned[0].va = (unsigned long) va;
+	unaligned[0].pc = pc;
+
+	/*
+	 * We don't want to use the generic get/put unaligned macros as
+	 * we want to trap exceptions. Only if we actually get an
+	 * exception will we decide whether we should have caught it.
+	 */
+
+	switch (opcode) {
+	case 0x21:
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 1(%3)\n"
+		"	extlh	%1, %3, %1\n"
+		"	exthh	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table,"a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+
+		if (error)
+			goto got_exception;
+		una_reg(reg) = tmp1 | tmp2;
+		return;
+
+	case 0x22:
+		__asm__ __volatile__(
+		"1:	ldl_u	%1,0(%3)\n"
+		"2:	ldl_u	%2,3(%3)\n"
+		"	extlw	%1,%3,%1\n"
+		"	exthw	%2,%3,%2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+
+		if (error)
+			goto got_exception;
+		una_reg(reg) = (int)(tmp1 | tmp2);
+		return;
+
+	case 0x23: /* ldl */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 7(%3)\n"
+		"	extll	%1, %3, %1\n"
+		"	exthl	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+
+		if (error)
+			goto got_exception;
+		una_reg(reg) = tmp1 | tmp2;
+		return;
+
+	case 0x29: /* sth */
+		__asm__ __volatile__(
+		"	zap	%6, 2, %1\n"
+		"	srl	%6, 8, %2\n"
+		"	stb	%1, 0x0(%5)\n"
+		"	stb	%2, 0x1(%5)\n"
+		"3:\n"
+
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%2, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%1, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+		"=&r"(tmp3), "=&r"(tmp4)
+		: "r"(va), "r"(una_reg(reg)), "0"(0));
+
+		if (error)
+			goto got_exception;
+		return;
+
+	case 0x2a: /* stw */
+		__asm__ __volatile__(
+		"	zapnot	%6, 0x1, %1\n"
+		"	srl	%6, 8, %2\n"
+		"	zapnot	%2, 0x1,%2\n"
+		"	srl	%6, 16, %3\n"
+		"	zapnot	%3, 0x1, %3\n"
+		"	srl	%6, 24, %4\n"
+		"	zapnot	%4, 0x1, %4\n"
+		"1:	stb	%1, 0x0(%5)\n"
+		"2:	stb	%2, 0x1(%5)\n"
+		"3:	stb	%3, 0x2(%5)\n"
+		"4:	stb	%4, 0x3(%5)\n"
+		"5:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	$31, 5b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	$31, 5b-2b(%0)\n"
+		"	.long	3b - .\n"
+		"	ldi	$31, 5b-3b(%0)\n"
+		"	.long	4b - .\n"
+		"	ldi	$31, 5b-4b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+		  "=&r"(tmp3), "=&r"(tmp4)
+		: "r"(va), "r"(una_reg(reg)), "0"(0));
+
+		if (error)
+			goto got_exception;
+		return;
+
+	case 0x2b: /* stl */
+		__asm__ __volatile__(
+		"	zapnot	%10, 0x1, %1\n"
+		"	srl	%10, 8, %2\n"
+		"	zapnot	%2, 0x1, %2\n"
+		"	srl	%10, 16, %3\n"
+		"	zapnot	%3, 0x1, %3\n"
+		"	srl	%10, 24, %4\n"
+		"	zapnot	%4, 0x1, %4\n"
+		"	srl	%10, 32, %5\n"
+		"	zapnot	%5, 0x1, %5\n"
+		"	srl	%10, 40, %6\n"
+		"	zapnot	%6, 0x1, %6\n"
+		"	srl	%10, 48, %7\n"
+		"	zapnot	%7, 0x1, %7\n"
+		"	srl	%10, 56, %8\n"
+		"	zapnot	%8, 0x1, %8\n"
+		"1:	stb	%1, 0(%9)\n"
+		"2:	stb	%2, 1(%9)\n"
+		"3:	stb	%3, 2(%9)\n"
+		"4:	stb	%4, 3(%9)\n"
+		"5:	stb	%5, 4(%9)\n"
+		"6:	stb	%6, 5(%9)\n"
+		"7:	stb	%7, 6(%9)\n"
+		"8:	stb	%8, 7(%9)\n"
+		"9:\n"
+		".section __ex_table, "a"\n\t"
+		"	.long	1b - .\n"
+		"	ldi	$31, 9b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	$31, 9b-2b(%0)\n"
+		"	.long	3b - .\n"
+		"	ldi	$31, 9b-3b(%0)\n"
+		"	.long	4b - .\n"
+		"	ldi	$31, 9b-4b(%0)\n"
+		"	.long	5b - .\n"
+		"	ldi	$31, 9b-5b(%0)\n"
+		"	.long	6b - .\n"
+		"	ldi	$31, 9b-6b(%0)\n"
+		"	.long	7b - .\n"
+		"	ldi	$31, 9b-7b(%0)\n"
+		"	.long	8b - .\n"
+		"	ldi	$31, 9b-8b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+		"=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+		: "r"(va), "r"(una_reg(reg)), "0"(0));
+
+		if (error)
+			goto got_exception;
+		return;
+	}
+
+	printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n",
+		pc, va, opcode, reg);
+	do_exit(SIGSEGV);
+
+got_exception:
+	/* Ok, we caught the exception, but we don't want it. Is there
+	 * someone to pass it along to?
+	 */
+	fixup = search_exception_tables(pc);
+	if (fixup != 0) {
+		unsigned long newpc;
+
+		newpc = fixup_exception(una_reg, fixup, pc);
+		printk("Forwarding unaligned exception at %lx (%lx)\n",
+		       pc, newpc);
+
+		regs->pc = newpc;
+		return;
+	}
+
+	/*
+	 * Yikes!  No one to forward the exception to.
+	 * Since the registers are in a weird format, dump them ourselves.
+	 */
+
+	printk("%s(%d): unhandled unaligned exception\n",
+	       current->comm, task_pid_nr(current));
+
+	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx\n",
+	       pc, una_reg(26), regs->ps);
+	printk("r0 = %016lx  r1 = %016lx  r2 = %016lx\n",
+	       una_reg(0), una_reg(1), una_reg(2));
+	printk("r3 = %016lx  r4 = %016lx  r5 = %016lx\n",
+	       una_reg(3), una_reg(4), una_reg(5));
+	printk("r6 = %016lx  r7 = %016lx  r8 = %016lx\n",
+	       una_reg(6), una_reg(7), una_reg(8));
+	printk("r9 = %016lx  r10= %016lx  r11= %016lx\n",
+	       una_reg(9), una_reg(10), una_reg(11));
+	printk("r12= %016lx  r13= %016lx  r14= %016lx\n",
+	       una_reg(12), una_reg(13), una_reg(14));
+	printk("r15= %016lx\n", una_reg(15));
+	printk("r16= %016lx  r17= %016lx  r18= %016lx\n",
+	       una_reg(16), una_reg(17), una_reg(18));
+	printk("r19= %016lx  r20= %016lx  r21= %016lx\n",
+	       una_reg(19), una_reg(20), una_reg(21));
+	printk("r22= %016lx  r23= %016lx  r24= %016lx\n",
+	       una_reg(22), una_reg(23), una_reg(24));
+	printk("r25= %016lx  r27= %016lx  r28= %016lx\n",
+	       una_reg(25), una_reg(27), una_reg(28));
+	printk("gp = %016lx  sp = %p\n", regs->gp, regs+1);
+
+	dik_show_code((unsigned int *)pc);
+	dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT);
+
+	if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) {
+		printk("die_if_kernel recursion detected.\n");
+		local_irq_enable();
+		while (1)
+			asm("nop");
+	}
+	do_exit(SIGSEGV);
+
+}
+
+/*
+ * Convert an s-floating point value in memory format to the
+ * corresponding value in register format. The exponent
+ * needs to be remapped to preserve non-finite values
+ * (infinities, not-a-numbers, denormals).
+ */
+static inline unsigned long
+s_mem_to_reg(unsigned long s_mem)
+{
+	unsigned long frac = (s_mem >> 0) & 0x7fffff;
+	unsigned long sign = (s_mem >> 31) & 0x1;
+	unsigned long exp_msb = (s_mem >> 30) & 0x1;
+	unsigned long exp_low = (s_mem >> 23) & 0x7f;
+	unsigned long exp;
+
+	exp = (exp_msb << 10) | exp_low;	/* common case */
+	if (exp_msb) {
+		if (exp_low == 0x7f)
+			exp = 0x7ff;
+	} else {
+		if (exp_low == 0x00)
+			exp = 0x000;
+		else
+			exp |= (0x7 << 7);
+	}
+	return (sign << 63) | (exp << 52) | (frac << 29);
+}
+
+/*
+ * Convert an s-floating point value in register format to the
+ * corresponding value in memory format.
+ */
+static inline unsigned long
+s_reg_to_mem(unsigned long s_reg)
+{
+	return ((s_reg >> 62) << 30) | ((s_reg << 5) >> 34);
+}
+
+/*
+ * Handle user-level unaligned fault. Handling user-level unaligned
+ * faults is *extremely* slow and produces nasty messages. A user
+ * program *should* fix unaligned faults ASAP.
+ *
+ * Notice that we have (almost) the regular kernel stack layout here,
+ * so finding the appropriate registers is a little more difficult
+ * than in the kernel case.
+ *
+ * Finally, we handle regular integer load/stores only. In
+ * particular, load-linked/store-conditionally and floating point
+ * load/stores are not supported. The former make no sense with
+ * unaligned faults (they are guaranteed to fail) and I don't think
+ * the latter will occur in any decent program.
+ *
+ * Sigh. We *do* have to handle some FP operations, because GCC will
+ * uses them as temporary storage for integer memory to memory copies.
+ * However, we need to deal with stt/ldt and sts/lds only.
+ */
+#define OP_INT_MASK	(1L << 0x22 | 1L << 0x2a | /* ldw stw */	\
+			 1L << 0x23 | 1L << 0x2b | /* ldl stl */	\
+			 1L << 0x21 | 1L << 0x29 | /* ldhu sth */	\
+			 1L << 0x20 | 1L << 0x28)  /* ldbu stb */
+
+#define OP_WRITE_MASK	(1L << 0x26 | 1L << 0x27 | /* fsts fstd */	\
+			 1L << 0x2c | 1L << 0x2d | /* stw stl */	\
+			 1L << 0x0d | 1L << 0x0e)  /* sth stb */
+
+#define R(x)	((size_t) &((struct pt_regs *)0)->x)
+
+static int unauser_reg_offsets[32] = {
+	R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8),
+	/* r9 ... r15 are stored in front of regs. */
+	-56, -48, -40, -32, -24, -16, -8,
+	R(r16), R(r17), R(r18),
+	R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26),
+	R(r27), R(r28), R(gp),
+	0, 0
+};
+
+#undef R
+
+asmlinkage void
+do_entUnaUser(void __user *va, unsigned long opcode,
+	      unsigned long reg, struct pt_regs *regs)
+{
+#ifdef CONFIG_UNA_PRINT
+	static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+#endif
+
+	unsigned long tmp1, tmp2, tmp3, tmp4;
+	unsigned long fake_reg, *reg_addr = &fake_reg;
+	int si_code;
+	long error;
+	unsigned long tmp, tmp5, tmp6, tmp7, tmp8, vb;
+	unsigned long fp[4];
+	unsigned long instr, instr_op, value;
+
+	/* Check the UAC bits to decide what the user wants us to do
+	 * with the unaliged access.
+	 */
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,
+			1, regs, regs->pc - 4);
+
+#ifdef CONFIG_UNA_PRINT
+	if (!(current_thread_info()->status & TS_UAC_NOPRINT)) {
+		if (__ratelimit(&ratelimit)) {
+			printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n",
+			       current->comm, task_pid_nr(current),
+			       regs->pc - 4, va, opcode, reg);
+		}
+	}
+#endif
+	if ((current_thread_info()->status & TS_UAC_SIGBUS))
+		goto give_sigbus;
+	/* Not sure why you'd want to use this, but... */
+	if ((current_thread_info()->status & TS_UAC_NOFIX))
+		return;
+
+	/* Don't bother reading ds in the access check since we already
+	 * know that this came from the user. Also rely on the fact that
+	 * the page at TASK_SIZE is unmapped and so can't be touched anyway.
+	 */
+	if ((unsigned long)va >= TASK_SIZE)
+		goto give_sigsegv;
+
+	++unaligned[1].count;
+	unaligned[1].va = (unsigned long)va;
+	unaligned[1].pc = regs->pc - 4;
+
+	if ((1L << opcode) & OP_INT_MASK) {
+		/* it's an integer load/store */
+		if (reg < 30) {
+			reg_addr = (unsigned long *)
+				((char *)regs + unauser_reg_offsets[reg]);
+		} else if (reg == 30) {
+			/* usp in HMCODE regs */
+			fake_reg = rdusp();
+		} else {
+			/* zero "register" */
+			fake_reg = 0;
+		}
+	}
+
+	get_user(instr, (__u32 *)(regs->pc - 4));
+	instr_op = (instr >> 26) & 0x3f;
+
+	get_user(value, (__u64 *)va);
+
+	switch (instr_op) {
+
+	case 0x0c:  /* vlds */
+		if ((unsigned long)va<<61 == 0) {
+			__asm__ __volatile__(
+			"1:	ldl	%1, 0(%5)\n"
+			"2:	ldl	%2, 8(%5)\n"
+			"3:\n"
+			".section __ex_table, "a"\n"
+			"	.long	1b - .\n"
+			"	ldi	%1, 3b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 3b-2b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			sw64_write_simd_fp_reg_s(reg, tmp1, tmp2);
+
+			return;
+		} else {
+			__asm__ __volatile__(
+			"1:	ldl_u	%1, 0(%6)\n"
+			"2:	ldl_u	%2, 7(%6)\n"
+			"3:	ldl_u	%3, 15(%6)\n"
+			"	extll	%1, %6, %1\n"
+			"	extll	%2, %6, %5\n"
+			"	exthl	%2, %6, %4\n"
+			"	exthl	%3, %6, %3\n"
+			"4:\n"
+			".section __ex_table, "a"\n"
+			"	.long	1b - .\n"
+			"	ldi	%1, 4b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 4b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	%3, 4b-3b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5)
+			: "r"(va), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			tmp1 = tmp1 | tmp4;
+			tmp2 = tmp5 | tmp3;
+
+			sw64_write_simd_fp_reg_s(reg, tmp1, tmp2);
+
+			return;
+		}
+	case 0x0a: /* ldse */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 3(%3)\n"
+		"	extlw	%1, %3, %1\n"
+		"	exthw	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+
+		if (error)
+			goto give_sigsegv;
+
+		tmp = tmp1 | tmp2;
+		tmp = tmp | (tmp<<32);
+
+		sw64_write_simd_fp_reg_s(reg, tmp, tmp);
+
+		return;
+
+	case 0x0d: /* vldd */
+		if ((unsigned long)va<<61 == 0) {
+			__asm__ __volatile__(
+			"1:	ldl	%1, 0(%5)\n"
+			"2:	ldl	%2, 8(%5)\n"
+			"3:	ldl	%3, 16(%5)\n"
+			"4:	ldl	%4, 24(%5)\n"
+			"5:\n"
+			".section __ex_table, "a"\n"
+			"	.long	1b - .\n"
+			"	ldi	%1, 5b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 5b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	%3, 5b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	%4, 5b-4b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			sw64_write_simd_fp_reg_d(reg, tmp1, tmp2, tmp3, tmp4);
+
+			return;
+		} else {
+			__asm__ __volatile__(
+			"1:	ldl_u	%1, 0(%6)\n"
+			"2:	ldl_u	%2, 7(%6)\n"
+			"3:	ldl_u	%3, 15(%6)\n"
+			"	extll	%1, %6, %1\n"
+			"	extll	%2, %6, %5\n"
+			"	exthl	%2, %6, %4\n"
+			"	exthl	%3, %6, %3\n"
+			"4:\n"
+			".section __ex_table, "a"\n"
+			"	.long	1b - .\n"
+			"	ldi	%1, 4b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 4b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	%3, 4b-3b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5)
+			: "r"(va), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			tmp7 = tmp1 | tmp4;		//f0
+			tmp8 = tmp5 | tmp3;		//f1
+
+			vb = ((unsigned long)(va))+16;
+
+			__asm__ __volatile__(
+			"1:	ldl_u	%1, 0(%6)\n"
+			"2:	ldl_u	%2, 7(%6)\n"
+			"3:	ldl_u	%3, 15(%6)\n"
+			"	extll	%1, %6, %1\n"
+			"	extll	%2, %6, %5\n"
+			"	exthl	%2, %6, %4\n"
+			"	exthl	%3, %6, %3\n"
+			"4:\n"
+			".section __ex_table, "a"\n"
+			"	.long	1b - .\n"
+			"	ldi	%1, 4b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 4b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	%3, 4b-3b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5)
+			: "r"(vb), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			tmp = tmp1 | tmp4;			// f2
+			tmp2 = tmp5 | tmp3;			// f3
+
+			sw64_write_simd_fp_reg_d(reg, tmp7, tmp8, tmp, tmp2);
+			return;
+		}
+
+	case 0x0b: /* ldde */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 7(%3)\n"
+		"	extll	%1, %3, %1\n"
+		"	exthl	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+
+		if (error)
+			goto give_sigsegv;
+
+		tmp = tmp1 | tmp2;
+
+		sw64_write_simd_fp_reg_d(reg, tmp, tmp, tmp, tmp);
+		return;
+
+	case 0x09: /* ldwe */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 3(%3)\n"
+		"	extlw	%1, %3, %1\n"
+		"	exthw	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+
+		if (error)
+			goto give_sigsegv;
+
+		sw64_write_simd_fp_reg_ldwe(reg, (int)(tmp1 | tmp2));
+
+		return;
+
+	case 0x0e: /* vsts */
+		sw64_read_simd_fp_m_s(reg, fp);
+		if ((unsigned long)va<<61 == 0) {
+			__asm__ __volatile__(
+			"1:	bis	%4, %4, %1\n"
+			"2:	bis	%5, %5, %2\n"
+			"3:	stl	%1, 0(%3)\n"
+			"4:	stl	%2, 8(%3)\n"
+			"5:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	%1, 5b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 5b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 5b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 5b-4b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "r"(fp[0]), "r"(fp[1]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			return;
+		} else {
+			__asm__ __volatile__(
+			"	zapnot	%10, 0x1, %1\n"
+			"	srl	%10, 8, %2\n"
+			"	zapnot	%2, 0x1, %2\n"
+			"	srl	%10, 16, %3\n"
+			"	zapnot	%3, 0x1, %3\n"
+			"	srl	%10, 24, %4\n"
+			"	zapnot	%4, 0x1, %4\n"
+			"	srl	%10, 32, %5\n"
+			"	zapnot	%5, 0x1, %5\n"
+			"	srl	%10, 40, %6\n"
+			"	zapnot	%6, 0x1, %6\n"
+			"	srl	%10, 48, %7\n"
+			"	zapnot	%7, 0x1, %7\n"
+			"	srl	%10, 56, %8\n"
+			"	zapnot	%8, 0x1, %8\n"
+			"1:	stb	%1, 0(%9)\n"
+			"2:	stb	%2, 1(%9)\n"
+			"3:	stb	%3, 2(%9)\n"
+			"4:	stb	%4, 3(%9)\n"
+			"5:	stb	%5, 4(%9)\n"
+			"6:	stb	%6, 5(%9)\n"
+			"7:	stb	%7, 6(%9)\n"
+			"8:	stb	%8, 7(%9)\n"
+			"9:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	$31, 9b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	$31, 9b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 9b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 9b-4b(%0)\n"
+			"	.long	5b - .\n"
+			"	ldi	$31, 9b-5b(%0)\n"
+			"	.long	6b - .\n"
+			"	ldi	$31, 9b-6b(%0)\n"
+			"	.long	7b - .\n"
+			"	ldi	$31, 9b-7b(%0)\n"
+			"	.long	8b - .\n"
+			"	ldi	$31, 9b-8b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+			: "r"(va), "r"(fp[0]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+
+			vb = ((unsigned long)va) + 8;
+
+			__asm__ __volatile__(
+			"	zapnot	%10, 0x1, %1\n"
+			"	srl	%10, 8, %2\n"
+			"	zapnot	%2, 0x1, %2\n"
+			"	srl	%10, 16, %3\n"
+			"	zapnot	%3, 0x1, %3\n"
+			"	srl	%10, 24, %4\n"
+			"	zapnot	%4, 0x1, %4\n"
+			"	srl	%10, 32, %5\n"
+			"	zapnot	%5, 0x1, %5\n"
+			"	srl	%10, 40, %6\n"
+			"	zapnot	%6, 0x1, %6\n"
+			"	srl	%10, 48, %7\n"
+			"	zapnot	%7, 0x1, %7\n"
+			"	srl	%10, 56, %8\n"
+			"	zapnot	%8, 0x1, %8\n"
+			"1:	stb	%1, 0(%9)\n"
+			"2:	stb	%2, 1(%9)\n"
+			"3:	stb	%3, 2(%9)\n"
+			"4:	stb	%4, 3(%9)\n"
+			"5:	stb	%5, 4(%9)\n"
+			"6:	stb	%6, 5(%9)\n"
+			"7:	stb	%7, 6(%9)\n"
+			"8:	stb	%8, 7(%9)\n"
+			"9:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	$31, 9b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	$31, 9b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 9b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 9b-4b(%0)\n"
+			"	.long	5b - .\n"
+			"	ldi	$31, 9b-5b(%0)\n"
+			"	.long	6b - .\n"
+			"	ldi	$31, 9b-6b(%0)\n"
+			"	.long	7b - .\n"
+			"	ldi	$31, 9b-7b(%0)\n"
+			"	.long	8b - .\n"
+			"	ldi	$31, 9b-8b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+			: "r"(vb), "r"(fp[1]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			return;
+		}
+
+	case 0x0f: /* vstd */
+		sw64_read_simd_fp_m_d(reg, fp);
+		if ((unsigned long)va<<61 == 0) {
+			__asm__ __volatile__(
+			"1:	bis	%4, %4, %1\n"
+			"2:	bis	%5, %5, %2\n"
+			"3:	stl	%1, 0(%3)\n"
+			"4:	stl	%2, 8(%3)\n"
+			"5:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	%1, 5b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 5b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 5b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 5b-4b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "r"(fp[0]), "r"(fp[1]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			vb = ((unsigned long)va)+16;
+
+
+			__asm__ __volatile__(
+			"1:	bis	%4, %4, %1\n"
+			"2:	bis	%5, %5, %2\n"
+			"3:	stl	%1, 0(%3)\n"
+			"4:	stl	%2, 8(%3)\n"
+			"5:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	%1, 5b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	%2, 5b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 5b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 5b-4b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(vb), "r"(fp[2]), "r"(fp[3]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			return;
+		} else {
+			__asm__ __volatile__(
+			"	zapnot	%10, 0x1, %1\n"
+			"	srl	%10, 8, %2\n"
+			"	zapnot	%2, 0x1, %2\n"
+			"	srl	%10, 16, %3\n"
+			"	zapnot	%3, 0x1, %3\n"
+			"	srl	%10, 24, %4\n"
+			"	zapnot	%4, 0x1, %4\n"
+			"	srl	%10, 32, %5\n"
+			"	zapnot	%5, 0x1, %5\n"
+			"	srl	%10, 40, %6\n"
+			"	zapnot	%6, 0x1, %6\n"
+			"	srl	%10, 48, %7\n"
+			"	zapnot	%7, 0x1, %7\n"
+			"	srl	%10, 56, %8\n"
+			"	zapnot	%8, 0x1, %8\n"
+			"1:	stb	%1, 0(%9)\n"
+			"2:	stb	%2, 1(%9)\n"
+			"3:	stb	%3, 2(%9)\n"
+			"4:	stb	%4, 3(%9)\n"
+			"5:	stb	%5, 4(%9)\n"
+			"6:	stb	%6, 5(%9)\n"
+			"7:	stb	%7, 6(%9)\n"
+			"8:	stb	%8, 7(%9)\n"
+			"9:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	$31, 9b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	$31, 9b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 9b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 9b-4b(%0)\n"
+			"	.long	5b - .\n"
+			"	ldi	$31, 9b-5b(%0)\n"
+			"	.long	6b - .\n"
+			"	ldi	$31, 9b-6b(%0)\n"
+			"	.long	7b - .\n"
+			"	ldi	$31, 9b-7b(%0)\n"
+			"	.long	8b - .\n"
+			"	ldi	$31, 9b-8b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+			: "r"(va), "r"(fp[0]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			vb = ((unsigned long)va) + 8;
+
+			__asm__ __volatile__(
+			"	zapnot	%10, 0x1, %1\n"
+			"	srl	%10, 8, %2\n"
+			"	zapnot	%2, 0x1, %2\n"
+			"	srl	%10, 16, %3\n"
+			"	zapnot	%3, 0x1, %3\n"
+			"	srl	%10, 24, %4\n"
+			"	zapnot	%4, 0x1, %4\n"
+			"	srl	%10, 32, %5\n"
+			"	zapnot	%5, 0x1, %5\n"
+			"	srl	%10, 40, %6\n"
+			"	zapnot	%6, 0x1, %6\n"
+			"	srl	%10, 48, %7\n"
+			"	zapnot	%7, 0x1, %7\n"
+			"	srl	%10, 56, %8\n"
+			"	zapnot	%8, 0x1, %8\n"
+			"1:	stb	%1, 0(%9)\n"
+			"2:	stb	%2, 1(%9)\n"
+			"3:	stb	%3, 2(%9)\n"
+			"4:	stb	%4, 3(%9)\n"
+			"5:	stb	%5, 4(%9)\n"
+			"6:	stb	%6, 5(%9)\n"
+			"7:	stb	%7, 6(%9)\n"
+			"8:	stb	%8, 7(%9)\n"
+			"9:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	$31, 9b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	$31, 9b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 9b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 9b-4b(%0)\n"
+			"	.long	5b - .\n"
+			"	ldi	$31, 9b-5b(%0)\n"
+			"	.long	6b - .\n"
+			"	ldi	$31, 9b-6b(%0)\n"
+			"	.long	7b - .\n"
+			"	ldi	$31, 9b-7b(%0)\n"
+			"	.long	8b - .\n"
+			"	ldi	$31, 9b-8b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+			: "r"(vb), "r"(fp[1]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			vb = vb + 8;
+
+			__asm__ __volatile__(
+			"	zapnot	%10, 0x1, %1\n"
+			"	srl	%10, 8, %2\n"
+			"	zapnot	%2, 0x1, %2\n"
+			"	srl	%10, 16, %3\n"
+			"	zapnot	%3, 0x1, %3\n"
+			"	srl	%10, 24, %4\n"
+			"	zapnot	%4, 0x1, %4\n"
+			"	srl	%10, 32, %5\n"
+			"	zapnot	%5, 0x1, %5\n"
+			"	srl	%10, 40, %6\n"
+			"	zapnot	%6, 0x1, %6\n"
+			"	srl	%10, 48, %7\n"
+			"	zapnot	%7, 0x1, %7\n"
+			"	srl	%10, 56, %8\n"
+			"	zapnot	%8, 0x1, %8\n"
+			"1:	stb	%1, 0(%9)\n"
+			"2:	stb	%2, 1(%9)\n"
+			"3:	stb	%3, 2(%9)\n"
+			"4:	stb	%4, 3(%9)\n"
+			"5:	stb	%5, 4(%9)\n"
+			"6:	stb	%6, 5(%9)\n"
+			"7:	stb	%7, 6(%9)\n"
+			"8:	stb	%8, 7(%9)\n"
+			"9:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	$31, 9b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	$31, 9b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 9b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 9b-4b(%0)\n"
+			"	.long	5b - .\n"
+			"	ldi	$31, 9b-5b(%0)\n"
+			"	.long	6b - .\n"
+			"	ldi	$31, 9b-6b(%0)\n"
+			"	.long	7b - .\n"
+			"	ldi	$31, 9b-7b(%0)\n"
+			"	.long	8b - .\n"
+			"	ldi	$31, 9b-8b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+			: "r"(vb), "r"(fp[2]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			vb = vb + 8;
+
+			__asm__ __volatile__(
+			"	zapnot	%10, 0x1, %1\n"
+			"	srl	%10, 8, %2\n"
+			"	zapnot	%2, 0x1, %2\n"
+			"	srl	%10, 16, %3\n"
+			"	zapnot	%3, 0x1, %3\n"
+			"	srl	%10, 24, %4\n"
+			"	zapnot	%4, 0x1, %4\n"
+			"	srl	%10, 32, %5\n"
+			"	zapnot	%5, 0x1, %5\n"
+			"	srl	%10, 40, %6\n"
+			"	zapnot	%6, 0x1, %6\n"
+			"	srl	%10, 48, %7\n"
+			"	zapnot	%7, 0x1, %7\n"
+			"	srl	%10, 56, %8\n"
+			"	zapnot	%8, 0x1, %8\n"
+			"1:	stb	%1, 0(%9)\n"
+			"2:	stb	%2, 1(%9)\n"
+			"3:	stb	%3, 2(%9)\n"
+			"4:	stb	%4, 3(%9)\n"
+			"5:	stb	%5, 4(%9)\n"
+			"6:	stb	%6, 5(%9)\n"
+			"7:	stb	%7, 6(%9)\n"
+			"8:	stb	%8, 7(%9)\n"
+			"9:\n"
+			".section __ex_table, "a"\n\t"
+			"	.long	1b - .\n"
+			"	ldi	$31, 9b-1b(%0)\n"
+			"	.long	2b - .\n"
+			"	ldi	$31, 9b-2b(%0)\n"
+			"	.long	3b - .\n"
+			"	ldi	$31, 9b-3b(%0)\n"
+			"	.long	4b - .\n"
+			"	ldi	$31, 9b-4b(%0)\n"
+			"	.long	5b - .\n"
+			"	ldi	$31, 9b-5b(%0)\n"
+			"	.long	6b - .\n"
+			"	ldi	$31, 9b-6b(%0)\n"
+			"	.long	7b - .\n"
+			"	ldi	$31, 9b-7b(%0)\n"
+			"	.long	8b - .\n"
+			"	ldi	$31, 9b-8b(%0)\n"
+			".previous"
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+			  "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+			: "r"(vb), "r"(fp[3]), "0"(0));
+
+			if (error)
+				goto give_sigsegv;
+
+			return;
+		}
+	}
+	switch (opcode) {
+	case 0x21: /* ldhu */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 1(%3)\n"
+		"	extlh	%1, %3, %1\n"
+		"	exthh	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		*reg_addr = tmp1 | tmp2;
+		break;
+
+	case 0x26: /* flds */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 3(%3)\n"
+		"	extlw	%1, %3, %1\n"
+		"	exthw	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		sw64_write_fp_reg(reg, s_mem_to_reg((int)(tmp1 | tmp2)));
+		return;
+
+	case 0x27: /* fldd */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 7(%3)\n"
+		"	extll	%1, %3, %1\n"
+		"	exthl	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		sw64_write_fp_reg(reg, tmp1 | tmp2);
+		return;
+
+	case 0x22: /* ldw */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 3(%3)\n"
+		"	extlw	%1, %3, %1\n"
+		"	exthw	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		*reg_addr = (int)(tmp1 | tmp2);
+		break;
+
+	case 0x23: /* ldl */
+		__asm__ __volatile__(
+		"1:	ldl_u	%1, 0(%3)\n"
+		"2:	ldl_u	%2, 7(%3)\n"
+		"	extll	%1, %3, %1\n"
+		"	exthl	%2, %3, %2\n"
+		"3:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%1, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%2, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+		: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		*reg_addr = tmp1 | tmp2;
+		break;
+
+	/* Note that the store sequences do not indicate that they change
+	 * memory because it _should_ be affecting nothing in this context.
+	 * (Otherwise we have other, much larger, problems.)
+	 */
+	case 0x29: /* sth with stb */
+		__asm__ __volatile__(
+		"	zap	%6, 2, %1\n"
+		"	srl	%6, 8, %2\n"
+		"	stb	%1, 0x0(%5)\n"
+		"	stb	%2, 0x1(%5)\n"
+		"3:\n"
+
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	%2, 3b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	%1, 3b-2b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+		  "=&r"(tmp3), "=&r"(tmp4)
+		: "r"(va), "r"(*reg_addr), "0"(0));
+
+		if (error)
+			goto give_sigsegv;
+		return;
+
+	case 0x2e: /* fsts*/
+		fake_reg = s_reg_to_mem(sw64_read_fp_reg(reg));
+		/* FALLTHRU */
+
+	case 0x2a: /* stw with stb*/
+		__asm__ __volatile__(
+		"	zapnot	%6, 0x1, %1\n"
+		"	srl	%6, 8, %2\n"
+		"	zapnot	%2, 0x1, %2\n"
+		"	srl	%6, 16, %3\n"
+		"	zapnot	%3, 0x1, %3\n"
+		"	srl	%6, 24, %4\n"
+		"	zapnot	%4, 0x1, %4\n"
+		"1:	stb  %1, 0x0(%5)\n"
+		"2:	stb  %2, 0x1(%5)\n"
+		"3:	stb  %3, 0x2(%5)\n"
+		"4:	stb  %4, 0x3(%5)\n"
+		"5:\n"
+		".section __ex_table, "a"\n"
+		"	.long	1b - .\n"
+		"	ldi	$31, 5b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	$31, 5b-2b(%0)\n"
+		"	.long	3b - .\n"
+		"	ldi	$31, 5b-3b(%0)\n"
+		"	.long	4b - .\n"
+		"	ldi	$31, 5b-4b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+		  "=&r"(tmp3), "=&r"(tmp4)
+		: "r"(va), "r"(*reg_addr), "0"(0));
+
+		if (error)
+			goto give_sigsegv;
+		return;
+
+	case 0x2f: /* fstd */
+		fake_reg = sw64_read_fp_reg(reg);
+		/* FALLTHRU */
+
+	case 0x2b: /* stl */
+		__asm__ __volatile__(
+		"	zapnot	%10, 0x1, %1\n"
+		"	srl	%10, 8, %2\n"
+		"	zapnot	%2, 0x1, %2\n"
+		"	srl	%10, 16, %3\n"
+		"	zapnot	%3, 0x1, %3\n"
+		"	srl	%10, 24, %4\n"
+		"	zapnot	%4, 0x1, %4\n"
+		"	srl	%10, 32, %5\n"
+		"	zapnot	%5, 0x1, %5\n"
+		"	srl	%10, 40, %6\n"
+		"	zapnot	%6, 0x1, %6\n"
+		"	srl	%10, 48, %7\n"
+		"	zapnot	%7, 0x1, %7\n"
+		"	srl	%10, 56, %8\n"
+		"	zapnot	%8, 0x1, %8\n"
+		"1:	stb	%1, 0(%9)\n"
+		"2:	stb	%2, 1(%9)\n"
+		"3:	stb	%3, 2(%9)\n"
+		"4:	stb	%4, 3(%9)\n"
+		"5:	stb	%5, 4(%9)\n"
+		"6:	stb	%6, 5(%9)\n"
+		"7:	stb	%7, 6(%9)\n"
+		"8:	stb	%8, 7(%9)\n"
+		"9:\n"
+		".section __ex_table, "a"\n\t"
+		"	.long	1b - .\n"
+		"	ldi	$31, 9b-1b(%0)\n"
+		"	.long	2b - .\n"
+		"	ldi	$31, 9b-2b(%0)\n"
+		"	.long	3b - .\n"
+		"	ldi	$31, 9b-3b(%0)\n"
+		"	.long	4b - .\n"
+		"	ldi	$31, 9b-4b(%0)\n"
+		"	.long	5b - .\n"
+		"	ldi	$31, 9b-5b(%0)\n"
+		"	.long	6b - .\n"
+		"	ldi	$31, 9b-6b(%0)\n"
+		"	.long	7b - .\n"
+		"	ldi	$31, 9b-7b(%0)\n"
+		"	.long	8b - .\n"
+		"	ldi	$31, 9b-8b(%0)\n"
+		".previous"
+		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
+		  "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
+		: "r"(va), "r"(*reg_addr), "0"(0));
+
+		if (error)
+			goto give_sigsegv;
+		return;
+
+	default:
+		/* What instruction were you trying to use, exactly? */
+		goto give_sigbus;
+	}
+
+	/* Only integer loads should get here; everyone else returns early. */
+	if (reg == 30)
+		wrusp(fake_reg);
+	return;
+
+give_sigsegv:
+	regs->pc -= 4;  /* make pc point to faulting insn */
+
+	/* We need to replicate some of the logic in mm/fault.c,
+	 * since we don't have access to the fault code in the
+	 * exception handling return path.
+	 */
+	if ((unsigned long)va >= TASK_SIZE)
+		si_code = SEGV_ACCERR;
+	else {
+		struct mm_struct *mm = current->mm;
+
+		down_read(&mm->mmap_lock);
+		if (find_vma(mm, (unsigned long)va))
+			si_code = SEGV_ACCERR;
+		else
+			si_code = SEGV_MAPERR;
+		up_read(&mm->mmap_lock);
+	}
+	send_sig_fault(SIGBUS, si_code, va, 0, current);
+	return;
+
+give_sigbus:
+	regs->pc -= 4;
+	send_sig_fault(SIGBUS, BUS_ADRALN, va, 0, current);
+}
+
+void
+trap_init(void)
+{
+	/* Tell HMcode what global pointer we want in the kernel. */
+	register unsigned long gptr __asm__("$29");
+	wrkgp(gptr);
+
+	wrent(entArith, 1);
+	wrent(entMM, 2);
+	wrent(entIF, 3);
+	wrent(entUna, 4);
+	wrent(entSys, 5);
+}
diff --git a/arch/sw_64/kernel/unaligned.c b/arch/sw_64/kernel/unaligned.c
new file mode 100644
index 000000000000..4ec1187d6cd0
--- /dev/null
+++ b/arch/sw_64/kernel/unaligned.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Mao Minkai
+ * Author: Mao Minkai
+ *
+ * This code is taken from arch/mips/kernel/segment.c
+ *	Copyright (C) 2013 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/unaligned.h>
+#include <asm/debug.h>
+
+static int show_unaligned(struct seq_file *sf, void *v)
+{
+	extern struct unaligned_stat {
+		unsigned long count, va, pc;
+	} unaligned[2];
+
+	seq_printf(sf, "kernel unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[0].count, unaligned[0].pc, unaligned[0].va);
+	seq_printf(sf, "user unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[1].count, unaligned[1].pc, unaligned[1].va);
+
+	return 0;
+}
+
+static int unaligned_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_unaligned, NULL);
+}
+
+static const struct file_operations unaligned_fops = {
+	.open		= unaligned_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init unaligned_info(void)
+{
+	struct dentry *unaligned;
+
+	if (!sw64_debugfs_dir)
+		return -ENODEV;
+
+	unaligned = debugfs_create_file("unaligned", S_IRUGO,
+				       sw64_debugfs_dir, NULL,
+				       &unaligned_fops);
+	if (!unaligned)
+		return -ENOMEM;
+	return 0;
+}
+device_initcall(unaligned_info);
diff --git a/arch/sw_64/kernel/uprobes.c b/arch/sw_64/kernel/uprobes.c
new file mode 100644
index 000000000000..d10464d0dcdd
--- /dev/null
+++ b/arch/sw_64/kernel/uprobes.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/highmem.h>
+#include <linux/kdebug.h>
+#include <linux/types.h>
+#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/uprobes.h>
+#include <linux/ptrace.h>
+
+#include <asm/ptrace.h>
+
+#define UPROBE_TRAP_NR	ULONG_MAX
+
+/**
+ * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
+ * @mm: the probed address space.
+ * @arch_uprobe: the probepoint information.
+ * @addr: virtual address at which to install the probepoint
+ * Return 0 on success or a -ve number on error.
+ */
+int arch_uprobe_analyze_insn(struct arch_uprobe *aup,
+		struct mm_struct *mm, unsigned long addr)
+{
+	u32 inst;
+
+	if (addr & 0x03)
+		return -EINVAL;
+
+	inst = aup->insn;
+
+	aup->ixol[0] = aup->insn;
+	aup->ixol[1] = UPROBE_BRK_UPROBE_XOL;		/* NOP  */
+
+	return 0;
+}
+
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+		void *src, unsigned long len)
+{
+	unsigned long kaddr, kstart;
+
+	/* Initialize the slot */
+	kaddr = (unsigned long)kmap_atomic(page);
+	kstart = kaddr + (vaddr & ~PAGE_MASK);
+	memcpy((void *)kstart, src, len);
+	flush_icache_range(kstart, kstart + len);
+	kunmap_atomic((void *)kaddr);
+}
+
+/*
+ * arch_uprobe_pre_xol - prepare to execute out of line.
+ * @auprobe: the probepoint information.
+ * @regs: reflects the saved user state of current task.
+ */
+int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	/* Instruction points to execute ol */
+	instruction_pointer_set(regs, utask->xol_vaddr);
+
+	user_enable_single_step(current);
+
+	return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	/* Instruction points to execute next to breakpoint address */
+	instruction_pointer_set(regs, utask->vaddr + 4);
+
+	user_disable_single_step(current);
+
+	return 0;
+}
+
+/*
+ * If xol insn itself traps and generates a signal(Say,
+ * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
+ * instruction jumps back to its own address. It is assumed that anything
+ * like do_page_fault/do_trap/etc sets thread.trap_nr != -1.
+ *
+ * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
+ * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
+ * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
+ */
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+	return false;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+		unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	/* regs == NULL is a kernel bug */
+	if (WARN_ON(!regs))
+		return NOTIFY_DONE;
+
+	/* We are only interested in userspace traps */
+	if (!user_mode(regs))
+		return NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_UPROBE:
+		if (uprobe_pre_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_UPROBE_XOL:
+		if (uprobe_post_sstep_notifier(regs))
+			return NOTIFY_STOP;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * This function gets called when XOL instruction either gets trapped or
+ * the thread has a fatal signal. Reset the instruction pointer to its
+ * probed address for the potential restart or for post mortem analysis.
+ */
+void arch_uprobe_abort_xol(struct arch_uprobe *aup,
+		struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	instruction_pointer_set(regs, utask->vaddr);
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(
+		unsigned long trampoline_vaddr, struct pt_regs *regs)
+{
+	unsigned long ra;
+
+	ra = regs->r26;
+
+	/* Replace the return address with the trampoline address */
+	regs->r26 = trampoline_vaddr;
+
+	return ra;
+}
+
+/*
+ * See if the instruction can be emulated.
+ * Returns true if instruction was emulated, false otherwise.
+ *
+ * For now we always emulate so this function just returns 0.
+ */
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	return 0;
+}
diff --git a/arch/sw_64/kernel/vdso.c b/arch/sw_64/kernel/vdso.c
new file mode 100644
index 000000000000..32ed952748f0
--- /dev/null
+++ b/arch/sw_64/kernel/vdso.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/clocksource.h>
+#include <linux/elf.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/vmalloc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/vdso.h>
+
+extern char vdso_start, vdso_end;
+static unsigned long vdso_pages;
+static struct page **vdso_pagelist;
+
+/*
+ * The vDSO data page.
+ */
+static union {
+	struct vdso_data	data;
+	u8			page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+static struct vm_special_mapping vdso_spec[2];
+
+static int __init vdso_init(void)
+{
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("vDSO is not a valid ELF object!\n");
+		return -EINVAL;
+	}
+
+	vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+	pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
+		vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data);
+
+	/* Allocate the vDSO pagelist, plus a page for the data. */
+	vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
+				GFP_KERNEL);
+	if (vdso_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the vDSO data page. */
+	vdso_pagelist[0] = virt_to_page(vdso_data);
+
+	/* Grab the vDSO code pages. */
+	for (i = 0; i < vdso_pages; i++)
+		vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE);
+
+	/* Populate the special mapping structures */
+	vdso_spec[0] = (struct vm_special_mapping) {
+		.name	= "[vvar]",
+		.pages	= vdso_pagelist,
+	};
+
+	vdso_spec[1] = (struct vm_special_mapping) {
+		.name	= "[vdso]",
+		.pages	= &vdso_pagelist[1],
+	};
+
+	return 0;
+}
+arch_initcall(vdso_init);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+				int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+	void *ret;
+
+	vdso_text_len = vdso_pages << PAGE_SHIFT;
+	/* Be sure to map the data page */
+	vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+
+	if (down_write_killable(&mm->mmap_lock))
+		return -EINTR;
+	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+	if (IS_ERR_VALUE(vdso_base)) {
+		ret = ERR_PTR(vdso_base);
+		goto up_fail;
+	}
+	ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
+				       VM_READ|VM_MAYREAD,
+				       &vdso_spec[0]);
+	if (IS_ERR(ret))
+		goto up_fail;
+
+	vdso_base += PAGE_SIZE;
+	mm->context.vdso = (void *)vdso_base;
+	ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       &vdso_spec[1]);
+	if (IS_ERR(ret))
+		goto up_fail;
+
+	up_write(&mm->mmap_lock);
+	return 0;
+
+up_fail:
+	mm->context.vdso = NULL;
+	up_write(&mm->mmap_lock);
+	return PTR_ERR(ret);
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+	vdso_data_write_begin(vdso_data);
+
+	vdso_data->xtime_sec = tk->xtime_sec;
+	vdso_data->xtime_nsec = tk->tkr_mono.xtime_nsec;
+	vdso_data->wall_to_mono_sec = tk->wall_to_monotonic.tv_sec;
+	vdso_data->wall_to_mono_nsec = tk->wall_to_monotonic.tv_nsec;
+	vdso_data->cs_shift = tk->tkr_mono.shift;
+
+	vdso_data->cs_mult = tk->tkr_mono.mult;
+	vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
+	vdso_data->cs_mask = tk->tkr_mono.mask;
+
+	vdso_data_write_end(vdso_data);
+}
+
+void update_vsyscall_tz(void)
+{
+	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+}
diff --git a/arch/sw_64/kernel/vdso/.gitignore b/arch/sw_64/kernel/vdso/.gitignore
new file mode 100644
index 000000000000..2b6a8b0ed7ca
--- /dev/null
+++ b/arch/sw_64/kernel/vdso/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+vdso.lds
+vdso.so.dbg.tmp
+vdso-syms.S
diff --git a/arch/sw_64/kernel/vdso/Makefile b/arch/sw_64/kernel/vdso/Makefile
new file mode 100644
index 000000000000..dfbd81c2c801
--- /dev/null
+++ b/arch/sw_64/kernel/vdso/Makefile
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+# Symbols present in the vdso
+vdso-syms = rt_sigreturn gettimeofday
+
+# Files to link into the vdso
+obj-vdso = $(patsubst %, v%.o, $(vdso-syms))
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+obj-y += vdso.o vdso-syms.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+CFLAGS_REMOVE_vdso.o = -pg
+CFLAGS_REMOVE_vrt_sigreturn.o = -pg
+CFLAGS_REMOVE_vgettimeofday.o = -pg
+
+ifdef CONFIG_FEEDBACK_COLLECT
+# vDSO code runs in userspace, not collecting feedback data.
+CFLAGS_REMOVE_vdso.o = -ffeedback-generate
+CFLAGS_REMOVE_vrt_sigreturn.o = -ffeedback-generate
+CFLAGS_REMOVE_vgettimeofday.o = -ffeedback-generate
+endif
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency
+$(obj)/vdso.o: $(obj)/vdso.so
+
+# link rule for the .so file, .lds has to be first
+SYSCFLAGS_vdso.so.dbg = $(c_flags)
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
+	$(call if_changed,vdsold)
+SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
+                            $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+
+$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
+	$(call if_changed,so2s)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# actual build commands
+# The DSO images are built using a special linker script
+# Add -lgcc so tilepro gets static muldi3 and lshrdi3 definitions.
+# Make sure only to export the intended __vdso_xxx symbol offsets.
+quiet_cmd_vdsold = VDSOLD  $@
+      cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \
+                           -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \
+                   $(CROSS_COMPILE)objcopy \
+                           $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
+                   rm $@.tmp
+
+# Extracts symbol offsets from the VDSO, converting them into an assembly file
+# that contains the same symbols at the same offsets.
+quiet_cmd_so2s = SO2S    $@
+      cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso.so: $(obj)/vdso.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+
+vdso_install: vdso.so
diff --git a/arch/sw_64/kernel/vdso/so2s.sh b/arch/sw_64/kernel/vdso/so2s.sh
new file mode 100755
index 000000000000..8f23ac544d1b
--- /dev/null
+++ b/arch/sw_64/kernel/vdso/so2s.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright 2020 Palmer Dabbelt palmerdabbelt@google.com
+
+grep -v "LINUX" | sed 's/([0-9a-f]*) T ([a-z0-9_]*)/.globl\t\2\n\2:\n.quad\t0x\1/'
diff --git a/arch/sw_64/kernel/vdso/vdso.S b/arch/sw_64/kernel/vdso/vdso.S
new file mode 100644
index 000000000000..ce5448d00cf7
--- /dev/null
+++ b/arch/sw_64/kernel/vdso/vdso.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso_start, vdso_end
+	.balign PAGE_SIZE
+vdso_start:
+	.incbin "arch/sw_64/kernel/vdso/vdso.so"
+	.balign PAGE_SIZE
+vdso_end:
+
+	.previous
diff --git a/arch/sw_64/kernel/vdso/vdso.lds.S b/arch/sw_64/kernel/vdso/vdso.lds.S
new file mode 100644
index 000000000000..67a635d6dfaf
--- /dev/null
+++ b/arch/sw_64/kernel/vdso/vdso.lds.S
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GNU linker script for the VDSO library.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ *
+ * Heavily based on the vDSO linker scripts for other archs.
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-sw_64")
+OUTPUT_ARCH(sw_64)
+
+SECTIONS
+{
+	PROVIDE(_vdso_data = . - PAGE_SIZE);
+	. = VDSO_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: { *(.text*) }
+	PROVIDE (__etext = .);
+	PROVIDE (_etext = .);
+	PROVIDE (etext = .);
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: { *(.rodata*) }		:text
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	LINUX_2.6.39 {
+	global:
+		__vdso_rt_sigreturn;
+		__vdso_gettimeofday;
+		__vdso_clock_gettime;
+	local: *;
+	};
+}
diff --git a/arch/sw_64/kernel/vdso/vgettimeofday.c b/arch/sw_64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..6ba9ff6e33d5
--- /dev/null
+++ b/arch/sw_64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/time.h>
+#include <asm/timex.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <asm/io.h>
+
+static __always_inline int do_realtime_coarse(struct timespec64 *ts,
+		const struct vdso_data *data)
+{
+	u32 start_seq;
+
+	do {
+		start_seq = vdso_data_read_begin(data);
+
+		ts->tv_sec = data->xtime_sec;
+		ts->tv_nsec = data->xtime_nsec >> data->cs_shift;
+	} while (vdso_data_read_retry(data, start_seq));
+
+	return 0;
+}
+
+
+static __always_inline int do_monotonic_coarse(struct timespec64 *ts,
+		const struct vdso_data *data)
+{
+	u32 start_seq;
+	u32 to_mono_sec;
+	u32 to_mono_nsec;
+
+	do {
+		start_seq = vdso_data_read_begin(data);
+
+		ts->tv_sec = data->xtime_sec;
+		ts->tv_nsec = data->xtime_nsec >> data->cs_shift;
+
+		to_mono_sec = data->wall_to_mono_sec;
+		to_mono_nsec = data->wall_to_mono_nsec;
+	} while (vdso_data_read_retry(data, start_seq));
+
+	ts->tv_sec += to_mono_sec;
+	timespec64_add_ns(ts, to_mono_nsec);
+
+	return 0;
+}
+
+static __always_inline u64 read_longtime(void)
+{
+	register unsigned long __r0 __asm__("$0");
+
+	__asm__ __volatile__(
+		"sys_call 0xB1"
+		: "=r"(__r0)
+		::"memory");
+
+	return __r0;
+}
+
+static __always_inline u64 get_ns(const struct vdso_data *data)
+{
+	u64 cycle_now, delta, nsec;
+
+	cycle_now = read_longtime();
+	delta = (cycle_now - data->cs_cycle_last) & data->cs_mask;
+
+	nsec = (delta * data->cs_mult) + data->xtime_nsec;
+	nsec >>= data->cs_shift;
+
+	return nsec;
+}
+
+
+static __always_inline int do_realtime(struct timespec64 *ts,
+		const struct vdso_data *data)
+{
+	u32 start_seq;
+	u64 ns;
+
+	do {
+		start_seq = vdso_data_read_begin(data);
+
+		ts->tv_sec = data->xtime_sec;
+		ns = get_ns(data);
+	} while (vdso_data_read_retry(data, start_seq));
+
+	ts->tv_nsec = 0;
+	timespec64_add_ns(ts, ns);
+
+	return 0;
+}
+
+static __always_inline int do_monotonic(struct timespec64 *ts,
+		const struct vdso_data *data)
+{
+	u32 start_seq;
+	u64 ns;
+	u32 to_mono_sec;
+	u32 to_mono_nsec;
+
+	do {
+		start_seq = vdso_data_read_begin(data);
+
+		ts->tv_sec = data->xtime_sec;
+		ns = get_ns(data);
+
+		to_mono_sec = data->wall_to_mono_sec;
+		to_mono_nsec = data->wall_to_mono_nsec;
+	} while (vdso_data_read_retry(data, start_seq));
+
+	ts->tv_sec += to_mono_sec;
+	ts->tv_nsec = 0;
+	timespec64_add_ns(ts, ns + to_mono_nsec);
+
+	return 0;
+}
+
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
+{
+	const struct vdso_data *data = get_vdso_data();
+	struct timespec64 ts;
+	int ret;
+
+	ret = do_realtime(&ts, data);
+	if (ret)
+		return ret;
+
+	if (tv) {
+		tv->tv_sec = ts.tv_sec;
+		tv->tv_usec = ts.tv_nsec / 1000;
+	}
+
+	if (tz) {
+		tz->tz_minuteswest = data->tz_minuteswest;
+		tz->tz_dsttime = data->tz_dsttime;
+	}
+
+	return 0;
+}
+
+int __vdso_clock_gettime(clockid_t clkid, struct timespec64 *ts)
+{
+	const struct vdso_data *data = get_vdso_data();
+	int ret;
+
+	switch (clkid) {
+	case CLOCK_REALTIME_COARSE:
+		ret = do_realtime_coarse(ts, data);
+		break;
+	case CLOCK_MONOTONIC_COARSE:
+		ret = do_monotonic_coarse(ts, data);
+		break;
+	case CLOCK_REALTIME:
+		ret = do_realtime(ts, data);
+		break;
+	case CLOCK_MONOTONIC:
+		ret = do_monotonic(ts, data);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	/* If we return -ENOSYS libc should fall back to a syscall. */
+	return ret;
+}
diff --git a/arch/sw_64/kernel/vdso/vrt_sigreturn.S b/arch/sw_64/kernel/vdso/vrt_sigreturn.S
new file mode 100644
index 000000000000..c07eb7244d0c
--- /dev/null
+++ b/arch/sw_64/kernel/vdso/vrt_sigreturn.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Sigreturn trampoline for returning from a signal when the SA_RESTORER
+ * flag is not set.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+	.text
+
+ENTRY(__vdso_rt_sigreturn)
+	mov		$sp, $16
+	ldi		$0, __NR_rt_sigreturn
+	sys_call	0x83
+ENDPROC(__vdso_rt_sigreturn)
diff --git a/arch/sw_64/kernel/vmlinux.lds.S b/arch/sw_64/kernel/vmlinux.lds.S
new file mode 100644
index 000000000000..a106be42121f
--- /dev/null
+++ b/arch/sw_64/kernel/vmlinux.lds.S
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+
+OUTPUT_FORMAT("elf64-sw_64")
+OUTPUT_ARCH(sw_64)
+ENTRY(__start)
+PHDRS { text PT_LOAD; note PT_NOTE; }
+jiffies = jiffies_64;
+SECTIONS
+{
+	. = _TEXT_START;
+
+	_text = .;	/* Text and read-only data */
+	.text : {
+		HEAD_TEXT
+		TEXT_TEXT
+		SCHED_TEXT
+		CPUIDLE_TEXT
+		LOCK_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		KPROBES_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+	} :text
+	_etext = .;	/* End of text section */
+
+	RO_DATA(4096)
+
+	/* Will be freed after init */
+	__init_begin = ALIGN(PAGE_SIZE);
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	INIT_DATA_SECTION(16)
+	PERCPU_SECTION(L1_CACHE_BYTES)
+
+	/*
+	 * Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
+	 * needed for the THREAD_SIZE aligned init_task gets freed after init
+	 */
+	. = ALIGN(THREAD_SIZE);
+	__init_end = .;
+	/* Freed after init ends here */
+
+	_sdata = .;	/* Start of rw data section */
+	_data = .;
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	.got : {
+#ifdef CONFIG_RELOCATABLE
+	_got_start = .;
+#endif
+		*(.got)
+#ifdef CONFIG_RELOCATABLE
+	_got_end = .;
+#endif
+	}
+	.sdata : {
+		*(.sdata)
+	}
+	_edata = .;	/* End of data section */
+
+#ifdef CONFIG_RELOCATABLE
+	_. = ALIGN(4);
+	.data.reloc : {
+		_relocation_start = .;
+		/*
+		 * Space for relocation table
+		 * This needs to be filled so that the
+		 * relocs tool can overwrite the content.
+		 * An invalid value is left at the start of the
+		 * section to abort relocation if the table
+		 * has not been filled in.
+		 */
+		LONG(0xFFFFFFFF);
+		FILL(0);
+		. += CONFIG_RELOCATION_TABLE_SIZE - 4;
+		_relocation_end = .;
+	}
+#endif
+	BSS_SECTION(0, 0, 0)
+	_end = .;
+
+	.mdebug 0 : {
+		*(.mdebug)
+	}
+	.note 0 : {
+		*(.note)
+	}
+
+	STABS_DEBUG
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	DISCARDS
+}
diff --git a/arch/sw_64/kvm/Kconfig b/arch/sw_64/kvm/Kconfig
new file mode 100644
index 000000000000..230ac526911c
--- /dev/null
+++ b/arch/sw_64/kvm/Kconfig
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	help
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	select KVM_SW64_HOST
+	select PREEMPT_NOTIFIERS
+	select CMA
+	depends on NET
+	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
+	select HAVE_KVM_IRQFD
+	select HAVE_KVM_MSI
+	select KVM_VFIO
+	select TUN
+	select GENERIC_ALLOCATOR
+	help
+	  Support for hosting Guest kernels.
+	  We don't support KVM with 3-level page tables yet.
+
+	  If unsure, say N.
+
+config KVM_SW64_HOST
+	tristate "KVM for SW64 processors support"
+	depends on KVM
+	help
+	  Provides host support for SW64 processors.
+	  To compile this as a module, choose M here.
+
+source "drivers/vhost/Kconfig"
+
+endif # VIRTUALIZATION
diff --git a/arch/sw_64/kvm/Makefile b/arch/sw_64/kvm/Makefile
new file mode 100644
index 000000000000..48ae938faab7
--- /dev/null
+++ b/arch/sw_64/kvm/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+KVM := ../../../virt/kvm
+
+ccflags-y += -Ivirt/kvm -Iarch/sw_64/kvm
+
+kvm-$(CONFIG_KVM_SW64_HOST) += $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+kvm-$(CONFIG_KVM_SW64_HOST) += kvm-sw64.o entry.o emulate.o mmio.o kvm_timer.o handle_exit.o
+
+obj-$(CONFIG_KVM_SW64_HOST) += kvm.o
diff --git a/arch/sw_64/kvm/emulate.c b/arch/sw_64/kvm/emulate.c
new file mode 100644
index 000000000000..1552119e6346
--- /dev/null
+++ b/arch/sw_64/kvm/emulate.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 - os kernal
+ * Author: fire3 fire3@example.com yangzh yangzh@gmail.com
+ * linhn linhn@example.com
+ */
+#include <asm/kvm_emulate.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+
+void sw64_decode(struct kvm_vcpu *vcpu, unsigned int insn, struct kvm_run *run)
+{
+	int opc, ra;
+
+	opc = (insn >> 26) & 0x3f;
+	ra = (insn >> 21) & 0x1f;
+
+	switch (opc) {
+	case 0x20: /* LDBU */
+		run->mmio.is_write = 0;
+		run->mmio.len = 1;
+		vcpu->arch.mmio_decode.rt = ra;
+		break;
+	case 0x21: /* LDHU */
+		run->mmio.is_write = 0;
+		run->mmio.len = 2;
+		vcpu->arch.mmio_decode.rt = ra;
+		break;
+	case 0x22: /* LDW */
+		run->mmio.is_write = 0;
+		run->mmio.len = 4;
+		vcpu->arch.mmio_decode.rt = ra;
+		break;
+	case 0x23: /* LDL */
+		run->mmio.is_write = 0;
+		run->mmio.len = 8;
+		vcpu->arch.mmio_decode.rt = ra;
+		break;
+	case 0x28: /* STB */
+		run->mmio.is_write = 1;
+		*(unsigned long *)run->mmio.data = vcpu_get_reg(vcpu, ra) & 0xffUL;
+		run->mmio.len = 1;
+		break;
+	case 0x29: /* STH */
+		run->mmio.is_write = 1;
+		*(unsigned long *)run->mmio.data = vcpu_get_reg(vcpu, ra) & 0xffffUL;
+		run->mmio.len = 2;
+		break;
+	case 0x2a: /* STW */
+		run->mmio.is_write = 1;
+		*(unsigned long *)run->mmio.data = vcpu_get_reg(vcpu, ra) & 0xffffffffUL;
+		run->mmio.len = 4;
+		break;
+	case 0x2b: /* STL */
+		run->mmio.is_write = 1;
+		*(unsigned long *)run->mmio.data = vcpu_get_reg(vcpu, ra);
+		run->mmio.len = 8;
+		break;
+	default:
+		printk("Miss done opc %d\n", opc);
+		break;
+	}
+}
+
+/*
+ * Virtual Interrupts.
+ */
+unsigned int interrupt_pending(struct kvm_vcpu *vcpu, bool *more)
+{
+	unsigned int irq;
+	DECLARE_BITMAP(blk, SWVM_IRQS);
+
+	bitmap_copy(blk, vcpu->arch.irqs_pending, SWVM_IRQS);
+
+	irq = find_last_bit(blk, SWVM_IRQS);
+
+	return irq;
+}
+
+void clear_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vcb.vcpu_irq = 0xffffffffffffffffUL;
+}
+
+void inject_vcpu_irq(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+	vcpu->arch.vcb.vcpu_irq = irq;
+}
+
+/*
+ * This actually diverts the Guest to running an interrupt handler, once an
+ * interrupt has been identified by interrupt_pending().
+ */
+void try_deliver_interrupt(struct kvm_vcpu *vcpu, unsigned int irq, bool more)
+{
+	BUG_ON(irq >= SWVM_IRQS);
+
+	/* Otherwise we check if they have interrupts disabled. */
+	if (vcpu->arch.vcb.vcpu_irq_disabled) {
+		clear_vcpu_irq(vcpu);
+		return;
+	}
+
+	/* If they don't have a handler (yet?), we just ignore it */
+	if (vcpu->arch.vcb.ent_int != 0) {
+		/* OK, mark it no longer pending and deliver it. */
+		clear_bit(irq, (vcpu->arch.irqs_pending));
+		/*
+		 * set_guest_interrupt() takes the interrupt descriptor and a
+		 * flag to say whether this interrupt pushes an error code onto
+		 * the stack as well: virtual interrupts never do.
+		 */
+		inject_vcpu_irq(vcpu, irq);
+	}
+}
diff --git a/arch/sw_64/kvm/entry.S b/arch/sw_64/kvm/entry.S
new file mode 100644
index 000000000000..76ebdda920cb
--- /dev/null
+++ b/arch/sw_64/kvm/entry.S
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 - os kernal
+ * Author: fire3 fire3@example.com
+ */
+	.text
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/regdef.h>
+
+	.set noat
+
+ENTRY(__sw64_vcpu_run)
+
+	/* save host fpregs */
+	ldl	$1, TI_TASK($8)
+	ldi	$1, TASK_THREAD($1)
+	rfpcr	$f0
+	fstd	$f0, THREAD_FPCR($1)
+	ldi	$1, THREAD_CTX_FP($1)
+	vstd	$f2, CTX_FP_F2($1)
+	vstd	$f3, CTX_FP_F3($1)
+	vstd	$f4, CTX_FP_F4($1)
+	vstd	$f5, CTX_FP_F5($1)
+	vstd	$f6, CTX_FP_F6($1)
+	vstd	$f7, CTX_FP_F7($1)
+	vstd	$f8, CTX_FP_F8($1)
+	vstd	$f9, CTX_FP_F9($1)
+
+	ldi	sp, -VCPU_RET_SIZE(sp)
+	/* r16 = guest kvm_vcpu_arch.vcb struct pointer */
+	/* r17 = guest kvm_vcpu_arch.kvm_regs struct pointer */
+	/* r18 = hcall args */
+	/* save host  pt_regs to current kernel  stack */
+	ldi	sp, -PT_REGS_SIZE(sp)
+
+	stl     $8, PT_REGS_R8(sp)
+	stl	$26, PT_REGS_R26(sp)
+
+	/* save host switch stack to current kernel stack */
+	ldi	sp, -SWITCH_STACK_SIZE(sp)
+	stl	$9, SWITCH_STACK_R9(sp)
+	stl	$10, SWITCH_STACK_R10(sp)
+	stl	$11, SWITCH_STACK_R11(sp)
+	stl	$12, SWITCH_STACK_R12(sp)
+	stl	$13, SWITCH_STACK_R13(sp)
+	stl	$14, SWITCH_STACK_R14(sp)
+	stl	$15, SWITCH_STACK_R15(sp)
+
+	/* restore guest switch stack from guest kvm_regs struct */
+	ldl	$0, KVM_REGS_R0($17)
+	ldl	$1, KVM_REGS_R1($17)
+	/* restore $2 later */
+	ldl	$3, KVM_REGS_R3($17)
+	ldl	$4, KVM_REGS_R4($17)
+	ldl	$5, KVM_REGS_R5($17)
+	ldl	$6, KVM_REGS_R6($17)
+	ldl	$7, KVM_REGS_R7($17)
+	ldl	$8, KVM_REGS_R8($17)
+	ldl	$9, KVM_REGS_R9($17)
+	ldl	$10, KVM_REGS_R10($17)
+	ldl	$11, KVM_REGS_R11($17)
+	ldl	$12, KVM_REGS_R12($17)
+	ldl	$13, KVM_REGS_R13($17)
+	ldl	$14, KVM_REGS_R14($17)
+	ldl	$15, KVM_REGS_R15($17)
+	ldl	$19, KVM_REGS_R19($17)
+	ldl	$20, KVM_REGS_R20($17)
+	ldl	$21, KVM_REGS_R21($17)
+	ldl	$22, KVM_REGS_R22($17)
+	ldl	$23, KVM_REGS_R23($17)
+	ldl	$24, KVM_REGS_R24($17)
+	ldl	$25, KVM_REGS_R25($17)
+	ldl	$26, KVM_REGS_R26($17)
+	ldl	$27, KVM_REGS_R27($17)
+	ldl	$28, KVM_REGS_R28($17)
+
+	fldd	$f0, KVM_REGS_FPCR($17)
+	wfpcr	$f0
+	fimovd  $f0, $2
+	and	$2, 0x3, $2
+	beq	$2, $g_setfpec_0
+	subl	$2, 0x1, $2
+	beq	$2, $g_setfpec_1
+	subl	$2, 0x1, $2
+	beq	$2, $g_setfpec_2
+	setfpec3
+	br	$g_setfpec_over
+$g_setfpec_0:
+	setfpec0
+	br	$g_setfpec_over
+$g_setfpec_1:
+	setfpec1
+	br	$g_setfpec_over
+$g_setfpec_2:
+	setfpec2
+$g_setfpec_over:
+	ldl	$2, KVM_REGS_R2($17)
+	vldd	$f0, KVM_REGS_F0($17)
+	vldd	$f1, KVM_REGS_F1($17)
+	vldd	$f2, KVM_REGS_F2($17)
+	vldd	$f3, KVM_REGS_F3($17)
+	vldd	$f4, KVM_REGS_F4($17)
+	vldd	$f5, KVM_REGS_F5($17)
+	vldd	$f6, KVM_REGS_F6($17)
+	vldd	$f7, KVM_REGS_F7($17)
+	vldd	$f8, KVM_REGS_F8($17)
+	vldd	$f9, KVM_REGS_F9($17)
+	vldd	$f10, KVM_REGS_F10($17)
+	vldd	$f11, KVM_REGS_F11($17)
+	vldd	$f12, KVM_REGS_F12($17)
+	vldd	$f13, KVM_REGS_F13($17)
+	vldd	$f14, KVM_REGS_F14($17)
+	vldd	$f15, KVM_REGS_F15($17)
+	vldd	$f16, KVM_REGS_F16($17)
+	vldd	$f17, KVM_REGS_F17($17)
+	vldd	$f18, KVM_REGS_F18($17)
+	vldd	$f19, KVM_REGS_F19($17)
+	vldd	$f20, KVM_REGS_F20($17)
+	vldd	$f21, KVM_REGS_F21($17)
+	vldd	$f22, KVM_REGS_F22($17)
+	vldd	$f23, KVM_REGS_F23($17)
+	vldd	$f24, KVM_REGS_F24($17)
+	vldd	$f25, KVM_REGS_F25($17)
+	vldd	$f26, KVM_REGS_F26($17)
+	vldd	$f27, KVM_REGS_F27($17)
+	vldd	$f28, KVM_REGS_F28($17)
+	vldd	$f29, KVM_REGS_F29($17)
+	vldd	$f30, KVM_REGS_F30($17)
+
+	ldi	$17, KVM_REGS_PS($17)
+
+	/* enter guest */
+	/* r16 = guest vcpucb pointer */
+	/* r17 = base of guest kvm_regs.ps, saved/restored by hmcode */
+
+	/* enter guest now */
+	sys_call 0x31
+	/* exit guest now */
+
+	ldi	$17, -KVM_REGS_PS($17) /* r17: base of kvm_regs */
+
+	vstd	$f0, KVM_REGS_F0($17)
+	vstd	$f1, KVM_REGS_F1($17)
+	vstd	$f2, KVM_REGS_F2($17)
+	vstd	$f3, KVM_REGS_F3($17)
+	vstd	$f4, KVM_REGS_F4($17)
+	vstd	$f5, KVM_REGS_F5($17)
+	vstd	$f6, KVM_REGS_F6($17)
+	vstd	$f7, KVM_REGS_F7($17)
+	vstd	$f8, KVM_REGS_F8($17)
+	vstd	$f9, KVM_REGS_F9($17)
+	vstd	$f10, KVM_REGS_F10($17)
+	vstd	$f11, KVM_REGS_F11($17)
+	vstd	$f12, KVM_REGS_F12($17)
+	vstd	$f13, KVM_REGS_F13($17)
+	vstd	$f14, KVM_REGS_F14($17)
+	vstd	$f15, KVM_REGS_F15($17)
+	vstd	$f16, KVM_REGS_F16($17)
+	vstd	$f17, KVM_REGS_F17($17)
+	vstd	$f18, KVM_REGS_F18($17)
+	vstd	$f19, KVM_REGS_F19($17)
+	vstd	$f20, KVM_REGS_F20($17)
+	vstd	$f21, KVM_REGS_F21($17)
+	vstd	$f22, KVM_REGS_F22($17)
+	vstd	$f23, KVM_REGS_F23($17)
+	vstd	$f24, KVM_REGS_F24($17)
+	vstd	$f25, KVM_REGS_F25($17)
+	vstd	$f26, KVM_REGS_F26($17)
+	vstd	$f27, KVM_REGS_F27($17)
+	vstd	$f28, KVM_REGS_F28($17)
+	vstd	$f29, KVM_REGS_F29($17)
+	vstd	$f30, KVM_REGS_F30($17)
+
+	rfpcr	$f0
+	fstd	$f0, KVM_REGS_FPCR($17)
+
+	/* don't save r0 Hmcode have saved r0 for us */
+	stl	$1, KVM_REGS_R1($17)
+	stl	$2, KVM_REGS_R2($17)
+	stl	$3, KVM_REGS_R3($17)
+	stl	$4, KVM_REGS_R4($17)
+	stl	$5, KVM_REGS_R5($17)
+	stl	$6, KVM_REGS_R6($17)
+	stl	$7, KVM_REGS_R7($17)
+	stl	$8, KVM_REGS_R8($17)
+	stl	$9, KVM_REGS_R9($17)
+	stl	$10, KVM_REGS_R10($17)
+	stl	$11, KVM_REGS_R11($17)
+	stl	$12, KVM_REGS_R12($17)
+	stl	$13, KVM_REGS_R13($17)
+	stl	$14, KVM_REGS_R14($17)
+	stl	$15, KVM_REGS_R15($17)
+	stl	$19, KVM_REGS_R19($17)
+	stl	$20, KVM_REGS_R20($17)
+	stl	$21, KVM_REGS_R21($17)
+	stl	$22, KVM_REGS_R22($17)
+	stl	$23, KVM_REGS_R23($17)
+	stl	$24, KVM_REGS_R24($17)
+	stl	$25, KVM_REGS_R25($17)
+	stl	$26, KVM_REGS_R26($17)
+	stl	$27, KVM_REGS_R27($17)
+	stl	$28, KVM_REGS_R28($17)
+
+	/* restore host switch stack from host sp */
+	ldl	$9, SWITCH_STACK_R9(sp)
+	ldl	$10, SWITCH_STACK_R10(sp)
+	ldl	$11, SWITCH_STACK_R11(sp)
+	ldl	$12, SWITCH_STACK_R12(sp)
+	ldl	$13, SWITCH_STACK_R13(sp)
+	ldl	$14, SWITCH_STACK_R14(sp)
+	ldl	$15, SWITCH_STACK_R15(sp)
+
+	ldi	sp, SWITCH_STACK_SIZE(sp)
+
+	/* restore host regs from host sp */
+	ldl     $8, PT_REGS_R8(sp)
+	ldl	$26, PT_REGS_R26(sp)
+
+	ldi	sp, PT_REGS_SIZE(sp)
+
+	/* restore host fpregs */
+	ldl	$1, TI_TASK($8)
+	ldi	$1, TASK_THREAD($1)
+	fldd	$f0, THREAD_FPCR($1)
+	wfpcr	$f0
+	fimovd	$f0, $2
+	and	$2, 0x3, $2
+	beq	$2, $setfpec_0
+	subl	$2, 0x1, $2
+	beq	$2, $setfpec_1
+	subl	$2, 0x1, $2
+	beq	$2, $setfpec_2
+	setfpec3
+	br	$setfpec_over
+$setfpec_0:
+	setfpec0
+	br	$setfpec_over
+$setfpec_1:
+	setfpec1
+	br	$setfpec_over
+$setfpec_2:
+	setfpec2
+$setfpec_over:
+	ldi	$1, THREAD_CTX_FP($1)
+	vldd	$f2, CTX_FP_F2($1)
+	vldd	$f3, CTX_FP_F3($1)
+	vldd	$f4, CTX_FP_F4($1)
+	vldd	$f5, CTX_FP_F5($1)
+	vldd	$f6, CTX_FP_F6($1)
+	vldd	$f7, CTX_FP_F7($1)
+	vldd	$f8, CTX_FP_F8($1)
+	vldd	$f9, CTX_FP_F9($1)
+
+	/* if $0 > 0, handle hcall */
+	bgt	$0, $ret_to
+
+	stl	$26, VCPU_RET_RA(sp)
+	stl	$0, VCPU_RET_R0(sp)
+
+	/* Hmcode will setup in  */
+	/* restore $16 $17 $18, do interrupt trick */
+	ldi	sp, -(HOST_INT_SIZE + PT_REGS_SIZE + SWITCH_STACK_SIZE)(sp)
+	ldl	$16, HOST_INT_R16(sp)
+	ldl	$17, HOST_INT_R17(sp)
+	ldl	$18, HOST_INT_R18(sp)
+	ldi	sp, (HOST_INT_SIZE + PT_REGS_SIZE + SWITCH_STACK_SIZE)(sp)
+
+	ldi	$8, 0x3fff
+	bic	sp, $8, $8
+	ldi	$19, -PT_REGS_SIZE(sp)
+
+	ldi	$26, ret_from_do_entInt_noregs
+	call	$31, do_entInt
+
+	/* ret($0) indicate hcall number */
+ret_from_do_entInt_noregs:
+	ldl	$26, VCPU_RET_RA(sp)
+	ldl	$0, VCPU_RET_R0(sp)
+
+	/* restore r16 - r19 */
+$ret_to:
+	ldi	sp, VCPU_RET_SIZE(sp)	/* pop stack */
+	ret
diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c
new file mode 100644
index 000000000000..0d6806051fc7
--- /dev/null
+++ b/arch/sw_64/kvm/handle_exit.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 - os kernal
+ * Author: fire3 fire3@example.com yangzh yangzh@gmail.com
+ * linhn linhn@example.com
+ */
+#include <asm/hmcall.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_timer.h>
+#include <linux/kvm.h>
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index, struct hcall_args *hargs)
+{
+	switch (exception_index) {
+	case SW64_KVM_EXIT_IO:
+		return io_mem_abort(vcpu, run, hargs);
+	case SW64_KVM_EXIT_HALT:
+		vcpu->arch.halted = 1;
+		kvm_vcpu_block(vcpu);
+		return 1;
+	case SW64_KVM_EXIT_SHUTDOWN:
+		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+		vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SHUTDOWN;
+		return 0;
+	case SW64_KVM_EXIT_RESTART:
+		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+		vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
+		return 0;
+	case SW64_KVM_EXIT_TIMER:
+		set_timer(vcpu, hargs->arg0);
+		return 1;
+	case SW64_KVM_EXIT_IPI:
+		vcpu_send_ipi(vcpu, hargs->arg0);
+		return 1;
+	case SW64_KVM_EXIT_FATAL_ERROR:
+		printk("Guest fatal error: Reason=[%lx], EXC_PC=[%lx], DVA=[%lx]", hargs->arg0, hargs->arg1, hargs->arg2);
+		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+		vcpu->run->hw.hardware_exit_reason = hargs->arg0;
+		return 0;
+	}
+
+	return 1;
+}
diff --git a/arch/sw_64/kvm/irq.h b/arch/sw_64/kvm/irq.h
new file mode 100644
index 000000000000..ee56d9b97632
--- /dev/null
+++ b/arch/sw_64/kvm/irq.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 1;
+}
+#endif
diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c
new file mode 100644
index 000000000000..1481c3dbb211
--- /dev/null
+++ b/arch/sw_64/kvm/kvm-sw64.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 - os kernal
+ * Author: fire3 fire3@example.com yangzh yangzh@gmail.com
+ * linhn linhn@example.com
+ */
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched/signal.h>
+#include <linux/freezer.h>
+#include <linux/smp.h>
+#include <linux/kvm.h>
+#include <linux/uaccess.h>
+#include <linux/genalloc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_asm.h>
+#include <asm/sw64io.h>
+
+#include <asm/kvm_timer.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+#include <asm/page.h>
+#include "../kernel/pci_impl.h"
+
+#include "vmem.c"
+
+bool set_msi_flag;
+unsigned long sw64_kvm_last_vpn[NR_CPUS];
+#define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid]
+
+#ifdef CONFIG_SUBARCH_C3B
+#define MAX_VPN			255
+#define WIDTH_HARDWARE_VPN	8
+#endif
+
+#define VPN_FIRST_VERSION	(1UL << WIDTH_HARDWARE_VPN)
+#define HARDWARE_VPN_MASK	((1UL << WIDTH_HARDWARE_VPN) - 1)
+#define VPN_SHIFT		(64 - WIDTH_HARDWARE_VPN)
+
+int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
+{
+	set_bit(number, (vcpu->arch.irqs_pending));
+	kvm_vcpu_kick(vcpu);
+	return 0;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id,
+		int level, bool line_status)
+{
+	int irq = e->msi.data & 0xff;
+	unsigned int vcpu_idx;
+	struct kvm_vcpu *vcpu = NULL;
+
+	vcpu_idx = irq % atomic_read(&kvm->online_vcpus);
+	vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+
+	if (!vcpu)
+		return -EINVAL;
+
+	return vcpu_interrupt_line(vcpu, irq, true);
+}
+
+extern int __sw64_vcpu_run(struct vcpucb *vcb, struct kvm_regs *regs, struct hcall_args *args);
+
+static unsigned long get_vpcr(unsigned long machine_mem_offset, unsigned long memory_size, unsigned long vpn)
+{
+	return (machine_mem_offset >> 23) | ((memory_size >> 23) << 16) | ((vpn & HARDWARE_VPN_MASK) << 44);
+}
+
+static unsigned long __get_new_vpn_context(struct kvm_vcpu *vcpu, long cpu)
+{
+	unsigned long vpn = cpu_last_vpn(cpu);
+	unsigned long next = vpn + 1;
+
+	if ((vpn & HARDWARE_VPN_MASK) >= MAX_VPN) {
+		tbia();
+		next = (vpn & ~HARDWARE_VPN_MASK) + VPN_FIRST_VERSION + 1; /* bypass 0 */
+	}
+	cpu_last_vpn(cpu) = next;
+	return next;
+}
+
+static void sw64_kvm_switch_vpn(struct kvm_vcpu *vcpu)
+{
+	unsigned long vpn;
+	unsigned long vpnc;
+	long cpu = smp_processor_id();
+
+	vpn = cpu_last_vpn(cpu);
+	vpnc = vcpu->arch.vpnc[cpu];
+
+	if ((vpnc ^ vpn) & ~HARDWARE_VPN_MASK) {
+		/* vpnc and cpu vpn not in the same version, get new vpnc and vpn */
+		vpnc = __get_new_vpn_context(vcpu, cpu);
+		vcpu->arch.vpnc[cpu] = vpnc;
+	}
+
+	vpn = vpnc & HARDWARE_VPN_MASK;
+
+	/* Always update vpn */
+	/* Just setup vcb, hardware CSR will be changed later in HMcode */
+	vcpu->arch.vcb.vpcr = ((vcpu->arch.vcb.vpcr) & (~(HARDWARE_VPN_MASK << 44))) | (vpn << 44);
+	vcpu->arch.vcb.dtb_pcr = ((vcpu->arch.vcb.dtb_pcr) & (~(HARDWARE_VPN_MASK << VPN_SHIFT))) | (vpn << VPN_SHIFT);
+
+	/*
+	 * If vcpu migrate to a new physical cpu, the new physical cpu may keep
+	 * old tlb entries for this vcpu's vpn, upn in the old tlb entries and
+	 * current vcpu's upn may not in the same version.
+	 * For now, we don't know the vcpu's upn version and the current version.
+	 * If we keep track of the vcpu's upn version, the TLB-flush could be less.
+	 * To be safe and correct, flush all tlb entries of current vpn for now.
+	 */
+
+	if (vcpu->arch.pcpu_id != cpu) {
+		tbivpn(0, 0, vpn);
+		vcpu->arch.pcpu_id = cpu;
+		vcpu->cpu = cpu;
+	}
+}
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	return ((!bitmap_empty(vcpu->arch.irqs_pending, SWVM_IRQS) || !vcpu->arch.halted)
+			&& !vcpu->arch.power_off);
+}
+
+int kvm_arch_check_processor_compat(void *opaque)
+{
+	return 0;
+}
+
+int kvm_arch_hardware_enable(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
+bool kvm_arch_has_vcpu_debugfs(void)
+{
+	return false;
+}
+
+int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+		const struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot *old,
+		const struct kvm_memory_slot *new,
+		enum kvm_mr_change change)
+{
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+	int r = 0;
+
+	switch (ext) {
+	case KVM_CAP_IRQCHIP:
+	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_SYNC_MMU:
+		r = 1;
+		break;
+	case KVM_CAP_NR_VCPUS:
+	case KVM_CAP_MAX_VCPUS:
+		r = KVM_MAX_VCPUS;
+		break;
+	default:
+		r = 0;
+	}
+
+	return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return 0;
+}
+
+int kvm_sw64_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return test_bit(SW64_KVM_IRQ_TIMER, &vcpu->arch.irqs_pending);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return kvm_sw64_pending_timer(vcpu);
+}
+
+int kvm_arch_hardware_setup(void *opaque)
+{
+	return 0;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	hrtimer_cancel(&vcpu->arch.hrt);
+}
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	return 0;
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_vcpu_destroy(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+
+	atomic_set(&kvm->online_vcpus, 0);
+
+}
+
+long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+		unsigned long npages)
+{
+	return 0;
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+		struct kvm_memory_slot *memslot,
+		const struct kvm_userspace_memory_region *mem,
+		enum kvm_mr_change change)
+{
+	unsigned long addr;
+	struct file *vm_file;
+	struct vm_area_struct *vma;
+	struct vmem_info *info;
+	unsigned long ret;
+	size_t size;
+
+	if (change == KVM_MR_FLAGS_ONLY)
+		return 0;
+
+	if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr)))
+		return 0;
+
+	if (test_bit(IO_MARK_BIT + 1, &(mem->guest_phys_addr)))
+		return 0;
+
+	if (!sw64_kvm_pool)
+		return -ENOMEM;
+
+	pr_info("%s: %#llx %#llx, user addr: %#llx\n", __func__,
+			mem->guest_phys_addr, mem->memory_size, mem->userspace_addr);
+
+	vma = find_vma(current->mm, mem->userspace_addr);
+	if (!vma)
+		return -ENOMEM;
+	vm_file = vma->vm_file;
+
+	if (!vm_file) {
+		info = kzalloc(sizeof(struct vmem_info), GFP_KERNEL);
+
+		size = round_up(mem->memory_size, 8<<20);
+		addr = gen_pool_alloc(sw64_kvm_pool, size);
+		if (!addr)
+			return -ENOMEM;
+		vm_munmap(mem->userspace_addr, mem->memory_size);
+		ret = vm_mmap(vm_file, mem->userspace_addr, mem->memory_size,
+				PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_FIXED, 0);
+		vma = find_vma(current->mm, mem->userspace_addr);
+		if (!vma)
+			return -ENOMEM;
+
+		info->start = addr;
+		info->size = size;
+		vma->vm_private_data = (void *) info;
+
+		vma->vm_ops = &vmem_vm_ops;
+		vma->vm_ops->open(vma);
+
+		remap_pfn_range(vma, mem->userspace_addr,
+				addr >> PAGE_SHIFT,
+				mem->memory_size, vma->vm_page_prot);
+
+		if ((long)ret < 0)
+			return ret;
+	} else {
+		info = vm_file->private_data;
+		addr = info->start;
+	}
+
+	pr_info("guest phys addr = %#lx, size = %#lx\n",
+			addr, vma->vm_end - vma->vm_start);
+	kvm->arch.mem.membank[0].guest_phys_addr = 0;
+	kvm->arch.mem.membank[0].host_phys_addr = (u64)addr;
+	kvm->arch.mem.membank[0].size = round_up(mem->memory_size, 8<<20);
+
+	memset((void *)(PAGE_OFFSET + addr), 0, 0x2000000);
+
+	return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+	/* Set up the timer for Guest */
+	pr_info("vcpu: [%d], regs addr = %#lx, vcpucb = %#lx\n", vcpu->vcpu_id,
+			(unsigned long)&vcpu->arch.regs, (unsigned long)&vcpu->arch.vcb);
+	hrtimer_init(&vcpu->arch.hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+	vcpu->arch.hrt.function = clockdev_fn;
+	vcpu->arch.tsk = current;
+
+	/* For guest kernel "sys_call HMC_whami", indicate virtual cpu id */
+	vcpu->arch.vcb.whami = vcpu->vcpu_id;
+	vcpu->arch.vcb.vcpu_irq_disabled = 1;
+	vcpu->arch.vcb.pcbb = vcpu->kvm->arch.mem.membank[0].host_phys_addr;
+	vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */
+
+	return 0;
+}
+
+int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+	unsigned long addr = vcpu->kvm->arch.mem.membank[0].host_phys_addr;
+
+	vcpu->arch.vcb.whami = vcpu->vcpu_id;
+	vcpu->arch.vcb.vcpu_irq_disabled = 1;
+	vcpu->arch.vcb.pcbb = vcpu->kvm->arch.mem.membank[0].host_phys_addr;
+	vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */
+	vcpu->arch.power_off = 0;
+	memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending));
+
+	if (vcpu->vcpu_id == 0)
+		memset((void *)(PAGE_OFFSET + addr), 0, 0x2000000);
+
+	return 0;
+}
+
+int kvm_set_routing_entry(struct kvm *kvm,
+		struct kvm_kernel_irq_routing_entry *e,
+		const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		e->msi.flags = ue->flags;
+		e->msi.devid = ue->u.msi.devid;
+		set_msi_flag = true;
+		break;
+	default:
+		goto out;
+	}
+	r = 0;
+out:
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+		struct kvm_translation *tr)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	vcpu->cpu = cpu;
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+	 * if the vcpu is no longer assigned to a cpu.  This is used for the
+	 * optimized make_all_cpus_request path.
+	 */
+	vcpu->cpu = -1;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+		struct kvm_mp_state *mp_state)
+{
+	return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+		struct kvm_mp_state *mp_state)
+{
+	return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	memcpy(&(vcpu->arch.regs), regs, sizeof(struct kvm_regs));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	memcpy(regs, &(vcpu->arch.regs), sizeof(struct kvm_regs));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+	return -ENOIOCTLCMD;
+}
+
+void _debug_printk_vcpu(struct kvm_vcpu *vcpu)
+{
+	unsigned long pc = vcpu->arch.regs.pc;
+	unsigned long offset = vcpu->kvm->arch.mem.membank[0].host_phys_addr;
+	unsigned long pc_phys = PAGE_OFFSET | ((pc & 0x7fffffffUL) + offset);
+	unsigned int insn;
+	int opc, ra, disp16;
+
+	insn = *(unsigned int *)pc_phys;
+
+	opc = (insn >> 26) & 0x3f;
+	ra = (insn >> 21) & 0x1f;
+	disp16 = insn & 0xffff;
+
+	if (opc == 0x06 && disp16 == 0x1000) /* RD_F */
+		pr_info("vcpu exit: pc = %#lx (%#lx), insn[%x] : rd_f r%d [%#lx]\n",
+				pc, pc_phys, insn, ra, vcpu_get_reg(vcpu, ra));
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+	int ret;
+	struct kvm_run *run = vcpu->run;
+	struct vcpucb *vcb = &(vcpu->arch.vcb);
+	struct hcall_args hargs;
+	int irq;
+	bool more;
+	sigset_t sigsaved;
+
+	/* Set guest vcb */
+	/* vpn will update later when vcpu is running */
+	if (vcpu->arch.vcb.vpcr == 0) {
+		vcpu->arch.vcb.vpcr
+			= get_vpcr(vcpu->kvm->arch.mem.membank[0].host_phys_addr, vcpu->kvm->arch.mem.membank[0].size, 0);
+		vcpu->arch.vcb.upcr = 0x7;
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	if (run->exit_reason == KVM_EXIT_MMIO)
+		kvm_handle_mmio_return(vcpu, run);
+
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	ret = 1;
+
+	while (ret > 0) {
+		/* Check conditions before entering the guest */
+		cond_resched();
+
+		preempt_disable();
+		local_irq_disable();
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			run->exit_reason = KVM_EXIT_INTR;
+		}
+
+		if (ret <= 0) {
+			local_irq_enable();
+			preempt_enable();
+			continue;
+		}
+
+		memset(&hargs, 0, sizeof(hargs));
+
+		clear_vcpu_irq(vcpu);
+		irq = interrupt_pending(vcpu, &more);
+		if (irq < SWVM_IRQS)
+			try_deliver_interrupt(vcpu, irq, more);
+
+		vcpu->arch.halted = 0;
+
+		sw64_kvm_switch_vpn(vcpu);
+		guest_enter_irqoff();
+
+		/* Enter the guest */
+		vcpu->mode = IN_GUEST_MODE;
+
+		ret = __sw64_vcpu_run((struct vcpucb *)__phys_addr((unsigned long)vcb), &(vcpu->arch.regs), &hargs);
+
+		/* Back from guest */
+		vcpu->mode = OUTSIDE_GUEST_MODE;
+
+		local_irq_enable();
+		guest_exit_irqoff();
+		preempt_enable();
+
+		/* ret = 0 indicate interrupt in guest mode, ret > 0 indicate hcall */
+		ret = handle_exit(vcpu, run, ret, &hargs);
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	struct vcpucb *kvm_vcb;
+
+	switch (ioctl) {
+	case KVM_SW64_VCPU_INIT:
+		return kvm_arch_vcpu_reset(vcpu);
+	case KVM_SW64_GET_VCB:
+		if (copy_to_user((void __user *)arg, &(vcpu->arch.vcb), sizeof(struct vcpucb)))
+			return -EINVAL;
+		break;
+	case KVM_SW64_SET_VCB:
+		kvm_vcb = memdup_user((void __user *)arg, sizeof(*kvm_vcb));
+		memcpy(&(vcpu->arch.vcb), kvm_vcb, sizeof(struct vcpucb));
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm __maybe_unused = filp->private_data;
+	long r;
+
+	switch (ioctl) {
+	case KVM_CREATE_IRQCHIP: {
+		struct kvm_irq_routing_entry routing;
+
+		r = -EINVAL;
+		memset(&routing, 0, sizeof(routing));
+		r = kvm_set_irq_routing(kvm, &routing, 0, 0);
+		break;
+	}
+	default:
+		r = -ENOIOCTLCMD;
+	}
+	return r;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -ENOIOCTLCMD;
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOIOCTLCMD;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_IOEVENTFD:
+		r = 1;
+		break;
+	case KVM_CAP_NR_VCPUS:
+	case KVM_CAP_MAX_VCPUS:
+		r = KVM_MAX_VCPUS;
+		break;
+	default:
+		r = 0;
+	}
+
+	return r;
+}
+
+void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid)
+{
+	struct kvm_vcpu *target_vcpu = kvm_get_vcpu(vcpu->kvm, target_vcpuid);
+
+	if (target_vcpu != NULL)
+		vcpu_interrupt_line(target_vcpu, 1, 1);
+}
+
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+		bool line_status)
+{
+	u32 irq = irq_level->irq;
+	unsigned int vcpu_idx, irq_num;
+	struct kvm_vcpu *vcpu = NULL;
+	bool level = irq_level->level;
+
+	vcpu_idx = irq % atomic_read(&kvm->online_vcpus);
+	irq_num = irq;
+
+	vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+	if (!vcpu)
+		return -EINVAL;
+
+	return vcpu_interrupt_line(vcpu, irq_num, level);
+}
+
+static int __init kvm_sw64_init(void)
+{
+	int i, ret;
+
+	ret = vmem_init();
+	if (ret)
+		return ret;
+
+	for (i = 0; i < NR_CPUS; i++)
+		sw64_kvm_last_vpn[i] = VPN_FIRST_VERSION;
+
+	ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	if (ret) {
+		vmem_exit();
+		return ret;
+	}
+	return 0;
+}
+
+static void __exit kvm_sw64_exit(void)
+{
+	kvm_exit();
+	vmem_exit();
+}
+
+module_init(kvm_sw64_init);
+module_exit(kvm_sw64_exit);
diff --git a/arch/sw_64/kvm/kvm_timer.c b/arch/sw_64/kvm/kvm_timer.c
new file mode 100644
index 000000000000..fea819732af5
--- /dev/null
+++ b/arch/sw_64/kvm/kvm_timer.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 - os kernal
+ * Author: fire3 fire3@example.com yangzh yangzh@gmail.com
+ */
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_timer.h>
+
+/*
+ * The Guest Clock.
+ *
+ * There are two sources of virtual interrupts.  We saw one in lguest_user.c:
+ * the Launcher sending interrupts for virtual devices.  The other is the Guest
+ * timer interrupt.
+ *
+ * The Guest uses the LHCALL_SET_CLOCKEVENT hypercall to tell us how long to
+ * the next timer interrupt (in nanoseconds).  We use the high-resolution timer
+ * infrastructure to set a callback at that time.
+ *
+ * 0 means "turn off the clock".
+ */
+
+void set_timer(struct kvm_vcpu *vcpu, unsigned long delta)
+{
+	ktime_t expires;
+
+	if (unlikely(delta == 0)) {
+		/* Clock event device is shutting down. */
+		hrtimer_cancel(&vcpu->arch.hrt);
+		return;
+	}
+	/*
+	 * We use wallclock time here, so the Guest might not be running for
+	 * all the time between now and the timer interrupt it asked for.  This
+	 * is almost always the right thing to do.
+	 */
+
+	expires = ktime_add_ns(ktime_get_real(), delta);
+	vcpu->arch.timer_next_event = expires;
+	hrtimer_start(&vcpu->arch.hrt, expires, HRTIMER_MODE_ABS);
+}
+
+/* And this is the routine when we want to set an interrupt for the Guest. */
+void set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+	/*
+	 * Next time the Guest runs, the core code will see if it can deliver
+	 * this interrupt.
+	 */
+	set_bit(irq, (vcpu->arch.irqs_pending));
+
+	/*
+	 * Make sure it sees it; it might be asleep (eg. halted), or running
+	 * the Guest right now, in which case kick_process() will knock it out.
+	 */
+	kvm_vcpu_kick(vcpu);
+}
+
+enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
+{
+	struct kvm_vcpu *vcpu;
+	ktime_t now, delta;
+
+	vcpu = container_of(timer, struct kvm_vcpu, arch.hrt);
+
+	now = ktime_get_real();
+
+	if (now < vcpu->arch.timer_next_event) {
+		delta = vcpu->arch.timer_next_event - now;
+		hrtimer_forward_now(timer, delta);
+		return HRTIMER_RESTART;
+	}
+
+	set_interrupt(vcpu, SW64_KVM_IRQ_TIMER);
+	return HRTIMER_NORESTART;
+}
diff --git a/arch/sw_64/kvm/mmio.c b/arch/sw_64/kvm/mmio.c
new file mode 100644
index 000000000000..340486e8e51b
--- /dev/null
+++ b/arch/sw_64/kvm/mmio.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 - os kernal
+ * Author: fire3 fire3@example.com yangzh yangzh@gmail.com
+ * linhn linhn@example.com
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmio.h>
+#include <asm/kvm_emulate.h>
+
+static unsigned long mmio_read_buf(char *buf, unsigned int len)
+{
+	unsigned long data = 0;
+	union {
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		data = buf[0];
+		break;
+	case 2:
+		memcpy(&tmp.hword, buf, len);
+		data = tmp.hword;
+		break;
+	case 4:
+		memcpy(&tmp.word, buf, len);
+		data = tmp.word;
+		break;
+	case 8:
+		memcpy(&tmp.dword, buf, len);
+		data = tmp.dword;
+		break;
+	}
+
+	return data;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	unsigned long data;
+	unsigned int len;
+
+	if (!run->mmio.is_write) {
+		len = run->mmio.len;
+		if (len > sizeof(unsigned long))
+			return -EINVAL;
+
+		data = mmio_read_buf(run->mmio.data, len);
+		vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
+	}
+
+	vcpu->arch.regs.pc += 4;
+
+	return 0;
+}
+
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		struct hcall_args *hargs)
+{
+	int ret;
+
+	run->mmio.phys_addr = hargs->arg1 & 0xfffffffffffffUL;
+	sw64_decode(vcpu, hargs->arg2, run);
+	if (run->mmio.is_write)
+		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
+				run->mmio.len, run->mmio.data);
+	else
+		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
+				run->mmio.len, run->mmio.data);
+
+	if (!ret) {
+		/* We handled the access successfully in the kernel. */
+		kvm_handle_mmio_return(vcpu, run);
+		return 1;
+	}
+
+	run->exit_reason = KVM_EXIT_MMIO;
+	return 0;
+}
diff --git a/arch/sw_64/kvm/vmem.c b/arch/sw_64/kvm/vmem.c
new file mode 100644
index 000000000000..b8a585ec1ad1
--- /dev/null
+++ b/arch/sw_64/kvm/vmem.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+
+static bool addr_in_pool(struct gen_pool *pool,
+		unsigned long start, size_t size)
+{
+	bool found = false;
+	unsigned long end = start + size - 1;
+	struct gen_pool_chunk *chunk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) {
+		if (start >= chunk->start_addr && start <= chunk->end_addr) {
+			if (end <= chunk->end_addr) {
+				found = true;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+	return found;
+}
+
+static void vmem_vm_open(struct vm_area_struct *vma)
+{
+	struct vmem_info *info = vma->vm_private_data;
+
+	atomic_inc(&info->refcnt);
+}
+
+static void vmem_vm_close(struct vm_area_struct *vma)
+{
+	unsigned long addr;
+	size_t size;
+	struct vmem_info *info;
+
+	info = vma->vm_private_data;
+	addr = info->start;
+	size = info->size;
+
+	if (atomic_dec_and_test(&info->refcnt)) {
+		if (sw64_kvm_pool && addr_in_pool(sw64_kvm_pool, addr, size)) {
+			pr_info("gen pool free addr: %#lx, size: %#lx\n",
+					addr, size);
+			gen_pool_free(sw64_kvm_pool, addr, size);
+		}
+		kfree(info);
+	}
+}
+
+const struct vm_operations_struct vmem_vm_ops = {
+	.open = vmem_vm_open,
+	.close = vmem_vm_close,
+};
+EXPORT_SYMBOL_GPL(vmem_vm_ops);
+
+static int vmem_open(struct inode *inode, struct file *flip)
+{
+	flip->private_data = NULL;
+	return 0;
+}
+
+static loff_t vmem_llseek(struct file *filp, loff_t offset, int whence)
+{
+	loff_t newpos = 256UL << 30;
+	return newpos;
+}
+
+static int vmem_release(struct inode *inode, struct file *flip)
+{
+	return 0;
+}
+
+static int vmem_mmap(struct file *flip, struct vm_area_struct *vma)
+{
+	unsigned long addr;
+	static struct vmem_info *info;
+	size_t size = vma->vm_end - vma->vm_start;
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		pr_err("%s: mapping must be shared\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!sw64_kvm_pool)
+		return -ENOMEM;
+
+	if (flip->private_data == NULL) {
+		addr = gen_pool_alloc(sw64_kvm_pool, size);
+		if (!addr)
+			return -ENOMEM;
+
+		info = kzalloc(sizeof(struct vmem_info), GFP_KERNEL);
+		pr_info("guest phys addr=%#lx, size=%#lx\n", addr, size);
+		info->start = addr;
+		info->size = size;
+		flip->private_data = (void *)info;
+	} else {
+		info = flip->private_data;
+		addr = info->start;
+	}
+
+	vma->vm_private_data = (void *)info;
+	vma->vm_ops = &vmem_vm_ops;
+	vma->vm_ops->open(vma);
+
+	/*to do if size bigger than vm_mem_size*/
+	pr_info("sw64_vmem: vm_start=%#lx, size= %#lx\n", vma->vm_start, size);
+
+	/*remap_pfn_range - remap kernel memory to userspace*/
+	if (remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, size,
+			    vma->vm_page_prot))
+		return  -EAGAIN;
+
+	return 0;
+}
+
+static const struct file_operations vmem_fops = {
+	.owner = THIS_MODULE,
+	.open = vmem_open,
+	.llseek = vmem_llseek,
+	.release = vmem_release,
+	.mmap = vmem_mmap,
+};
+
+static struct miscdevice vmem_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name  = "sw64_vmem",
+	.fops  = &vmem_fops,
+};
+
+static int __init vmem_init(void)
+{
+	int err;
+
+	err = misc_register(&vmem_dev);
+	if (err != 0) {
+		pr_err("Could not register sw64_vmem device\n");
+		return err;
+	}
+	return 0;
+}
+
+static void vmem_exit(void)
+{
+	misc_deregister(&vmem_dev);
+}
diff --git a/arch/sw_64/lib/Kconfig b/arch/sw_64/lib/Kconfig
new file mode 100644
index 000000000000..d1e9cdd3947a
--- /dev/null
+++ b/arch/sw_64/lib/Kconfig
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+menu "Library optimization options"
+
+config DEEP_CLEAR_PAGE
+	bool "Clear Page with SIMD optimization"
+	default y
+	help
+	  This option enables the use of SIMD version of clear page routine.
+	  Say N if you want to use the generic version.
+
+config DEEP_COPY_PAGE
+	bool "Copy Page with SIMD optimization"
+	default y
+	help
+	  This option enables the use of SIMD version of copy page routine.
+	  Say N if you want to use the generic version.
+
+config DEEP_COPY_USER
+	bool "Copy User with SIMD optimization"
+	default y
+	help
+	  This option enables the use of SIMD version of copy user routine.
+	  Say N if you want to use the generic version.
+
+
+config DEEP_MEMCPY
+	bool "Memory Copy with SIMD optimization"
+	default y
+	help
+	  This option enables the use of SIMD version of memory copy routine.
+	  Say N if you want to use the generic version.
+
+config DEEP_MEMSET
+	bool "Memory Set with SIMD optimization"
+	default y
+	help
+	  This option enables the use of SIMD version of memory set routine.
+	  Say N if you want to use the generic version.
+
+endmenu
diff --git a/arch/sw_64/lib/Makefile b/arch/sw_64/lib/Makefile
new file mode 100644
index 000000000000..bb2e9b52fedc
--- /dev/null
+++ b/arch/sw_64/lib/Makefile
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for sw-specific library files..
+#
+
+asflags-y := $(KBUILD_CFLAGS)
+ccflags-y := -Werror
+
+lib-y =	__divlu.o __remlu.o __divwu.o __remwu.o \
+	udelay.o \
+        memmove.o \
+        checksum.o \
+        csum_partial_copy.o \
+        clear_user.o \
+        fpreg.o \
+        strcpy.o \
+        strncpy.o \
+        fls.o \
+        csum_ipv6_magic.o
+
+lib-clear_page-y := clear_page.o
+lib-clear_page-$(CONFIG_DEEP_CLEAR_PAGE) := deep-clear_page.o
+
+lib-copy_page-y := copy_page.o
+lib-copy_page-$(CONFIG_DEEP_COPY_PAGE) := deep-copy_page.o
+
+lib-copy_user-y := copy_user.o
+lib-copy_user-$(CONFIG_DEEP_COPY_USER) := deep-copy_user.o
+
+lib-memcpy-y := memcpy.o
+lib-memcpy-$(CONFIG_DEEP_MEMCPY) := deep-memcpy.o
+
+lib-memset-y := memset.o
+lib-memset-$(CONFIG_DEEP_MEMSET) := deep-memset.o
+
+lib-y += $(lib-clear_page-y) $(lib-copy_page-y) $(lib-copy_user-y) $(lib-memcpy-y) $(lib-memset-y)
+
+obj-y = iomap.o
+obj-y += iomap_copy.o
+
+# The division routines are built from single source, with different defines.
+AFLAGS___divlu.o = -DDIV
+AFLAGS___remlu.o =       -DREM
+AFLAGS___divwu.o = -DDIV       -DINTSIZE
+AFLAGS___remwu.o =       -DREM -DINTSIZE
+
+$(addprefix $(obj)/,__divlu.o __remlu.o __divwu.o __remwu.o): \
+						$(src)/divide.S FORCE
+	$(call if_changed_rule,as_o_S)
diff --git a/arch/sw_64/lib/checksum.c b/arch/sw_64/lib/checksum.c
new file mode 100644
index 000000000000..561bbac59f8d
--- /dev/null
+++ b/arch/sw_64/lib/checksum.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed..
+ * Comments in other versions indicate that the algorithms are from RFC1071
+ */
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+static inline unsigned short from64to16(unsigned long x)
+{
+	/* Using extract instructions is a bit more efficient
+	 * than the original shift/bitmask version.
+	 */
+
+	union {
+		unsigned long	ul;
+		unsigned int	ui[2];
+		unsigned short	us[4];
+	} in_v, tmp_v, out_v;
+
+	in_v.ul = x;
+	tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1];
+
+	/* Since the bits of tmp_v.sh[3] are going to always be zero,
+	 *we don't have to bother to add that in.
+	 */
+	out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1]
+		+ (unsigned long) tmp_v.us[2];
+
+	/* Similarly, out_v.us[2] is always zero for the final add. */
+	return out_v.us[0] + out_v.us[1];
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented.
+ */
+__sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum)
+{
+	return (__force __sum16)~from64to16(
+		(__force u64)saddr + (__force u64)daddr +
+		(__force u64)sum + ((len + proto) << 8));
+}
+EXPORT_SYMBOL(csum_tcpudp_magic);
+
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum)
+{
+	unsigned long result;
+
+	result = (__force u64)saddr + (__force u64)daddr +
+		 (__force u64)sum + ((len + proto) << 8);
+
+	/*
+	 * Fold down to 32-bits so we don't lose in the typedef-less
+	 * network stack.
+	 *
+	 * 64 to 33
+	 */
+	result = (result & 0xffffffff) + (result >> 32);
+	/* 33 to 32 */
+	result = (result & 0xffffffff) + (result >> 32);
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_tcpudp_nofold);
+
+/*
+ * Do a 64-bit checksum on an arbitrary memory area..
+ *
+ * This isn't a great routine, but it's not _horrible_ either. The
+ * inner loop could be unrolled a bit further, and there are better
+ * ways to do the carry, but this is reasonable.
+ */
+static inline unsigned long do_csum(const unsigned char *buff, int len)
+{
+	int odd, count;
+	unsigned long result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = *buff << 8;
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+			if (4 & (unsigned long) buff) {
+				result += *(unsigned int *) buff;
+				count--;
+				len -= 4;
+				buff += 4;
+			}
+			count >>= 1;	/* nr of 64-bit words.. */
+			if (count) {
+				unsigned long carry = 0;
+
+				do {
+					unsigned long w = *(unsigned long *) buff;
+
+					count--;
+					buff += 8;
+					result += carry;
+					result += w;
+					carry = (w > result);
+				} while (count);
+				result += carry;
+				result = (result & 0xffffffff) + (result >> 32);
+			}
+			if (len & 4) {
+				result += *(unsigned int *) buff;
+				buff += 4;
+			}
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += *buff;
+	result = from64to16(result);
+	if (odd)
+		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+	return result;
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	return (__force __sum16)~do_csum(iph, ihl*4);
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	unsigned long result = do_csum(buff, len);
+
+	/* add in old sum, and carry.. */
+	result += (__force u32)sum;
+	/* 32+c bits -> 32 bits */
+	result = (result & 0xffffffff) + (result >> 32);
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+	return (__force __sum16)~from64to16(do_csum(buff, len));
+}
+EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/sw_64/lib/clear_page.S b/arch/sw_64/lib/clear_page.S
new file mode 100644
index 000000000000..e1cc7cddfd2f
--- /dev/null
+++ b/arch/sw_64/lib/clear_page.S
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Zero an entire page.
+ */
+#include <asm/export.h>
+	.text
+	.align 4
+	.global clear_page
+	.ent clear_page
+clear_page:
+	.prologue 0
+
+	ldi	$0, 64
+
+/* Optimize by GUOY from SOC 2013-06-04 */
+1:
+
+	stl_nc	$31, 0x0($16)
+	stl_nc	$31, 0x8($16)
+	stl_nc	$31, 0x10($16)
+	stl_nc	$31, 0x18($16)
+
+	stl_nc	$31, 0x20($16)
+	stl_nc	$31, 0x28($16)
+	stl_nc	$31, 0x30($16)
+	stl_nc	$31, 0x38($16)
+
+	stl_nc	$31, 0x40($16)
+	stl_nc	$31, 0x48($16)
+	stl_nc	$31, 0x50($16)
+	stl_nc	$31, 0x58($16)
+
+	stl_nc	$31, 0x60($16)
+	stl_nc	$31, 0x68($16)
+	subl	$0, 1, $0
+
+	stl_nc	$31, 0x70($16)
+	stl_nc	$31, 0x78($16)
+	addl	$16, 128, $16
+	bne	$0, 1b
+
+	memb
+	ret
+
+	.end clear_page
+	EXPORT_SYMBOL(clear_page)
diff --git a/arch/sw_64/lib/clear_user.S b/arch/sw_64/lib/clear_user.S
new file mode 100644
index 000000000000..88d332032c9d
--- /dev/null
+++ b/arch/sw_64/lib/clear_user.S
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Contributed by Richard Henderson rth@tamu.edu
+ *
+ * Zero user space, handling exceptions as we go.
+ *
+ * We have to make sure that $0 is always up-to-date and contains the
+ * right "bytes left to zero" value (and that it is updated only _after_
+ * a successful copy).  There is also some rather minor exception setup
+ * stuff.
+ *
+ */
+#include <asm/export.h>
+/* Allow an exception for an insn; exit if we get one.  */
+#define EX(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.long 99b - .;			\
+	ldi $31, $exception-99b($31);	\
+	.previous
+
+	.set noat
+	.set noreorder
+	.align 4
+
+	.globl __clear_user
+	.ent __clear_user
+	.frame $30, 0, $26
+	.prologue 0
+
+$loop:
+	and	$1, 3, $4
+	beq	$4, 1f
+
+0:	EX(stl $31, 0($16))
+	subl	$0, 8, $0
+	subl	$4, 1, $4
+	addl	$16, 8, $16
+	bne	$4, 0b
+	unop
+
+1:	bic	$1, 3, $1
+	beq	$1, $tail
+
+2:	EX(stl $31, 0($16))
+	subl	$0, 8, $0
+	EX(stl $31, 8($16))
+	subl	$0, 8, $0
+	EX(stl $31, 16($16))
+	subl	$0, 8, $0
+	EX(stl $31, 24($16))
+	subl	$0, 8, $0
+	subl	$1, 4, $1
+	addl	$16, 32, $16
+	bne	$1, 2b
+
+$tail:
+	bne	$2, 1f
+	ret	$31, ($26), 1
+
+1:
+	EX(stb $31, 0($16))
+	addl	$16, 1, $16
+	subl	$2, 1, $2
+	bne	$2, 1b
+	clr	$0
+	ret	$31, ($26), 1
+
+__clear_user:
+	and	$17, $17, $0
+	and	$16, 7, $4
+	beq	$0, $zerolength
+	addl	$0, $4, $1
+	and	$1, 7, $2
+	srl	$1, 3, $1
+	beq	$4, $loop
+
+	subl	$4, 8, $4
+	addl	$0, $4, $0
+	beq	$1, $oneword
+
+$head:
+	EX(stb $31, 0($16))
+	addl	$16, 1, $16
+	addl	$4, 1, $4
+	bne	$4, $head
+	subl	$1, 1, $1
+	br	$loop
+	unop
+
+$oneword:
+	EX(stb $31, 0($16))
+	addl	$16, 1, $16
+	addl	$4, 1, $4
+	bne	$4, $oneword
+	clr	$0
+
+$zerolength:
+$exception:
+	ret	$31, ($26), 1
+	.end __clear_user
+	EXPORT_SYMBOL(__clear_user)
diff --git a/arch/sw_64/lib/copy_page.S b/arch/sw_64/lib/copy_page.S
new file mode 100644
index 000000000000..898472c36c80
--- /dev/null
+++ b/arch/sw_64/lib/copy_page.S
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw/lib/copy_page.S
+ *
+ * Copy an entire page.
+ */
+#include <asm/export.h>
+
+	.text
+	.align 4
+	.global copy_page
+	.ent copy_page
+copy_page:
+	.prologue 0
+
+	ldi	$18, 64
+
+/* Optimize by GUOY from SOC 2013-06-04 */
+1:
+	ldl	$0, 0($17)
+	ldl	$1, 8($17)
+	ldl	$2, 16($17)
+	ldl	$3, 24($17)
+
+	stl_nc	$0, 0($16)
+	stl_nc	$1, 8($16)
+	stl_nc	$2, 16($16)
+	stl_nc	$3, 24($16)
+
+	ldl	$4, 32($17)
+	ldl	$5, 40($17)
+	ldl	$6, 48($17)
+	ldl	$7, 56($17)
+
+	stl_nc	$4, 32($16)
+	stl_nc	$5, 40($16)
+	stl_nc	$6, 48($16)
+	stl_nc	$7, 56($16)
+
+	ldl	$0, 64($17)
+	ldl	$1, 72($17)
+	ldl	$2, 80($17)
+	ldl	$3, 88($17)
+
+	stl_nc	$0, 64($16)
+	stl_nc	$1, 72($16)
+	stl_nc	$2, 80($16)
+	stl_nc	$3, 88($16)
+
+	ldl	$4, 96($17)
+	ldl	$5, 104($17)
+	ldl	$6, 112($17)
+	ldl	$7, 120($17)
+
+	stl_nc	$4, 96($16)
+	stl_nc	$5, 104($16)
+	stl_nc	$6, 112($16)
+	stl_nc	$7, 120($16)
+
+	ldwe	$f31, 3 * 0x80($17)
+	subl	$18, 1, $18
+	addl	$17, 128, $17
+
+	addl	$16, 128, $16
+	bne	$18, 1b
+
+	memb
+	ret
+
+	.end copy_page
+	EXPORT_SYMBOL(copy_page)
diff --git a/arch/sw_64/lib/copy_user.S b/arch/sw_64/lib/copy_user.S
new file mode 100644
index 000000000000..2c3dd0b5656c
--- /dev/null
+++ b/arch/sw_64/lib/copy_user.S
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copy to/from user space, handling exceptions as we go..  This
+ * isn't exactly pretty.
+ *
+ * This is essentially the same as "memcpy()", but with a few twists.
+ * Notably, we have to make sure that $0 is always up-to-date and
+ * contains the right "bytes left to copy" value (and that it is updated
+ * only _after_ a successful copy). There is also some rather minor
+ * exception setup stuff..
+ */
+#include <asm/export.h>
+/* Allow an exception for an insn; exit if we get one.  */
+#define EXI(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table, "a";	\
+	.long 99b - .;			\
+	ldi	$31, $exitin-99b($31);	\
+	.previous
+
+#define EXO(x,y...)			\
+	99:	x, ##y;			\
+	.section __ex_table, "a";	\
+	.long 99b - .;			\
+	ldi	$31, $exitout-99b($31);	\
+	.previous
+
+	.set noat
+	.align 4
+	.globl __copy_user
+	.ent __copy_user
+__copy_user:
+	.prologue 0
+	and	$18, $18, $0
+	and	$16, 7, $3
+	beq	$0, $35
+	beq	$3, $36
+	subl	$3, 8, $3
+	.align 4
+$37:
+	EXI(ldbu $1, 0($17))
+	EXO(stb $1, 0($16))
+	addl	$3, 1, $3
+	subl	$0, 1, $0
+	addl	$16, 1, $16
+	addl	$17, 1, $17
+	beq	$0, $41
+	bne	$3, $37
+$36:
+	and	$17, 7, $1
+	bic	$0, 7, $4
+	beq	$1, $43
+	beq	$4, $48
+	EXI(ldl_u $3, 0($17))
+	.align 4
+$50:
+	EXI(ldl_u $2, 8($17))
+	subl	$4, 8, $4
+	extll	$3, $17, $3
+	exthl	$2, $17, $1
+	bis	$3, $1, $1
+	EXO(stl $1,0($16))
+	addl	$17, 8, $17
+	subl	$0, 8, $0
+	addl	$16, 8, $16
+	bis	$2, $2, $3
+	bne	$4, $50
+$48:
+	beq	$0, $41
+	.align 4
+$57:
+	EXI(ldbu $1, 0($17))
+	EXO(stb $1, 0($16))
+	subl	$0, 1, $0
+	addl	$16, 1, $16
+	addl	$17, 1, $17
+	bne	$0, $57
+	br	$31, $41
+	.align 4
+$43:
+	beq	$4, $65
+	.align 4
+$66:
+	EXI(ldl $1, 0($17))
+	subl	$4, 8, $4
+	EXO(stl $1,0($16))
+	addl	$17, 8, $17
+	subl	$0, 8, $0
+	addl	$16, 8, $16
+	bne	$4, $66
+$65:
+	beq	$0, $41
+	EXI(ldbu $1, 0($17))
+	EXO(stb	$1, 0($16))
+	addl	$17, 1, $17
+	addl	$16, 1, $16
+	subl	$0, 1, $0
+	br	$31, $65
+$41:
+$35:
+$exitin:
+$exitout:
+	ret	$31, ($26), 1
+
+	.end __copy_user
+	EXPORT_SYMBOL(__copy_user)
diff --git a/arch/sw_64/lib/csum_ipv6_magic.S b/arch/sw_64/lib/csum_ipv6_magic.S
new file mode 100644
index 000000000000..755e1c13cb25
--- /dev/null
+++ b/arch/sw_64/lib/csum_ipv6_magic.S
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Contributed by Richard Henderson rth@tamu.edu
+ *
+ * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
+ *		struct in6_addr *daddr, __u32 len,
+ *		unsigned short proto, unsigned int csum);
+ *
+ * Misalignment handling (which costs 16 instructions / 8 cycles)
+ * added by Ivan Kokshaysky ink@jurassic.park.msu.ru
+ */
+#include <asm/export.h>
+	.globl csum_ipv6_magic
+	.align 4
+	.ent csum_ipv6_magic
+	.frame $30, 0, $26, 0
+csum_ipv6_magic:
+	.prologue 0
+
+	ldl_u	$0, 0($16)
+	zapnot	$20, 15, $20
+	exthl	$18, 1, $4
+	ldl_u	$21, 7($16)
+
+	extlb	$18, 1, $5
+	ldl_u	$1, 8($16)
+	extlb	$18, 2, $6
+	ldl_u	$22, 15($16)
+
+	extlb	$18, 3, $18
+	ldl_u	$2, 0($17)
+	sra	$4, 32, $4
+	ldl_u	$23, 7($17)
+
+	extll	$0, $16, $0
+	ldl_u	$3, 8($17)
+	exthl	$21, $16, $21
+	ldl_u	$24, 15($17)
+
+	sll	$5, 16, $5
+	or	$0, $21, $0
+	extll	$1, $16, $1
+	addl	$20, $0, $20
+
+	exthl	$22, $16, $22
+	cmpult	$20, $0, $0
+	sll	$6, 8, $6
+	or	$1, $22, $1
+
+	extll	$2, $17, $2
+	or	$4, $18, $18
+	exthl	$23, $17, $23
+	or	$5, $6, $5
+
+	extll	$3, $17, $3
+	or	$2, $23, $2
+	exthl	$24, $17, $24
+	or	$18, $5, $18
+
+	exthh	$19, 7, $7
+	or	$3, $24, $3
+	extlb	$19, 1, $19
+	addl	$20, $1, $20
+
+	or	$19, $7, $19
+	cmpult	$20, $1, $1
+	sll	$19, 48, $19
+
+	sra	$19, 32, $19
+	addl	$20, $2, $20
+	cmpult	$20, $2, $2
+	addl	$20, $3, $20
+
+	cmpult	$20, $3, $3
+	addl	$20, $18, $20
+	cmpult	$20, $18, $18
+	addl	$20, $19, $20
+
+	cmpult	$20, $19, $19
+	addl	$0, $1, $0
+	addl	$2, $3, $2
+	addl	$18, $19, $18
+
+	addl	$0, $2, $0
+	addl	$20, $18, $20
+	addl	$0, $20, $0
+	unop
+
+	extlh	$0, 2, $2
+	zapnot	$0, 3, $3
+	extlh	$0, 4, $1
+	addl	$2, $3, $3
+
+	extlh	$0, 6, $0
+	addl	$3, $1, $3
+	addl	$0, $3, $0
+	unop
+
+	extlh	$0, 2, $1
+	zapnot	$0, 3, $0
+	addl	$0, $1, $0
+	unop
+
+	extlh	$0, 2, $1
+	zapnot	$0, 3, $0
+	addl	$0, $1, $0
+	not	$0, $0
+
+	zapnot	$0, 3, $0
+	ret
+
+	.end csum_ipv6_magic
+	EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/sw_64/lib/csum_partial_copy.c b/arch/sw_64/lib/csum_partial_copy.c
new file mode 100644
index 000000000000..678d9aa78d15
--- /dev/null
+++ b/arch/sw_64/lib/csum_partial_copy.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * csum_partial_copy - do IP checksumming and copy
+ *
+ * (C) Copyright 1996 Linus Torvalds
+ *
+ * Don't look at this too closely - you'll go mad. The things
+ * we do for performance..
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+
+
+#define ldl_u(x, y) \
+	__asm__ __volatile__("ldl_u %0, %1":"=r" (x):"m" (*(const unsigned long *)(y)))
+
+#define stl_u(x, y) \
+	__asm__ __volatile__("stl_u %1, %0":"=m" (*(unsigned long *)(y)):"r" (x))
+
+static inline void stll_u(unsigned long data, unsigned long *dst)
+{
+	int i = 0;
+	unsigned long doff = (unsigned long)dst & 7;
+
+	for (; doff < 8; i++, doff++)
+		*((char *)dst + i) = *((char *)&data + i);
+}
+
+static inline void sthl_u(unsigned long data, unsigned long *dst)
+{
+	int i = 0;
+	unsigned long doff = (unsigned long)dst & 7;
+
+	for (; i < doff; i++)
+		*((char *)dst + 8 - doff + i) = *((char *)&data + 8 - doff + i);
+}
+
+#define extll(x, y, z) \
+	__asm__ __volatile__("extll %1, %2, %0":"=r" (z):"r" (x), "r" (y))
+
+#define exthl(x, y, z) \
+	__asm__ __volatile__("exthl %1, %2, %0":"=r" (z):"r" (x), "r" (y))
+
+#define maskll(x, y, z) \
+	__asm__ __volatile__("maskll %1, %2, %0":"=r" (z):"r" (x), "r" (y))
+
+#define maskhl(x, y, z) \
+	__asm__ __volatile__("maskhl %1, %2, %0":"=r" (z):"r" (x), "r" (y))
+
+#define insll(x, y, z) \
+	__asm__ __volatile__("insll %1, %2, %0":"=r" (z):"r" (x), "r" (y))
+
+#define inshl(x, y, z) \
+	__asm__ __volatile__("inshl %1, %2, %0":"=r" (z):"r" (x), "r" (y))
+
+
+#define __get_word(insn, x, ptr)			\
+({							\
+	long __guu_err;					\
+	__asm__ __volatile__(				\
+	"1:	"#insn" %0,%2\n"			\
+	"2:\n"						\
+	".section __ex_table,"a"\n"			\
+	"	.long 1b - .\n"				\
+	"	ldi %0,2b-1b(%1)\n"			\
+	".previous"					\
+		: "=r"(x), "=r"(__guu_err)		\
+		: "m"(__m(ptr)), "1"(0));		\
+	__guu_err;					\
+})
+
+static inline unsigned short from64to16(unsigned long x)
+{
+	/* Using extract instructions is a bit more efficient
+	 * than the original shift/bitmask version.
+	 */
+
+	union {
+		unsigned long	ul;
+		unsigned int	ui[2];
+		unsigned short	us[4];
+	} in_v, tmp_v, out_v;
+
+	in_v.ul = x;
+	tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1];
+
+	/* Since the bits of tmp_v.sh[3] are going to always be zero,
+	 * we don't have to bother to add that in.
+	 */
+	out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1]
+			+ (unsigned long) tmp_v.us[2];
+
+	/* Similarly, out_v.us[2] is always zero for the final add.  */
+	return out_v.us[0] + out_v.us[1];
+}
+
+/*
+ * Ok. This isn't fun, but this is the EASY case.
+ */
+static inline unsigned long
+csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
+		long len)
+{
+	unsigned long checksum = ~0U;
+	unsigned long carry = 0;
+
+	while (len >= 0) {
+		unsigned long word;
+
+		if (__get_word(ldl, word, src))
+			return 0;
+		checksum += carry;
+		src++;
+		checksum += word;
+		len -= 8;
+		carry = checksum < word;
+		*dst = word;
+		dst++;
+	}
+	len += 8;
+	checksum += carry;
+	if (len) {
+		int i = 0;
+		unsigned long word;
+
+		if (__get_word(ldl, word, src))
+			return 0;
+		maskll(word, len, word);
+		checksum += word;
+		carry = checksum < word;
+		for (; i < len; i++)
+			*((char *)dst + i) = *((char *)&word + i);
+		checksum += carry;
+	}
+	return checksum;
+}
+
+/*
+ * This is even less fun, but this is still reasonably
+ * easy.
+ */
+static inline unsigned long
+csum_partial_cfu_dest_aligned(const unsigned long __user *src,
+		unsigned long *dst, unsigned long soff, long len)
+{
+	unsigned long first;
+	unsigned long word, carry;
+	unsigned long lastsrc = 7+len+(unsigned long)src;
+	unsigned long checksum = ~0U;
+
+	if (__get_word(ldl_u, first, src))
+		return 0;
+	carry = 0;
+	while (len >= 0) {
+		unsigned long second;
+
+		if (__get_word(ldl_u, second, src+1))
+			return 0;
+		extll(first, soff, word);
+		len -= 8;
+		src++;
+		exthl(second, soff, first);
+		checksum += carry;
+		word |= first;
+		first = second;
+		checksum += word;
+		*dst = word;
+		dst++;
+		carry = checksum < word;
+	}
+	len += 8;
+	checksum += carry;
+	if (len) {
+		int i = 0;
+		unsigned long second;
+
+		if (__get_word(ldl_u, second, lastsrc))
+			return 0;
+		extll(first, soff, word);
+		exthl(second, soff, first);
+		word |= first;
+		maskll(word, len, word);
+		checksum += word;
+		carry = checksum < word;
+		for (; i < len; i++)
+			*((char *)dst + i) = *((char *)&word + i);
+		checksum += carry;
+	}
+	return checksum;
+}
+
+/*
+ * This is slightly less fun than the above..
+ */
+static inline unsigned long
+csum_partial_cfu_src_aligned(const unsigned long __user *src,
+		unsigned long *dst, unsigned long doff,
+		long len, unsigned long partial_dest)
+{
+	unsigned long carry = 0;
+	unsigned long word;
+	unsigned long second_dest;
+	int i;
+	unsigned long checksum = ~0U;
+
+	if (len >= 0) {
+		if (__get_word(ldl, word, src))
+			return 0;
+		checksum += carry;
+		checksum += word;
+		carry = checksum < word;
+		stll_u(word, dst);
+		len -= 8;
+		src++;
+		dst++;
+
+		inshl(word, doff, partial_dest);
+		while (len >= 0) {
+			if (__get_word(ldl, word, src))
+				return 0;
+			len -= 8;
+			insll(word, doff, second_dest);
+			checksum += carry;
+			stl_u(partial_dest | second_dest, dst);
+			src++;
+			checksum += word;
+			inshl(word, doff, partial_dest);
+			carry = checksum < word;
+			dst++;
+		}
+		sthl_u(word, dst - 1);
+	}
+	len += 8;
+
+	if (__get_word(ldl, word, src))
+		return 0;
+	maskll(word, len, word);
+	checksum += carry;
+	checksum += word;
+	carry = checksum < word;
+	for (i = 0; i < len; i++)
+		*((char *)dst + i) = *((char *)&word + i);
+
+	checksum += carry;
+	return checksum;
+}
+
+/*
+ * This is so totally un-fun that it's frightening. Don't
+ * look at this too closely, you'll go blind.
+ */
+static inline unsigned long
+csum_partial_cfu_unaligned(const unsigned long __user *src,
+		unsigned long *dst, unsigned long soff, unsigned long doff,
+		long len, unsigned long partial_dest)
+{
+	unsigned long carry = 0;
+	unsigned long first;
+	unsigned long second, word;
+	unsigned long second_dest;
+	int i;
+	unsigned long checksum = ~0U;
+
+	if (__get_word(ldl_u, first, src))
+		return 0;
+	if (len >= 0) {
+		extll(first, soff, word);
+		if (__get_word(ldl_u, second, src+1))
+			return 0;
+		exthl(second, soff, first);
+		word |= first;
+		checksum += carry;
+		checksum += word;
+		carry = checksum < word;
+		stll_u(word, dst);
+		sthl_u(word, dst);
+		len -= 8;
+		src++;
+		dst++;
+
+		if (__get_word(ldl_u, first, src))
+			return 0;
+		ldl_u(partial_dest, dst);
+		maskll(partial_dest, doff, partial_dest);
+		while (len >= 0) {
+			if (__get_word(ldl_u, second, src+1))
+				return 0;
+			extll(first, soff, word);
+			checksum += carry;
+			len -= 8;
+			exthl(second, soff, first);
+			src++;
+			word |= first;
+			first = second;
+			insll(word, doff, second_dest);
+			checksum += word;
+			stl_u(partial_dest | second_dest, dst);
+			carry = checksum < word;
+			inshl(word, doff, partial_dest);
+			dst++;
+		}
+		sthl_u(word, dst - 1);
+	}
+	len += 8;
+
+	checksum += carry;
+	if (__get_word(ldl_u, second, src+1))
+		return 0;
+	extll(first, soff, word);
+	exthl(second, soff, first);
+	word |= first;
+	maskll(word, len, word);
+	checksum += word;
+	carry = checksum < word;
+	for (i = 0; i < len; i++)
+		*((char *)dst + i) = *((char *)&word + i);
+
+	checksum += carry;
+	return checksum;
+}
+
+static __wsum __csum_and_copy(const void __user *src, void *dst, int len)
+{
+	unsigned long checksum;
+	unsigned long soff = 7 & (unsigned long) src;
+	unsigned long doff = 7 & (unsigned long) dst;
+
+	if (!doff) {
+		if (!soff)
+			checksum = csum_partial_cfu_aligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst, len-8);
+		else
+			checksum = csum_partial_cfu_dest_aligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst,
+				soff, len-8);
+	} else {
+		unsigned long partial_dest;
+
+		ldl_u(partial_dest, dst);
+		if (!soff)
+			checksum = csum_partial_cfu_src_aligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst,
+				doff, len-8, partial_dest);
+		else
+			checksum = csum_partial_cfu_unaligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst,
+				soff, doff, len-8, partial_dest);
+	}
+	return (__force __wsum)from64to16(checksum);
+}
+
+__wsum
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
+{
+	if (!access_ok(src, len))
+		return 0;
+	return __csum_and_copy(src, dst, len);
+}
+EXPORT_SYMBOL(csum_and_copy_from_user);
+
+__wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+	return __csum_and_copy((__force const void __user *)src,
+			dst, len);
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/sw_64/lib/deep-clear_page.S b/arch/sw_64/lib/deep-clear_page.S
new file mode 100644
index 000000000000..52a3db33fc17
--- /dev/null
+++ b/arch/sw_64/lib/deep-clear_page.S
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Zero an entire page.
+ */
+#include <asm/export.h>
+	.text
+	.align 4
+	.global clear_page
+	.ent clear_page
+clear_page:
+	.prologue 0
+
+	ldi	$0,64
+
+/* Optimize by GUOY from SOC 2013-06-04 */
+1:
+
+/*
+	stl_nc	$31,0x0($16)
+	stl_nc	$31,0x8($16)
+	stl_nc	$31,0x10($16)
+	stl_nc	$31,0x18($16)
+
+	stl_nc	$31,0x20($16)
+	stl_nc	$31,0x28($16)
+	stl_nc	$31,0x30($16)
+	stl_nc	$31,0x38($16)
+
+	stl_nc	$31,0x40($16)
+	stl_nc	$31,0x48($16)
+	stl_nc	$31,0x50($16)
+	stl_nc	$31,0x58($16)
+
+	stl_nc	$31,0x60($16)
+	stl_nc	$31,0x68($16)
+	stl_nc	$31,0x70($16)
+	stl_nc	$31,0x78($16)
+*/
+
+	vstd_nc	$f31, 0x0($16)
+	vstd_nc	$f31, 0x20($16)
+	subl	$0, 1, $0
+	vstd_nc	$f31, 0x40($16)
+
+	vstd_nc	$f31, 0x60($16)
+	addl	$16, 128, $16
+	bne	$0, 1b
+
+	memb
+	ret
+
+	.end clear_page
+	EXPORT_SYMBOL(clear_page)
diff --git a/arch/sw_64/lib/deep-copy_page.S b/arch/sw_64/lib/deep-copy_page.S
new file mode 100644
index 000000000000..5061837e537b
--- /dev/null
+++ b/arch/sw_64/lib/deep-copy_page.S
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw/lib/copy_page.S
+ *
+ * Copy an entire page.
+ */
+#include <asm/export.h>
+
+	.text
+	.align 4
+	.global copy_page
+	.ent copy_page
+copy_page:
+	.prologue 0
+
+	ldi	$18, 64
+	subl	$sp, 0x60, $sp
+	ldi	$4, 0x40($sp)
+	stl	$4, 0($sp)
+	bic	$4, 0x1f, $4
+	vstd	$f16, 0($4)
+
+/* Optimize by GUOY from SOC 2013-06-04 */
+1:
+	vldd	$f16, 0($17)
+	vstd_nc	$f16, 0($16)
+
+	vldd	$f16, 32($17)
+	vstd_nc	$f16, 32($16)
+
+	vldd	$f16, 64($17)
+	vstd_nc	$f16, 64($16)
+
+	vldd	$f16, 96($17)
+	vstd_nc	$f16, 96($16)
+
+	ldwe	$f31, 5*0x80($17)
+	subl	$18, 1, $18
+	addl	$17, 128, $17
+
+	addl	$16, 128, $16
+	bne	$18, 1b
+
+	memb
+	ldl	$4, 0($sp)
+	ldi	$4, 0x40($sp)
+	bic	$4, 0x1f, $4
+	vldd	$f16, 0($4)
+	addl	$sp, 0x60, $sp
+	ret
+
+	.end copy_page
+	EXPORT_SYMBOL(copy_page)
diff --git a/arch/sw_64/lib/deep-copy_user.S b/arch/sw_64/lib/deep-copy_user.S
new file mode 100644
index 000000000000..631246c68bab
--- /dev/null
+++ b/arch/sw_64/lib/deep-copy_user.S
@@ -0,0 +1,342 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copy to/from user space, handling exceptions as we go..  This
+ * isn't exactly pretty.
+ *
+ * This is essentially the same as "memcpy()", but with a few twists.
+ * Notably, we have to make sure that $18 is always up-to-date and
+ * contains the right "bytes left to copy" value (and that it is updated
+ * only _after_ a successful copy). There is also some rather minor
+ * exception setup stuff..
+ *
+ * Inputs:
+ *	length in $18
+ *	destination address in $16
+ *	source address in $17
+ *	return address in $26
+ *
+ * Outputs:
+ *	bytes left to copy in $0
+ *
+ * Clobbers:
+ *	$1,$2,$3,$4,$5,$16,$17
+ *
+ */
+
+/* Author: Copy_user simd version 1.1 (20190904) by Gao Xiuwu.
+*/
+#include <asm/export.h>
+
+/* Allow an exception for an insn; exit if we get one.  */
+#define EXI(x, y...)			\
+	99: x, ##y;			\
+	.section __ex_table, "a";	\
+	.long 99b - .;			\
+	ldi $31, $exitin-99b($31);	\
+	.previous
+
+#define EXO(x,y...)			\
+	99: x, ##y;			\
+	.section __ex_table, "a";	\
+	.long 99b - .;			\
+	ldi $31, $exitout-99b($31);	\
+	.previous
+
+	.set noat
+	.align 4
+	.globl __copy_user
+	.ent __copy_user
+
+__copy_user:
+	.prologue 0
+	subl	$18, 32, $1
+	beq	$18, $zerolength
+
+	and	$16, 7, $3
+	ble	$1, $onebyteloop
+	beq	$3, $destaligned
+	subl	$3, 8, $3
+/*
+ * The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U)
+ * This loop aligns the destination a byte at a time
+ * We know we have at least one trip through this loop
+ */
+$aligndest:
+	EXI(ldbu $1, 0($17))
+	addl	$16, 1, $16
+	addl	$3, 1, $3
+
+/*
+ * the -1 is to compensate for the inc($16) done in a previous quadpack
+ * which allows us zero dependencies within either quadpack in the loop
+ */
+	EXO(stb $1, -1($16))
+	addl	$17, 1, $17
+	subl	$18, 1, $18
+	bne	$3, $aligndest
+
+/*
+ * If we fell through into here, we have a minimum of 33 - 7 bytes
+ * If we arrived via branch, we have a minimum of 32 bytes
+ */
+$destaligned:
+	and	$17, 7, $1
+	bic	$18, 7, $4
+	#EXI(ldl_u $3, 0($17))
+	beq	$1, $quadaligned
+
+#ifndef MISQUAD_SCALAR
+$misquad:
+	and	$16, 31, $1
+	beq	$1, $dest32Baligned
+
+$align_32B:
+	EXI(ldbu $1, 0($17))
+	addl	$17, 1, $17
+	EXO(stb $1, 0($16))
+	subl	$18, 1, $18
+	addl	$16, 1, $16
+	and	$16, 31, $1
+	beq	$18, $exitout
+	bne	$1, $align_32B
+
+$dest32Baligned:
+	ldi	$2, 256($31)
+	andnot	$17, 31, $3
+	EXI(vldd  $f10, 0($3))
+	and	$17, 31, $5
+	sll	$5, 3, $5
+	subw	$2, $5, $4
+	ifmovs	$5, $f15
+	ifmovs	$4, $f14
+
+	cmple $18, 63, $1
+	bne $1, $misalign_tail_simd
+
+$misalign_body_simd:
+	EXI(vldd $f11, 32($3))
+	fillcs	128*5($3)
+
+	srlow	$f10, $f15, $f12
+	sllow	$f11, $f14, $f13
+	#fillde	128*5($16)
+	vlogfc	$f12, $f13, $f31, $f12
+
+	EXI(vldd $f10, 64($3))
+	srlow	$f11, $f15, $f22
+	sllow	$f10, $f14, $f23
+	vlogfc	$f22, $f23, $f31, $f22
+
+	EXO(vstd  $f12, 0($16))
+	EXO(vstd  $f22, 32($16))
+
+	addl	$16, 64, $16
+	addl	$3, 64, $3
+	subl	$18, 64, $18
+
+	cmple	$18, 63, $1
+	beq	$1, $misalign_body_simd
+	br	$misalign_tail_simd
+
+$misalign_tail_simd:
+	cmple	$18, 31, $1
+	bne	$1, $before_misalign_tail_quads
+
+	EXI(vldd $f11, 32($3))
+	srlow	$f10, $f15, $f12
+	sllow	$f11, $f14, $f13
+	vlogfc	$f12, $f13, $f31, $f12
+
+	EXO(vstd $f12, 0($16))
+
+	subl	$18, 32, $18
+	addl	$16, 32, $16
+	addl	$3, 32, $3
+	vfmov	$f11, $f10
+
+$before_misalign_tail_quads:
+	srlow	$f10, $f15, $f12
+	s8subl	$18, $4, $1
+	ble	$1, $tail_quads
+
+	EXI(vldd $f11, 32($3))
+	sllow	$f11, $f14, $f13
+	vlogfc	$f12, $f13, $f31, $f12
+
+$tail_quads:
+	subl	$18, 8, $1
+	blt	$1, $less_than_8
+
+$move_a_quad:
+	fimovd	$f12, $1
+	srlow	$f12, 64, $f12
+
+	EXO(stl $1, 0($16))
+	subl	$18, 8, $18
+	addl	$16, 8, $16
+	subl	$18, 8, $1
+	bge	$1, $move_a_quad
+
+$less_than_8:
+	.align 4
+	beq	$18, $exitout
+	fimovd	$f12, $1
+
+$tail_bytes:
+	EXO(stb $1, 0($16))
+	subl	$18, 1, $18
+	srl	$1, 8, $1
+	addl	$16, 1, $16
+	bgt	$18, $tail_bytes
+	br	$exitout
+#else
+
+/*
+ * In the worst case, we've just executed an ldl_u here from 0($17)
+ * and we'll repeat it once if we take the branch
+ */
+
+/* Misaligned quadword loop - not unrolled.  Leave it that way. */
+$misquad:
+	EXI(ldl_u $2, 8($17))
+	subl	$4, 8, $4
+	extll	$3, $17, $3
+	exthl	$2, $17, $1
+
+	bis	$3, $1, $1
+	EXO(stl $1, 0($16))
+	addl	$17, 8, $17
+	subl	$18, 8, $18
+
+	addl	$16, 8, $16
+	bis	$2, $2, $3
+	bne	$4, $misquad
+
+	beq	$18, $zerolength
+
+/* We know we have at least one trip through the byte loop */
+	EXI(ldbu $2, 0($17))
+	addl	$16, 1, $16
+	br	$31, $dirtyentry
+#endif
+/* Do the trailing byte loop load, then hop into the store part of the loop */
+
+/*
+ * A minimum of (33 - 7) bytes to do a quad at a time.
+ * Based upon the usage context, it's worth the effort to unroll this loop
+ * $18 - number of bytes to be moved
+ * $4 - number of bytes to move as quadwords
+ * $16 is current destination address
+ * $17 is current source address
+ */
+
+$quadaligned:
+	and	$16, 31, $1
+	beq	$1, $quadaligned_dest32Baligned
+
+$quadaligned_align_32B:
+	EXI(ldl $1, 0($17))
+	addl	$17, 8, $17
+	EXO(stl $1, 0($16))
+	subl	$18, 8, $18
+	subl	$4, 8, $4
+	addl	$16, 8, $16
+	and	$16, 31, $1
+	beq	$4, $onebyteloop
+	bne	$1, $quadaligned_align_32B
+
+$quadaligned_dest32Baligned:
+	and	$17, 31, $2
+	bne	$2, $dest32Baligned
+
+$quad32Bailgned:
+	subl	$4, 64, $2
+	blt	$2, $onequad
+
+/*
+ * There is a significant assumption here that the source and destination
+ * addresses differ by more than 32 bytes.  In this particular case, a
+ * sparsity of registers further bounds this to be a minimum of 8 bytes.
+ * But if this isn't met, then the output result will be incorrect.
+ * Furthermore, due to a lack of available registers, we really can't
+ * unroll this to be an 8x loop (which would enable us to use the wh64
+ * instruction memory hint instruction).
+ */
+
+$simd_quadalign_unroll2:
+	fillcs 128 * 5($17)
+	EXI(vldd $f22, 0($17))
+	EXI(vldd $f23, 32($17))
+	EXO(vstd $f22, 0($16))
+	EXO(vstd $f23, 32($16))
+	#fillde 128 * 5($16)
+	subl	$4, 64, $4
+	subl	$18, 64, $18
+	addl	$17, 64, $17
+	addl	$16, 64, $16
+	subl	$4, 64, $3
+	bge	$3, $simd_quadalign_unroll2
+	bne	$4, $onequad
+	br	$31, $noquads
+
+$onequad:
+	EXI(ldl $1, 0($17))
+	subl	$4, 8, $4
+	addl	$17, 8, $17
+
+	EXO(stl $1, 0($16))
+	subl	$18, 8, $18
+	addl	$16, 8, $16
+	bne	$4, $onequad
+
+$noquads:
+	beq	$18, $zerolength
+
+/*
+ * For small copies (or the tail of a larger copy), do a very simple byte loop.
+ * There's no point in doing a lot of complex alignment calculations to try to
+ * to quadword stuff for a small amount of data.
+ *	$18 - remaining number of bytes left to copy
+ *	$16 - current dest addr
+ *	$17 - current source addr
+ */
+
+$onebyteloop:
+	EXI(ldbu $2, 0($17))
+	addl	$16, 1, $16
+
+$dirtyentry:
+/*
+ * the -1 is to compensate for the inc($16) done in a previous quadpack
+ * which allows us zero dependencies within either quadpack in the loop
+ */
+	EXO(stb $2, -1($16))
+	addl	$17, 1, $17
+	subl	$18, 1, $18
+	bgt	$18, $onebyteloop
+
+$zerolength:
+$exitout:
+	bis	$31, $18, $0
+	ret	$31, ($26), 1
+
+$exitin:
+
+	/* A stupid byte-by-byte zeroing of the rest of the output
+	 * buffer.  This cures security holes by never leaving
+	 * random kernel data around to be copied elsewhere.
+	 */
+
+	mov	$18, $1
+
+$101:
+	EXO(stb $31, 0($16))
+	subl	$1, 1, $1
+	addl	$16, 1, $16
+	bgt	$1, $101
+
+	bis	$31, $18, $0
+	ret	$31, ($26), 1
+
+	.end __copy_user
+	EXPORT_SYMBOL(__copy_user)
diff --git a/arch/sw_64/lib/deep-memcpy.S b/arch/sw_64/lib/deep-memcpy.S
new file mode 100644
index 000000000000..e847ec3d08df
--- /dev/null
+++ b/arch/sw_64/lib/deep-memcpy.S
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/export.h>
+	.set noreorder
+	.set noat
+
+	.align  4
+	.globl memcpy
+	.ent memcpy
+
+memcpy:
+	.frame $30, 0, $26, 0
+	.prologue 0
+
+	subl	$sp, 0xa0, $sp
+	ldi	$4, 0x40($sp)
+	stl	$4, 0($sp)
+	bic	$4, 0x1f, $4
+	vstd	$f4, 0($4)
+	vstd	$f5, 0x20($4)
+
+	mov	$16, $0
+	ble	$18, $nomoredata
+	xor	$16, $17, $1
+	and	$1, 7, $1
+
+	bne	$1, $misaligned
+
+	and	$16, 7, $1
+	beq	$1, $both_0mod8
+
+$head_align:
+	ldbu	$1, 0($17)
+	subl	$18, 1, $18
+	addl	$17, 1, $17
+	stb	$1, 0($16)
+	addl	$16, 1, $16
+	and	$16, 7, $1
+	ble	$18, $nomoredata
+	bne	$1, $head_align
+
+$both_0mod8:
+	cmple	$18, 127, $1
+	bne	$1, $no_unroll
+	and	$16, 63, $1
+	beq	$1, $do_unroll
+
+$single_head_quad:
+	ldl	$1, 0($17)
+	subl	$18, 8, $18
+	addl	$17, 8, $17
+
+	stl	$1, 0($16)
+	addl	$16, 8, $16
+	and	$16, 63, $1
+	bne	$1, $single_head_quad
+
+$do_unroll:
+	addl	$16, 64, $7
+	cmple	$18, 127, $1
+	bne	$1, $tail_quads
+
+#JJ
+	and	$17, 31, $1
+	bne	$1, $unroll_body
+
+$unroll_body_simd:
+	ldwe	$f31,128*5($17)
+	vldd	$f4, 0($17)
+	vldd	$f5, 32($17)
+	vstd_nc	$f4, 0($16)
+	vstd_nc	$f5, 32($16)
+	addl	$16, 64, $16
+	subl	$18, 64, $18
+	addl	$17, 64, $17
+	cmple	$18, 63, $1
+	beq	$1, $unroll_body_simd
+	memb
+	br	$no_unroll
+#endJJ
+
+$unroll_body:
+	#wh64 ($7)
+	#e_fillcs 0($7)
+
+	ldl	$6, 0($17)
+	#e_fillcs 256($17)
+
+	ldl	$4, 8($17)
+	ldl	$5, 16($17)
+	addl	$7, 64, $7
+
+	ldl	$3, 24($17)
+	addl	$16, 64, $1
+
+	addl	$17, 32, $17
+	stl_nc	$6, 0($16)
+
+	stl_nc	$4, 8($16)
+	stl_nc	$5, 16($16)
+	subl	$18, 192, $2
+
+	stl_nc	$3, 24($16)
+	addl	$16, 32, $16
+
+	ldl	$6, 0($17)
+	ldwe	$f31, 4*128($17)
+	#e_fillcs 288($17)
+	ldl	$4, 8($17)
+	#cmovlt	$2, $1, $7
+	sellt	$2, $1, $7, $7
+
+	ldl	$5, 16($17)
+	ldl	$3, 24($17)
+	addl	$16, 32, $16
+	subl	$18, 64, $18
+
+	addl	$17, 32, $17
+	stl_nc	$6, -32($16)
+	stl_nc	$4, -24($16)
+	cmple	$18, 63, $1
+
+	stl_nc	$5, -16($16)
+	stl_nc	$3, -8($16)
+	beq	$1, $unroll_body
+
+	memb
+
+$tail_quads:
+$no_unroll:
+	.align 4
+	subl	$18, 8, $18
+	blt	$18, $less_than_8
+
+$move_a_quad:
+	ldl	$1, 0($17)
+	subl	$18, 8, $18
+	addl	$17, 8, $17
+
+	stl	$1, 0($16)
+	addl	$16, 8, $16
+	bge	$18, $move_a_quad
+
+$less_than_8:
+	.align 4
+	addl	$18, 8, $18
+	ble	$18, $nomoredata
+
+
+$tail_bytes:
+	subl	$18, 1, $18
+	ldbu	$1, 0($17)
+	addl	$17, 1, $17
+
+	stb	$1, 0($16)
+	addl	$16, 1, $16
+	bgt	$18, $tail_bytes
+
+	ldi	$4, 0x40($sp)
+	bic	$4, 0x1f, $4
+	vldd	$f4, 0($4)
+	vldd	$f5, 0x20($4)
+	ldl	$4, 0($sp)
+	addl	$sp, 0xa0, $sp
+
+	ret	$31, ($26), 1
+
+$misaligned:
+	mov	$0, $4
+	and	$0, 7, $1
+	beq	$1, $dest_0mod8
+
+$aligndest:
+	ble	$18, $nomoredata
+	ldbu	$1, 0($17)
+	subl	$18, 1, $18
+	addl	$17, 1, $17
+
+	stb	$1, 0($4)
+	addl	$4, 1, $4
+	and	$4, 7, $1
+	bne	$1, $aligndest
+
+
+$dest_0mod8:
+	subl	$18, 8, $18
+	blt	$18, $misalign_tail
+	ldl_u	$3, 0($17)
+
+$mis_quad:
+	ldl_u	$16, 8($17)
+	#extql	$3, $17, $3
+	fillde	256($17)
+	and	$17, 7, $1
+	sll	$1, 3, $1
+	srl	$3, $1, $3
+
+	#extqh $16, $17, $1
+	subl	$1, 64, $1
+	negl	$1, $1
+	sll	$16, $1, $1
+
+	bis	$3, $1, $1
+
+	subl	$18, 8, $18
+	addl	$17, 8, $17
+	fillde	128($4)
+	stl	$1, 0($4)
+	mov	$16, $3
+
+	addl	$4, 8, $4
+	bge	$18, $mis_quad
+
+$misalign_tail:
+	addl	$18, 8, $18
+	ble	$18, $nomoredata
+
+$misalign_byte:
+	ldbu	$1, 0($17)
+	subl	$18, 1, $18
+	addl	$17, 1, $17
+
+	stb	$1, 0($4)
+	addl	$4, 1, $4
+	bgt	$18, $misalign_byte
+
+
+$nomoredata:
+	ldi	$4, 0x40($sp)
+	bic	$4, 0x1f, $4
+	vldd	$f4, 0($4)
+	vldd	$f5, 0x20($4)
+	ldl	$4, 0($sp)
+	addl	$sp, 0xa0, $sp
+
+	ret	$31, ($26), 1
+
+	.end memcpy
+	 EXPORT_SYMBOL(memcpy)
+__memcpy = memcpy
+.globl __memcpy
diff --git a/arch/sw_64/lib/deep-memset.S b/arch/sw_64/lib/deep-memset.S
new file mode 100644
index 000000000000..4efba2062e11
--- /dev/null
+++ b/arch/sw_64/lib/deep-memset.S
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Optimized memset() for SW64 with SIMD instructions
+ *
+ * Copyright (C) Mao Minkai
+ * Author: Mao Minkai
+ *
+ * Fill SIZE bytes pointed to by SRC with CHAR.
+ *
+ * Input:
+ *	$16:	SRC, clobbered
+ *	$17:	CHAR, clobbered
+ *	$18:	SIZE, clobbered
+ *
+ * Output:
+ *	$0:	SRC
+ *
+ * Temporaries:
+ *	$1:	unaligned parts of addr (0 means aligned addr), tmp data
+ *	$2:	tmp data
+ *	$3:	tmp data
+ *	$4:	tmp data
+ *	$5:	compare result
+ *	$f10:	32 bytes data (manually saved)
+ *
+ */
+
+#include <asm/export.h>
+
+	.set noat
+	.set noreorder
+	.text
+	.align 4
+	.globl memset
+	.globl __memset
+	.globl ___memset
+	.globl __memsetw
+	.ent ___memset
+___memset:
+	.frame $30, 0, $26, 0
+	.prologue 0
+
+/* expand 1 byte data to 8 bytes */
+	and	$17, 0xff, $17
+	sll	$17, 8, $4
+	bis	$17, $4, $17
+	sll	$17, 16, $4
+	bis	$17, $4, $17
+	sll	$17, 32, $4
+	bis	$17, $4, $17
+
+__constant_c_memset:
+	bis	$31, $16, $0	# set return value
+	beq	$18, $out	# return if size is 0
+	cmplt	$18, 8, $5	# size less than 8, do 1-byte loop
+	bne	$5, $tail_loop
+
+/* loop until SRC is 8 bytes aligned */
+$head_loop:
+	and	$16, 0x7, $1
+	beq	$1, $mod8_aligned
+	stb	$17, 0($16)
+	subl	$18, 1, $18
+	beq	$18, $out
+	addl	$16, 1, $16
+	br	$31, $head_loop
+
+$mod8_aligned:
+
+/* set 8 bytes each time */
+$mod8_loop:
+	and	$16, 0x1f, $1
+	beq	$1, $mod32_aligned
+	subl	$18, 8, $18
+	blt	$18, $tail
+	stl	$17, 0($16)
+	addl	$16, 8, $16
+	br	$31, $mod8_loop
+
+/* expand data to 32 bytes */
+$mod32_aligned:
+	subl	$sp, 64, $sp
+	addl	$sp, 31, $4
+	bic	$4, 0x1f, $4
+	vstd	$f10, 0($4)
+	ifmovd	$17, $f10
+	vcpyf	$f10, $f10
+
+/* set 64 bytes each time */
+$mod32_loop:
+	subl	$18, 64, $18
+	blt	$18, $mod32_tail
+	vstd_nc	$f10, 0($16)
+	vstd_nc	$f10, 32($16)
+	addl	$16, 64, $16
+	br	$31, $mod32_loop
+
+$mod32_tail:
+	vldd	$f10, 0($4)
+	addl	$sp, 64, $sp
+	addl	$18, 64, $18
+$mod32_tail_loop:
+	subl	$18, 8, $18
+	blt	$18, $tail
+	stl_nc	$17, 0($16)
+	addl	$16, 8, $16
+	br	$31, $mod32_tail_loop
+
+$tail:
+	addl	$18, 8, $18
+
+/* set one byte each time */
+$tail_loop:
+	beq	$18, $out
+	stb	$17, 0($16)
+	subl	$18, 1, $18
+	addl	$16, 1, $16
+	br	$31, $tail_loop
+
+/* done, return */
+$out:
+	memb			# required for _nc store instructions
+	ret
+
+	.end ___memset
+	EXPORT_SYMBOL(___memset)
+
+	.align 5
+	.ent __memsetw
+__memsetw:
+	.prologue 0
+
+	inslh	$17, 0, $1
+	inslh	$17, 2, $2
+	inslh	$17, 4, $3
+	bis	$1, $2, $1
+	inslh	$17, 6, $4
+	bis	$1, $3, $1
+	bis	$1, $4, $17
+	br	$31, __constant_c_memset
+
+	.end __memsetw
+	EXPORT_SYMBOL(__memsetw)
+
+memset = ___memset
+EXPORT_SYMBOL(memset)
+__memset = ___memset
+EXPORT_SYMBOL(__memset)
diff --git a/arch/sw_64/lib/divide.S b/arch/sw_64/lib/divide.S
new file mode 100644
index 000000000000..ceef343a6084
--- /dev/null
+++ b/arch/sw_64/lib/divide.S
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * (C) 1995 Linus Torvalds
+ *
+ * The sw64 chip doesn't provide hardware division, so we have to do it
+ * by hand.  The compiler expects the functions
+ *
+ *	__divlu: 64-bit unsigned long divide
+ *	__remlu: 64-bit unsigned long remainder
+ *	__divls/__remqs: signed 64-bit
+ *	__divwu/__remlu: unsigned 32-bit
+ *	__divws/__remls: signed 32-bit
+ *
+ * These are not normal C functions: instead of the normal
+ * calling sequence, these expect their arguments in registers
+ * $24 and $25, and return the result in $27. Register $28 may
+ * be clobbered (assembly temporary), anything else must be saved.
+ *
+ * In short: painful.
+ *
+ * This is a rather simple bit-at-a-time algorithm: it's very good
+ * at dividing random 64-bit numbers, but the more usual case where
+ * the divisor is small is handled better by the DEC algorithm
+ * using lookup tables. This uses much less memory, though, and is
+ * nicer on the cache.. Besides, I don't know the copyright status
+ * of the DEC code.
+ */
+
+/*
+ * My temporaries:
+ *	$0 - current bit
+ *	$1 - shifted divisor
+ *	$2 - modulus/quotient
+ *
+ *	$23 - return address
+ *	$24 - dividend
+ *	$25 - divisor
+ *
+ *	$27 - quotient/modulus
+ *	$28 - compare status
+ */
+#include <asm/export.h>
+
+#define halt .long 0
+
+/*
+ * Select function type and registers
+ */
+#define mask	$0
+#define divisor	$1
+#define compare	$28
+#define tmp1	$3
+#define tmp2	$4
+
+#ifdef DIV
+#define DIV_ONLY(x,y...)	x, ##y
+#define MOD_ONLY(x,y...)
+#define func(x)			__div##x
+#define modulus			$2
+#define quotient		$27
+#define GETSIGN(x)		xor $24, $25, x
+#define STACK			48
+#else
+#define DIV_ONLY(x,y...)
+#define MOD_ONLY(x,y...)	x, ##y
+#define func(x)			__rem##x
+#define modulus			$27
+#define quotient		$2
+#define GETSIGN(x)		bis $24, $24, x
+#define STACK			32
+#endif
+
+/*
+ * For 32-bit operations, we need to extend to 64-bit
+ */
+#ifdef INTSIZE
+#define ufunction		func(wu)
+#define sfunction		func(w)
+#define LONGIFY(x)		zapnot x, 15, x
+#define SLONGIFY(x)		addw x, 0, x
+#else
+#define ufunction		func(lu)
+#define sfunction		func(l)
+#define LONGIFY(x)
+#define SLONGIFY(x)
+#endif
+
+.set noat
+.align 3
+.globl ufunction
+.ent ufunction
+ufunction:
+	subl	$30, STACK, $30
+	.frame $30, STACK, $23
+	.prologue 0
+
+7:	stl	$1, 0($30)
+	bis	$25, $25, divisor
+	stl	$2, 8($30)
+	bis	$24, $24, modulus
+	stl	$0, 16($30)
+	bis	$31, $31, quotient
+	LONGIFY(divisor)
+	stl	tmp1, 24($30)
+	LONGIFY(modulus)
+	bis	$31, 1, mask
+	DIV_ONLY(stl tmp2, 32($30))
+	beq	divisor, 9f		# div by zero
+
+#ifdef INTSIZE
+	/*
+	 * shift divisor left, using 3-bit shifts for
+	 * 32-bit divides as we can't overflow. Three-bit
+	 * shifts will result in looping three times less
+	 * here, but can result in two loops more later.
+	 * Thus using a large shift isn't worth it (and
+	 * s8add pairs better than a sll..)
+	 */
+1:	cmpult	divisor, modulus, compare
+	s8addl	divisor, $31, divisor
+	s8addl	mask, $31, mask
+	bne	compare, 1b
+#else
+1:	cmpult	divisor, modulus, compare
+	blt	divisor, 2f
+	addl	divisor, divisor, divisor
+	addl	mask, mask, mask
+	bne	compare, 1b
+#endif
+
+	/* ok, start to go right again.. */
+2:	DIV_ONLY(addl quotient, mask, tmp2)
+	srl	mask, 1, mask
+	cmpule	divisor, modulus, compare
+	subl	modulus, divisor, tmp1
+	DIV_ONLY(selne compare, tmp2, quotient, quotient)
+	srl	divisor, 1, divisor
+	selne	compare, tmp1, modulus, modulus
+	bne	mask, 2b
+
+9:	ldl	$1, 0($30)
+	ldl	$2, 8($30)
+	ldl	$0, 16($30)
+	ldl	tmp1, 24($30)
+	DIV_ONLY(ldl tmp2, 32($30))
+	addl	$30, STACK, $30
+	ret	$31, ($23), 1
+	.end	ufunction
+	EXPORT_SYMBOL(ufunction)
+/*
+ * Uhh.. Ugly signed division. I'd rather not have it at all, but
+ * it's needed in some circumstances. There are different ways to
+ * handle this, really. This does:
+ *	-a / b = a / -b = -(a / b)
+ *	-a % b = -(a % b)
+ *	a % -b = a % b
+ * which is probably not the best solution, but at least should
+ * have the property that (x/y)*y + (x%y) = x.
+ */
+.align 3
+.globl sfunction
+.ent sfunction
+sfunction:
+	subl	$30, STACK, $30
+	.frame $30, STACK, $23
+	.prologue 0
+	bis	$24, $25, $28
+	SLONGIFY($28)
+	bge	$28, 7b
+	stl	$24, 0($30)
+	subl	$31, $24, $28
+	stl	$25, 8($30)
+	sellt	$24, $28, $24, $24	# abs($24)
+	stl	$23, 16($30)
+	subl	$31, $25, $28
+	stl	tmp1, 24($30)
+	sellt	$25, $28, $25, $25	# abs($25)
+	bsr	$23, ufunction
+	ldl	$24, 0($30)
+	ldl	$25, 8($30)
+	GETSIGN($28)
+	subl	$31, $27, tmp1
+	SLONGIFY($28)
+	ldl	$23, 16($30)
+	sellt	$28, tmp1, $27, $27
+	ldl	tmp1, 24($30)
+	addl	$30, STACK, $30
+	ret	$31, ($23), 1
+	.end	sfunction
+	EXPORT_SYMBOL(sfunction)
diff --git a/arch/sw_64/lib/fls.c b/arch/sw_64/lib/fls.c
new file mode 100644
index 000000000000..e960b1c06782
--- /dev/null
+++ b/arch/sw_64/lib/fls.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/bitops.h>
+
+/* This is fls(x)-1, except zero is held to zero.  This allows most
+ * efficient input into extbl, plus it allows easy handling of fls(0)=0.
+ */
+
+const unsigned char __flsm1_tab[256] = {
+	0,
+	0,
+	1, 1,
+	2, 2, 2, 2,
+	3, 3, 3, 3, 3, 3, 3, 3,
+	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+};
+EXPORT_SYMBOL(__flsm1_tab);
diff --git a/arch/sw_64/lib/fpreg.c b/arch/sw_64/lib/fpreg.c
new file mode 100644
index 000000000000..b88c6be03c6d
--- /dev/null
+++ b/arch/sw_64/lib/fpreg.c
@@ -0,0 +1,992 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) Copyright 1998 Linus Torvalds
+ */
+
+#include <linux/gfp.h>
+#include <linux/export.h>
+
+#define STT(reg, val)		\
+	asm volatile ("fimovd $f"#reg", %0" : "=r"(val));
+#define STS(reg, val)		\
+	asm volatile ("fimovs $f"#reg", %0" : "=r"(val));
+#define LDT(reg, val)		\
+	asm volatile ("ifmovd %0, $f"#reg : : "r"(val));
+#define LDS(reg, val)		\
+	asm volatile ("ifmovs %0, $f"#reg : : "r"(val));
+#define VLDD(reg, val)		\
+	asm volatile ("vldd $f"#reg", %0" : : "m"(val) : "memory");
+#define VSTD(reg, val)		\
+	asm volatile ("vstd $f"#reg", %0" : "=m"(val) : : "memory");
+#define VLDS(reg, val)		\
+	asm volatile ("vlds $f"#reg", %0" : : "m"(val) : "memory");
+#define LDWE(reg, val)		\
+	asm volatile ("ldwe $f"#reg", %0" : : "m"(val) : "memory");
+#define VSTS(reg, val)		\
+	asm volatile ("vsts $f"#reg", %0" : "=m"(val) : : "memory");
+#define STDH(reg, val)		\
+	asm volatile ("vstd $f"#reg", %0" : "=m"(val) : : "memory");
+
+void
+sw64_write_simd_fp_reg_s(unsigned long reg, unsigned long f0, unsigned long f1)
+{
+
+	unsigned long tmpa[4] __attribute__((aligned(16)));
+
+	tmpa[0] = f0;
+	tmpa[1] = f1;
+
+	switch (reg) {
+	case  0:
+		VLDS(0, *tmpa);
+		break;
+	case  1:
+		VLDS(1, *tmpa);
+		break;
+	case  2:
+		VLDS(2, *tmpa);
+		break;
+	case  3:
+		VLDS(3, *tmpa);
+		break;
+	case  4:
+		VLDS(4, *tmpa);
+		break;
+	case  5:
+		VLDS(5, *tmpa);
+		break;
+	case  6:
+		VLDS(6, *tmpa);
+		break;
+	case  7:
+		VLDS(7, *tmpa);
+		break;
+	case  8:
+		VLDS(8, *tmpa);
+		break;
+	case  9:
+		VLDS(9, *tmpa);
+		break;
+	case 10:
+		VLDS(10, *tmpa);
+		break;
+	case 11:
+		VLDS(11, *tmpa);
+		break;
+	case 12:
+		VLDS(12, *tmpa);
+		break;
+	case 13:
+		VLDS(13, *tmpa);
+		break;
+	case 14:
+		VLDS(14, *tmpa);
+		break;
+	case 15:
+		VLDS(15, *tmpa);
+		break;
+	case 16:
+		VLDS(16, *tmpa);
+		break;
+	case 17:
+		VLDS(17, *tmpa);
+		break;
+	case 18:
+		VLDS(18, *tmpa);
+		break;
+	case 19:
+		VLDS(19, *tmpa);
+		break;
+	case 20:
+		VLDS(20, *tmpa);
+		break;
+	case 21:
+		VLDS(21, *tmpa);
+		break;
+	case 22:
+		VLDS(22, *tmpa);
+		break;
+	case 23:
+		VLDS(23, *tmpa);
+		break;
+	case 24:
+		VLDS(24, *tmpa);
+		break;
+	case 25:
+		VLDS(25, *tmpa);
+		break;
+	case 26:
+		VLDS(26, *tmpa);
+		break;
+	case 27:
+		VLDS(27, *tmpa);
+		break;
+	case 28:
+		VLDS(28, *tmpa);
+		break;
+	case 29:
+		VLDS(29, *tmpa);
+		break;
+	case 30:
+		VLDS(30, *tmpa);
+		break;
+	case 31:
+		break;
+	}
+
+}
+
+
+void sw64_write_simd_fp_reg_d(unsigned long reg, unsigned long f0,
+		unsigned long f1, unsigned long f2, unsigned long f3)
+{
+	unsigned long tmpa[4] __attribute__((aligned(32)));
+
+	tmpa[0] = f0;
+	tmpa[1] = f1;
+	tmpa[2] = f2;
+	tmpa[3] = f3;
+
+	switch (reg) {
+	case  0:
+		VLDD(0, *tmpa);
+		break;
+	case  1:
+		VLDD(1, *tmpa);
+		break;
+	case  2:
+		VLDD(2, *tmpa);
+		break;
+	case  3:
+		VLDD(3, *tmpa);
+		break;
+	case  4:
+		VLDD(4, *tmpa);
+		break;
+	case  5:
+		VLDD(5, *tmpa);
+		break;
+	case  6:
+		VLDD(6, *tmpa);
+		break;
+	case  7:
+		VLDD(7, *tmpa);
+		break;
+	case  8:
+		VLDD(8, *tmpa);
+		break;
+	case  9:
+		VLDD(9, *tmpa);
+		break;
+	case 10:
+		VLDD(10, *tmpa);
+		break;
+	case 11:
+		VLDD(11, *tmpa);
+		break;
+	case 12:
+		VLDD(12, *tmpa);
+		break;
+	case 13:
+		VLDD(13, *tmpa);
+		break;
+	case 14:
+		VLDD(14, *tmpa);
+		break;
+	case 15:
+		VLDD(15, *tmpa);
+		break;
+	case 16:
+		VLDD(16, *tmpa);
+		break;
+	case 17:
+		VLDD(17, *tmpa);
+		break;
+	case 18:
+		VLDD(18, *tmpa);
+		break;
+	case 19:
+		VLDD(19, *tmpa);
+		break;
+	case 20:
+		VLDD(20, *tmpa);
+		break;
+	case 21:
+		VLDD(21, *tmpa);
+		break;
+	case 22:
+		VLDD(22, *tmpa);
+		break;
+	case 23:
+		VLDD(23, *tmpa);
+		break;
+	case 24:
+		VLDD(24, *tmpa);
+		break;
+	case 25:
+		VLDD(25, *tmpa);
+		break;
+	case 26:
+		VLDD(26, *tmpa);
+		break;
+	case 27:
+		VLDD(27, *tmpa);
+		break;
+	case 28:
+		VLDD(28, *tmpa);
+		break;
+	case 29:
+		VLDD(29, *tmpa);
+		break;
+	case 30:
+		VLDD(30, *tmpa);
+		break;
+	case 31:
+		break;
+	}
+
+
+}
+
+
+void sw64_write_simd_fp_reg_ldwe(unsigned long reg, int a)
+{
+	switch (reg) {
+	case  0:
+		LDWE(0, a);
+		break;
+	case  1:
+		LDWE(1, a);
+		break;
+	case  2:
+		LDWE(2, a);
+		break;
+	case  3:
+		LDWE(3, a);
+		break;
+	case  4:
+		LDWE(4, a);
+		break;
+	case  5:
+		LDWE(5, a);
+		break;
+	case  6:
+		LDWE(6, a);
+		break;
+	case  7:
+		LDWE(7, a);
+		break;
+	case  8:
+		LDWE(8, a);
+		break;
+	case  9:
+		LDWE(9, a);
+		break;
+	case 10:
+		LDWE(10, a);
+		break;
+	case 11:
+		LDWE(11, a);
+		break;
+	case 12:
+		LDWE(12, a);
+		break;
+	case 13:
+		LDWE(13, a);
+		break;
+	case 14:
+		LDWE(14, a);
+		break;
+	case 15:
+		LDWE(15, a);
+		break;
+	case 16:
+		LDWE(16, a);
+		break;
+	case 17:
+		LDWE(17, a);
+		break;
+	case 18:
+		LDWE(18, a);
+		break;
+	case 19:
+		LDWE(19, a);
+		break;
+	case 20:
+		LDWE(20, a);
+		break;
+	case 21:
+		LDWE(21, a);
+		break;
+	case 22:
+		LDWE(22, a);
+		break;
+	case 23:
+		LDWE(23, a);
+		break;
+	case 24:
+		LDWE(24, a);
+		break;
+	case 25:
+		LDWE(25, a);
+		break;
+	case 26:
+		LDWE(26, a);
+		break;
+	case 27:
+		LDWE(27, a);
+		break;
+	case 28:
+		LDWE(28, a);
+		break;
+	case 29:
+		LDWE(29, a);
+		break;
+	case 30:
+		LDWE(30, a);
+		break;
+	case 31:
+		break;
+	}
+}
+
+
+void sw64_read_simd_fp_m_s(unsigned long reg, unsigned long *fp_value)
+{
+	volatile unsigned long tmpa[2] __attribute__((aligned(16)));
+	switch (reg) {
+	case  0:
+		VSTS(0, *tmpa);
+		break;
+	case  1:
+		VSTS(1, *tmpa);
+		break;
+	case  2:
+		VSTS(2, *tmpa);
+		break;
+	case  3:
+		VSTS(3, *tmpa);
+		break;
+	case  4:
+		VSTS(4, *tmpa);
+		break;
+	case  5:
+		VSTS(5, *tmpa);
+		break;
+	case  6:
+		VSTS(6, *tmpa);
+		break;
+	case  7:
+		VSTS(7, *tmpa);
+		break;
+	case  8:
+		VSTS(8, *tmpa);
+		break;
+	case  9:
+		VSTS(9, *tmpa);
+		break;
+	case 10:
+		VSTS(10, *tmpa);
+		break;
+	case 11:
+		VSTS(11, *tmpa);
+		break;
+	case 12:
+		VSTS(12, *tmpa);
+		break;
+	case 13:
+		VSTS(13, *tmpa);
+		break;
+	case 14:
+		VSTS(14, *tmpa);
+		break;
+	case 15:
+		VSTS(15, *tmpa);
+		break;
+	case 16:
+		VSTS(16, *tmpa);
+		break;
+	case 17:
+		VSTS(17, *tmpa);
+		break;
+	case 18:
+		VSTS(18, *tmpa);
+		break;
+	case 19:
+		VSTS(19, *tmpa);
+		break;
+	case 20:
+		VSTS(20, *tmpa);
+		break;
+	case 21:
+		VSTS(21, *tmpa);
+		break;
+	case 22:
+		VSTS(22, *tmpa);
+		break;
+	case 23:
+		VSTS(23, *tmpa);
+		break;
+	case 24:
+		VSTS(24, *tmpa);
+		break;
+	case 25:
+		VSTS(25, *tmpa);
+		break;
+	case 26:
+		VSTS(26, *tmpa);
+		break;
+	case 27:
+		VSTS(27, *tmpa);
+		break;
+	case 28:
+		VSTS(28, *tmpa);
+		break;
+	case 29:
+		VSTS(29, *tmpa);
+		break;
+	case 30:
+		VSTS(30, *tmpa);
+		break;
+	case 31:
+		VSTS(31, *tmpa);
+		break;
+	}
+
+	*fp_value = tmpa[0];
+	*(fp_value+1) = tmpa[1];
+}
+
+void sw64_read_simd_fp_m_d(unsigned long reg, unsigned long *fp_value)
+{
+	volatile unsigned long tmpa[4] __attribute__((aligned(32)));
+	switch (reg) {
+	case  0:
+		VSTD(0, *tmpa);
+		break;
+	case  1:
+		VSTD(1, *tmpa);
+		break;
+	case  2:
+		VSTD(2, *tmpa);
+		break;
+	case  3:
+		VSTD(3, *tmpa);
+		break;
+	case  4:
+		VSTD(4, *tmpa);
+		break;
+	case  5:
+		VSTD(5, *tmpa);
+		break;
+	case  6:
+		VSTD(6, *tmpa);
+		break;
+	case  7:
+		VSTD(7, *tmpa);
+		break;
+	case  8:
+		VSTD(8, *tmpa);
+		break;
+	case  9:
+		VSTD(9, *tmpa);
+		break;
+	case 10:
+		VSTD(10, *tmpa);
+		break;
+	case 11:
+		VSTD(11, *tmpa);
+		break;
+	case 12:
+		VSTD(12, *tmpa);
+		break;
+	case 13:
+		VSTD(13, *tmpa);
+		break;
+	case 14:
+		VSTD(14, *tmpa);
+		break;
+	case 15:
+		VSTD(15, *tmpa);
+		break;
+	case 16:
+		VSTD(16, *tmpa);
+		break;
+	case 17:
+		VSTD(17, *tmpa);
+		break;
+	case 18:
+		VSTD(18, *tmpa);
+		break;
+	case 19:
+		VSTD(19, *tmpa);
+		break;
+	case 20:
+		VSTD(20, *tmpa);
+		break;
+	case 21:
+		VSTD(21, *tmpa);
+		break;
+	case 22:
+		VSTD(22, *tmpa);
+		break;
+	case 23:
+		VSTD(23, *tmpa);
+		break;
+	case 24:
+		VSTD(24, *tmpa);
+		break;
+	case 25:
+		VSTD(25, *tmpa);
+		break;
+	case 26:
+		VSTD(26, *tmpa);
+		break;
+	case 27:
+		VSTD(27, *tmpa);
+		break;
+	case 28:
+		VSTD(28, *tmpa);
+		break;
+	case 29:
+		VSTD(29, *tmpa);
+		break;
+	case 30:
+		VSTD(30, *tmpa);
+		break;
+	case 31:
+		VSTD(31, *tmpa);
+		break;
+	}
+
+	*fp_value = tmpa[0];
+	*(fp_value+1) = tmpa[1];
+	*(fp_value+2) = tmpa[2];
+	*(fp_value+3) = tmpa[3];
+
+	return;
+}
+
+unsigned long sw64_read_fp_reg(unsigned long reg)
+{
+	unsigned long val;
+
+	switch (reg) {
+	case  0:
+		STT(0, val);
+		break;
+	case  1:
+		STT(1, val);
+		break;
+	case  2:
+		STT(2, val);
+		break;
+	case  3:
+		STT(3, val);
+		break;
+	case  4:
+		STT(4, val);
+		break;
+	case  5:
+		STT(5, val);
+		break;
+	case  6:
+		STT(6, val);
+		break;
+	case  7:
+		STT(7, val);
+		break;
+	case  8:
+		STT(8, val);
+		break;
+	case  9:
+		STT(9, val);
+		break;
+	case 10:
+		STT(10, val);
+		break;
+	case 11:
+		STT(11, val);
+		break;
+	case 12:
+		STT(12, val);
+		break;
+	case 13:
+		STT(13, val);
+		break;
+	case 14:
+		STT(14, val);
+		break;
+	case 15:
+		STT(15, val);
+		break;
+	case 16:
+		STT(16, val);
+		break;
+	case 17:
+		STT(17, val);
+		break;
+	case 18:
+		STT(18, val);
+		break;
+	case 19:
+		STT(19, val);
+		break;
+	case 20:
+		STT(20, val);
+		break;
+	case 21:
+		STT(21, val);
+		break;
+	case 22:
+		STT(22, val);
+		break;
+	case 23:
+		STT(23, val);
+		break;
+	case 24:
+		STT(24, val);
+		break;
+	case 25:
+		STT(25, val);
+		break;
+	case 26:
+		STT(26, val);
+		break;
+	case 27:
+		STT(27, val);
+		break;
+	case 28:
+		STT(28, val);
+		break;
+	case 29:
+		STT(29, val);
+		break;
+	case 30:
+		STT(30, val);
+		break;
+	case 31:
+		STT(31, val);
+		break;
+	default:
+		return 0;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL(sw64_read_fp_reg);
+
+void sw64_write_fp_reg(unsigned long reg, unsigned long val)
+{
+	switch (reg) {
+	case  0:
+		LDT(0, val);
+		break;
+	case  1:
+		LDT(1, val);
+		break;
+	case  2:
+		LDT(2, val);
+		break;
+	case  3:
+		LDT(3, val);
+		break;
+	case  4:
+		LDT(4, val);
+		break;
+	case  5:
+		LDT(5, val);
+		break;
+	case  6:
+		LDT(6, val);
+		break;
+	case  7:
+		LDT(7, val);
+		break;
+	case  8:
+		LDT(8, val);
+		break;
+	case  9:
+		LDT(9, val);
+		break;
+	case 10:
+		LDT(10, val);
+		break;
+	case 11:
+		LDT(11, val);
+		break;
+	case 12:
+		LDT(12, val);
+		break;
+	case 13:
+		LDT(13, val);
+		break;
+	case 14:
+		LDT(14, val);
+		break;
+	case 15:
+		LDT(15, val);
+		break;
+	case 16:
+		LDT(16, val);
+		break;
+	case 17:
+		LDT(17, val);
+		break;
+	case 18:
+		LDT(18, val);
+		break;
+	case 19:
+		LDT(19, val);
+		break;
+	case 20:
+		LDT(20, val);
+		break;
+	case 21:
+		LDT(21, val);
+		break;
+	case 22:
+		LDT(22, val);
+		break;
+	case 23:
+		LDT(23, val);
+		break;
+	case 24:
+		LDT(24, val);
+		break;
+	case 25:
+		LDT(25, val);
+		break;
+	case 26:
+		LDT(26, val);
+		break;
+	case 27:
+		LDT(27, val);
+		break;
+	case 28:
+		LDT(28, val);
+		break;
+	case 29:
+		LDT(29, val);
+		break;
+	case 30:
+		LDT(30, val);
+		break;
+	case 31:
+		LDT(31, val);
+		break;
+	}
+}
+EXPORT_SYMBOL(sw64_write_fp_reg);
+
+unsigned long sw64_read_fp_reg_s(unsigned long reg)
+{
+	unsigned long val;
+
+	switch (reg) {
+	case  0:
+		STS(0, val);
+		break;
+	case  1:
+		STS(1, val);
+		break;
+	case  2:
+		STS(2, val);
+		break;
+	case  3:
+		STS(3, val);
+		break;
+	case  4:
+		STS(4, val);
+		break;
+	case  5:
+		STS(5, val);
+		break;
+	case  6:
+		STS(6, val);
+		break;
+	case  7:
+		STS(7, val);
+		break;
+	case  8:
+		STS(8, val);
+		break;
+	case  9:
+		STS(9, val);
+		break;
+	case 10:
+		STS(10, val);
+		break;
+	case 11:
+		STS(11, val);
+		break;
+	case 12:
+		STS(12, val);
+		break;
+	case 13:
+		STS(13, val);
+		break;
+	case 14:
+		STS(14, val);
+		break;
+	case 15:
+		STS(15, val);
+		break;
+	case 16:
+		STS(16, val);
+		break;
+	case 17:
+		STS(17, val);
+		break;
+	case 18:
+		STS(18, val);
+		break;
+	case 19:
+		STS(19, val);
+		break;
+	case 20:
+		STS(20, val);
+		break;
+	case 21:
+		STS(21, val);
+		break;
+	case 22:
+		STS(22, val);
+		break;
+	case 23:
+		STS(23, val);
+		break;
+	case 24:
+		STS(24, val);
+		break;
+	case 25:
+		STS(25, val);
+		break;
+	case 26:
+		STS(26, val);
+		break;
+	case 27:
+		STS(27, val);
+		break;
+	case 28:
+		STS(28, val);
+		break;
+	case 29:
+		STS(29, val);
+		break;
+	case 30:
+		STS(30, val);
+		break;
+	case 31:
+		STS(31, val);
+		break;
+	default:
+		return 0;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL(sw64_read_fp_reg_s);
+
+void sw64_write_fp_reg_s(unsigned long reg, unsigned long val)
+{
+	switch (reg) {
+	case  0:
+		LDS(0, val);
+		break;
+	case  1:
+		LDS(1, val);
+		break;
+	case  2:
+		LDS(2, val);
+		break;
+	case  3:
+		LDS(3, val);
+		break;
+	case  4:
+		LDS(4, val);
+		break;
+	case  5:
+		LDS(5, val);
+		break;
+	case  6:
+		LDS(6, val);
+		break;
+	case  7:
+		LDS(7, val);
+		break;
+	case  8:
+		LDS(8, val);
+		break;
+	case  9:
+		LDS(9, val);
+		break;
+	case 10:
+		LDS(10, val);
+		break;
+	case 11:
+		LDS(11, val);
+		break;
+	case 12:
+		LDS(12, val);
+		break;
+	case 13:
+		LDS(13, val);
+		break;
+	case 14:
+		LDS(14, val);
+		break;
+	case 15:
+		LDS(15, val);
+		break;
+	case 16:
+		LDS(16, val);
+		break;
+	case 17:
+		LDS(17, val);
+		break;
+	case 18:
+		LDS(18, val);
+		break;
+	case 19:
+		LDS(19, val);
+		break;
+	case 20:
+		LDS(20, val);
+		break;
+	case 21:
+		LDS(21, val);
+		break;
+	case 22:
+		LDS(22, val);
+		break;
+	case 23:
+		LDS(23, val);
+		break;
+	case 24:
+		LDS(24, val);
+		break;
+	case 25:
+		LDS(25, val);
+		break;
+	case 26:
+		LDS(26, val);
+		break;
+	case 27:
+		LDS(27, val);
+		break;
+	case 28:
+		LDS(28, val);
+		break;
+	case 29:
+		LDS(29, val);
+		break;
+	case 30:
+		LDS(30, val);
+		break;
+	case 31:
+		LDS(31, val);
+		break;
+	}
+}
+EXPORT_SYMBOL(sw64_write_fp_reg_s);
diff --git a/arch/sw_64/lib/iomap.c b/arch/sw_64/lib/iomap.c
new file mode 100644
index 000000000000..30d24923624d
--- /dev/null
+++ b/arch/sw_64/lib/iomap.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sw_64 IO and memory functions.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/io.h>
+
+/*
+ * Here comes the sw64 implementation of the IOMAP interfaces.
+ */
+unsigned int ioread8(const void __iomem *addr)
+{
+	return readb(addr);
+}
+EXPORT_SYMBOL(ioread8);
+
+unsigned int ioread16(const void __iomem *addr)
+{
+	return readw(addr);
+}
+EXPORT_SYMBOL(ioread16);
+
+unsigned int ioread32(const void __iomem *addr)
+{
+	return readl(addr);
+}
+EXPORT_SYMBOL(ioread32);
+
+void iowrite8(u8 b, void __iomem *addr)
+{
+	writeb(b, addr);
+}
+EXPORT_SYMBOL(iowrite8);
+
+void iowrite16(u16 b, void __iomem *addr)
+{
+	writew(b, addr);
+}
+EXPORT_SYMBOL(iowrite16);
+
+void iowrite32(u32 b, void __iomem *addr)
+{
+	writel(b, addr);
+}
+EXPORT_SYMBOL(iowrite32);
+
+u8 inb(unsigned long port)
+{
+	return ioread8(ioport_map(port, 1));
+}
+EXPORT_SYMBOL(inb);
+
+u16 inw(unsigned long port)
+{
+	return ioread16(ioport_map(port, 2));
+}
+EXPORT_SYMBOL(inw);
+
+u32 inl(unsigned long port)
+{
+	return ioread32(ioport_map(port, 4));
+}
+EXPORT_SYMBOL(inl);
+
+void outb(u8 b, unsigned long port)
+{
+	iowrite8(b, ioport_map(port, 1));
+}
+EXPORT_SYMBOL(outb);
+
+void outw(u16 b, unsigned long port)
+{
+	iowrite16(b, ioport_map(port, 2));
+}
+EXPORT_SYMBOL(outw);
+
+void outl(u32 b, unsigned long port)
+{
+	iowrite32(b, ioport_map(port, 4));
+}
+EXPORT_SYMBOL(outl);
+
+
+/*
+ * Read COUNT 8-bit bytes from port PORT into memory starting at SRC.
+ */
+void ioread8_rep(const void __iomem *port, void *dst, unsigned long count)
+{
+	while ((unsigned long)dst & 0x3) {
+		if (!count)
+			return;
+		count--;
+		*(unsigned char *)dst = ioread8(port);
+		dst += 1;
+	}
+
+	while (count >= 4) {
+		unsigned int w;
+
+		count -= 4;
+		w = ioread8(port);
+		w |= ioread8(port) << 8;
+		w |= ioread8(port) << 16;
+		w |= ioread8(port) << 24;
+		*(unsigned int *)dst = w;
+		dst += 4;
+	}
+
+	while (count) {
+		--count;
+		*(unsigned char *)dst = ioread8(port);
+		dst += 1;
+	}
+}
+EXPORT_SYMBOL(ioread8_rep);
+
+void insb(unsigned long port, void *dst, unsigned long count)
+{
+	ioread8_rep(ioport_map(port, 1), dst, count);
+}
+EXPORT_SYMBOL(insb);
+
+/*
+ * Read COUNT 16-bit words from port PORT into memory starting at
+ * SRC.  SRC must be at least short aligned.  This is used by the
+ * IDE driver to read disk sectors.  Performance is important, but
+ * the interfaces seems to be slow: just using the inlined version
+ * of the inw() breaks things.
+ */
+void ioread16_rep(const void __iomem *port, void *dst, unsigned long count)
+{
+	if (unlikely((unsigned long)dst & 0x3)) {
+		if (!count)
+			return;
+		BUG_ON((unsigned long)dst & 0x1);
+		count--;
+		*(unsigned short *)dst = ioread16(port);
+		dst += 2;
+	}
+
+	while (count >= 2) {
+		unsigned int w;
+
+		count -= 2;
+		w = ioread16(port);
+		w |= ioread16(port) << 16;
+		*(unsigned int *)dst = w;
+		dst += 4;
+	}
+
+	if (count)
+		*(unsigned short *)dst = ioread16(port);
+}
+EXPORT_SYMBOL(ioread16_rep);
+
+void insw(unsigned long port, void *dst, unsigned long count)
+{
+	ioread16_rep(ioport_map(port, 2), dst, count);
+}
+EXPORT_SYMBOL(insw);
+
+
+/*
+ * Read COUNT 32-bit words from port PORT into memory starting at
+ * SRC. Now works with any alignment in SRC. Performance is important,
+ * but the interfaces seems to be slow: just using the inlined version
+ * of the inl() breaks things.
+ */
+void ioread32_rep(const void __iomem *port, void *dst, unsigned long count)
+{
+	if (unlikely((unsigned long)dst & 0x3)) {
+		while (count--) {
+			struct S { int x __attribute__((packed)); };
+			((struct S *)dst)->x = ioread32(port);
+			dst += 4;
+		}
+	} else {
+		/* Buffer 32-bit aligned.  */
+		while (count--) {
+			*(unsigned int *)dst = ioread32(port);
+			dst += 4;
+		}
+	}
+}
+EXPORT_SYMBOL(ioread32_rep);
+
+void insl(unsigned long port, void *dst, unsigned long count)
+{
+	ioread32_rep(ioport_map(port, 4), dst, count);
+}
+EXPORT_SYMBOL(insl);
+
+
+/*
+ * Like insb but in the opposite direction.
+ * Don't worry as much about doing aligned memory transfers:
+ * doing byte reads the "slow" way isn't nearly as slow as
+ * doing byte writes the slow way (no r-m-w cycle).
+ */
+void iowrite8_rep(void __iomem *port, const void *xsrc, unsigned long count)
+{
+	const unsigned char *src = xsrc;
+
+	while (count--)
+		iowrite8(*src++, port);
+}
+EXPORT_SYMBOL(iowrite8_rep);
+
+void outsb(unsigned long port, const void *src, unsigned long count)
+{
+	iowrite8_rep(ioport_map(port, 1), src, count);
+}
+EXPORT_SYMBOL(outsb);
+
+
+/*
+ * Like insw but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Performance is important, but the
+ * interfaces seems to be slow: just using the inlined version of the
+ * outw() breaks things.
+ */
+void iowrite16_rep(void __iomem *port, const void *src, unsigned long count)
+{
+	if (unlikely((unsigned long)src & 0x3)) {
+		if (!count)
+			return;
+		BUG_ON((unsigned long)src & 0x1);
+		iowrite16(*(unsigned short *)src, port);
+		src += 2;
+		--count;
+	}
+
+	while (count >= 2) {
+		unsigned int w;
+
+		count -= 2;
+		w = *(unsigned int *)src;
+		src += 4;
+		iowrite16(w >>  0, port);
+		iowrite16(w >> 16, port);
+	}
+
+	if (count)
+		iowrite16(*(unsigned short *)src, port);
+}
+EXPORT_SYMBOL(iowrite16_rep);
+
+void outsw(unsigned long port, const void *src, unsigned long count)
+{
+	iowrite16_rep(ioport_map(port, 2), src, count);
+}
+EXPORT_SYMBOL(outsw);
+
+
+/*
+ * Like insl but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Works with any alignment in SRC.
+ * Performance is important, but the interfaces seems to be slow:
+ * just using the inlined version of the outl() breaks things.
+ */
+void iowrite32_rep(void __iomem *port, const void *src, unsigned long count)
+{
+	if (unlikely((unsigned long)src & 0x3)) {
+		while (count--) {
+			struct S { int x __attribute__((packed)); };
+			iowrite32(((struct S *)src)->x, port);
+			src += 4;
+		}
+	} else {
+		/* Buffer 32-bit aligned.  */
+		while (count--) {
+			iowrite32(*(unsigned int *)src, port);
+			src += 4;
+		}
+	}
+}
+EXPORT_SYMBOL(iowrite32_rep);
+
+void outsl(unsigned long port, const void *src, unsigned long count)
+{
+	iowrite32_rep(ioport_map(port, 4), src, count);
+}
+EXPORT_SYMBOL(outsl);
+
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ * This needs to be optimized.
+ */
+void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
+{
+	/*
+	 * Optimize co-aligned transfers.  Everything else gets handled
+	 * a byte at a time.
+	 */
+
+	if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) {
+		count -= 8;
+		do {
+			*(u64 *)to = __raw_readq(from);
+			count -= 8;
+			to += 8;
+			from += 8;
+		} while (count >= 0);
+		count += 8;
+	}
+
+	if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) {
+		count -= 4;
+		do {
+			*(u32 *)to = __raw_readl(from);
+			count -= 4;
+			to += 4;
+			from += 4;
+		} while (count >= 0);
+		count += 4;
+	}
+
+	if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) {
+		count -= 2;
+		do {
+			*(u16 *)to = __raw_readw(from);
+			count -= 2;
+			to += 2;
+			from += 2;
+		} while (count >= 0);
+		count += 2;
+	}
+
+	while (count > 0) {
+		*(u8 *) to = __raw_readb(from);
+		count--;
+		to++;
+		from++;
+	}
+	mb();
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ * This needs to be optimized.
+ */
+void memcpy_toio(volatile void __iomem *to, const void *from, long count)
+{
+	/*
+	 * Optimize co-aligned transfers.  Everything else gets handled
+	 * a byte at a time.
+	 * FIXME -- align FROM.
+	 */
+
+	if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) {
+		count -= 8;
+		do {
+			__raw_writeq(*(const u64 *)from, to);
+			count -= 8;
+			to += 8;
+			from += 8;
+		} while (count >= 0);
+		count += 8;
+	}
+
+	if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) {
+		count -= 4;
+		do {
+			__raw_writel(*(const u32 *)from, to);
+			count -= 4;
+			to += 4;
+			from += 4;
+		} while (count >= 0);
+		count += 4;
+	}
+
+	if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) {
+		count -= 2;
+		do {
+			__raw_writew(*(const u16 *)from, to);
+			count -= 2;
+			to += 2;
+			from += 2;
+		} while (count >= 0);
+		count += 2;
+	}
+
+	while (count > 0) {
+		__raw_writeb(*(const u8 *) from, to);
+		count--;
+		to++;
+		from++;
+	}
+	mb();
+}
+EXPORT_SYMBOL(memcpy_toio);
+
+
+/*
+ * "memset" on IO memory space.
+ */
+void _memset_c_io(volatile void __iomem *to, unsigned long c, long count)
+{
+	/* Handle any initial odd byte */
+	if (count > 0 && ((u64)to & 1)) {
+		__raw_writeb(c, to);
+		to++;
+		count--;
+	}
+
+	/* Handle any initial odd halfword */
+	if (count >= 2 && ((u64)to & 2)) {
+		__raw_writew(c, to);
+		to += 2;
+		count -= 2;
+	}
+
+	/* Handle any initial odd word */
+	if (count >= 4 && ((u64)to & 4)) {
+		__raw_writel(c, to);
+		to += 4;
+		count -= 4;
+	}
+
+	/*
+	 * Handle all full-sized quadwords: we're aligned
+	 *  (or have a small count)
+	 */
+	count -= 8;
+	if (count >= 0) {
+		do {
+			__raw_writeq(c, to);
+			to += 8;
+			count -= 8;
+		} while (count >= 0);
+	}
+	count += 8;
+
+	/* The tail is word-aligned if we still have count >= 4 */
+	if (count >= 4) {
+		__raw_writel(c, to);
+		to += 4;
+		count -= 4;
+	}
+
+	/* The tail is half-word aligned if we have count >= 2 */
+	if (count >= 2) {
+		__raw_writew(c, to);
+		to += 2;
+		count -= 2;
+	}
+
+	/* And finally, one last byte.. */
+	if (count)
+		__raw_writeb(c, to);
+	mb();
+}
+EXPORT_SYMBOL(_memset_c_io);
+
+/*
+ * A version of memcpy used by the vga console routines to move data around
+ * arbitrarily between screen and main memory.
+ */
+
+void
+scr_memcpyw(u16 *d, const u16 *s, unsigned int count)
+{
+	const u16 __iomem *ios = (const u16 __iomem *) s;
+	u16 __iomem *iod = (u16 __iomem *) d;
+	int s_isio = __is_ioaddr(s);
+	int d_isio = __is_ioaddr(d);
+	u16 tmp;
+
+	if (s_isio) {
+		if (d_isio) {
+			/*
+			 * FIXME: Should handle unaligned ops and
+			 * operation widening.
+			 */
+
+			count /= 2;
+			while (count--) {
+				tmp = __raw_readw(ios++);
+				__raw_writew(tmp, iod++);
+			}
+		} else
+			memcpy_fromio(d, ios, count);
+	} else {
+		if (d_isio)
+			memcpy_toio(iod, s, count);
+		else
+			memcpy(d, s, count);
+	}
+}
+EXPORT_SYMBOL(scr_memcpyw);
+
+void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+	return ioportmap(port);
+}
+EXPORT_SYMBOL(ioport_map);
+
+void ioport_unmap(void __iomem *addr)
+{
+}
+EXPORT_SYMBOL(ioport_unmap);
diff --git a/arch/sw_64/lib/iomap_copy.c b/arch/sw_64/lib/iomap_copy.c
new file mode 100644
index 000000000000..10e756fffff5
--- /dev/null
+++ b/arch/sw_64/lib/iomap_copy.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/io.h>
+
+/**
+ * __iowrite32_copy - copy data to MMIO space, in 32-bit units
+ * @to: destination, in MMIO space (must be 32-bit aligned)
+ * @from: source (must be 32-bit aligned)
+ * @count: number of 32-bit quantities to copy
+ *
+ * Copy data from kernel space to MMIO space, in units of 32 bits at a
+ * time.  Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ */
+void __iowrite32_copy(void __iomem *to,
+		      const void *from,
+		      size_t count)
+{
+	u32 __iomem *dst = to;
+	const u32 *src = from;
+	const u32 *end = src + count;
+
+	while (src < end) {
+		__raw_writel(*src++, dst++);
+		mb();
+	}
+
+}
+
+/**
+ * __iowrite64_copy - copy data to MMIO space, in 64-bit or 32-bit units
+ * @to: destination, in MMIO space (must be 64-bit aligned)
+ * @from: source (must be 64-bit aligned)
+ * @count: number of 64-bit quantities to copy
+ *
+ * Copy data from kernel space to MMIO space, in units of 32 or 64 bits at a
+ * time.  Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ */
+void __iowrite64_copy(void __iomem *to,
+		      const void *from,
+		      size_t count)
+{
+#ifdef CONFIG_64BIT
+	u64 __iomem *dst = to;
+	const u64 *src = from;
+	const u64 *end = src + count;
+
+	while (src < end)
+		__raw_writeq(*src++, dst++);
+		mb();
+#else
+	__iowrite32_copy(to, from, count * 2);
+#endif
+}
diff --git a/arch/sw_64/lib/memcpy.S b/arch/sw_64/lib/memcpy.S
new file mode 100644
index 000000000000..31c422b393ee
--- /dev/null
+++ b/arch/sw_64/lib/memcpy.S
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Reasonably optimized memcpy() routine for the sw64
+ *
+ *	- memory accessed as aligned quadwords only
+ *	- uses bcmpge to compare 8 bytes in parallel
+ *
+ * Temp usage notes:
+ *	$1, $2, - scratch
+ */
+#include <asm/export.h>
+	.set noreorder
+	.set noat
+
+	.align 4
+	.globl memcpy
+	.ent memcpy
+memcpy:
+	.frame $30, 0, $26, 0
+	.prologue 0
+
+	mov	$16, $0
+	ble	$18, $nomoredata
+	xor	$16, $17, $1
+	and	$1, 7, $1
+
+	bne	$1, $misaligned
+	/* source and dest are same mod 8 address */
+	and	$16, 7, $1
+	beq	$1, $both_0mod8
+
+	/*
+	 * source and dest are same misalignment.  move a byte at a time
+	 * until a 0mod8 alignment for both is reached.
+	 * At least one byte more to move
+	 */
+
+$head_align:
+	ldbu	$1, 0($17)
+	subl	$18, 1, $18
+	addl	$17, 1, $17
+	stb	$1, 0($16)
+	addl	$16, 1, $16
+	and	$16, 7, $1
+	ble	$18, $nomoredata
+	bne	$1, $head_align
+
+$both_0mod8:
+	cmple	$18, 127, $1
+	bne	$1, $no_unroll
+	and	$16, 63, $1
+	beq	$1, $do_unroll
+
+$single_head_quad:
+	ldl	$1, 0($17)
+	subl	$18, 8, $18
+	addl	$17, 8, $17
+
+	stl	$1, 0($16)
+	addl	$16, 8, $16
+	and	$16, 63, $1
+	bne	$1, $single_head_quad
+
+$do_unroll:
+	addl	$16, 64, $7
+	cmple	$18, 127, $1
+	bne	$1, $tail_quads
+
+$unroll_body:
+	#wh64	($7)
+	fillde	0($7)
+
+	ldl	$6, 0($17)
+
+	ldl	$4, 8($17)
+	ldl	$5, 16($17)
+	addl	$7, 64, $7
+
+	ldl	$3, 24($17)
+	addl	$16, 64, $1
+
+	addl	$17, 32, $17
+	stl	$6, 0($16)
+
+	stl	$4, 8($16)
+	stl	$5, 16($16)
+	subl	$18, 192, $2
+
+	stl	$3, 24($16)
+	addl	$16, 32, $16
+
+	ldl	$6, 0($17)
+	ldl	$4, 8($17)
+	#cmovlt	$2, $1, $7
+	sellt	$2, $1, $7, $7
+
+	ldl	$5, 16($17)
+	ldl	$3, 24($17)
+	addl	$16, 32, $16
+	subl	$18, 64, $18
+
+	addl	$17, 32, $17
+	stl	$6, -32($16)
+	stl	$4, -24($16)
+	cmple	$18, 63, $1
+
+	stl	$5, -16($16)
+	stl	$3, -8($16)
+	beq	$1, $unroll_body
+
+$tail_quads:
+$no_unroll:
+	.align 4
+	subl	$18, 8, $18
+	blt	$18, $less_than_8
+
+$move_a_quad:
+	ldl	$1, 0($17)
+	subl	$18, 8, $18
+	addl	$17, 8, $17
+
+	stl	$1, 0($16)
+	addl	$16, 8, $16
+	bge	$18, $move_a_quad
+
+$less_than_8:
+	.align 4
+	addl	$18, 8, $18
+	ble	$18, $nomoredata
+
+	/* Trailing bytes */
+$tail_bytes:
+	subl	$18, 1, $18
+	ldbu	$1, 0($17)
+	addl	$17, 1, $17
+
+	stb	$1, 0($16)
+	addl	$16, 1, $16
+	bgt	$18, $tail_bytes
+
+	/* branching to exit takes 3 extra cycles, so replicate exit here */
+	ret	$31, ($26), 1
+
+$misaligned:
+	mov	$0, $4
+	and	$0, 7, $1
+	beq	$1, $dest_0mod8
+
+$aligndest:
+	ble	$18, $nomoredata
+	ldbu	$1, 0($17)
+	subl	$18, 1, $18
+	addl	$17, 1, $17
+
+	stb	$1, 0($4)
+	addl	$4, 1, $4
+	and	$4, 7, $1
+	bne	$1, $aligndest
+
+	/* Source has unknown alignment, but dest is known to be 0mod8 */
+$dest_0mod8:
+	subl	$18, 8, $18
+	blt	$18, $misalign_tail
+	ldl_u	$3, 0($17)
+
+$mis_quad:
+	ldl_u	$16, 8($17)
+	extll	$3, $17, $3
+	exthl	$16, $17, $1
+	bis	$3, $1, $1
+
+	subl	$18, 8, $18
+	addl	$17, 8, $17
+	stl	$1, 0($4)
+	mov	$16, $3
+
+	addl	$4, 8, $4
+	bge	$18, $mis_quad
+
+$misalign_tail:
+	addl	$18, 8, $18
+	ble	$18, $nomoredata
+
+$misalign_byte:
+	ldbu	$1, 0($17)
+	subl	$18, 1, $18
+	addl	$17, 1, $17
+
+	stb	$1, 0($4)
+	addl	$4, 1, $4
+	bgt	$18, $misalign_byte
+
+
+$nomoredata:
+	ret	$31, ($26), 1
+
+	.end memcpy
+	EXPORT_SYMBOL(memcpy)
+/* For backwards module compatibility.  */
+__memcpy = memcpy
+.globl __memcpy
diff --git a/arch/sw_64/lib/memmove.S b/arch/sw_64/lib/memmove.S
new file mode 100644
index 000000000000..3e34fcd5b217
--- /dev/null
+++ b/arch/sw_64/lib/memmove.S
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Barely optimized memmove routine for sw64.
+ * This is hand-massaged output from the original memcpy.c.  We defer to
+ * memcpy whenever possible; the backwards copy loops are not unrolled.
+ */
+#include <asm/export.h>
+	.set noat
+	.set noreorder
+	.text
+
+	.align 4
+	.globl memmove
+	.ent memmove
+memmove:
+	ldgp	$29, 0($27)
+	unop
+	.prologue 1
+
+	addl	$16, $18, $4
+	addl	$17, $18, $5
+	cmpule	$4, $17, $1		# dest + n <= src
+	cmpule	$5, $16, $2		# dest >= src + n
+
+	bis	$1, $2, $1
+	mov	$16, $0
+	xor	$16, $17, $2
+	bne	$1, memcpy		# samegp
+
+	and	$2, 7, $2		# Test for src/dest co-alignment.
+	and	$16, 7, $1
+	cmpule	$16, $17, $3
+	bne	$3, $memmove_up		# dest < src
+
+	and	$4, 7, $1
+	bne	$2, $misaligned_dn
+	unop
+	beq	$1, $skip_aligned_byte_loop_head_dn
+
+$aligned_byte_loop_head_dn:
+	ldi	$4, -1($4)
+	ldi	$5, -1($5)
+	unop
+	ble	$18, $egress
+
+	ldbu	$1, 0($5)
+	ldi	$18, -1($18)
+	stb	$1, 0($4)
+
+	and	$4, 7, $6
+	bne	$6, $aligned_byte_loop_head_dn
+
+$skip_aligned_byte_loop_head_dn:
+	ldi	$18, -8($18)
+	blt	$18, $skip_aligned_word_loop_dn
+
+$aligned_word_loop_dn:
+	ldl	$1, -8($5)
+	ldi	$5, -8($5)
+	ldi	$18, -8($18)
+
+	stl	$1, -8($4)
+	ldi	$4, -8($4)
+	bge	$18, $aligned_word_loop_dn
+
+$skip_aligned_word_loop_dn:
+	ldi	$18, 8($18)
+	bgt	$18, $byte_loop_tail_dn
+	unop
+	ret	$31, ($26), 1
+
+	.align 4
+$misaligned_dn:
+	fnop
+	unop
+	beq	$18, $egress
+
+$byte_loop_tail_dn:
+	ldbu	$1, -1($5)
+	ldi	$5, -1($5)
+	ldi	$4, -1($4)
+
+	ldi	$18, -1($18)
+	stb	$1, 0($4)
+
+	bgt	$18, $byte_loop_tail_dn
+	br	$egress
+
+$memmove_up:
+	mov	$16, $4
+	mov	$17, $5
+	bne	$2, $misaligned_up
+	beq	$1, $skip_aligned_byte_loop_head_up
+
+$aligned_byte_loop_head_up:
+	unop
+	ble	$18, $egress
+	ldbu	$1, 0($5)
+
+	ldi	$18, -1($18)
+
+	ldi	$5, 1($5)
+	stb	$1, 0($4)
+	ldi	$4, 1($4)
+
+	and	$4, 7, $6
+	bne	$6, $aligned_byte_loop_head_up
+
+$skip_aligned_byte_loop_head_up:
+	ldi	$18, -8($18)
+	blt	$18, $skip_aligned_word_loop_up
+
+$aligned_word_loop_up:
+	ldl	$1, 0($5)
+	ldi	$5, 8($5)
+	ldi	$18, -8($18)
+
+	stl	$1, 0($4)
+	ldi	$4, 8($4)
+	bge	$18, $aligned_word_loop_up
+
+$skip_aligned_word_loop_up:
+	ldi	$18, 8($18)
+	bgt	$18, $byte_loop_tail_up
+	unop
+	ret	$31, ($26), 1
+
+	.align 4
+$misaligned_up:
+	fnop
+	unop
+	beq	$18, $egress
+
+$byte_loop_tail_up:
+	ldbu	$1, 0($5)
+	ldi	$18, -1($18)
+
+	stb	$1, 0($4)
+
+	ldi	$5, 1($5)
+	ldi	$4, 1($4)
+	bgt	$18, $byte_loop_tail_up
+
+$egress:
+	ret	$31, ($26), 1
+
+	.end memmove
+	EXPORT_SYMBOL(memmove)
diff --git a/arch/sw_64/lib/memset.S b/arch/sw_64/lib/memset.S
new file mode 100644
index 000000000000..dbc4d775c7ea
--- /dev/null
+++ b/arch/sw_64/lib/memset.S
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is an efficient (and small) implementation of the C library "memset()"
+ * function for the sw.
+ *
+ *	(C) Copyright 1996 Linus Torvalds
+ *
+ * This routine is "moral-ware": you are free to use it any way you wish, and
+ * the only obligation I put on you is a moral one: if you make any improvements
+ * to the routine, please send me your improvements for me to use similarly.
+ *
+ * The scheduling comments are according to the documentation (and done by
+ * hand, so they might well be incorrect, please do tell me about it..)
+ */
+
+#include <asm/export.h>
+
+	.set noat
+	.set noreorder
+.text
+	.globl memset
+	.globl __memset
+	.globl ___memset
+	.globl __memsetw
+	.globl __constant_c_memset
+
+	.ent ___memset
+.align 5
+___memset:
+	.frame $30, 0, $26, 0
+	.prologue 0
+
+	and	$17, 255, $1
+	inslb	$17, 1, $17
+	bis	$17, $1, $17
+	sll	$17, 16, $1
+
+	bis	$17, $1, $17
+	sll	$17, 32, $1
+	bis	$17, $1, $17
+	ldl_u	$31, 0($30)
+
+.align 5
+__constant_c_memset:
+	addl	$18, $16, $6
+	bis	$16, $16, $0
+	xor	$16, $6, $1
+	ble	$18, end
+
+	bic	$1, 7, $1
+	beq	$1, within_one_quad
+	and	$16, 7, $3
+	beq	$3, aligned
+
+	bis	$16, $16, $5
+	subl	$3, 8, $3
+	addl	$18, $3, $18
+	subl	$16, $3, $16
+
+	eqv	$3, $31, $3
+	addl	$3, 1, $3
+unaligned_start_loop:
+	stb	$17, 0($5)
+	subl	$3, 1, $3
+	addl	$5, 1, $5
+	bgt	$3, unaligned_start_loop
+
+
+.align 4
+aligned:
+	sra	$18, 3, $3
+	and	$18, 7, $18
+	bis	$16, $16, $5
+	beq	$3, no_quad
+
+/*added by JJ*/
+	ldi	$3, -8($3)
+	blt	$3, nounrol
+
+.align 3
+wloop:
+	fillde	256($5)
+	stl	$17, 0($5)
+	stl	$17, 8($5)
+	stl	$17, 16($5)
+	stl	$17, 24($5)
+	subl	$3, 8, $3
+	stl	$17, 32($5)
+	stl	$17, 40($5)
+	stl	$17, 48($5)
+	stl	$17, 56($5)
+	addl	$5, 0x40, $5
+	bge	$3, wloop
+
+nounrol:
+	addl	$3, 8, $3
+	beq	$3, no_quad
+/*end JJ*/
+
+.align 3
+loop:
+	stl	$17, 0($5)
+	subl	$3, 1, $3
+	addl	$5, 8, $5
+	bne	$3, loop
+
+no_quad:
+	bis	$31, $31, $31
+	beq	$18, end
+	and	$6, 7, $6
+no_quad_loop:
+	stb	$17, 0($5)
+	subl	$6, 1, $6
+	addl	$5, 1, $5
+	bgt	$6, no_quad_loop
+	ret	$31, ($26), 1
+
+.align 3
+within_one_quad:
+	bis	$18, $18, $1
+	bis	$16, $16, $5
+within_one_quad_loop:
+	stb	$17, 0($5)
+	subl	$1, 1, $1
+	addl	$5, 1, $5
+	bgt	$1, within_one_quad_loop
+
+end:
+	ret	$31, ($26), 1
+	.end ___memset
+	EXPORT_SYMBOL(___memset)
+
+	.align 5
+	.ent __memsetw
+__memsetw:
+	.prologue 0
+
+	inslh	$17, 0, $1
+	inslh	$17, 2, $2
+	inslh	$17, 4, $3
+	or	$1, $2, $1
+	inslh	$17, 6, $4
+	or	$1, $3, $1
+	or	$1, $4, $17
+	br __constant_c_memset
+
+	.end __memsetw
+	EXPORT_SYMBOL(__memsetw)
+
+memset = ___memset
+EXPORT_SYMBOL(memset)
+__memset = ___memset
+EXPORT_SYMBOL(__memset)
diff --git a/arch/sw_64/lib/strcpy.S b/arch/sw_64/lib/strcpy.S
new file mode 100644
index 000000000000..61b6141f88e2
--- /dev/null
+++ b/arch/sw_64/lib/strcpy.S
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Optimized strcpy() for SW64
+
+ * Copyright (C) Mao Minkai
+ * Author: Mao Minkai
+ *
+ * Copy a null-terminated string from SRC to DST.
+ *
+ * Input:
+ *	$16:	DST, clobbered
+ *	$17:	SRC, clobbered
+ *
+ * Output:
+ *	$0:	DST
+ *
+ * Temporaries:
+ *	$1:	unaligned parts of addr (0 means aligned addr)
+ *	$4:	current data to copy (could have 1 byte or 8 bytes)
+ *	$5:	parts of current data, compare result
+ *	$6:	number of bytes left to copy
+ *
+ * Tag naming:
+ *	co:	SRC and DST are co-aligned
+ *	mis:	SRC and DST are not co-aligned
+ *	a:	SRC or DST has aligned address
+ *	una:	SRC or DST has unaligned address
+ *
+ */
+
+#include <asm/export.h>
+
+	.text
+	.align 4
+	.globl strcpy
+	.ent strcpy
+strcpy:
+	.frame $30, 0, $26
+	.prologue 0
+
+	bis	$31, $16, $0	# set return value
+
+	xor	$16, $17, $1
+	and	$1, 7, $1
+	bne	$1, $mis_aligned
+
+/* src and dst are co-aligned */
+	and	$16, 7, $1
+	bne	$1, $co_una_head
+
+/* do the copy in loop, for (co)-aligned src and dst with (a)ligned addr */
+$co_a_loop:
+	ldl	$4, 0($17)
+	cmpgeb	$31, $4, $5
+	bne	$5, $tail_loop	# we find null
+	stl	$4, 0($16)
+	addl	$17, 8, $17
+	addl	$16, 8, $16
+	br	$31, $co_a_loop
+
+/* src and dst are co-aligned but have unaligned address */
+$co_una_head:
+	ldl_u	$4, 0($17)
+	extll	$4, $16, $4
+	cmpgeb	$31, $4, $5
+	bne	$5, $tail_loop	# we find null
+	ldi	$6, 8($31)
+	subl	$6, $1, $6
+	addl	$17, $6, $17	# prepare addr of middle part
+
+/* copy the unaligned part in loop */
+$co_una_head_loop:
+	stb	$4, 0($16)
+	addl	$16, 1, $16
+	subl	$6, 1, $6
+	beq	$6, $co_a_loop
+	addl	$4, 1, $4
+	br	$31, $co_una_head_loop
+
+/* src and dst are not co-aligned */
+$mis_aligned:
+	and	$16, 7, $1
+	beq	$1, $mis_a_dst
+	ldi	$6, 8($31)
+	subl	$6, $1, $6
+
+/* copy the first few bytes to make dst aligned */
+$mis_una_head_loop:
+	bis	$31, $31, $6
+	ldbu	$4, 0($17)
+	stb	$4, 0($16)
+	beq	$4, $out	# we have reached null, return
+	addl	$17, 1, $17
+	addl	$16, 1, $16
+	subl	$6, 1, $6
+	beq	$6, $mis_a_dst
+	br	$31, $mis_una_head_loop
+
+/* dst has aligned addr */
+$mis_a_dst:
+	and	$17, 7, $1
+
+$mis_a_dst_loop:
+	ldl_u	$4, 0($17)
+	ldl_u	$5, 7($17)
+	extll	$4, $1, $4
+	exthl	$5, $1, $5
+	bis	$4, $5, $4
+	cmpgeb	$31, $4, $5
+	bne	$5, $tail_loop	# we find null
+	stl	$4, 0($16)
+	addl	$17, 8, $17
+	addl	$16, 8, $16
+	br	$31, $mis_a_dst_loop
+
+/* we have find null in the last few bytes, copy one byte each time */
+$tail_loop:
+	ldbu	$4, 0($17)
+	stb	$4, 0($16)
+	beq	$4, $out	# we have reached null, return
+	addl	$17, 1, $17
+	addl	$16, 1, $16
+	br	$31, $tail_loop
+
+/* copy is done, return */
+$out:
+	ret
+
+	.end strcpy
+	EXPORT_SYMBOL(strcpy)
diff --git a/arch/sw_64/lib/strncpy.S b/arch/sw_64/lib/strncpy.S
new file mode 100644
index 000000000000..f50c70599bb4
--- /dev/null
+++ b/arch/sw_64/lib/strncpy.S
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Optimized strncpy() for SW64
+
+ * Copyright (C) Mao Minkai
+ * Author: Mao Minkai
+ *
+ * Copy a string from SRC to DST. At most SIZE bytes are coppied.
+ *
+ * Input:
+ *	$16:	DST, clobbered
+ *	$17:	SRC, clobbered
+ *	$18:	SIZE, clobbered
+ *
+ * Output:
+ *	$0:	DST
+ *
+ * Temporaries:
+ *	$1:	unaligned parts of addr (0 means aligned addr)
+ *	$4:	current data to copy (could have 1 byte or 8 bytes)
+ *	$5:	parts of current data, compare result
+ *	$6:	number of bytes left to copy in head
+ *
+ * Tag naming:
+ *	co:	SRC and DST are co-aligned
+ *	mis:	SRC and DST are not co-aligned
+ *	a:	SRC or DST has aligned address
+ *	una:	SRC or DST has unaligned address
+ *
+ */
+
+#include <asm/export.h>
+
+	.text
+	.align 4
+	.globl strncpy
+	.ent strncpy
+strncpy:
+	.frame $30, 0, $26
+	.prologue 0
+
+	bis	$31, $16, $0		# set return value
+	beq	$18, $out		# return if size is 0
+	cmplt	$18, 8, $5		# size less than 8, do 1-byte copy
+	bne	$5, $tail_loop
+
+	xor	$16, $17, $1
+	and	$1, 7, $1
+	bne	$1, $mis_aligned
+
+/* src and dst are co-aligned */
+	and	$16, 7, $1
+	bne	$1, $co_una_head
+
+/* do the copy in loop, for (co)-aligned src and dst with (a)ligned addr */
+$co_a_loop:
+	ldl	$4, 0($17)
+	cmpgeb	$31, $4, $5
+	bne	$5, $tail_loop		# we find null
+	subl	$18, 8, $5
+	blt	$5, $tail_loop		# we have fewer than 8 bytes to copy
+	stl	$4, 0($16)
+	subl	$18, 8, $18
+	beq	$18, $out
+	addl	$17, 8, $17
+	addl	$16, 8, $16
+	br	$31, $co_a_loop
+
+/* src and dst are co-aligned but have unaligned address */
+$co_una_head:
+	ldl_u	$4, 0($17)
+	extll	$4, $16, $4
+	cmpgeb	$31, $4, $5
+	bne	$5, $tail_loop		# we find null
+	ldi	$6, 8($31)
+	subl	$6, $1, $6
+	addl	$17, $6, $17		# prepare addr of middle part
+	subl	$18, $6, $18		# sub bytes going to be copy
+
+/* copy the unaligned part in loop */
+$co_una_head_loop:
+	stb	$4, 0($16)
+	addl	$16, 1, $16
+	subl	$6, 1, $6
+	beq	$6, $co_a_loop
+	addl	$4, 1, $4
+	br	$31, $co_una_head_loop
+
+/* src and dst are not co-aligned */
+$mis_aligned:
+	and	$16, 7, $1
+	beq	$1, $mis_a_dst
+
+$mis_una_head:
+	ldi	$6, 8($31)
+	subl	$6, $1, $6
+
+/* copy the first few bytes to make dst aligned */
+$mis_una_head_loop:
+	ldbu	$4, 0($17)
+	stb	$4, 0($16)
+	subl	$18, 1, $18
+	beq	$18, $out
+	beq	$4, $null_padding	# we have reached null
+	addl	$17, 1, $17
+	addl	$16, 1, $16
+	subl	$6, 1, $6
+	beq	$6, $mis_a_dst
+	br	$31, $mis_una_head_loop
+
+/* dst has aligned addr */
+$mis_a_dst:
+	and	$17, 7, $1
+
+$mis_a_dst_loop:
+	ldl_u	$4, 0($17)
+	ldl_u	$5, 7($17)
+	extll	$4, $1, $4
+	exthl	$5, $1, $5
+	bis	$4, $5, $4
+	cmpgeb	$31, $4, $5
+	bne	$5, $tail_loop		# we find null
+	subl	$18, 8, $5
+	blt	$5, $tail_loop		# we have fewer than 8 bytes to copy
+	stl	$4, 0($16)
+	subl	$18, 8, $18
+	beq	$5, $out
+	addl	$17, 8, $17
+	addl	$16, 8, $16
+	br	$31, $mis_a_dst_loop
+
+/* we have find null in the last few bytes, copy one byte each time */
+$tail_loop:
+	ldbu	$4, 0($17)
+	stb	$4, 0($16)
+	subl	$18, 1, $18
+	beq	$18, $out
+	beq	$4, $null_padding	# we have reached null
+	addl	$17, 1, $17
+	addl	$16, 1, $16
+	br	$31, $tail_loop
+
+$null_padding:
+	addl	$16, 1, $16
+	subl	$18, 1, $18
+	stb	$31, 0($16)
+	beq	$18, $out
+	br	$31, $null_padding
+
+/* copy is done, return */
+$out:
+	ret
+
+	.end strncpy
+	EXPORT_SYMBOL(strncpy)
diff --git a/arch/sw_64/lib/udelay.c b/arch/sw_64/lib/udelay.c
new file mode 100644
index 000000000000..595887caa7b3
--- /dev/null
+++ b/arch/sw_64/lib/udelay.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1993, 2000 Linus Torvalds
+ *
+ * Delay routines, using a pre-computed "loops_per_jiffy" value.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h> /* for udelay's use of smp_processor_id */
+#include <asm/param.h>
+#include <asm/smp.h>
+#include <asm/hw_init.h>
+#include <linux/delay.h>
+
+/*
+ * Use only for very small delays (< 1 msec).
+ *
+ * The active part of our cycle counter is only 32-bits wide, and
+ * we're treating the difference between two marks as signed.  On
+ * a 1GHz box, that's about 2 seconds.
+ */
+void __delay(unsigned long loops)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"	rtc %0\n"
+		"	addl %1,%0,%1\n"
+		"1:	rtc %0\n"
+		"	subl %1,%0,%0\n"
+		"	bgt %0,1b"
+		: "=&r" (tmp), "=r" (loops) : "1"(loops));
+}
+EXPORT_SYMBOL(__delay);
+
+#ifdef CONFIG_SMP
+#define LPJ	 cpu_data[smp_processor_id()].loops_per_jiffy
+#else
+#define LPJ	 loops_per_jiffy
+#endif
+
+void udelay(unsigned long usecs)
+{
+	unsigned long loops = usecs * get_cpu_freq() / 1000000;
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"	rtc %0\n"
+		"	addl %1,%0,%1\n"
+		"1:	rtc %0\n"
+		"	subl %1,%0,%0\n"
+		"	bgt %0,1b"
+		: "=&r" (tmp), "=r" (loops) : "1"(loops));
+}
+EXPORT_SYMBOL(udelay);
+
+void ndelay(unsigned long nsecs)
+{
+	unsigned long loops = nsecs * get_cpu_freq() / 1000000000;
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"	rtc %0\n"
+		"	addl %1,%0,%1\n"
+		"1:	rtc %0\n"
+		"	subl %1,%0,%0\n"
+		"	bgt %0,1b"
+		: "=&r" (tmp), "=r" (loops) : "1"(loops));
+}
+EXPORT_SYMBOL(ndelay);
diff --git a/arch/sw_64/math-emu/Makefile b/arch/sw_64/math-emu/Makefile
new file mode 100644
index 000000000000..72e750d138e6
--- /dev/null
+++ b/arch/sw_64/math-emu/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the FPU instruction emulation.
+#
+
+ccflags-y := -w
+
+obj-$(CONFIG_MATHEMU) += math-emu.o
+
+math-emu-objs := math.o qrnnd.o
diff --git a/arch/sw_64/math-emu/math.c b/arch/sw_64/math-emu/math.c
new file mode 100644
index 000000000000..3903b421b8f4
--- /dev/null
+++ b/arch/sw_64/math-emu/math.c
@@ -0,0 +1,2267 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Modify History
+ *
+ * who		when		what
+ * ---		----		----
+ * stone	2004-09-02	Add SIMD floating emulation code
+ * fire3        2008-12-27      Add SIMD floating emulation code for SW64
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+
+#include <linux/uaccess.h>
+
+#include "sfp-util.h"
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+#include <math-emu/double.h>
+
+#define math_debug 0
+
+#define DEBUG_INFO(fmt, arg...)					\
+	do {							\
+		if (math_debug)					\
+			printk(KERN_DEBUG fmt, ## arg);		\
+	} while (0)
+
+/*
+ * This is for sw64
+ */
+
+#define IEEE_E_STATUS_MASK IEEE_STATUS_MASK
+#define IEEE_E_STATUS_TO_EXCSUM_SHIFT 0
+#define SW64_FP_DENOMAL	1		/* A denormal data */
+#define SW64_FP_NORMAL	0		/* A denormal data */
+#define SW64_FP_NAN	2
+
+#define SW64_FP_NAN_S(X, val)				\
+do {							\
+	union _FP_UNION_S *_flo =			\
+		(union _FP_UNION_S *)(val);		\
+							\
+	X##_f = _flo->bits.frac;			\
+	X##_e = _flo->bits.exp;				\
+	X##_s = _flo->bits.sign;			\
+							\
+	switch (X##_e) {				\
+	case 255:					\
+		if (_FP_FRAC_ZEROP_1(X))		\
+			X##_c = SW64_FP_NORMAL;		\
+		else					\
+			X##_c = SW64_FP_NAN;		\
+		break;					\
+	default:					\
+		X##_c = SW64_FP_NORMAL;			\
+		break;					\
+	}						\
+} while (0)
+
+
+#define SW64_FP_NAN_D(X, val)				\
+do {							\
+	union _FP_UNION_D *_flo =			\
+		(union _FP_UNION_D *)(val);		\
+							\
+	X##_f = _flo->bits.frac;			\
+	X##_e = _flo->bits.exp;				\
+	X##_s = _flo->bits.sign;			\
+							\
+	switch (X##_e) {				\
+	case 2047:					\
+		if (_FP_FRAC_ZEROP_1(X))		\
+			X##_c = SW64_FP_NORMAL;		\
+		else					\
+			X##_c = SW64_FP_NAN;		\
+		break;					\
+	default:					\
+		X##_c = SW64_FP_NORMAL;			\
+		break;					\
+	}						\
+} while (0)
+
+
+
+#define SW64_FP_NORMAL_S(X, val)			\
+do {							\
+	union _FP_UNION_S *_flo =			\
+		(union _FP_UNION_S *)(val);		\
+							\
+	X##_f = _flo->bits.frac;			\
+	X##_e = _flo->bits.exp;				\
+	X##_s = _flo->bits.sign;			\
+							\
+	switch (X##_e) {				\
+	case 0:						\
+		if (_FP_FRAC_ZEROP_1(X))		\
+			X##_c = SW64_FP_NORMAL;		\
+		else					\
+			X##_c = SW64_FP_DENOMAL;	\
+		break;					\
+	default:					\
+		X##_c = SW64_FP_NORMAL;			\
+		break;					\
+	}						\
+} while (0)
+
+#define SW64_FP_NORMAL_D(X, val)			\
+do {							\
+	union _FP_UNION_D *_flo =			\
+		(union _FP_UNION_D *)(val);		\
+							\
+	X##_f = _flo->bits.frac;			\
+	X##_e = _flo->bits.exp;				\
+	X##_s = _flo->bits.sign;			\
+							\
+	switch (X##_e) {				\
+	case 0:						\
+		if (_FP_FRAC_ZEROP_1(X))		\
+			X##_c = SW64_FP_NORMAL;		\
+		else					\
+			X##_c = SW64_FP_DENOMAL;	\
+		break;					\
+	default:					\
+		X##_c = SW64_FP_NORMAL;			\
+		break;					\
+	}						\
+} while (0)
+
+/* Operation Code for SW64 */
+#define OP_SIMD_1	0x1A
+#define OP_SIMD_2	0x1B
+#define OP_SIMD_MUL_ADD	0x1B
+#define OP_SIMD_NORMAL	0x1A
+#define OP_MUL_ADD	0x19
+
+#define FNC_FMAS	0x0
+#define FNC_FMAD	0x1
+#define FNC_FMSS	0x2
+#define FNC_FMSD	0x3
+#define FNC_FNMAS	0x4
+#define FNC_FNMAD	0x5
+#define FNC_FNMSS	0x6
+#define FNC_FNMSD	0x7
+
+#define FNC_VADDS	0x80
+#define FNC_VADDD	0x81
+#define FNC_VSUBS	0x82
+#define FNC_VSUBD	0x83
+#define FNC_VMULS	0x84
+#define FNC_VMULD	0x85
+#define FNC_VDIVS	0x86
+#define FNC_VDIVD	0x87
+#define FNC_VSQRTS	0x88
+#define FNC_VSQRTD	0x89
+
+#define FNC_VFCMPEQ	0x8c
+#define FNC_VFCMPLE	0x8d
+#define FNC_VFCMPLT	0x8e
+#define FNC_VFCMPUN	0x8f
+
+#define FNC_VCPYS	0x90
+#define FNC_VCPYSE	0x91
+#define FNC_VCPYSN	0x92
+
+#define FNC_VMAS	0x0
+#define FNC_VMAD	0x1
+#define FNC_VMSS	0x2
+#define FNC_VMSD	0x3
+#define FNC_VNMAS	0x4
+#define FNC_VNMAD	0x5
+#define FNC_VNMSS	0x6
+#define FNC_VNMSD	0x7
+
+long simd_fp_emul_s(unsigned long pc);
+long simd_fp_emul_d(unsigned long pc);
+long mul_add_fp_emul(unsigned long pc);
+long simd_cmp_emul_d(unsigned long pc);
+
+long simd_mul_add_fp_emul_d(unsigned long pc);
+long simd_mul_add_fp_emul_s(unsigned long pc);
+
+void read_fp_reg_s(unsigned long reg, unsigned long *p0,
+		unsigned long *p1, unsigned long *p2, unsigned long *p3);
+void read_fp_reg_d(unsigned long reg, unsigned long *val_p0,
+		unsigned long *p1, unsigned long *p2, unsigned long *p3);
+void write_fp_reg_s(unsigned long reg, unsigned long val_p0,
+		unsigned long p1, unsigned long p2, unsigned long p3);
+void write_fp_reg_d(unsigned long reg, unsigned long val_p0,
+		unsigned long p1, unsigned long p2, unsigned long p3);
+#define	LOW_64_WORKING	1
+#define HIGH_64_WORKING	2
+
+/*
+ * End for sw64
+ */
+
+#define	OPC_HMC		0x00
+#define OPC_INTA	0x10
+#define OPC_INTL	0x11
+#define OPC_INTS	0x12
+#define OPC_INTM	0x13
+#define OPC_FLTC	0x14
+#define OPC_FLTV	0x15
+#define OPC_FLTI	0x16
+#define OPC_FLTL	0x17
+#define OPC_MISC	0x18
+#define	OPC_JSR		0x1a
+
+#define FOP_SRC_S	0
+#define FOP_SRC_T	2
+#define FOP_SRC_Q	3
+
+#define FOP_FNC_ADDx	0
+#define FOP_FNC_CVTQL	0
+#define FOP_FNC_SUBx	1
+#define FOP_FNC_MULx	2
+#define FOP_FNC_DIVx	3
+#define FOP_FNC_CMPxUN	4
+#define FOP_FNC_CMPxEQ	5
+#define FOP_FNC_CMPxLT	6
+#define FOP_FNC_CMPxLE	7
+#define FOP_FNC_SQRTx	11
+#define FOP_FNC_CVTxS	12
+#define FOP_FNC_CVTxT	14
+#define FOP_FNC_CVTxQ	15
+
+/* this is for sw64 added by fire3*/
+#define FOP_FNC_ADDS	0
+#define FOP_FNC_ADDD	1
+#define FOP_FNC_SUBS	2
+#define FOP_FNC_SUBD	3
+#define FOP_FNC_MULS	4
+#define FOP_FNC_MULD	5
+#define FOP_FNC_DIVS	6
+#define FOP_FNC_DIVD	7
+#define FOP_FNC_SQRTS	8
+#define FOP_FNC_SQRTD	9
+
+#define FOP_FNC_CMPEQ	0x10
+#define FOP_FNC_CMPLE	0x11
+#define FOP_FNC_CMPLT	0x12
+#define FOP_FNC_CMPUN	0x13
+
+#define FOP_FNC_CVTSD	0x20
+#define FOP_FNC_CVTDS	0x21
+#define FOP_FNC_CVTLS	0x2D
+#define FOP_FNC_CVTLD	0x2F
+#define FOP_FNC_CVTDL	0x27
+#define FOP_FNC_CVTDL_G	0x22
+#define FOP_FNC_CVTDL_P	0x23
+#define FOP_FNC_CVTDL_Z	0x24
+#define FOP_FNC_CVTDL_N	0x25
+
+#define FOP_FNC_CVTWL	0x28
+#define FOP_FNC_CVTLW	0x29
+
+/* fire3 added end */
+
+
+#define MISC_TRAPB	0x0000
+#define MISC_EXCB	0x0400
+
+extern unsigned long sw64_read_fp_reg(unsigned long reg);
+extern void sw64_write_fp_reg(unsigned long reg, unsigned long val);
+extern unsigned long sw64_read_fp_reg_s(unsigned long reg);
+extern void sw64_write_fp_reg_s(unsigned long reg, unsigned long val);
+
+
+#ifdef MODULE
+
+MODULE_DESCRIPTION("FP Software completion module");
+
+extern long (*sw64_fp_emul_imprecise)(struct pt_regs *, unsigned long);
+extern long (*sw64_fp_emul)(unsigned long pc);
+
+static long (*save_emul_imprecise)(struct pt_regs *, unsigned long);
+static long (*save_emul)(unsigned long pc);
+
+long do_sw_fp_emul_imprecise(struct pt_regs *, unsigned long);
+long do_sw_fp_emul(unsigned long);
+
+int init_module(void)
+{
+	save_emul_imprecise = sw64_fp_emul_imprecise;
+	save_emul = sw64_fp_emul;
+	sw64_fp_emul_imprecise = do_sw_fp_emul_imprecise;
+	sw64_fp_emul = do_sw_fp_emul;
+	return 0;
+}
+
+void cleanup_module(void)
+{
+	sw64_fp_emul_imprecise = save_emul_imprecise;
+	sw64_fp_emul = save_emul;
+}
+
+#undef  sw64_fp_emul_imprecise
+#define sw64_fp_emul_imprecise		do_sw_fp_emul_imprecise
+#undef  sw64_fp_emul
+#define sw64_fp_emul			do_sw_fp_emul
+
+#endif /* MODULE */
+
+
+/*
+ * Emulate the floating point instruction at address PC.  Returns -1 if the
+ * instruction to be emulated is illegal (such as with the opDEC trap), else
+ * the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
+ *
+ * Notice that the kernel does not and cannot use FP regs.  This is good
+ * because it means that instead of saving/restoring all fp regs, we simply
+ * stick the result of the operation into the appropriate register.
+ */
+long sw64_fp_emul(unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
+
+	unsigned long fa, fb, fc, func, mode, mode_bk, src;
+	unsigned long res, va, vb, vc, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+	unsigned long opcode;
+
+	get_user(insn, (__u32 *)pc);
+	opcode = (insn >> 26) & 0x3f;
+	fc     = (insn >>  0) & 0x1f;	/* destination register */
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >>  5) & 0xff;
+	fpcr = rdfpcr();
+	mode   = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
+	DEBUG_INFO("======= Entering Floating mathe emulation =====\n");
+	DEBUG_INFO("Floating math emulation insn = %#lx, opcode=%d, func=%d\n", insn, opcode, func);
+	DEBUG_INFO("SW64 hardware fpcr = %#lx\n", fpcr);
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+	DEBUG_INFO("SW64 software swcr = %#lx\n", swcr);
+	DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
+
+	if (opcode == OP_SIMD_NORMAL) { /* float simd math  */
+		if (func == FNC_VADDS || func == FNC_VSUBS  || func == FNC_VSQRTS
+				|| func == FNC_VMULS || func == FNC_VDIVS)
+			si_code = simd_fp_emul_s(pc);
+		if (func == FNC_VADDD || func == FNC_VSUBD  || func == FNC_VSQRTD
+				|| func == FNC_VMULD || func == FNC_VDIVD)
+			si_code = simd_fp_emul_d(pc);
+		if (func == FNC_VFCMPUN || func == FNC_VFCMPLT  || func == FNC_VFCMPLE
+				|| func == FNC_VFCMPEQ)
+			si_code = simd_cmp_emul_d(pc);
+		return si_code;
+	}
+	if (opcode == OP_SIMD_MUL_ADD) {/* simd mul and add */
+		func = (insn >> 10) & 0x3f;
+		if (func == FNC_VMAS || func == FNC_VMSS || func == FNC_VNMAS
+			|| func == FNC_VNMSS) {
+			si_code = simd_mul_add_fp_emul_s(pc);
+			return si_code;
+		}
+
+		if (func == FNC_VMAD || func == FNC_VMSD || func == FNC_VNMAD
+			|| func == FNC_VNMSD) {
+			si_code = simd_mul_add_fp_emul_d(pc);
+			return si_code;
+		}
+		func = (insn >>  5) & 0xff;
+	}
+
+	if (opcode == OP_MUL_ADD) {
+		si_code = mul_add_fp_emul(pc);
+		return si_code;
+	}
+	switch (func) {
+	case FOP_FNC_SUBS:
+		va = sw64_read_fp_reg_s(fa);
+		vb = sw64_read_fp_reg_s(fb);
+		FP_UNPACK_SP(SA, &va);
+		FP_UNPACK_SP(SB, &vb);
+		FP_SUB_S(SR, SA, SB);
+		goto pack_s;
+
+	case FOP_FNC_SUBD:
+		va = sw64_read_fp_reg(fa);
+		vb = sw64_read_fp_reg(fb);
+		FP_UNPACK_DP(DA, &va);
+		FP_UNPACK_DP(DB, &vb);
+		FP_SUB_D(DR, DA, DB);
+		goto pack_d;
+
+	case FOP_FNC_ADDS:
+		va = sw64_read_fp_reg_s(fa);
+		vb = sw64_read_fp_reg_s(fb);
+		FP_UNPACK_SP(SA, &va);
+		FP_UNPACK_SP(SB, &vb);
+		FP_ADD_S(SR, SA, SB);
+		goto pack_s;
+
+	case FOP_FNC_ADDD:
+		va = sw64_read_fp_reg(fa);
+		vb = sw64_read_fp_reg(fb);
+		FP_UNPACK_DP(DA, &va);
+		FP_UNPACK_DP(DB, &vb);
+		FP_ADD_D(DR, DA, DB);
+		goto pack_d;
+
+	case FOP_FNC_MULS:
+		va = sw64_read_fp_reg_s(fa);
+		vb = sw64_read_fp_reg_s(fb);
+		FP_UNPACK_SP(SA, &va);
+		FP_UNPACK_SP(SB, &vb);
+		FP_MUL_S(SR, SA, SB);
+		goto pack_s;
+
+	case FOP_FNC_MULD:
+		va = sw64_read_fp_reg(fa);
+		vb = sw64_read_fp_reg(fb);
+		FP_UNPACK_DP(DA, &va);
+		FP_UNPACK_DP(DB, &vb);
+		FP_MUL_D(DR, DA, DB);
+		goto pack_d;
+
+	case FOP_FNC_DIVS:
+		DEBUG_INFO("FOP_FNC_DIVS\n");
+		va = sw64_read_fp_reg_s(fa);
+		vb = sw64_read_fp_reg_s(fb);
+		FP_UNPACK_SP(SA, &va);
+		FP_UNPACK_SP(SB, &vb);
+		FP_DIV_S(SR, SA, SB);
+		goto pack_s;
+
+	case FOP_FNC_DIVD:
+		DEBUG_INFO("FOP_FNC_DIVD\n");
+		va = sw64_read_fp_reg(fa);
+		vb = sw64_read_fp_reg(fb);
+		FP_UNPACK_DP(DA, &va);
+		FP_UNPACK_DP(DB, &vb);
+		FP_DIV_D(DR, DA, DB);
+		goto pack_d;
+
+	case FOP_FNC_SQRTS:
+		va = sw64_read_fp_reg_s(fa);
+		vb = sw64_read_fp_reg_s(fb);
+		FP_UNPACK_SP(SA, &va);
+		FP_UNPACK_SP(SB, &vb);
+		FP_SQRT_S(SR, SB);
+		goto pack_s;
+	case FOP_FNC_SQRTD:
+		va = sw64_read_fp_reg(fa);
+		vb = sw64_read_fp_reg(fb);
+		FP_UNPACK_DP(DA, &va);
+		FP_UNPACK_DP(DB, &vb);
+		FP_SQRT_D(DR, DB);
+		goto pack_d;
+	}
+
+
+	va = sw64_read_fp_reg(fa);
+	vb = sw64_read_fp_reg(fb);
+	if ((func & ~0xf) == FOP_FNC_CMPEQ) {
+		va = sw64_read_fp_reg(fa);
+		vb = sw64_read_fp_reg(fb);
+
+		FP_UNPACK_RAW_DP(DA, &va);
+		FP_UNPACK_RAW_DP(DB, &vb);
+		if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
+			FP_SET_EXCEPTION(FP_EX_DENORM);
+			if (FP_DENORM_ZERO)
+				_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
+		}
+		if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
+			FP_SET_EXCEPTION(FP_EX_DENORM);
+			if (FP_DENORM_ZERO)
+				_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
+		}
+		FP_CMP_D(res, DA, DB, 3);
+		vc = 0x4000000000000000;
+		/* CMPTEQ, CMPTUN don't trap on QNaN, while CMPTLT and CMPTLE do */
+		if (res == 3 && (((func == FOP_FNC_CMPLT) || (func == FOP_FNC_CMPLE))
+					|| FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) {
+			DEBUG_INFO("CMPLT CMPLE:func:%d, trap on QNaN.", func);
+			FP_SET_EXCEPTION(FP_EX_INVALID);
+		}
+		switch (func) {
+		case FOP_FNC_CMPUN:
+			if (res != 3)
+				vc = 0;
+			break;
+		case FOP_FNC_CMPEQ:
+			if (res)
+				vc = 0;
+			break;
+		case FOP_FNC_CMPLT:
+			if (res != -1)
+				vc = 0;
+			break;
+		case FOP_FNC_CMPLE:
+			if ((long)res > 0)
+				vc = 0;
+			break;
+		}
+		goto done_d;
+	}
+	FP_UNPACK_DP(DA, &va);
+	FP_UNPACK_DP(DB, &vb);
+
+	if (func == FOP_FNC_CVTSD) {
+		vb = sw64_read_fp_reg_s(fb);
+		FP_UNPACK_SP(SB, &vb);
+		DR_c = DB_c;
+		DR_s = DB_s;
+		DR_e = DB_e + (1024 - 128);
+		DR_f = SB_f << (52 - 23);
+		goto pack_d;
+	}
+
+	if (func == FOP_FNC_CVTDS) {
+		FP_CONV(S, D, 1, 1, SR, DB);
+		goto pack_s;
+	}
+
+	if (func == FOP_FNC_CVTDL || func == FOP_FNC_CVTDL_G || func == FOP_FNC_CVTDL_P
+			|| func == FOP_FNC_CVTDL_Z || func == FOP_FNC_CVTDL_N) {
+		mode_bk = mode;
+		if (func == FOP_FNC_CVTDL_Z)
+			mode = 0x0UL;
+		else if (func == FOP_FNC_CVTDL_N)
+			mode = 0x1UL;
+		else if (func == FOP_FNC_CVTDL_G)
+			mode = 0x2UL;
+		else if (func == FOP_FNC_CVTDL_P)
+			mode = 0x3UL;
+
+		if (DB_c == FP_CLS_NAN && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
+			/* AAHB Table B-2 says QNaN should not trigger INV */
+			vc = 0;
+		} else
+			FP_TO_INT_ROUND_D(vc, DB, 64, 2);
+		mode = mode_bk;
+		goto done_d;
+	}
+
+	vb = sw64_read_fp_reg(fb);
+
+	switch (func) {
+	case FOP_FNC_CVTLW:
+		/*
+		 * Notice: We can get here only due to an integer
+		 * overflow.  Such overflows are reported as invalid
+		 * ops.  We return the result the hw would have
+		 * computed.
+		 */
+		vc = ((vb & 0xc0000000) << 32 |	/* sign and msb */
+				(vb & 0x3fffffff) << 29);	/* rest of the int */
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+		goto done_d;
+
+	case FOP_FNC_CVTLS:
+		FP_FROM_INT_S(SR, ((long)vb), 64, long);
+		goto pack_s;
+
+	case FOP_FNC_CVTLD:
+		FP_FROM_INT_D(DR, ((long)vb), 64, long);
+		goto pack_d;
+	}
+	goto bad_insn;
+
+
+pack_s:
+	FP_PACK_SP(&vc, SR);
+
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vc = 0;
+	DEBUG_INFO("SW64 Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
+	DEBUG_INFO("SW64 Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
+	sw64_write_fp_reg_s(fc, vc);
+	goto done;
+
+pack_d:
+	FP_PACK_DP(&vc, DR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vc = 0;
+	DEBUG_INFO("SW64 Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
+	DEBUG_INFO("SW64 Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
+done_d:
+	sw64_write_fp_reg(fc, vc);
+	goto done;
+
+	/*
+	 * Take the appropriate action for each possible
+	 * floating-point result:
+	 *
+	 *	- Set the appropriate bits in the FPCR
+	 *	- If the specified exception is enabled in the FPCR,
+	 *	  return.  The caller (entArith) will dispatch
+	 *	  the appropriate signal to the translated program.
+	 *
+	 * In addition, properly track the exception state in software
+	 * as described in the SW64 Architecture Handbook section 4.7.7.3.
+	 */
+done:
+	if (_fex) {
+		/* Record exceptions in software control word.  */
+		swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+		current_thread_info()->ieee_state
+			|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+
+		/* Update hardware control register.  */
+		fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
+		fpcr |= ieee_swcr_to_fpcr(swcr);
+		DEBUG_INFO("SW64 before write fpcr = %#lx\n", fpcr);
+		wrfpcr(fpcr);
+
+		/* Do we generate a signal?  */
+		_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+		}
+
+		return si_code;
+	}
+
+	/*
+	 * We used to write the destination register here, but DEC FORTRAN
+	 * requires that the result *always* be written... so we do the write
+	 * immediately after the operations above.
+	 */
+
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
+	return -1;
+}
+
+long sw64_fp_emul_imprecise(struct pt_regs *regs, unsigned long write_mask)
+{
+	unsigned long trigger_pc = regs->pc - 4;
+	unsigned long insn, opcode, rc, si_code = 0;
+
+
+	/*
+	 * Turn off the bits corresponding to registers that are the
+	 * target of instructions that set bits in the exception
+	 * summary register.  We have some slack doing this because a
+	 * register that is the target of a trapping instruction can
+	 * be written at most once in the trap shadow.
+	 *
+	 * Branches, jumps, TRAPBs, EXCBs and calls to HMcode all
+	 * bound the trap shadow, so we need not look any further than
+	 * up to the first occurrence of such an instruction.
+	 */
+	while (write_mask) {
+		get_user(insn, (__u32 *)(trigger_pc));
+		opcode = insn >> 26;
+		rc = insn & 0x1f;
+
+		switch (opcode) {
+		case OPC_HMC:
+		case OPC_JSR:
+		case 0x30 ... 0x3f:	/* branches */
+			goto egress;
+
+		case OPC_MISC:
+		switch (insn & 0xffff) {
+		case MISC_TRAPB:
+		case MISC_EXCB:
+			goto egress;
+
+		default:
+			break;
+			}
+		break;
+
+		case OPC_INTA:
+		case OPC_INTL:
+		case OPC_INTS:
+		case OPC_INTM:
+			write_mask &= ~(1UL << rc);
+			break;
+
+		case OPC_FLTC:
+		case OPC_FLTV:
+		case OPC_FLTI:
+		case OPC_FLTL:
+			write_mask &= ~(1UL << (rc + 32));
+			break;
+		}
+		if (!write_mask) {
+			/* Re-execute insns in the trap-shadow.  */
+			regs->pc = trigger_pc + 4;
+			si_code = sw64_fp_emul(trigger_pc);
+			goto egress;
+		}
+		trigger_pc -= 4;
+	}
+
+egress:
+	return si_code;
+}
+
+#define WORKING_PART_0 0
+#define WORKING_PART_1 1
+#define WORKING_PART_2 2
+#define WORKING_PART_3 3
+
+
+/*
+ * This is for sw64
+ */
+
+long simd_cmp_emul_d(unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); FP_DECL_D(DC);
+	unsigned long fa, fb, fc, func, mode, src;
+	unsigned long res, va, vb, vc, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+
+	unsigned long va_p0, va_p1, va_p2, va_p3;
+	unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
+	unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
+	unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
+
+	int working_part;
+
+	get_user(insn, (__u32 *)pc);
+	fc     = (insn >>  0) & 0x1f;	/* destination register */
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >>  5) & 0xff;
+	fpcr = rdfpcr();
+	mode   = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
+
+	DEBUG_INFO("======== Entering SIMD floating-CMP math emulation =======\n");
+	DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+	DEBUG_INFO("software swcr = %#lx\n", swcr);
+	DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
+	read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
+	read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
+	read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
+	DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
+	DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
+	DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+	working_part = WORKING_PART_0;
+simd_working:
+	_fex = 0;
+	switch (working_part) {
+	case WORKING_PART_0:
+		DEBUG_INFO("WORKING_PART_0\n");
+		va = va_p0;
+		vb = vb_p0;
+		vc = vc_p0;
+		break;
+	case WORKING_PART_1:
+		DEBUG_INFO("WORKING_PART_1\n");
+		va = va_p1;
+		vb = vb_p1;
+		vc = vc_p1;
+		break;
+	case WORKING_PART_2:
+		DEBUG_INFO("WORKING_PART_2\n");
+		va = va_p2;
+		vb = vb_p2;
+		vc = vc_p2;
+		break;
+	case WORKING_PART_3:
+		DEBUG_INFO("WORKING_PART_3\n");
+		va = va_p3;
+		vb = vb_p3;
+		vc = vc_p3;
+		break;
+	}
+	DEBUG_INFO("Before unpack va:%#lx, vb:%#lx\n", va, vb);
+	FP_UNPACK_RAW_DP(DA, &va);
+	FP_UNPACK_RAW_DP(DB, &vb);
+	DEBUG_INFO("DA_e:%d, _FP_FRAC_ZEROP_1(DA):%d\n", DA_e, _FP_FRAC_ZEROP_1(DA));
+	DEBUG_INFO("DB_e:%d, _FP_FRAC_ZEROP_1(DB):%d\n", DA_e, _FP_FRAC_ZEROP_1(DA));
+	DEBUG_INFO("DA iszero:%d, DB iszero:%d\n", ((!DA_e && _FP_FRAC_ZEROP_1(DA)) ? 1 : 0),
+			((!DB_e && _FP_FRAC_ZEROP_1(DB))));
+	if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
+		FP_SET_EXCEPTION(FP_EX_DENORM);
+		if (FP_DENORM_ZERO)
+			_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
+	}
+	if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
+		FP_SET_EXCEPTION(FP_EX_DENORM);
+		if (FP_DENORM_ZERO)
+			_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
+	}
+	FP_CMP_D(res, DA, DB, 3);
+	vc = 0x4000000000000000;
+	/* CMPTEQ, CMPTUN don't trap on QNaN, while CMPTLT and CMPTLE do */
+	if (res == 3 && (((func == FOP_FNC_CMPLT) || (func == FOP_FNC_CMPLE))
+				|| FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) {
+		DEBUG_INFO("CMPLT CMPLE:func:%d, trap on QNaN.", func);
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+	}
+	DEBUG_INFO("res:%d\n", res);
+	switch (func) {
+	case FNC_VFCMPUN:
+		if (res != 3)
+			vc = 0;
+		break;
+	case FNC_VFCMPEQ:
+		if (res)
+			vc = 0;
+		break;
+	case FNC_VFCMPLT:
+		if (res != -1)
+			vc = 0;
+		break;
+	case FNC_VFCMPLE:
+		if ((long)res > 0)
+			vc = 0;
+		break;
+	}
+next_working_s:
+	switch (working_part) {
+	case WORKING_PART_0:
+		working_part = WORKING_PART_1;
+		vc_p0 = vc;
+		fex_p0 = _fex;
+		goto simd_working;
+	case WORKING_PART_1:
+		working_part = WORKING_PART_2;
+		vc_p1 = vc;
+		fex_p1 = _fex;
+		goto simd_working;
+	case WORKING_PART_2:
+		working_part = WORKING_PART_3;
+		vc_p2 = vc;
+		fex_p2 = _fex;
+		goto simd_working;
+	case WORKING_PART_3:
+		vc_p3 = vc;
+		fex_p3 = _fex;
+		goto done;
+	}
+done:
+	if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
+		unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
+		unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
+
+		fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
+		swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
+		/* manage fpcr_p0 */
+		if (fex_p0) {
+			swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p0 = fpcr;
+			fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
+		}
+
+		if (fex_p1) {
+			swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p1 = fpcr;
+			fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
+		}
+
+		if (fex_p2) {
+			swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p2 = fpcr;
+			fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
+		}
+
+		if (fex_p3) {
+			swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p3 = fpcr;
+			fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
+		}
+
+		fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
+		DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
+		DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
+		DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
+		DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
+		DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
+		wrfpcr(fpcr);
+		DEBUG_INFO("Before write fp: vc_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+		write_fp_reg_d(fc, vc_p0, vc_p1, vc_p2, vc_p3);
+
+		/* Do we generate a signal?  */
+		_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
+			| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+		}
+		DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+		return si_code;
+
+	}
+	DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
+	return -1;
+}
+
+
+long simd_fp_emul_d(unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); FP_DECL_D(DC);
+	unsigned long fa, fb, fc, func, mode, src;
+	unsigned long res, va, vb, vc, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+
+	unsigned long va_p0, va_p1, va_p2, va_p3;
+	unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
+	unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
+	unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
+
+	int working_part;
+
+	get_user(insn, (__u32 *)pc);
+	fc     = (insn >>  0) & 0x1f;	/* destination register */
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >>  5) & 0xff;
+	fpcr = rdfpcr();
+	mode   = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
+
+	DEBUG_INFO("======== Entering SIMD D-floating math emulation =======\n");
+	DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+	DEBUG_INFO("software swcr = %#lx\n", swcr);
+	DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
+	read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
+	read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
+	read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
+	DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
+	DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
+	DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+	working_part = WORKING_PART_0;
+simd_working:
+	_fex = 0;
+	switch (working_part) {
+	case WORKING_PART_0:
+		DEBUG_INFO("WORKING_PART_0\n");
+		va = va_p0;
+		vb = vb_p0;
+		vc = vc_p0;
+		if ((fpcr & FPCR_STATUS_MASK0) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("LOW: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_1:
+		DEBUG_INFO("WORKING_PART_1\n");
+		va = va_p1;
+		vb = vb_p1;
+		vc = vc_p1;
+		if ((fpcr & FPCR_STATUS_MASK1) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
+				goto next_working_s;
+		}
+
+		break;
+	case WORKING_PART_2:
+		DEBUG_INFO("WORKING_PART_2\n");
+		va = va_p2;
+		vb = vb_p2;
+		vc = vc_p2;
+		if ((fpcr & FPCR_STATUS_MASK2) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_3:
+		DEBUG_INFO("WORKING_PART_3\n");
+		va = va_p3;
+		vb = vb_p3;
+		vc = vc_p3;
+		if ((fpcr & FPCR_STATUS_MASK3) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
+				goto next_working_s;
+		}
+		break;
+	}
+
+	FP_UNPACK_DP(DA, &va);
+	FP_UNPACK_DP(DB, &vb);
+
+	switch (func) {
+	case FNC_VSUBD:
+		DEBUG_INFO("FNC_VSUBD\n");
+		FP_SUB_D(DR, DA, DB);
+		goto pack_d;
+	case FNC_VMULD:
+		DEBUG_INFO("FNC_VMULD\n");
+		FP_MUL_D(DR, DA, DB);
+		goto pack_d;
+	case FNC_VADDD:
+		DEBUG_INFO("FNC_VADDD\n");
+		FP_ADD_D(DR, DA, DB);
+		goto pack_d;
+	case FNC_VDIVD:
+		DEBUG_INFO("FNC_VDIVD\n");
+		FP_DIV_D(DR, DA, DB);
+		goto pack_d;
+	case FNC_VSQRTD:
+		DEBUG_INFO("FNC_VSQRTD\n");
+		FP_SQRT_D(DR, DB);
+		goto pack_d;
+	}
+pack_d:
+	FP_PACK_DP(&vc, DR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) {
+		DEBUG_INFO("pack_d, vc=0 !!!!\n");
+		vc = 0;
+	}
+
+	DEBUG_INFO("SW64 SIMD Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
+	DEBUG_INFO("SW64 SIMD Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
+next_working_s:
+	switch (working_part) {
+	case WORKING_PART_0:
+		working_part = WORKING_PART_1;
+		vc_p0 = vc;
+		fex_p0 = _fex;
+		goto simd_working;
+	case WORKING_PART_1:
+		working_part = WORKING_PART_2;
+		vc_p1 = vc;
+		fex_p1 = _fex;
+		goto simd_working;
+	case WORKING_PART_2:
+		working_part = WORKING_PART_3;
+		vc_p2 = vc;
+		fex_p2 = _fex;
+		goto simd_working;
+	case WORKING_PART_3:
+		vc_p3 = vc;
+		fex_p3 = _fex;
+		goto done;
+	}
+done:
+	if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
+		unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
+		unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
+
+		fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
+		swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
+		/* manage fpcr_p0 */
+		if (fex_p0) {
+			swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p0 = fpcr;
+			fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
+		}
+
+		if (fex_p1) {
+			swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p1 = fpcr;
+			fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
+		}
+
+		if (fex_p2) {
+			swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p2 = fpcr;
+			fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
+		}
+
+		if (fex_p3) {
+			swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p3 = fpcr;
+			fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
+		}
+
+		fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
+		DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
+		DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
+		DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
+		DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
+		DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
+		wrfpcr(fpcr);
+		DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+		write_fp_reg_d(fc, vc_p0, vc_p1, vc_p2, vc_p3);
+
+		/* Do we generate a signal?  */
+		_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
+			| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+		}
+		DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+		return si_code;
+	}
+	DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
+	return -1;
+}
+
+long simd_fp_emul_s(unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+
+	unsigned long fa, fb, fc, func, mode, src;
+	unsigned long res, va, vb, vc, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+
+	unsigned long va_p0, va_p1, va_p2, va_p3;
+	unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
+	unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
+	unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
+
+	int working_part;
+
+	get_user(insn, (__u32 *)pc);
+	fc     = (insn >>  0) & 0x1f;	/* destination register */
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >>  5) & 0xff;
+	fpcr = rdfpcr();
+	mode   = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
+
+	DEBUG_INFO("======== Entering SIMD S-floating math emulation =======\n");
+	DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+	DEBUG_INFO("software swcr = %#lx\n", swcr);
+	DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
+	read_fp_reg_s(fa, &va_p0, &va_p1, &va_p2, &va_p3);
+	read_fp_reg_s(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
+	read_fp_reg_s(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
+	DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
+	DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
+	DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+	working_part = WORKING_PART_0;
+simd_working:
+	_fex = 0;
+	switch (working_part) {
+	case WORKING_PART_0:
+		DEBUG_INFO("WORKING_PART_0\n");
+		va = va_p0;
+		vb = vb_p0;
+		vc = vc_p0;
+		if ((fpcr & FPCR_STATUS_MASK0) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("PART0: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_1:
+		DEBUG_INFO("WORKING_PART_1\n");
+		va = va_p1;
+		vb = vb_p1;
+		vc = vc_p1;
+		if ((fpcr & FPCR_STATUS_MASK1) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("PART1: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_2:
+		DEBUG_INFO("WORKING_PART_2\n");
+		va = va_p2;
+		vb = vb_p2;
+		vc = vc_p2;
+		if ((fpcr & FPCR_STATUS_MASK2) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("PART2: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_3:
+		DEBUG_INFO("WORKING_PART_3\n");
+		va = va_p3;
+		vb = vb_p3;
+		vc = vc_p3;
+		if ((fpcr & FPCR_STATUS_MASK3) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("PART3: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+
+	}
+
+	FP_UNPACK_SP(SA, &va);
+	FP_UNPACK_SP(SB, &vb);
+
+	switch (func) {
+	case FNC_VSUBS:
+		DEBUG_INFO("FNC_VSUBS\n");
+		FP_SUB_S(SR, SA, SB);
+		goto pack_s;
+	case FNC_VMULS:
+		DEBUG_INFO("FNC_VMULS\n");
+		FP_MUL_S(SR, SA, SB);
+		goto pack_s;
+	case FNC_VADDS:
+		DEBUG_INFO("FNC_VADDS\n");
+		FP_ADD_S(SR, SA, SB);
+		goto pack_s;
+	case FNC_VDIVS:
+		DEBUG_INFO("FNC_VDIVS\n");
+		FP_DIV_S(SR, SA, SB);
+		goto pack_s;
+	case FNC_VSQRTS:
+		DEBUG_INFO("FNC_VSQRTS\n");
+		FP_SQRT_S(SR, SB);
+		goto pack_s;
+	}
+pack_s:
+	FP_PACK_SP(&vc, SR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) {
+		DEBUG_INFO("pack_s, vc=0 !!!!\n");
+		vc = 0;
+	}
+
+	DEBUG_INFO("SW64 SIMD Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
+	DEBUG_INFO("SW64 SIMD Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
+next_working_s:
+	switch (working_part) {
+	case WORKING_PART_0:
+		working_part = WORKING_PART_1;
+		vc_p0 = vc;
+		fex_p0 = _fex;
+		goto simd_working;
+	case WORKING_PART_1:
+		working_part = WORKING_PART_2;
+		vc_p1 = vc;
+		fex_p1 = _fex;
+		goto simd_working;
+	case WORKING_PART_2:
+		working_part = WORKING_PART_3;
+		vc_p2 = vc;
+		fex_p2 = _fex;
+		goto simd_working;
+	case WORKING_PART_3:
+		vc_p3 = vc;
+		fex_p3 = _fex;
+		goto done;
+	}
+done:
+	if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
+		unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
+		unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
+
+		fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
+		swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
+		/* manage fpcr_p0 */
+		if (fex_p0) {
+			swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p0 = fpcr;
+			fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
+			DEBUG_INFO("fex_p0: fpcr_p0:%#lx\n", fpcr_p0);
+		}
+
+		if (fex_p1) {
+			swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p1 = fpcr;
+			fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
+			DEBUG_INFO("fex_p1: fpcr_p1:%#lx\n", fpcr_p1);
+		}
+
+		if (fex_p2) {
+			swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p2 = fpcr;
+			fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
+			DEBUG_INFO("fex_p2: fpcr_p2:%#lx\n", fpcr_p2);
+		}
+
+		if (fex_p3) {
+			swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p3 = fpcr;
+			fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
+			DEBUG_INFO("fex_p3: fpcr_p3:%#lx\n", fpcr_p3);
+		}
+
+		fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
+		DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
+		DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
+		DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
+		DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
+		DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
+		wrfpcr(fpcr);
+
+		DEBUG_INFO("Before write fp: vc_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+		write_fp_reg_s(fc, vc_p0, vc_p1, vc_p2, vc_p3);
+
+		/* Do we generate a signal?  */
+		_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
+			| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+		}
+		DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+		return si_code;
+	}
+	DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
+	return -1;
+
+}
+
+static inline unsigned long negative_value(unsigned long va)
+{
+	return (va ^ 0x8000000000000000UL);
+}
+
+static inline unsigned long s_negative_value(unsigned long va)
+{
+	return (va ^ 0x80000000UL);
+}
+
+/*
+ * sw64 mul-add  floating emulation
+ */
+long mul_add_fp_emul(unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(S_TMP); FP_DECL_S(SR);
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(D_TMP); FP_DECL_D(DR);
+	FP_DECL_S(S_ZERO);
+	FP_DECL_D(D_ZERO);
+	FP_DECL_S(S_TMP2);
+	FP_DECL_D(D_TMP2);
+
+	unsigned long fa, fb, fc, fd, func, mode, src;
+	unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+	unsigned long vzero = 0;
+
+	get_user(insn, (__u32 *)pc);
+	fd     = (insn >>  0) & 0x1f;	/* destination register */
+	fc     = (insn >>  5) & 0x1f;
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >> 10) & 0x3f;
+
+	fpcr = rdfpcr();
+	mode   = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
+
+	DEBUG_INFO("===== Entering SW64 MUL-ADD Emulation =====\n");
+	DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+	DEBUG_INFO("software swcr = %#lx\n", swcr);
+
+	if (func == FNC_FMAS || func == FNC_FMSS || func == FNC_FNMAS || func == FNC_FNMSS) {
+		va = sw64_read_fp_reg_s(fa);
+		vb = sw64_read_fp_reg_s(fb);
+		vc = sw64_read_fp_reg_s(fc);
+		FP_UNPACK_SP(SA, &va);
+		FP_UNPACK_SP(SB, &vb);
+		FP_UNPACK_SP(SC, &vc);
+		FP_UNPACK_SP(S_ZERO, &vzero);
+	}
+	if (func == FNC_FMAD || func == FNC_FMSD || func == FNC_FNMAD || func == FNC_FNMSD) {
+		va = sw64_read_fp_reg(fa);
+		vb = sw64_read_fp_reg(fb);
+		vc = sw64_read_fp_reg(fc);
+		FP_UNPACK_DP(DA, &va);
+		FP_UNPACK_DP(DB, &vb);
+		FP_UNPACK_DP(DC, &vc);
+		FP_UNPACK_DP(D_ZERO, &vzero);
+	}
+	DEBUG_INFO("va = %#lx, vb = %#lx, vc = %#lx\n", va, vb, vc);
+	switch (func) {
+	case FNC_FMAS:
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_ADD_S(SR, S_TMP, SC);
+		goto pack_s;
+	case FNC_FMSS:
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_SUB_S(SR, S_TMP, SC);
+		goto pack_s;
+	case FNC_FNMAS: /* (-va*vb) + vc */
+		va = s_negative_value(va);
+		FP_UNPACK_SP(SA, &va);
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_ADD_S(SR, S_TMP, SC);
+		goto pack_s;
+	case FNC_FNMSS: /* (-va*vb) - vc */
+		va = s_negative_value(va);
+		FP_UNPACK_SP(SA, &va);
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_SUB_S(SR, S_TMP, SC);
+		goto pack_s;
+	case FNC_FMAD:
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_ADD_D(DR, D_TMP, DC);
+		goto pack_d;
+	case FNC_FMSD:
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_SUB_D(DR, D_TMP, DC);
+		goto pack_d;
+	case FNC_FNMAD:
+		va = negative_value(va);
+		FP_UNPACK_DP(DA, &va);
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_ADD_D(DR, D_TMP, DC);
+		goto pack_d;
+	case FNC_FNMSD:
+		va = negative_value(va);
+		FP_UNPACK_DP(DA, &va);
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_SUB_D(DR, D_TMP, DC);
+		goto pack_d;
+	default:
+		goto bad_insn;
+
+	}
+pack_s:
+	FP_PACK_SP(&vd, SR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vd = 0;
+	sw64_write_fp_reg_s(fd, vd);
+	goto done;
+
+pack_d:
+	FP_PACK_DP(&vd, DR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vd = 0;
+	sw64_write_fp_reg(fd, vd);
+
+done:
+	DEBUG_INFO("vd = %#lx\n", vd);
+	if (_fex) {
+		/* Record exceptions in software control word.  */
+		swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+		current_thread_info()->ieee_state
+			|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+
+		/* Update hardware control register.  */
+		fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
+		fpcr |= ieee_swcr_to_fpcr(swcr);
+		wrfpcr(fpcr);                           /** wrfpcr will destroy vector register! */
+		if (func == FNC_FMAS || func == FNC_FMSS || func == FNC_FNMAS || func == FNC_FNMSS)
+			sw64_write_fp_reg_s(fd, vd);
+		if (func == FNC_FMAD || func == FNC_FMSD || func == FNC_FNMAD || func == FNC_FNMSD)
+			sw64_write_fp_reg(fd, vd);
+
+		/* Do we generate a signal?  */
+		_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+		}
+
+		return si_code;
+	}
+
+	/*
+	 * We used to write the destination register here, but DEC FORTRAN
+	 * requires that the result *always* be written... so we do the write
+	 * immediately after the operations above.
+	 */
+
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
+	return -1;
+}
+
+
+long simd_mul_add_fp_emul_s(unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(S_TMP); FP_DECL_S(SR);
+	FP_DECL_S(S_ZERO);
+	FP_DECL_S(S_TMP2);
+
+	unsigned long fa, fb, fc, fd, func, mode, src;
+	unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+	unsigned long vzero = 0;
+
+	get_user(insn, (__u32 *)pc);
+	fd     = (insn >>  0) & 0x1f;	/* destination register */
+	fc     = (insn >>  5) & 0x1f;
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >> 10) & 0x3f;
+
+	fpcr = rdfpcr();
+	mode   = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
+
+	unsigned long va_p0, va_p1, va_p2, va_p3;
+	unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
+	unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
+	unsigned long vd_p0, vd_p1, vd_p2, vd_p3;
+	unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
+
+	int working_part;
+
+	DEBUG_INFO("======== Entering SIMD S-floating mul-add emulation =======\n");
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+	DEBUG_INFO("software swcr = %#lx\n", swcr);
+	DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
+	read_fp_reg_s(fa, &va_p0, &va_p1, &va_p2, &va_p3);
+	read_fp_reg_s(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
+	read_fp_reg_s(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
+	read_fp_reg_s(fd, &vd_p0, &vd_p1, &vd_p2, &vd_p3);
+	DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
+	DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
+	DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+	DEBUG_INFO("vd_p0:%#lx, vd_p1:%#lx, vd_p2:%#lx, vd_p3:%#lx\n", vd_p0, vd_p1, vd_p2, vd_p3);
+	working_part = WORKING_PART_0;
+simd_working:
+	_fex = 0;
+	switch (working_part) {
+	case WORKING_PART_0:
+		DEBUG_INFO("WORKING_PART_0\n");
+		va = va_p0;
+		vb = vb_p0;
+		vc = vc_p0;
+		DEBUG_INFO("FPCR_STATUS_MASK0 : %#lx, fpcr :%#lx\n", FPCR_STATUS_MASK0, fpcr);
+		if ((fpcr & FPCR_STATUS_MASK0) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			SW64_FP_NORMAL_S(SC, &vc);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("LOW: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_1:
+		DEBUG_INFO("WORKING_PART_1\n");
+		va = va_p1;
+		vb = vb_p1;
+		vc = vc_p1;
+		DEBUG_INFO("FPCR_STATUS_MASK1 : %#lx, fpcr :%#lx\n", FPCR_STATUS_MASK0, fpcr);
+		if ((fpcr & FPCR_STATUS_MASK1) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			SW64_FP_NORMAL_S(SC, &vc);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_2:
+		DEBUG_INFO("WORKING_PART_2\n");
+		va = va_p2;
+		vb = vb_p2;
+		vc = vc_p2;
+		if ((fpcr & FPCR_STATUS_MASK2) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			SW64_FP_NORMAL_S(SC, &vc);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_3:
+		DEBUG_INFO("WORKING_PART_3\n");
+		va = va_p3;
+		vb = vb_p3;
+		vc = vc_p3;
+		if ((fpcr & FPCR_STATUS_MASK3) == 0) {
+			SW64_FP_NORMAL_S(SA, &va);
+			SW64_FP_NORMAL_S(SB, &vb);
+			SW64_FP_NORMAL_S(SC, &vc);
+			if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
+		} else {
+			SW64_FP_NAN_S(SA, &va);
+			SW64_FP_NAN_S(SB, &vb);
+			if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	}
+
+	FP_UNPACK_SP(SA, &va);
+	FP_UNPACK_SP(SB, &vb);
+	FP_UNPACK_SP(SC, &vc);
+	FP_UNPACK_SP(S_ZERO, &vzero);
+	switch (func) {
+	case FNC_FMAS:
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_ADD_S(SR, S_TMP, SC);
+		goto pack_s;
+	case FNC_FMSS:
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_SUB_S(SR, S_TMP, SC);
+		goto pack_s;
+	case FNC_FNMAS: /* (-va*vb) + vc */
+		va = s_negative_value(va);
+		FP_UNPACK_SP(SA, &va);
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_ADD_S(SR, S_TMP, SC);
+		goto pack_s;
+	case FNC_FNMSS: /* (-va*vb) - vc */
+		va = s_negative_value(va);
+		FP_UNPACK_SP(SA, &va);
+		FP_MUL_S(S_TMP, SA, SB);
+		FP_SUB_S(SR, S_TMP, SC);
+		goto pack_s;
+	default:
+		goto bad_insn;
+	}
+
+pack_s:
+	FP_PACK_SP(&vd, SR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vd = 0;
+	DEBUG_INFO("SW64 SIMD Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
+	DEBUG_INFO("SW64 SIMD Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
+next_working_s:
+	switch (working_part) {
+	case WORKING_PART_0:
+		working_part = WORKING_PART_1;
+		vd_p0 = vd;
+		fex_p0 = _fex;
+		goto simd_working;
+	case WORKING_PART_1:
+		working_part = WORKING_PART_2;
+		vd_p1 = vd;
+		fex_p1 = _fex;
+		goto simd_working;
+	case WORKING_PART_2:
+		working_part = WORKING_PART_3;
+		vd_p2 = vd;
+		fex_p2 = _fex;
+		goto simd_working;
+	case WORKING_PART_3:
+		vd_p3 = vd;
+		fex_p3 = _fex;
+		goto done;
+	}
+done:
+	if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
+		unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
+		unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
+
+		fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
+		swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
+		/* manage fpcr_p0 */
+		if (fex_p0) {
+			swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p0 = fpcr;
+			fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
+		}
+
+		if (fex_p1) {
+			swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p1 = fpcr;
+			fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
+		}
+
+		if (fex_p2) {
+			swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p2 = fpcr;
+			fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
+		}
+
+		if (fex_p3) {
+			swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p3 = fpcr;
+			fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
+		}
+
+		fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
+		DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
+		DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
+		DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
+		DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
+		DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
+		wrfpcr(fpcr);
+		DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+		write_fp_reg_s(fd, vd_p0, vd_p1, vd_p2, vd_p3); /* write to fd */
+
+		/* Do we generate a signal?  */
+		_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
+			| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+		}
+		DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+		return si_code;
+
+	}
+	DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
+	return -1;
+}
+
+long simd_mul_add_fp_emul_d(unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(D_TMP); FP_DECL_D(DR);
+	FP_DECL_D(D_ZERO);
+	FP_DECL_D(D_TMP2);
+
+	unsigned long fa, fb, fc, fd, func, mode, src;
+	unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+	unsigned long vzero = 0;
+
+	get_user(insn, (__u32 *)pc);
+	fd     = (insn >>  0) & 0x1f;	/* destination register */
+	fc     = (insn >>  5) & 0x1f;
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >> 10) & 0x3f;
+
+	fpcr = rdfpcr();
+	mode   = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
+
+	unsigned long va_p0, va_p1, va_p2, va_p3;
+	unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
+	unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
+	unsigned long vd_p0, vd_p1, vd_p2, vd_p3;
+	unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
+
+	int working_part;
+
+	DEBUG_INFO("======== Entering SIMD D-floating mul-add emulation =======\n");
+	DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+	DEBUG_INFO("software swcr = %#lx\n", swcr);
+	read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
+	read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
+	read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
+	read_fp_reg_d(fd, &vd_p0, &vd_p1, &vd_p2, &vd_p3);
+	DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
+	DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
+	DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+	DEBUG_INFO("vd_p0:%#lx, vd_p1:%#lx, vd_p2:%#lx, vd_p3:%#lx\n", vd_p0, vd_p1, vd_p2, vd_p3);
+	working_part = WORKING_PART_0;
+simd_working:
+	_fex = 0;
+	switch (working_part) {
+	case WORKING_PART_0:
+		DEBUG_INFO("WORKING_PART_0\n");
+		va = va_p0;
+		vb = vb_p0;
+		vc = vc_p0;
+		vd = vd_p0;
+		if ((fpcr & FPCR_STATUS_MASK0) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			SW64_FP_NORMAL_D(DC, &vc);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("LOW: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			SW64_FP_NAN_D(DC, &vc);
+			if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_1:
+		DEBUG_INFO("WORKING_PART_1\n");
+		va = va_p1;
+		vb = vb_p1;
+		vc = vc_p1;
+		vd = vd_p1;
+		if ((fpcr & FPCR_STATUS_MASK1) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			SW64_FP_NORMAL_D(DC, &vc);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			SW64_FP_NAN_D(DC, &vc);
+			if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_2:
+		DEBUG_INFO("WORKING_PART_2\n");
+		va = va_p2;
+		vb = vb_p2;
+		vc = vc_p2;
+		vd = vd_p2;
+		if ((fpcr & FPCR_STATUS_MASK2) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			SW64_FP_NORMAL_D(DC, &vc);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			SW64_FP_NAN_D(DC, &vc);
+			if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	case WORKING_PART_3:
+		DEBUG_INFO("WORKING_PART_3\n");
+		va = va_p3;
+		vb = vb_p3;
+		vc = vc_p3;
+		vd = vd_p3;
+		if ((fpcr & FPCR_STATUS_MASK3) == 0) {
+			SW64_FP_NORMAL_D(DA, &va);
+			SW64_FP_NORMAL_D(DB, &vb);
+			SW64_FP_NORMAL_D(DC, &vc);
+			if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
+				goto next_working_s;
+			else
+				DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
+		} else {
+			SW64_FP_NAN_D(DA, &va);
+			SW64_FP_NAN_D(DB, &vb);
+			SW64_FP_NAN_D(DC, &vc);
+			if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
+				goto next_working_s;
+		}
+		break;
+	}
+
+	FP_UNPACK_DP(DA, &va);
+	FP_UNPACK_DP(DB, &vb);
+	FP_UNPACK_DP(DC, &vc);
+	FP_UNPACK_DP(D_ZERO, &vzero);
+
+	switch (func) {
+	case FNC_FMAD:
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_ADD_D(DR, D_TMP, DC);
+		goto pack_d;
+	case FNC_FMSD:
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_SUB_D(DR, D_TMP, DC);
+		goto pack_d;
+	case FNC_FNMAD:
+		va = negative_value(va);
+		FP_UNPACK_DP(DA, &va);
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_ADD_D(DR, D_TMP, DC);
+		goto pack_d;
+	case FNC_FNMSD:
+		va = negative_value(va);
+		FP_UNPACK_DP(DA, &va);
+		FP_MUL_D(D_TMP, DA, DB);
+		FP_SUB_D(DR, D_TMP, DC);
+
+		goto pack_d;
+	default:
+		goto bad_insn;
+	}
+
+pack_d:
+	FP_PACK_DP(&vd, DR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vd = 0;
+	DEBUG_INFO("SW64 SIMD Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
+	DEBUG_INFO("SW64 SIMD Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
+next_working_s:
+	switch (working_part) {
+	case WORKING_PART_0:
+		working_part = WORKING_PART_1;
+		vd_p0 = vd;
+		fex_p0 = _fex;
+		goto simd_working;
+	case WORKING_PART_1:
+		working_part = WORKING_PART_2;
+		vd_p1 = vd;
+		fex_p1 = _fex;
+		goto simd_working;
+	case WORKING_PART_2:
+		working_part = WORKING_PART_3;
+		vd_p2 = vd;
+		fex_p2 = _fex;
+		goto simd_working;
+	case WORKING_PART_3:
+		vd_p3 = vd;
+		fex_p3 = _fex;
+		goto done;
+	}
+done:
+	if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
+		unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
+		unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
+
+		fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
+		swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
+		/* manage fpcr_p0 */
+		if (fex_p0) {
+			swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p0 = fpcr;
+			fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
+		}
+
+		if (fex_p1) {
+			swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p1 = fpcr;
+			fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
+		}
+
+		if (fex_p2) {
+			swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p2 = fpcr;
+			fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
+		}
+
+		if (fex_p3) {
+			swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+			current_thread_info()->ieee_state
+				|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
+
+			/* Update hardware control register.  */
+			fpcr_p3 = fpcr;
+			fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
+			fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
+		}
+
+		fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
+		DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
+		DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
+		DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
+		DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
+		DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
+		wrfpcr(fpcr);
+
+		DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
+		write_fp_reg_d(fd, vd_p0, vd_p1, vd_p2, vd_p3); /* write to fd */
+
+		/* Do we generate a signal?  */
+		_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
+			| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE)
+				si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF)
+				si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF)
+				si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE)
+				si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV)
+				si_code = FPE_FLTINV;
+		}
+		DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+		return si_code;
+	}
+	DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
+	return -1;
+}
+
+void read_fp_reg_s(unsigned long reg, unsigned long *val_p0,
+		unsigned long *val_p1, unsigned long *val_p2, unsigned long *val_p3)
+{
+	unsigned long fp[2];
+
+	sw64_read_simd_fp_m_s(reg, fp);
+	*val_p0 = fp[0] & 0xffffffffUL;
+	*val_p1 = (fp[0] >> 32) & 0xffffffffUL;
+	*val_p2 = fp[1] & 0xffffffffUL;
+	*val_p3 = (fp[1] >> 32) & 0xffffffffUL;
+}
+
+void read_fp_reg_d(unsigned long reg, unsigned long *val_p0,
+		unsigned long *val_p1, unsigned long *val_p2, unsigned long *val_p3)
+{
+	unsigned long fp[4];
+
+	sw64_read_simd_fp_m_d(reg, fp);
+	*val_p0 = fp[0];
+	*val_p1 = fp[1];
+	*val_p2 = fp[2];
+	*val_p3 = fp[3];
+}
+
+void write_fp_reg_s(unsigned long reg, unsigned long val_p0,
+		unsigned long val_p1, unsigned long val_p2, unsigned long val_p3)
+{
+	unsigned long fp[2];
+
+	fp[0] = ((val_p1 & 0xffffffffUL) << 32) | (val_p0 & 0xffffffffUL);
+	fp[1] = ((val_p3 & 0xffffffffUL) << 32) | (val_p2 & 0xffffffffUL);
+	sw64_write_simd_fp_reg_s(reg, fp[0], fp[1]);
+}
+
+void write_fp_reg_d(unsigned long reg, unsigned long val_p0,
+		unsigned long val_p1, unsigned long val_p2, unsigned long val_p3)
+{
+	sw64_write_simd_fp_reg_d(reg, val_p0, val_p1, val_p2, val_p3);
+}
diff --git a/arch/sw_64/math-emu/qrnnd.S b/arch/sw_64/math-emu/qrnnd.S
new file mode 100644
index 000000000000..1e732f2e68c0
--- /dev/null
+++ b/arch/sw_64/math-emu/qrnnd.S
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+ #  __udiv_qrnnd
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of GCC.
+
+	.set noreorder
+	.set noat
+
+	.text
+
+	.globl __udiv_qrnnd
+	.ent __udiv_qrnnd
+__udiv_qrnnd:
+	.frame		$30, 0, $26, 0
+	.prologue	0
+
+	# ldiq	$2,16
+	ldi		$2, 16($31)
+	blt		$19, $largedivisor
+
+$loop1:	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$19, $17, $20
+	subl		$17, $19, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$19, $17, $20
+	subl		$17, $19, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$19, $17, $20
+	subl		$17, $19, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$19, $17, $20
+	subl		$17, $19, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	subl		$2, 1, $2
+	bgt		$2, $loop1
+	stl		$17, 0($16)
+	bis		$31, $18, $0
+	ret		$31, ($26), 1
+
+$largedivisor:
+	and		$18, 1, $4
+
+	srl		$18, 1, $18
+	sll		$17, 63, $3
+	or		$3, $18, $18
+	srl		$17, 1, $17
+
+	and		$19, 1, $6
+	srl		$19, 1, $5
+	addl		$5, $6, $5
+
+$loop2:	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$5, $17, $20
+	subl		$17, $5, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$5, $17, $20
+	subl		$17, $5, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$5, $17, $20
+	subl		$17, $5, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	cmplt		$18, 0, $3
+	addl		$17, $17, $17
+	bis		$17, $3, $17
+	addl		$18, $18, $18
+	cmpule		$5, $17, $20
+	subl		$17, $5, $3
+	selne		$20, $3, $17, $17
+	bis		$18, $20, $18
+	subl		$2, 1, $2
+	bgt		$2, $loop2
+
+	addl		$17, $17, $17
+	addl		$4, $17, $17
+	bne		$6, $Odd
+	stl		$17, 0($16)
+	bis		$31, $18, $0
+	ret		$31, ($26), 1
+
+$Odd:
+	# q' in $18. r' in $17
+	addl		$17, $18, $17
+
+	cmpult		$17, $18, $3	# $3 := carry from addl
+	subl		$17, $19, $at
+	addl		$18, $3, $18
+	selne		$3, $at, $17, $17
+
+	cmpult		$17, $19, $3
+	addl		$18, 1, $at
+	seleq		$3, $at, $18, $18
+	subl		$17, $19, $at
+	seleq		$3, $at, $17, $17
+
+	stl		$17, 0($16)
+	bis		$31, $18, $0
+	ret		$31, ($26), 1
+
+	.end	__udiv_qrnnd
diff --git a/arch/sw_64/math-emu/sfp-util.h b/arch/sw_64/math-emu/sfp-util.h
new file mode 100644
index 000000000000..63f9685999f3
--- /dev/null
+++ b/arch/sw_64/math-emu/sfp-util.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/fpu.h>
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)			\
+	((sl) = (al) + (bl), (sh) = (ah) + (bh) + ((sl) < (al)))
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)			\
+	((sl) = (al) - (bl), (sh) = (ah) - (bh) - ((al) < (bl)))
+
+#define umul_ppmm(wh, wl, u, v)					\
+	__asm__ ("mull %2, %3, %1; umulh %2, %3, %0"		\
+		: "=r" ((UDItype)(wh)),				\
+		"=&r" ((UDItype)(wl))				\
+		: "r" ((UDItype)(u)),				\
+		"r" ((UDItype)(v)))
+
+#define udiv_qrnnd(q, r, n1, n0, d)				\
+do { unsigned long __r;						\
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d));		\
+	(r) = __r;						\
+} while (0)
+extern unsigned long __udiv_qrnnd(unsigned long *, unsigned long,
+		unsigned long, unsigned long);
+
+#define UDIV_NEEDS_NORMALIZATION	1
+
+#define abort()			goto bad_insn
+
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN -1
+#endif
+#define __BYTE_ORDER __LITTLE_ENDIAN
diff --git a/arch/sw_64/mm/Makefile b/arch/sw_64/mm/Makefile
new file mode 100644
index 000000000000..92be882cc82b
--- /dev/null
+++ b/arch/sw_64/mm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux sw_64-specific parts of the memory manager.
+#
+
+#ccflags-y := -Werror
+
+obj-y	:= init.o fault.o  physaddr.o mmap.o
+
+obj-$(CONFIG_NUMA)	+= numa.o
+obj-$(CONFIG_HUGETLB_PAGE)      += hugetlbpage.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE)      += thp.o
diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c
new file mode 100644
index 000000000000..c68be4a40d23
--- /dev/null
+++ b/arch/sw_64/mm/fault.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/core.h>
+
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/extable.h>
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+
+__read_mostly bool segv_debug_enabled;
+
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr)
+{
+	int ret = 0;
+	/* kprobe_running() needs smp_processor_id() */
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, mmcsr))
+			ret = 1;
+		preempt_enable();
+	}
+	return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr)
+{
+	return 0;
+}
+#endif
+
+extern void die_if_kernel(char *, struct pt_regs *, long, unsigned long *);
+extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15);
+
+void show_all_vma(void)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *tmp;
+
+	unsigned long start = 0;
+	unsigned long end = 0;
+	int i = 0;
+
+	if (mm) {
+		tmp = mm->mmap;
+		while (tmp) {
+			start = tmp->vm_start;
+			end = tmp->vm_end;
+			if (tmp->vm_file)
+				pr_info("vma[%d]: [%#lx, %#lx], len = %#lx, flags = %#lx, file = %s, name = %s\n",
+						i, start, end, (end - start), tmp->vm_flags,
+						tmp->vm_file->f_path.dentry->d_name.name, current->comm);
+			else
+				pr_info("vma[%d]: [%#lx, %#lx], len = %#lx, flags = %#lx, name = %s\n",
+						i, start, end, (end - start), tmp->vm_flags, current->comm);
+			tmp = tmp->vm_next;
+			i++;
+		}
+	}
+}
+
+/*
+ * Force a new ASN for a task.
+ */
+
+#ifndef CONFIG_SMP
+unsigned long last_asn = ASN_FIRST_VERSION;
+#endif
+
+void
+__load_new_mm_context(struct mm_struct *next_mm)
+{
+	unsigned long mmc;
+	struct pcb_struct *pcb;
+
+	mmc = __get_new_mm_context(next_mm, smp_processor_id());
+	next_mm->context.asid[smp_processor_id()] = mmc;
+
+	pcb = &current_thread_info()->pcb;
+	pcb->asn = mmc & HARDWARE_ASN_MASK;
+	pcb->ptbr = ((unsigned long) next_mm->pgd - PAGE_OFFSET) >> PAGE_SHIFT;
+
+	__reload_thread(pcb);
+}
+
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to handle_mm_fault().
+ *
+ * mmcsr:
+ *	0 = translation not valid
+ *	1 = access violation
+ *	2 = fault-on-read
+ *	3 = fault-on-execute
+ *	4 = fault-on-write
+ *
+ * cause:
+ *	-1 = instruction fetch
+ *	0 = load
+ *	1 = store
+ *
+ * Registers $9 through $15 are saved in a block just prior to `regs' and
+ * are saved and restored around the call to allow exception code to
+ * modify them.
+ */
+
+/* Macro for exception fixup code to access integer registers.  */
+#define dpf_reg(r)							\
+	(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 :	\
+				 (r) <= 18 ? (r)+10 : (r)-10])
+unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd = NULL;
+	p4d_t *p4d = NULL;
+	pud_t *pud  = NULL;
+	pmd_t *pmd = NULL;
+	pte_t *pte = NULL;
+	unsigned long ret = 0UL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd)) {
+		ret = 0;
+		pr_debug("addr = %#lx, pgd = %#lx\n", addr, pgd_val(*pgd));
+		goto out;
+	}
+	p4d = pgd_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		ret = 0;
+		pr_debug("addr = %#lx, pgd = %#lx, p4d = %#lx\n",
+				addr, pgd_val(*pgd), p4d_val(*p4d));
+		goto out;
+	}
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		ret = 0;
+		pr_debug("addr = %#lx, pgd = %#lx, pud = %#lx\n",
+				addr, pgd_val(*pgd), pud_val(*pud));
+		goto out;
+	}
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd)) {
+		ret = 0;
+		pr_debug("addr = %#lx, pgd = %#lx, pud = %#lx, pmd = %#lx\n",
+				addr, pgd_val(*pgd), pud_val(*pud), pmd_val(*pmd));
+		goto out;
+
+	}
+	pte = pte_offset_map(pmd, addr);
+	if (pte_present(*pte)) {
+		ret = ((unsigned long)__va(((pte_val(*pte) >> 32)) << PAGE_SHIFT));
+		pr_debug("addr = %#lx, pgd = %#lx, pud = %#lx, pmd = %#lx, pte = %#lx, ret = %#lx\n",
+				addr, *(unsigned long *)pgd, *(unsigned long *)pud,
+				*(unsigned long *)pmd, *(unsigned long *)pte, ret);
+	}
+out:
+	return ret;
+}
+
+extern int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_regs *regs);
+
+asmlinkage void notrace
+do_page_fault(unsigned long address, unsigned long mmcsr,
+		long cause, struct pt_regs *regs)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	const struct exception_table_entry *fixup;
+	int si_code = SEGV_MAPERR;
+	vm_fault_t fault;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	if (notify_page_fault(regs, mmcsr))
+		return;
+
+	if (unlikely(mmcsr >= MMCSR__DA_MATCH)) {
+		if (do_match(address, mmcsr, cause, regs) == 1)
+			return;
+	}
+
+	if (unlikely(mmcsr == MMCSR__ACV1)) {
+		if (!user_mode(regs))
+			goto no_context;
+		else {
+			down_read(&mm->mmap_lock);
+			goto bad_area;
+		}
+	}
+
+	/*
+	 * If we're in an interrupt context, or have no user context,
+	 * we must not take the fault.
+	 */
+	if (!mm || faulthandler_disabled())
+		goto no_context;
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
+retry:
+	down_read(&mm->mmap_lock);
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it.
+	 */
+good_area:
+	si_code = SEGV_ACCERR;
+	if (cause < 0) {
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+	} else if (!cause) {
+		/* Allow reads even for write-only mappings */
+		if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(vma, address, flags, NULL);
+
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+		return;
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+					regs, address);
+			current->maj_flt++;
+		} else {
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+					regs, address);
+			current->min_flt++;
+		}
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+
+			 /*
+			  * No need to up_read(&mm->mmap_lock) as we would
+			  * have already released it in __lock_page_or_retry
+			  * in mm/filemap.c.
+			  */
+
+			goto retry;
+		}
+	}
+
+	up_read(&mm->mmap_lock);
+
+	return;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map.
+	 * Fix it, but check if it's kernel or user first.
+	 */
+ bad_area:
+	up_read(&mm->mmap_lock);
+
+	if (user_mode(regs))
+		goto do_sigsegv;
+
+ no_context:
+	/* Are we prepared to handle this fault as an exception?  */
+	fixup = search_exception_tables(regs->pc);
+	if (fixup != 0) {
+		unsigned long newpc;
+
+		newpc = fixup_exception(dpf_reg, fixup, regs->pc);
+		regs->pc = newpc;
+		return;
+	}
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	pr_alert("Unable to handle kernel paging request at virtual address %016lx\n",
+	       address);
+	die_if_kernel("Oops", regs, cause, (unsigned long *)regs - 16);
+	do_exit(SIGKILL);
+
+	/*
+	 * We ran out of memory, or some other thing happened to us that
+	 * made us unable to handle the page fault gracefully.
+	 */
+ out_of_memory:
+	up_read(&mm->mmap_lock);
+	if (!user_mode(regs))
+		goto no_context;
+	pagefault_out_of_memory();
+	return;
+
+ do_sigbus:
+	up_read(&mm->mmap_lock);
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0);
+	if (!user_mode(regs))
+		goto no_context;
+	return;
+
+ do_sigsegv:
+	force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0);
+
+	if (unlikely(segv_debug_enabled)) {
+		pr_info("fault: want to send_segv: pid %d, cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n",
+				current->pid, cause, mmcsr, address, regs->pc);
+		dik_show_regs(regs, (unsigned long *)regs-16);
+		show_all_vma();
+	}
+
+	return;
+}
diff --git a/arch/sw_64/mm/hugetlbpage.c b/arch/sw_64/mm/hugetlbpage.c
new file mode 100644
index 000000000000..3c03709d441c
--- /dev/null
+++ b/arch/sw_64/mm/hugetlbpage.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SW64 Huge TLB Page Support for Kernel.
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+/*
+ * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
+ * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
+ * Otherwise, returns 0.
+ */
+int pmd_huge(pmd_t pmd)
+{
+	return !pmd_none(pmd) &&
+		(pmd_val(pmd) & (_PAGE_VALID | _PAGE_PSE)) != _PAGE_VALID;
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+#define want_pmd_share()	(1)
+#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+#define want_pmd_share()	(0)
+#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+
+pte_t *sw64_256m_hugepte_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr)
+{
+	int i;
+	struct page *page;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pmd = pmd_alloc(mm, pud, addr);
+	if (pmd == NULL)
+		return NULL;
+
+	pte = pte_alloc_map(mm, pmd, addr);
+	if (pte == NULL)
+		return NULL;
+
+	page = virt_to_page(pte);
+	pmd_val(*pmd) = pmd_val(*pmd) | _PAGE_PSE | _PAGE_PHU;
+	for (i = 1; i < 32; i++)
+		pmd_val(*(pmd+i)) = pmd_val(*pmd);
+	return pte;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
+	if (pud) {
+		if (sz == PMD_SIZE) {
+			if (want_pmd_share() && pud_none(*pud))
+				pte = huge_pmd_share(mm, addr, pud);
+			else
+				pte = (pte_t *)pmd_alloc(mm, pud, addr);
+		} else if (sz == (PMD_SIZE << 5)) {
+			pte = sw64_256m_hugepte_alloc(mm, pud, addr);
+		} else	{
+			printk(" Unsupported page size %lx\n", sz);
+			return NULL;
+		}
+	}
+	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
+		unsigned long sz)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		p4d = p4d_offset(pgd, addr);
+		if (p4d_present(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (pud_present(*pud)) {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_present(*pmd))
+					return NULL;
+				if (pmd_val(*pmd) & _PAGE_PHU)
+					pte = pte_offset_map(pmd, addr);
+				else
+					pte = (pte_t *) pmd;
+			}
+		}
+	}
+	return pte;
+}
+
+static inline int sw64_huge_pmd_bad(pmd_t pmd)
+{
+	return !(((pmd_val(pmd) & ~_PFN_MASK) == _PAGE_TABLE) ||
+			((pmd_val(pmd) & _PAGE_PHU) == _PAGE_PHU));
+}
+
+static inline int sw64_huge_pmd_none_or_clear_bad(pmd_t *pmd)
+{
+	if (pmd_none(*pmd))
+		return 1;
+	if (unlikely(sw64_huge_pmd_bad(*pmd))) {
+		pmd_clear_bad(pmd);
+		return 1;
+	}
+	return 0;
+}
+
+static void sw64_huge_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+		unsigned long addr)
+{
+	if ((((unsigned long)pmd & 0xffUL) == 0) &&
+		((pmd_val(*pmd) & _PAGE_PHU) == _PAGE_PHU)) {
+		pgtable_t token = pmd_pgtable(*pmd);
+
+		pmd_clear(pmd);
+		pte_free_tlb(tlb, token, addr);
+		mm_dec_nr_ptes(tlb->mm);
+	} else {
+		pmd_clear(pmd);
+	}
+}
+
+static inline void sw64_huge_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+		unsigned long addr, unsigned long end,
+		unsigned long floor, unsigned long ceiling)
+{
+	pmd_t *pmd;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (sw64_huge_pmd_none_or_clear_bad(pmd))
+			continue;
+		sw64_huge_free_pte_range(tlb, pmd, addr);
+	} while (pmd++, addr = next, addr != end);
+
+	start &= PUD_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PUD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pmd = pmd_offset(pud, start);
+	pud_clear(pud);
+	pmd_free_tlb(tlb, pmd, start);
+	mm_dec_nr_pmds(tlb->mm);
+}
+
+static inline void sw64_huge_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
+		unsigned long addr, unsigned long end,
+		unsigned long floor, unsigned long ceiling)
+{
+	pud_t *pud;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pud = pud_offset(p4d, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		sw64_huge_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+	} while (pud++, addr = next, addr != end);
+
+	start &= PGDIR_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PGDIR_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pud = pud_offset(p4d, start);
+	p4d_clear(p4d);
+	pud_free_tlb(tlb, pud, start);
+	mm_dec_nr_puds(tlb->mm);
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+		unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct vm_unmapped_area_info info;
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = current->mm->mmap_legacy_base;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+		unsigned long addr0, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct vm_unmapped_area_info info;
+	unsigned long addr;
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = current->mm->mmap_base;
+	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+	info.align_offset = 0;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (addr) {
+		addr = ALIGN(addr, huge_page_size(h));
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+	if (mm->get_unmapped_area == arch_get_unmapped_area)
+		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+				pgoff, flags);
+	else
+		return hugetlb_get_unmapped_area_topdown(file, addr, len,
+				pgoff, flags);
+}
+
+#if (defined(CONFIG_FORCE_MAX_ZONEORDER) && (CONFIG_FORCE_MAX_ZONEORDER >= 16))
+static __init int sw64_256m_hugetlb_init(void)
+{
+	if (!size_to_hstate(1UL << (PMD_SHIFT + 5)))
+		hugetlb_add_hstate(PMD_SHIFT + 5 - PAGE_SHIFT);
+	return 0;
+}
+arch_initcall(sw64_256m_hugetlb_init);
+#endif
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long ps = memparse(opt, &opt);
+
+	if (ps == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (ps == (PMD_SIZE << 5)) {
+		hugetlb_add_hstate(PMD_SHIFT + 5 - PAGE_SHIFT);
+	} else {
+		printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
+			ps >> 20);
+		return 0;
+	}
+	return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c
new file mode 100644
index 000000000000..d0e934356dd5
--- /dev/null
+++ b/arch/sw_64/mm/init.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/* 2.3.x zone allocator, 1999 Andrea Arcangeli andrea@suse.de */
+
+#include <linux/pagemap.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/gfp.h>
+#include <linux/uaccess.h>
+#include <linux/memblock.h>
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
+#include <linux/acpi.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/mmu_context.h>
+#include <asm/console.h>
+#include <asm/tlb.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/memory.h>
+#include <asm/hw_init.h>
+
+extern void die_if_kernel(char *, struct pt_regs *, long);
+
+struct mem_desc_t mem_desc;
+#ifndef CONFIG_NUMA
+struct numa_node_desc_t numa_nodes_desc[1];
+#endif /* CONFIG_NUMA */
+
+/*
+ * empty_zero_page is a special page that is used for
+ * zero-initialized data and COW.
+ */
+struct page *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+pg_data_t *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
+pgd_t swapper_pg_dir[1024]	__attribute__((__aligned__(PAGE_SIZE)));
+static pud_t vmalloc_pud[1024]	__attribute__((__aligned__(PAGE_SIZE)));
+
+static phys_addr_t mem_start;
+static phys_addr_t mem_size_limit;
+
+static int __init setup_mem_size(char *p)
+{
+	char *oldp;
+	unsigned long start, size;
+
+	start = 0;
+	oldp = p;
+	size = memparse(p, &p);
+	if (p == oldp)
+		return -EINVAL;
+
+	if (*p == '@')
+		start = memparse(p + 1, &p);
+
+	mem_start = start;
+	mem_size_limit = size;
+	return 0;
+}
+early_param("mem", setup_mem_size);
+
+pgd_t *
+pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret, *init;
+
+	ret = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	init = pgd_offset(&init_mm, 0UL);
+	if (ret)
+		pgd_val(ret[PTRS_PER_PGD-2]) = pgd_val(init[PTRS_PER_PGD-2]);
+
+	return ret;
+}
+
+static inline unsigned long
+load_PCB(struct pcb_struct *pcb)
+{
+	register unsigned long sp __asm__("$30");
+	pcb->ksp = sp;
+	return __reload_thread(pcb);
+}
+
+/* Set up initial PCB, VPTB, and other such nicities.  */
+
+static inline void
+switch_to_system_map(void)
+{
+	unsigned long newptbr;
+	unsigned long original_pcb_ptr;
+
+	/*
+	 * Initialize the kernel's page tables.  Linux puts the vptb in
+	 * the last slot of the L1 page table.
+	 */
+	memset(swapper_pg_dir, 0, PAGE_SIZE);
+	newptbr = __pa(swapper_pg_dir) >> PAGE_SHIFT;
+
+	/* Also set up the real kernel PCB while we're at it.  */
+	init_thread_info.pcb.ptbr = newptbr;
+	init_thread_info.pcb.flags = 1;	/* set FEN, clear everything else */
+	original_pcb_ptr = load_PCB(&init_thread_info.pcb);
+	tbia();
+}
+
+void __init callback_init(void)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+
+	switch_to_system_map();
+
+	/* Allocate one PGD and one PUD. */
+	pgd = pgd_offset_k(VMALLOC_START);
+	p4d = p4d_offset(pgd, VMALLOC_START);
+	p4d_set(p4d, (pud_t *)vmalloc_pud);
+}
+
+void __init zone_sizes_init(void)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	unsigned long dma_pfn;
+
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+	dma_pfn = PFN_DOWN(virt_to_phys((void *)MAX_DMA_ADDRESS));
+
+#ifdef CONFIG_ZONE_DMA32
+	max_zone_pfns[ZONE_DMA32] = min(dma_pfn, max_low_pfn);
+#endif
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+
+	free_area_init(max_zone_pfns);
+}
+
+/*
+ * paging_init() sets up the memory map.
+ */
+void __init paging_init(void)
+{
+	void *zero_page;
+
+	zero_page = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
+	pr_info("zero page start: %p\n", zero_page);
+	memset(zero_page, 0, PAGE_SIZE);
+	empty_zero_page = virt_to_page(zero_page);
+}
+
+void __init mem_detect(void)
+{
+	int i;
+
+	mem_desc.phys_base = 0;
+	for (i = 0; i < MAX_NUMSOCKETS; i++) {
+		if (socket_desc[i].is_online)
+			mem_desc.phys_size += socket_desc[i].socket_mem;
+	}
+
+	if (mem_start >= NODE0_START) {
+		mem_desc.base = mem_start;
+	} else {
+		mem_desc.base = NODE0_START;
+		mem_size_limit -= NODE0_START - mem_start;
+	}
+
+	if (mem_size_limit && mem_size_limit < mem_desc.phys_size - NODE0_START)
+		mem_desc.size = mem_size_limit;
+	else
+		mem_desc.size = mem_desc.phys_size - NODE0_START;
+}
+
+void __init sw64_memblock_init(void)
+{
+	memblock_add(mem_desc.base, mem_desc.size);
+
+	memblock_remove(1ULL << MAX_PHYSMEM_BITS, PHYS_ADDR_MAX);
+
+	/* Make sure kernel text is in memory range. */
+	memblock_add(__pa_symbol(_text), (unsigned long)(_end - _text));
+	memblock_reserve(__pa_symbol(_text), _end - _text);
+
+	max_pfn = max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+
+	memblock_allow_resize();
+	memblock_initialized = true;
+	process_memmap();
+}
+
+#ifndef CONFIG_NUMA
+void __init sw64_numa_init(void)
+{
+	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+	u64 nd_pa;
+	void *nd;
+	int tnid;
+
+	memblock_set_node(mem_desc.base, mem_desc.size, &memblock.memory, 0);
+	nd_pa = memblock_phys_alloc(nd_size, SMP_CACHE_BYTES);
+	nd = __va(nd_pa);
+
+	/* report and initialize */
+	pr_info("NODE_DATA [mem %#018llx-%#018llx]\n",
+		nd_pa, nd_pa + nd_size - 1);
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (tnid != 0)
+		pr_info("NODE_DATA(%d) on node %d\n", 0, tnid);
+
+	node_data[0] = nd;
+	memset(NODE_DATA(0), 0, sizeof(pg_data_t));
+	NODE_DATA(0)->node_id = 0;
+	NODE_DATA(0)->node_start_pfn = mem_desc.base >> PAGE_SHIFT;
+	NODE_DATA(0)->node_spanned_pages = mem_desc.size >> PAGE_SHIFT;
+	node_set_online(0);
+}
+#endif /* CONFIG_NUMA */
+
+void __init
+mem_init(void)
+{
+	set_max_mapnr(max_low_pfn);
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+#ifdef CONFIG_SWIOTLB
+	swiotlb_init(1);
+#endif
+	memblock_free_all();
+	mem_init_print_info(NULL);
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+		struct vmem_altmap *altmap)
+{
+	return vmemmap_populate_basepages(start, end, node, altmap);
+}
+
+void vmemmap_free(unsigned long start, unsigned long end,
+		struct vmem_altmap *altmap)
+{
+}
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+int pfn_valid(unsigned long pfn)
+{
+	phys_addr_t addr = pfn << PAGE_SHIFT;
+
+	if ((addr >> PAGE_SHIFT) != pfn)
+		return 0;
+	return memblock_is_map_memory(addr);
+}
+EXPORT_SYMBOL(pfn_valid);
+#endif
+
+#ifdef CONFIG_HAVE_MEMBLOCK
+#ifndef MIN_MEMBLOCK_ADDR
+#define MIN_MEMBLOCK_ADDR       __pa(PAGE_OFFSET)
+#endif
+#ifndef MAX_MEMBLOCK_ADDR
+#define MAX_MEMBLOCK_ADDR       ((phys_addr_t)~0)
+#endif
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	const u64 phys_offset = MIN_MEMBLOCK_ADDR;
+
+	if (acpi_disabled) {
+		if (!PAGE_ALIGNED(base)) {
+			if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {
+				pr_warn("Ignoring memory block 0x%llx - 0x%llx\n",
+					base, base + size);
+				return;
+			}
+			size -= PAGE_SIZE - (base & ~PAGE_MASK);
+			base = PAGE_ALIGN(base);
+		}
+		size &= PAGE_MASK;
+
+		if (base > MAX_MEMBLOCK_ADDR) {
+			pr_warn("Ignoring memory block 0x%llx - 0x%llx\n",
+				base, base + size);
+			return;
+		}
+
+		if (base + size - 1 > MAX_MEMBLOCK_ADDR) {
+			pr_warn("Ignoring memory range 0x%llx - 0x%llx\n",
+				((u64)MAX_MEMBLOCK_ADDR) + 1, base + size);
+					size = MAX_MEMBLOCK_ADDR - base + 1;
+		}
+
+		if (base + size < phys_offset) {
+			pr_warn("Ignoring memory block 0x%llx - 0x%llx\n",
+				base, base + size);
+		return;
+		}
+
+		if (base < phys_offset) {
+			pr_warn("Ignoring memory range 0x%llx - 0x%llx\n",
+				base, phys_offset);
+			size -= phys_offset - base;
+			base = phys_offset;
+		}
+		memblock_add(base, size);
+	} else
+		return;
+}
+#endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+		bool want_memblock)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int ret;
+
+	ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
+	if (ret)
+		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
+		       __func__,  ret);
+
+	return ret;
+}
+
+void arch_remove_memory(int nid, u64 start, u64 size,
+			struct vmem_altmap *altmap)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	__remove_pages(start_pfn, nr_pages, altmap);
+}
+#endif
diff --git a/arch/sw_64/mm/mmap.c b/arch/sw_64/mm/mmap.c
new file mode 100644
index 000000000000..a7a189fc36d6
--- /dev/null
+++ b/arch/sw_64/mm/mmap.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/random.h>
+#include <linux/syscalls.h>
+
+#include <asm/current.h>
+
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		       unsigned long len, unsigned long pgoff,
+		       unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct vm_unmapped_area_info info;
+	unsigned long limit;
+
+	/* Support 32 bit heap. */
+	if (current->personality & ADDR_LIMIT_32BIT)
+		limit = 0x80000000;
+	else
+		limit = TASK_SIZE;
+
+	if (len > limit)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (addr + len > TASK_SIZE)
+			return -EINVAL;
+
+		return addr;
+	}
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = mm->mmap_base;
+	info.high_limit = limit;
+	info.align_mask = 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+
+	return vm_unmapped_area(&info);
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+	unsigned long rnd;
+
+	/* 8MB for 32bit, 256MB for 64bit */
+	if (current->personality & ADDR_LIMIT_32BIT)
+		rnd = get_random_long() & 0x7ffffful;
+	else
+		rnd = get_random_long() & 0xffffffful;
+
+	return rnd << PAGE_SHIFT;
+}
+
+/*
+ * This function, called very early during the creation of a new process VM
+ * image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
+	/*
+	 * Fall back to the standard layout if the personality bit is set, or
+	 * if the expected stack growth is unlimited:
+	 */
+	mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+	mm->get_unmapped_area = arch_get_unmapped_area;
+}
+
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags, unsigned long, fd,
+		unsigned long, off)
+{
+	unsigned long ret = -EINVAL;
+
+	if ((off + PAGE_ALIGN(len)) < off)
+		goto out;
+	if (off & ~PAGE_MASK)
+		goto out;
+	ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+ out:
+	return ret;
+}
diff --git a/arch/sw_64/mm/numa.c b/arch/sw_64/mm/numa.c
new file mode 100644
index 000000000000..97288d91d7bb
--- /dev/null
+++ b/arch/sw_64/mm/numa.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  DISCONTIGMEM NUMA sw64 support.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/swap.h>
+#include <linux/initrd.h>
+#include <linux/pfn.h>
+#include <linux/module.h>
+#include <linux/cpuset.h>
+#include <linux/init.h>
+#ifdef CONFIG_PCI
+#include <linux/pci.h>
+#endif
+#include <linux/acpi.h>
+#include <linux/of.h>
+
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <asm/sw64_init.h>
+#include <asm/hw_init.h>
+#include <asm/memory.h>
+#include <asm/core.h>
+
+int cpu_to_node_map[NR_CPUS];
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+struct numa_node_desc_t numa_nodes_desc[MAX_NUMNODES];
+nodemask_t numa_nodes_parsed __initdata;
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
+static bool numa_off;
+
+static __init int numa_setup(char *opt)
+{
+	if (!opt)
+		return -EINVAL;
+	if (!strncmp(opt, "off", 3))
+		numa_off = 1;
+	return 0;
+}
+early_param("numa", numa_setup);
+
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: cpumask_of_node() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
+ */
+static void __init setup_node_to_cpumask_map(void)
+{
+	int node;
+
+	/* setup nr_node_ids if not done yet */
+	if (nr_node_ids == MAX_NUMNODES)
+		setup_nr_node_ids();
+
+	/* allocate and clear the mapping */
+	for (node = 0; node < nr_node_ids; node++) {
+		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
+		cpumask_clear(node_to_cpumask_map[node]);
+	}
+
+	/* cpumask_of_node() will now work */
+	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
+}
+
+/**
+ * numa_add_memblk - Set node id to memblk
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end:  End address of the new memblk
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+	int ret;
+
+	ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
+	if (ret < 0) {
+		pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
+			start, (end - 1), nid);
+		return ret;
+	}
+
+	node_set(nid, numa_nodes_parsed);
+	return ret;
+}
+
+/**
+ * Initialize NODE_DATA for a node on the local memory
+ */
+static void __init setup_node_data(int nid, unsigned long start_pfn, unsigned long end_pfn)
+{
+	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+	u64 nd_pa;
+	void *nd;
+	int tnid;
+
+	if (start_pfn >= end_pfn)
+		pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
+
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd = __va(nd_pa);
+
+	/* report and initialize */
+	pr_info("NODE_DATA [mem %#018llx-%#018llx]\n",
+		nd_pa, nd_pa + nd_size - 1);
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (tnid != nid)
+		pr_info("NODE_DATA(%d) on node %d\n", nid, tnid);
+
+	node_data[nid] = nd;
+	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+	NODE_DATA(nid)->node_id = nid;
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+/**
+ * numa_free_distance
+ *
+ * Free current distance table.
+ */
+void __init numa_free_distance(void)
+{
+	size_t size;
+
+	if (!numa_distance)
+		return;
+
+	size = numa_distance_cnt * numa_distance_cnt *
+		sizeof(numa_distance[0]);
+
+	memblock_free(__pa(numa_distance), size);
+	numa_distance_cnt = 0;
+	numa_distance = NULL;
+}
+
+/**
+ *
+ * Create a new NUMA distance table.
+ *
+ */
+static int __init numa_alloc_distance(void)
+{
+	size_t size;
+	u64 phys;
+	int i, j;
+
+	size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
+	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn),
+				      size, PAGE_SIZE);
+	if (WARN_ON(!phys))
+		return -ENOMEM;
+
+	memblock_reserve(phys, size);
+
+	numa_distance = __va(phys);
+	numa_distance_cnt = nr_node_ids;
+
+	/* fill with the default distances */
+	for (i = 0; i < numa_distance_cnt; i++)
+		for (j = 0; j < numa_distance_cnt; j++) {
+			numa_distance[i * numa_distance_cnt + j] = i == j ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+		}
+
+	pr_info("Initialized distance table, cnt=%d\n", numa_distance_cnt);
+
+	return 0;
+}
+
+/**
+ * numa_set_distance - Set inter node NUMA distance from node to node.
+ * @from: the 'from' node to set distance
+ * @to: the 'to'  node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance.
+ * If distance table doesn't exist, a warning is printed.
+ *
+ * If @from or @to is higher than the highest known node or lower than zero
+ * or @distance doesn't make sense, the call is ignored.
+ *
+ */
+void __init numa_set_distance(int from, int to, int distance)
+{
+	if (!numa_distance) {
+		pr_warn_once("Warning: distance table not allocated yet\n");
+		return;
+	}
+
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
+			from < 0 || to < 0) {
+		pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+			    from, to, distance);
+		return;
+	}
+
+	if ((u8)distance != distance ||
+	    (from == to && distance != LOCAL_DISTANCE)) {
+		pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+			     from, to, distance);
+		return;
+	}
+
+	numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+/**
+ * Return NUMA distance @from to @to
+ */
+int __node_distance(int from, int to)
+{
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+static int __init numa_register_nodes(void)
+{
+	int nid;
+	struct memblock_region *mblk;
+
+	/* Check that valid nid is set to memblks */
+	for_each_mem_region(mblk) {
+		pr_info("memblk node %d [mem %#018llx-%#018llx]\n",
+				mblk->nid, mblk->base,
+				mblk->base + mblk->size - 1);
+		if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
+			pr_warn("Warning: invalid memblk node %d [mem %#018llx-%#018llx]\n",
+				mblk->nid, mblk->base,
+				mblk->base + mblk->size - 1);
+			return -EINVAL;
+		}
+	}
+
+	/* Finally register nodes */
+	for_each_node_mask(nid, numa_nodes_parsed) {
+		unsigned long start_pfn, end_pfn;
+
+		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+		setup_node_data(nid, start_pfn, end_pfn);
+		node_set_online(nid);
+	}
+
+	/* Setup online nodes to actual nodes */
+	node_possible_map = numa_nodes_parsed;
+
+	return 0;
+}
+
+static int __init numa_init(int (*init_func)(void))
+{
+	int ret;
+
+	nodes_clear(numa_nodes_parsed);
+	nodes_clear(node_possible_map);
+	nodes_clear(node_online_map);
+	numa_free_distance();
+
+	ret = numa_alloc_distance();
+	if (ret < 0)
+		return ret;
+
+	ret = init_func();
+	if (ret < 0)
+		return ret;
+
+	if (nodes_empty(numa_nodes_parsed)) {
+		pr_info("No NUMA configuration found\n");
+		return -EINVAL;
+	}
+
+	ret = numa_register_nodes();
+	if (ret < 0)
+		return ret;
+
+	setup_node_to_cpumask_map();
+
+	return 0;
+}
+
+static void __init get_numa_info_socket(void)
+{
+	int i;
+
+	phys_addr_t base = 0;
+
+	for (i = 0; i < MAX_NUMSOCKETS; i++) {
+		if (socket_desc[i].is_online) {
+			numa_nodes_desc[i].base = base;
+			numa_nodes_desc[i].size = socket_desc[i].socket_mem;
+			base += numa_nodes_desc[i].size;
+		}
+	}
+}
+
+static int __init manual_numa_init(void)
+{
+	int ret, nid;
+	struct memblock_region *mblk;
+	phys_addr_t node_base, node_size, node_end;
+
+	if (numa_off) {
+		pr_info("NUMA disabled\n"); /* Forced off on command line. */
+		pr_info("Faking one node at [mem %#018llx-%#018llx]\n",
+				memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
+		for_each_mem_region(mblk) {
+			ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
+			if (!ret)
+				continue;
+
+			pr_err("NUMA init failed\n");
+			return ret;
+		}
+	} else {
+		get_numa_info_socket();
+
+		for (nid = 0; nid < MAX_NUMNODES; nid++) {
+			node_base = numa_nodes_desc[nid].base;
+			node_size = numa_nodes_desc[nid].size;
+			node_end = node_base + node_size;
+			ret = 0;
+
+			if (!node_end)
+				continue;
+
+			for_each_mem_region(mblk) {
+				if (mblk->base >= node_base && mblk->base < node_end) {
+					if (mblk->base + mblk->size < node_end)
+						ret = numa_add_memblk(nid, mblk->base, mblk->base + mblk->size);
+					else
+						ret = numa_add_memblk(nid, mblk->base, node_end);
+				}
+			}
+
+			if (!node_size) {
+				memblock_add_node(node_base, node_size, nid);
+				node_set(nid, numa_nodes_parsed);
+				pr_info("Setup empty node %d from %#llx\n", nid, node_base);
+			}
+
+			if (!ret)
+				continue;
+
+			pr_err("NUMA init failed for node %d, [mem %#018llx-%#018llx]",
+					nid, node_base, node_end - 1);
+		}
+	}
+
+	return 0;
+}
+
+/* We do not have acpi support. */
+int acpi_numa_init(void)
+{
+	return -1;
+}
+
+void __init sw64_numa_init(void)
+{
+	if (!numa_off) {
+		if (!acpi_disabled && !numa_init(acpi_numa_init))
+			return;
+		if (acpi_disabled && !numa_init(of_numa_init))
+			return;
+	}
+
+	numa_init(manual_numa_init);
+}
+
+void cpu_set_node(void)
+{
+	int i;
+
+	if (numa_off) {
+		for (i = 0; i < nr_cpu_ids; i++)
+			cpu_to_node_map[i] = 0;
+	} else {
+		int rr, default_node, cid;
+
+		rr = first_node(node_online_map);
+		for (i = 0; i < nr_cpu_ids; i++) {
+			cid = cpu_to_rcid(i);
+			default_node = cid >> CORES_PER_NODE_SHIFT;
+			if (node_online(default_node)) {
+				cpu_to_node_map[i] = default_node;
+			} else {
+				cpu_to_node_map[i] = rr;
+				rr = next_node(rr, node_online_map);
+				if (rr == MAX_NUMNODES)
+					rr = first_node(node_online_map);
+			}
+		}
+	}
+	/*
+	 * Setup numa_node for cpu 0 before per_cpu area for booting.
+	 * Actual setup of numa_node will be done in native_smp_prepare_cpus().
+	 */
+	set_cpu_numa_node(0, cpu_to_node_map[0]);
+}
+
+void numa_store_cpu_info(unsigned int cpu)
+{
+	set_cpu_numa_node(cpu, cpu_to_node_map[cpu]);
+}
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+const struct cpumask *cpumask_of_node(int node)
+{
+
+	if (node == NUMA_NO_NODE)
+		return cpu_all_mask;
+
+	if (WARN_ON(node < 0 || node >= nr_node_ids))
+		return cpu_none_mask;
+
+	if (WARN_ON(node_to_cpumask_map[node] == NULL))
+		return cpu_online_mask;
+
+	return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(cpumask_of_node);
+
+static void numa_update_cpu(unsigned int cpu, bool remove)
+{
+	int nid = cpu_to_node(cpu);
+
+	if (nid == NUMA_NO_NODE)
+		return;
+
+	if (remove)
+		cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]);
+	else
+		cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+}
+
+void numa_add_cpu(unsigned int cpu)
+{
+	numa_update_cpu(cpu, false);
+}
+
+void numa_remove_cpu(unsigned int cpu)
+{
+	numa_update_cpu(cpu, true);
+}
diff --git a/arch/sw_64/mm/physaddr.c b/arch/sw_64/mm/physaddr.c
new file mode 100644
index 000000000000..d5cf83e671ae
--- /dev/null
+++ b/arch/sw_64/mm/physaddr.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mmdebug.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+
+unsigned long __phys_addr(unsigned long x)
+{
+	unsigned long y = x;
+
+	if (y >= __START_KERNEL_map) {
+		y -= __START_KERNEL_map;
+		VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+	} else {
+		VIRTUAL_BUG_ON(y < PAGE_OFFSET);
+		y -= PAGE_OFFSET;
+		VIRTUAL_BUG_ON(!phys_addr_valid(y));
+	}
+	return y;
+}
+EXPORT_SYMBOL(__phys_addr);
+
+bool __virt_addr_valid(unsigned long x)
+{
+	unsigned long y = x;
+
+	if (y >= __START_KERNEL_map) {
+		y -= __START_KERNEL_map;
+		if (y >= KERNEL_IMAGE_SIZE)
+			return false;
+	} else {
+		if (y < PAGE_OFFSET)
+			return false;
+		y -= PAGE_OFFSET;
+	}
+
+	return pfn_valid(y >> PAGE_SHIFT);
+}
+EXPORT_SYMBOL(__virt_addr_valid);
diff --git a/arch/sw_64/mm/thp.c b/arch/sw_64/mm/thp.c
new file mode 100644
index 000000000000..68260dd0e837
--- /dev/null
+++ b/arch/sw_64/mm/thp.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+			  unsigned long address, pmd_t *pmdp,
+			  pmd_t entry, int dirty)
+{
+	int changed = !pmd_same(*pmdp, entry);
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	if (changed && dirty) {
+		*pmdp = entry;
+		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	}
+
+	return changed;
+}
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long addr, pmd_t *pmdp)
+{
+	int ret = 0;
+
+	if (pmd_young(*pmdp))
+		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+				(unsigned long *)pmdp);
+	return ret;
+}
+
+int pmdp_clear_flush_young(struct vm_area_struct *vma,
+			   unsigned long address, pmd_t *pmdp)
+{
+	int young;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	young = pmdp_test_and_clear_young(vma, address, pmdp);
+	if (young)
+		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+	return young;
+}
+void pmdp_splitting_flush(struct vm_area_struct *vma,
+			  unsigned long address, pmd_t *pmdp)
+{
+	int set;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	set = !test_and_set_bit(_PAGE_BIT_SPLITTING, (unsigned long *)pmdp);
+	if (set) {
+		/* need tlb flush only to serialize against gup-fast */
+		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	}
+}
diff --git a/arch/sw_64/net/Makefile b/arch/sw_64/net/Makefile
new file mode 100644
index 000000000000..d4663b4bf509
--- /dev/null
+++ b/arch/sw_64/net/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h
new file mode 100644
index 000000000000..2bf3ca6f3abd
--- /dev/null
+++ b/arch/sw_64/net/bpf_jit.h
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BPF JIT compiler for SW64
+ *
+ * Copyright (C) Mao Minkai
+ * Author: Mao Minkai
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef _SW64_BPF_JIT_H
+#define _SW64_BPF_JIT_H
+
+#define SW64_BPF_OPCODE_OFFSET		26
+#define SW64_BPF_RA_OFFSET		21
+#define SW64_BPF_RB_OFFSET		16
+#define SW64_BPF_SIMPLE_ALU_IMM_OFFSET	13
+#define SW64_BPF_SIMPLE_ALU_FUNC_OFFSET	5
+#define SW64_BPF_SIMPLE_ALU_RC_OFFSET	0
+
+#define SW64_BPF_OPCODE_BR_CALL		0x01
+#define SW64_BPF_OPCODE_BR_RET		0x02
+#define SW64_BPF_OPCODE_BR_JMP		0x03
+#define SW64_BPF_OPCODE_BR_BR		0x04
+#define SW64_BPF_OPCODE_BR_BSR		0x05
+#define SW64_BPF_OPCODE_BR_BEQ		0x30
+#define SW64_BPF_OPCODE_BR_BNE		0x31
+#define SW64_BPF_OPCODE_BR_BLT		0x32
+#define SW64_BPF_OPCODE_BR_BLE		0x33
+#define SW64_BPF_OPCODE_BR_BGT		0x34
+#define SW64_BPF_OPCODE_BR_BGE		0x35
+#define SW64_BPF_OPCODE_BR_BLBC		0x36
+#define SW64_BPF_OPCODE_BR_BLBS		0x37
+
+#define SW64_BPF_OPCODE_LS_LDBU		0x20
+#define SW64_BPF_OPCODE_LS_LDHU		0x21
+#define SW64_BPF_OPCODE_LS_LDW		0x22
+#define SW64_BPF_OPCODE_LS_LDL		0x23
+#define SW64_BPF_OPCODE_LS_STB		0x28
+#define SW64_BPF_OPCODE_LS_STH		0x29
+#define SW64_BPF_OPCODE_LS_STW		0x2A
+#define SW64_BPF_OPCODE_LS_STL		0x2B
+#define SW64_BPF_OPCODE_LS_LDI		0x3E
+#define SW64_BPF_OPCODE_LS_LDIH		0x3F
+
+#define SW64_BPF_OPCODE_ALU_REG		0x10
+#define SW64_BPF_OPCODE_ALU_IMM		0x12
+
+#define SW64_BPF_FUNC_ALU_ADDW		0x00
+#define SW64_BPF_FUNC_ALU_SUBW		0x01
+#define SW64_BPF_FUNC_ALU_ADDL		0x08
+#define SW64_BPF_FUNC_ALU_SUBL		0x09
+#define SW64_BPF_FUNC_ALU_MULW		0x10
+#define SW64_BPF_FUNC_ALU_MULL		0x18
+#define SW64_BPF_FUNC_ALU_ZAP		0x68
+#define SW64_BPF_FUNC_ALU_ZAPNOT	0x69
+#define SW64_BPF_FUNC_ALU_SEXTB		0x6A
+#define SW64_BPF_FUNC_ALU_SEXTH		0x6B
+
+#define SW64_BPF_OPCODE_BS_REG		0x10
+#define SW64_BPF_OPCODE_BS_IMM		0x12
+
+#define SW64_BPF_FUNC_BS_SLL		0x48
+#define SW64_BPF_FUNC_BS_SRL		0x49
+#define SW64_BPF_FUNC_BS_SRA		0x4A
+
+#define SW64_BPF_OPCODE_LOGIC_REG	0x10
+#define SW64_BPF_OPCODE_LOGIC_IMM	0x12
+
+#define SW64_BPF_FUNC_LOGIC_AND		0x38
+#define SW64_BPF_FUNC_LOGIC_BIC		0x39
+#define SW64_BPF_FUNC_LOGIC_BIS		0x3A
+#define SW64_BPF_FUNC_LOGIC_ORNOT	0x3B
+#define SW64_BPF_FUNC_LOGIC_XOR		0x3C
+#define SW64_BPF_FUNC_LOGIC_EQV		0x3D
+
+#define SW64_BPF_OPCODE_CMP_REG		0x10
+#define SW64_BPF_OPCODE_CMP_IMM		0x12
+
+#define SW64_BPF_FUNC_CMP_EQ		0x28
+#define SW64_BPF_FUNC_CMP_LT		0x29
+#define SW64_BPF_FUNC_CMP_LE		0x2A
+#define SW64_BPF_FUNC_CMP_ULT		0x2B
+#define SW64_BPF_FUNC_CMP_ULE		0x2C
+
+/* special instuction used in jit_fill_hole() */
+#define SW64_BPF_ILLEGAL_INSN	((1 << 25) | 0x80)
+
+enum sw64_bpf_registers {
+	SW64_BPF_REG_V0		= 0,	/* keep return value */
+	SW64_BPF_REG_T0		= 1,
+	SW64_BPF_REG_T1		= 2,
+	SW64_BPF_REG_T2		= 3,
+	SW64_BPF_REG_T3		= 4,
+	SW64_BPF_REG_T4		= 5,
+	SW64_BPF_REG_T5		= 6,
+	SW64_BPF_REG_T6		= 7,
+	SW64_BPF_REG_T7		= 8,
+	SW64_BPF_REG_S0		= 9,	/* callee saved */
+	SW64_BPF_REG_S1		= 10,	/* callee saved */
+	SW64_BPF_REG_S2		= 11,	/* callee saved */
+	SW64_BPF_REG_S3		= 12,	/* callee saved */
+	SW64_BPF_REG_S4		= 13,	/* callee saved */
+	SW64_BPF_REG_S5		= 14,	/* callee saved */
+	SW64_BPF_REG_S6		= 15,	/* callee saved */
+	SW64_BPF_REG_FP		= 15,	/* frame pointer if necessary */
+	SW64_BPF_REG_A0		= 16,	/* argument 0 */
+	SW64_BPF_REG_A1		= 17,	/* argument 1 */
+	SW64_BPF_REG_A2		= 18,	/* argument 2 */
+	SW64_BPF_REG_A3		= 19,	/* argument 3 */
+	SW64_BPF_REG_A4		= 20,	/* argument 4 */
+	SW64_BPF_REG_A5		= 21,	/* argument 5 */
+	SW64_BPF_REG_T8		= 22,
+	SW64_BPF_REG_T9		= 23,
+	SW64_BPF_REG_T10	= 24,
+	SW64_BPF_REG_T11	= 25,
+	SW64_BPF_REG_RA		= 26,	/* callee saved, keep retuen address */
+	SW64_BPF_REG_T12	= 27,
+	SW64_BPF_REG_PV		= 27,
+	SW64_BPF_REG_AT		= 28,	/* reserved by assembler */
+	SW64_BPF_REG_GP		= 29,	/* global pointer */
+	SW64_BPF_REG_SP		= 30,	/* callee saved, stack pointer */
+	SW64_BPF_REG_ZR		= 31	/* read 0 */
+};
+
+/* SW64 load and store instructions */
+#define SW64_BPF_LDBU(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDBU, dst, rb, offset16)
+#define SW64_BPF_LDHU(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDHU, dst, rb, offset16)
+#define SW64_BPF_LDW(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDW, dst, rb, offset16)
+#define SW64_BPF_LDL(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDL, dst, rb, offset16)
+#define SW64_BPF_STB(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STB, src, rb, offset16)
+#define SW64_BPF_STH(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STH, src, rb, offset16)
+#define SW64_BPF_STW(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STW, src, rb, offset16)
+#define SW64_BPF_STL(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STL, src, rb, offset16)
+#define SW64_BPF_LDI(dst, rb, imm16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDI, dst, rb, imm16)
+#define SW64_BPF_LDIH(dst, rb, imm16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDIH, dst, rb, imm16)
+
+/* SW64 ALU instructions REG format */
+#define SW64_BPF_ADDW_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ADDW)
+#define SW64_BPF_ADDL_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ADDL)
+#define SW64_BPF_SUBW_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_SUBW)
+#define SW64_BPF_SUBL_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_SUBL)
+#define SW64_BPF_MULW_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_MULW)
+#define SW64_BPF_MULL_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_MULL)
+#define SW64_BPF_ZAP_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ZAP)
+#define SW64_BPF_ZAPNOT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ZAPNOT)
+#define SW64_BPF_SEXTB_REG(rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			0, rb, dst, SW64_BPF_FUNC_ALU_SEXTB)
+#define SW64_BPF_SEXTH_REG(rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			0, rb, dst, SW64_BPF_FUNC_ALU_SEXTH)
+
+/* SW64 ALU instructions IMM format */
+#define SW64_BPF_ADDW_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ADDW)
+#define SW64_BPF_ADDL_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ADDL)
+#define SW64_BPF_SUBW_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_SUBW)
+#define SW64_BPF_SUBL_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_SUBL)
+#define SW64_BPF_MULW_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_MULW)
+#define SW64_BPF_MULL_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_MULL)
+#define SW64_BPF_ZAP_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAP)
+#define SW64_BPF_ZAPNOT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAPNOT)
+#define SW64_BPF_SEXTB_IMM(imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			0, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB)
+
+/* SW64 bit shift instructions REG format */
+#define SW64_BPF_SLL_REG(src, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \
+			src, rb, dst, SW64_BPF_FUNC_BS_SLL)
+#define SW64_BPF_SRL_REG(src, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \
+			src, rb, dst, SW64_BPF_FUNC_BS_SRL)
+#define SW64_BPF_SRA_REG(src, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \
+			src, rb, dst, SW64_BPF_FUNC_BS_SRA)
+
+/* SW64 bit shift instructions IMM format */
+#define SW64_BPF_SLL_IMM(src, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \
+			src, imm8, dst, SW64_BPF_FUNC_BS_SLL)
+#define SW64_BPF_SRL_IMM(src, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \
+			src, imm8, dst, SW64_BPF_FUNC_BS_SRL)
+#define SW64_BPF_SRA_IMM(src, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \
+			src, imm8, dst, SW64_BPF_FUNC_BS_SRA)
+
+/* SW64 control instructions */
+#define SW64_BPF_CALL(ra, rb) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_CALL, ra, rb, 0)
+#define SW64_BPF_RET(rb) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_RET, SW64_BPF_REG_ZR, rb, 0)
+#define SW64_BPF_JMP(ra, rb) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_JMP, ra, rb, 0)
+#define SW64_BPF_BR(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BR, ra, offset)
+#define SW64_BPF_BSR(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BSR, ra, offset)
+#define SW64_BPF_BEQ(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BEQ, ra, offset)
+#define SW64_BPF_BNE(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BNE, ra, offset)
+#define SW64_BPF_BLT(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLT, ra, offset)
+#define SW64_BPF_BLE(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLE, ra, offset)
+#define SW64_BPF_BGT(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGT, ra, offset)
+#define SW64_BPF_BGE(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGE, ra, offset)
+#define SW64_BPF_BLBC(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBC, ra, offset)
+#define SW64_BPF_BLBS(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBS, ra, offset)
+
+/* SW64 bit logic instructions REG format */
+#define SW64_BPF_AND_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_LOGIC_AND)
+#define SW64_BPF_ANDNOT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIC)
+#define SW64_BPF_OR_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIS)
+#define SW64_BPF_ORNOT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_LOGIC_ORNOT)
+#define SW64_BPF_XOR_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_LOGIC_XOR)
+#define SW64_BPF_EQV_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_LOGIC_EQV)
+
+/* SW64 bit logic instructions IMM format */
+#define SW64_BPF_AND_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_LOGIC_AND)
+#define SW64_BPF_ANDNOT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIC)
+#define SW64_BPF_OR_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIS)
+#define SW64_BPF_ORNOT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_LOGIC_ORNOT)
+#define SW64_BPF_XOR_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_LOGIC_XOR)
+#define SW64_BPF_EQV_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_LOGIC_EQV)
+
+/* SW64 compare instructions REG format */
+#define SW64_BPF_CMPEQ_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_CMP_EQ)
+#define SW64_BPF_CMPLT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_CMP_LT)
+#define SW64_BPF_CMPLE_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_CMP_LE)
+#define SW64_BPF_CMPULT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_CMP_ULT)
+#define SW64_BPF_CMPULE_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_CMP_ULE)
+
+/* SW64 compare instructions imm format */
+#define SW64_BPF_CMPEQ_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_CMP_EQ)
+#define SW64_BPF_CMPLT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_CMP_LT)
+#define SW64_BPF_CMPLE_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_CMP_LE)
+#define SW64_BPF_CMPULT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_CMP_ULT)
+#define SW64_BPF_CMPULE_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_CMP_ULE)
+
+#endif /* _SW64_BPF_JIT_H */
diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c
new file mode 100644
index 000000000000..102de82d69e1
--- /dev/null
+++ b/arch/sw_64/net/bpf_jit_comp.c
@@ -0,0 +1,973 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BPF JIT compiler for SW64
+ *
+ * Copyright (C) Mao Minkai
+ * Author: Mao Minkai
+ *
+ * This file is taken from arch/arm64/net/bpf_jit_comp.c
+ *	Copyright (C) 2014-2016 Zi Shen Lim zlim.lnx@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/printk.h>
+
+#include <asm/cacheflush.h>
+
+#include "bpf_jit.h"
+
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
+#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
+
+/*
+ * TO-DO List:
+ *	DIV
+ *	MOD
+ */
+
+static const int bpf2sw64[] = {
+	/* return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = SW64_BPF_REG_V0,
+	/* arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = SW64_BPF_REG_A1,
+	[BPF_REG_2] = SW64_BPF_REG_A2,
+	[BPF_REG_3] = SW64_BPF_REG_A3,
+	[BPF_REG_4] = SW64_BPF_REG_A4,
+	[BPF_REG_5] = SW64_BPF_REG_A5,
+	/* callee saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = SW64_BPF_REG_S1,
+	[BPF_REG_7] = SW64_BPF_REG_S2,
+	[BPF_REG_8] = SW64_BPF_REG_S3,
+	[BPF_REG_9] = SW64_BPF_REG_S4,
+	/* read-only frame pointer to access stack */
+	[BPF_REG_FP] = SW64_BPF_REG_S0,
+	/* temporary registers for internal BPF JIT */
+	[TMP_REG_1] = SW64_BPF_REG_T1,
+	[TMP_REG_2] = SW64_BPF_REG_T2,
+	/* tail_call_cnt */
+	[TCALL_CNT] = SW64_BPF_REG_S5,
+	/* temporary register for blinding constants */
+	[BPF_REG_AX] = SW64_BPF_REG_T12,
+};
+
+struct jit_ctx {
+	const struct bpf_prog *prog;
+	int idx;		// JITed instruction index
+	int epilogue_offset;
+	int *insn_offset;	// [bpf_insn_idx] = jited_insn_idx
+	u32 *image;		// JITed instruction
+	u32 stack_size;
+};
+
+struct sw64_jit_data {
+	struct bpf_binary_header *header;
+	u8 *image;	// bpf instruction
+	struct jit_ctx ctx;
+};
+
+static inline u32 sw64_bpf_gen_format_br(int opcode, enum sw64_bpf_registers ra, u32 disp)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	return opcode | ra | disp;
+}
+
+static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra,
+		enum sw64_bpf_registers rb, u16 disp)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	rb = rb << SW64_BPF_RB_OFFSET;
+	return opcode | ra | rb | disp;
+}
+
+static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_registers ra,
+		enum sw64_bpf_registers rb, enum sw64_bpf_registers rc, int function)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	rb = rb << SW64_BPF_RB_OFFSET;
+	rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET;
+	function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET;
+	return opcode | ra | rb | function | rc;
+}
+
+static inline u32 sw64_bpf_gen_format_simple_alu_imm(int opcode, enum sw64_bpf_registers ra,
+		enum sw64_bpf_registers rc, u8 imm, int function)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET;
+	imm = imm << SW64_BPF_SIMPLE_ALU_IMM_OFFSET;
+	function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET;
+	return opcode | ra | imm | function | rc;
+}
+
+static inline void emit(const u32 insn, struct jit_ctx *ctx)
+{
+	if (ctx->image != NULL)
+		ctx->image[ctx->idx] = insn;
+
+	ctx->idx++;
+}
+
+static inline void emit_sw64_ldu64(const int dst, const u64 imm64, struct jit_ctx *ctx)
+{
+	u16 imm_tmp;
+	int reg_tmp = SW64_BPF_REG_T8;
+
+	imm_tmp = (imm64 >> 60) & 0xf;
+	emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx);
+
+	imm_tmp = (imm64 >> 45) & 0x7fff;
+	emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+
+	imm_tmp = (imm64 >> 30) & 0x7fff;
+	emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+
+	imm_tmp = (imm64 >> 15) & 0x7fff;
+	emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+
+	imm_tmp = imm64 & 0x7fff;
+	emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx);
+}
+
+static inline void emit_sw64_ldu32(const int dst, const u32 imm32, struct jit_ctx *ctx)
+{
+	u16 imm_tmp;
+	int reg_tmp = SW64_BPF_REG_T8;
+
+	imm_tmp = (imm32 >> 30) & 3;
+	emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx);
+
+	imm_tmp = (imm32 >> 15) & 0x7fff;
+	emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+
+	imm_tmp = imm32 & 0x7fff;
+	emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx);
+}
+
+static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ctx *ctx)
+{
+	s16 hi = imm32 >> 16;
+	s16 lo = imm32 & 0xffff;
+	int reg_tmp = SW64_BPF_REG_T8;
+
+	emit(SW64_BPF_LDIH(dst, SW64_BPF_REG_ZR, hi), ctx);
+	if (lo & 0x8000) {	// sign bit is 1
+		lo = lo & 0x7fff;
+		emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, 1), ctx);
+		emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
+		emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+		emit(SW64_BPF_LDI(dst, dst, lo), ctx);
+	} else {	// sign bit is 0
+		emit(SW64_BPF_LDI(dst, dst, lo), ctx);
+	}
+}
+
+/* dst = ra / rb */
+static void emit_sw64_div(const int ra, const int rb, const int dst, struct jit_ctx *ctx)
+{
+	pr_err("DIV is not supported for now.\n");
+}
+
+/* dst = ra % rb */
+static void emit_sw64_mod(const int ra, const int rb, const int dst, struct jit_ctx *ctx)
+{
+	pr_err("MOD is not supported for now.\n");
+}
+
+static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx)
+{
+	int tmp = SW64_BPF_REG_T8;
+
+	emit(SW64_BPF_LDI(tmp, dst, 0), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp, 0x2, tmp), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx);
+	emit(SW64_BPF_SRL_REG(tmp, 8, tmp), ctx);
+	emit(SW64_BPF_SLL_REG(dst, 8, dst), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp, dst), ctx);
+}
+
+static void emit_sw64_htobe32(const int dst, struct jit_ctx *ctx)
+{
+	int tmp1 = SW64_BPF_REG_T8;
+	int tmp2 = SW64_BPF_REG_T9;
+
+	emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
+	emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x8, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x6, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
+
+	emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
+	emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x4, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x9, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
+}
+
+static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx)
+{
+	int tmp1 = SW64_BPF_REG_T8;
+	int tmp2 = SW64_BPF_REG_T9;
+
+	emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
+	emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x80, tmp1), ctx);
+	emit(SW64_BPF_ZAP_IMM(dst, 0x81, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 56, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp2, 56, tmp2), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
+
+	emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
+	emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x40, tmp1), ctx);
+	emit(SW64_BPF_ZAP_IMM(dst, 0x42, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp2, 40, tmp2), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
+
+	emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
+	emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x4, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x20, tmp1), ctx);
+	emit(SW64_BPF_ZAP_IMM(dst, 0x24, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
+
+	emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
+	emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x8, tmp1), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x10, tmp1), ctx);
+	emit(SW64_BPF_ZAP_IMM(dst, 0x18, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+	emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	memset(area, SW64_BPF_ILLEGAL_INSN, size);
+}
+
+static int offset_to_epilogue(const struct jit_ctx *ctx)
+{
+	return ctx->epilogue_offset - ctx->idx;
+}
+
+/* For tail call to jump into */
+#define PROLOGUE_OFFSET 8
+
+static void build_prologue(struct jit_ctx *ctx, bool was_classic)
+{
+	const int r6 = bpf2sw64[BPF_REG_6];
+	const int r7 = bpf2sw64[BPF_REG_7];
+	const int r8 = bpf2sw64[BPF_REG_8];
+	const int r9 = bpf2sw64[BPF_REG_9];
+	const int fp = bpf2sw64[BPF_REG_FP];
+	const int tcc = bpf2sw64[TCALL_CNT];
+	const int tmp1 = bpf2sw64[TMP_REG_1];
+
+	/* Save callee-saved registers */
+	emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx);
+	emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 0), ctx);
+	emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 8), ctx);
+	emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 16), ctx);
+	emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 24), ctx);
+	emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 32), ctx);
+	emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 40), ctx);
+	emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx);
+
+	/* Set up BPF prog stack base register */
+	emit(SW64_BPF_LDI(fp, SW64_BPF_REG_SP, 0), ctx);
+	if (!was_classic)
+		/* Initialize tail_call_cnt */
+		emit(SW64_BPF_LDI(tcc, SW64_BPF_REG_ZR, 0), ctx);
+
+	/* Set up function call stack */
+	ctx->stack_size = ctx->prog->aux->stack_depth;
+	emit_sw64_ldu32(tmp1, ctx->stack_size, ctx);
+	emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx);
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	const int r6 = bpf2sw64[BPF_REG_6];
+	const int r7 = bpf2sw64[BPF_REG_7];
+	const int r8 = bpf2sw64[BPF_REG_8];
+	const int r9 = bpf2sw64[BPF_REG_9];
+	const int fp = bpf2sw64[BPF_REG_FP];
+	const int tcc = bpf2sw64[TCALL_CNT];
+	const int tmp1 = bpf2sw64[TMP_REG_1];
+
+	/* Destroy function call stack */
+	emit_sw64_ldu32(tmp1, ctx->stack_size, ctx);
+	emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx);
+
+	/* Restore callee-saved registers */
+	emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 0), ctx);
+	emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 8), ctx);
+	emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 16), ctx);
+	emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 24), ctx);
+	emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 32), ctx);
+	emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 40), ctx);
+	emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx);
+	emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx);
+
+	/* Return */
+	emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx);
+}
+
+static int out_offset = -1; /* initialized on the first pass of build_body() */
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
+{
+	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
+	const u8 r2 = bpf2sw64[BPF_REG_2];	/* struct bpf_array *array */
+	const u8 r3 = bpf2sw64[BPF_REG_3];	/* u64 index */
+
+	const u8 tmp = bpf2sw64[TMP_REG_1];
+	const u8 prg = bpf2sw64[TMP_REG_2];
+	const u8 tcc = bpf2sw64[TCALL_CNT];
+	const int idx0 = ctx->idx;
+#define cur_offset (ctx->idx - idx0)
+#define jmp_offset (out_offset - (cur_offset))
+	u64 offset;
+
+	/* if (index >= array->map.max_entries)
+	 *     goto out;
+	 */
+	offset = offsetof(struct bpf_array, map.max_entries);
+	emit_sw64_ldu64(tmp, offset, ctx);		/* tmp = offset */
+	emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx);		/* tmp = r2 + tmp = &map.max_entries */
+	emit(SW64_BPF_LDW(tmp, tmp, 0), ctx);		/* tmp = *tmp = map.max_entries */
+	emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx);	/* map.max_entries is u32 */
+	emit(SW64_BPF_SUBL_REG(r3, tmp, tmp), ctx);		/* tmp = r3 - tmp = index - map.max_entries */
+	emit(SW64_BPF_BGE(tmp, jmp_offset), ctx);
+
+	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *     goto out;
+	 * tail_call_cnt++;
+	 */
+	emit(SW64_BPF_LDI(tmp, SW64_BPF_REG_ZR, MAX_TAIL_CALL_CNT), ctx);
+	emit(SW64_BPF_SUBL_REG(tcc, tmp, tmp), ctx);
+	emit(SW64_BPF_BGT(tmp, jmp_offset), ctx);
+	emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx);
+
+	/* prog = array->ptrs[index];
+	 * if (prog == NULL)
+	 *     goto out;
+	 */
+	offset = offsetof(struct bpf_array, ptrs);
+	emit_sw64_ldu64(tmp, offset, ctx);		/* tmp = offset of ptrs */
+	emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx);		/* tmp = r2 + tmp = &ptrs */
+	emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx);		/* prg = r3 * 8, ptrs is 8 bit aligned */
+	emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx);	/* prg = tmp + prg = &prog */
+	emit(SW64_BPF_LDL(prg, prg, 0), ctx);		/* prg = *prg = prog */
+	emit(SW64_BPF_BEQ(prg, jmp_offset), ctx);
+
+	/* goto *(prog->bpf_func + prologue_offset); */
+	offset = offsetof(struct bpf_prog, bpf_func);
+	emit_sw64_ldu64(tmp, offset, ctx);		/* tmp = offset */
+	emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx);	/* tmp = prg + tmp = &bpf_func */
+	emit(SW64_BPF_LDW(tmp, tmp, 0), ctx);		/* tmp = *tmp = bpf_func */
+	emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx);	/* bpf_func is unsigned int */
+	emit(SW64_BPF_ADDL_REG(tmp, sizeof(u32) * PROLOGUE_OFFSET, tmp), ctx);
+	emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, ctx->stack_size, SW64_BPF_REG_SP), ctx);
+	emit(SW64_BPF_BR(tmp, 0), ctx);
+
+	/* out */
+	if (out_offset == -1)
+		out_offset = cur_offset;
+	if (cur_offset != out_offset) {
+		pr_err("tail_call out_offset = %d, expected %d!\n",
+				cur_offset, out_offset);
+		return -1;
+	}
+	return 0;
+#undef cur_offset
+#undef jmp_offset
+}
+
+/* JITs an eBPF instruction.
+ * Returns:
+ * 0  - successfully JITed an 8-byte eBPF instruction.
+ * >0 - successfully JITed a 16-byte eBPF instruction.
+ * <0 - failed to JIT.
+ */
+static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+	const u8 code = insn->code;
+	const u8 dst = bpf2sw64[insn->dst_reg];
+	const u8 src = bpf2sw64[insn->src_reg];
+	const u8 tmp1 = bpf2sw64[TMP_REG_1];
+	const u8 tmp2 = bpf2sw64[TMP_REG_2];
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+	int jmp_offset;
+	u64 func;
+	struct bpf_insn insn1;
+	u64 imm64;
+
+	switch (code) {
+	case BPF_ALU | BPF_MOV | BPF_X:
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		emit(SW64_BPF_LDI(dst, src, 0), ctx);
+		break;
+	case BPF_ALU | BPF_ADD | BPF_X:
+		emit(SW64_BPF_ADDW_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+		emit(SW64_BPF_ADDL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_SUB | BPF_X:
+		emit(SW64_BPF_SUBW_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+		emit(SW64_BPF_SUBL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_X:
+		emit(SW64_BPF_MULW_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+		emit(SW64_BPF_MULL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_X:
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+		emit_sw64_div(dst, src, dst, ctx);
+		return -EINVAL;
+	case BPF_ALU | BPF_MOD | BPF_X:
+	case BPF_ALU64 | BPF_MOD | BPF_X:
+		emit_sw64_mod(dst, src, dst, ctx);
+		return -EINVAL;
+	case BPF_ALU | BPF_LSH | BPF_X:
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		emit(SW64_BPF_SLL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_X:
+		emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx);
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		emit(SW64_BPF_SRL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_X:
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		emit(SW64_BPF_SRA_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_AND | BPF_X:
+	case BPF_ALU64 | BPF_AND | BPF_X:
+		emit(SW64_BPF_AND_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_OR | BPF_X:
+	case BPF_ALU64 | BPF_OR | BPF_X:
+		emit(SW64_BPF_OR_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_X:
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		emit(SW64_BPF_XOR_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_NEG:
+	case BPF_ALU64 | BPF_NEG:
+		emit(SW64_BPF_SEXTB_IMM(0xff, tmp1), ctx);
+		emit(SW64_BPF_XOR_IMM(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_END | BPF_TO_LE:
+		switch (imm) {
+		case 16:
+			emit(SW64_BPF_ZAPNOT_IMM(dst, 0x3, dst), ctx);
+			break;
+		case 32:
+			emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx);
+			break;
+		case 64:
+			break;
+		}
+	case BPF_ALU | BPF_END | BPF_TO_BE:
+		switch (imm) {
+		case 16:
+			emit_sw64_htobe16(dst, ctx);
+			break;
+		case 32:
+			emit_sw64_htobe32(dst, ctx);
+			break;
+		case 64:
+			emit_sw64_htobe64(dst, ctx);
+			break;
+		}
+
+	case BPF_ALU | BPF_MOV | BPF_K:
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+		emit_sw64_lds32(dst, imm, ctx);
+		break;
+	case BPF_ALU | BPF_ADD | BPF_K:
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_SUB | BPF_K:
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K:
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_K:
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit_sw64_div(dst, src, tmp1, ctx);
+		return -EINVAL;
+	case BPF_ALU | BPF_MOD | BPF_K:
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit_sw64_mod(dst, src, tmp1, ctx);
+		return -EINVAL;
+	case BPF_ALU | BPF_LSH | BPF_K:
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_K:
+		emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx);
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_K:
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_AND | BPF_K:
+	case BPF_ALU64 | BPF_AND | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_OR | BPF_K:
+	case BPF_ALU64 | BPF_OR | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_OR_REG(dst, tmp1, dst), ctx);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_K:
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx);
+		break;
+
+	case BPF_JMP | BPF_JA:
+		emit(SW64_BPF_BR(SW64_BPF_REG_RA, off), ctx);
+		break;
+
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
+	case BPF_JMP | BPF_JSET | BPF_X:
+		switch (BPF_OP(code)) {
+		case BPF_JEQ:
+			emit(SW64_BPF_CMPEQ_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JGT:
+			emit(SW64_BPF_CMPULT_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JLT:
+			emit(SW64_BPF_CMPULT_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JGE:
+			emit(SW64_BPF_CMPULE_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JLE:
+			emit(SW64_BPF_CMPULE_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JNE:
+			emit(SW64_BPF_CMPEQ_REG(dst, src, tmp1), ctx);
+			emit(SW64_BPF_XOR_IMM(tmp1, 1, tmp1), ctx);
+			break;
+		case BPF_JSGT:
+			emit(SW64_BPF_CMPLT_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JSLT:
+			emit(SW64_BPF_CMPLT_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JSGE:
+			emit(SW64_BPF_CMPLE_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JSLE:
+			emit(SW64_BPF_CMPLE_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JSET:
+			emit(SW64_BPF_AND_REG(dst, src, tmp1), ctx);
+			break;
+		}
+		emit(SW64_BPF_BLBS(tmp1, off), ctx);
+		break;
+
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+	case BPF_JMP | BPF_JNE | BPF_K:
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
+	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
+	case BPF_JMP | BPF_JSET | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		switch (BPF_OP(code)) {
+		case BPF_JEQ:
+			emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx);
+			break;
+		case BPF_JGT:
+			emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp1), ctx);
+			break;
+		case BPF_JLT:
+			emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp1), ctx);
+			break;
+		case BPF_JGE:
+			emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp1), ctx);
+			break;
+		case BPF_JLE:
+			emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp1), ctx);
+			break;
+		case BPF_JNE:
+			emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx);
+			emit(SW64_BPF_XOR_IMM(tmp1, 1, tmp1), ctx);
+			break;
+		case BPF_JSGT:
+			emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp1), ctx);
+			break;
+		case BPF_JSLT:
+			emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp1), ctx);
+			break;
+		case BPF_JSGE:
+			emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp1), ctx);
+			break;
+		case BPF_JSLE:
+			emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp1), ctx);
+			break;
+		case BPF_JSET:
+			emit(SW64_BPF_AND_REG(dst, tmp1, tmp1), ctx);
+			break;
+		}
+		emit(SW64_BPF_BLBS(tmp1, off), ctx);
+		break;
+
+	case BPF_JMP | BPF_CALL:
+		func = (u64)__bpf_call_base + imm;
+		emit_sw64_ldu64(tmp1, func, ctx);
+		emit(SW64_BPF_CALL(SW64_BPF_REG_RA, tmp1), ctx);
+		break;
+
+	case BPF_JMP | BPF_TAIL_CALL:
+		if (emit_bpf_tail_call(ctx))
+			return -EFAULT;
+		break;
+
+	case BPF_JMP | BPF_EXIT:
+		if (insn - ctx->prog->insnsi + 1 == ctx->prog->len)
+			break;
+		jmp_offset = (offset_to_epilogue(ctx) - 1) * 4;
+		// emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx);
+		// break;
+		emit_sw64_lds32(tmp1, jmp_offset, ctx);
+		emit(SW64_BPF_BR(tmp2, 0), ctx);
+		emit(SW64_BPF_ADDL_REG(tmp1, tmp2, tmp1), ctx);
+		emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp1), ctx);
+		break;
+
+	case BPF_LD | BPF_IMM | BPF_DW:
+		insn1 = insn[1];
+		imm64 = (u64)insn1.imm << 32 | (u32)imm;
+		emit_sw64_ldu64(dst, imm64, ctx);
+
+		return 1;
+
+	/* LDX: dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+		emit(SW64_BPF_LDW(dst, src, off), ctx);
+		break;
+	case BPF_LDX | BPF_MEM | BPF_H:
+		emit(SW64_BPF_LDHU(dst, src, off), ctx);
+		emit(SW64_BPF_SEXTH_REG(dst, dst), ctx);
+		break;
+	case BPF_LDX | BPF_MEM | BPF_B:
+		emit(SW64_BPF_LDBU(dst, src, off), ctx);
+		emit(SW64_BPF_SEXTB_REG(dst, dst), ctx);
+		break;
+	case BPF_LDX | BPF_MEM | BPF_DW:
+		emit(SW64_BPF_LDW(dst, src, off), ctx);
+		break;
+
+	/* ST: *(size *)(dst + off) = imm */
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		/* Load imm to a register then store it */
+		emit_sw64_lds32(tmp1, imm, ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(SW64_BPF_STW(tmp1, dst, off), ctx);
+			break;
+		case BPF_H:
+			emit(SW64_BPF_STH(tmp1, dst, off), ctx);
+			break;
+		case BPF_B:
+			emit(SW64_BPF_STB(tmp1, dst, off), ctx);
+			break;
+		case BPF_DW:
+			emit(SW64_BPF_STL(tmp1, dst, off), ctx);
+			break;
+		}
+		break;
+
+	/* STX: *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+		emit(SW64_BPF_STW(src, dst, off), ctx);
+		break;
+	case BPF_STX | BPF_MEM | BPF_H:
+		emit(SW64_BPF_STW(src, dst, off), ctx);
+		break;
+	case BPF_STX | BPF_MEM | BPF_B:
+		emit(SW64_BPF_STW(src, dst, off), ctx);
+		break;
+	case BPF_STX | BPF_MEM | BPF_DW:
+		emit(SW64_BPF_STW(src, dst, off), ctx);
+		break;
+
+	/* STX XADD: lock *(u32 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_W:
+		emit(SW64_BPF_LDW(tmp1, dst, off), ctx);
+		emit(SW64_BPF_ADDW_REG(tmp1, src, tmp1), ctx);
+		emit(SW64_BPF_STW(tmp1, dst, off), ctx);
+		break;
+	/* STX XADD: lock *(u64 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_DW:
+		emit(SW64_BPF_LDL(tmp1, dst, off), ctx);
+		emit(SW64_BPF_ADDL_REG(tmp1, src, tmp1), ctx);
+		emit(SW64_BPF_STL(tmp1, dst, off), ctx);
+		break;
+
+	default:
+		pr_err("unknown opcode %02x\n", code);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	int i;
+
+	for (i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &prog->insnsi[i];
+		int ret;
+
+		ret = build_insn(insn, ctx);
+		if (ret > 0) {
+			i++;
+			if (ctx->image == NULL)
+				ctx->insn_offset[i] = ctx->idx;
+			continue;
+		}
+		if (ctx->image == NULL)
+			ctx->insn_offset[i] = ctx->idx;
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int validate_code(struct jit_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->idx; i++) {
+		if (ctx->image[i] == SW64_BPF_ILLEGAL_INSN)
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	struct bpf_prog *tmp, *orig_prog = prog;
+	struct bpf_binary_header *header;
+	struct sw64_jit_data *jit_data;
+	bool was_classic = bpf_prog_was_classic(prog);
+	bool tmp_blinded = false;
+	bool extra_pass = false;
+	struct jit_ctx ctx;
+	int image_size;
+	u8 *image_ptr;
+
+	if (!prog->jit_requested)
+		return orig_prog;
+
+	tmp = bpf_jit_blind_constants(prog);
+	/* If blinding was requested and we failed during blinding,
+	 * we must fall back to the interpreter.
+	 */
+	if (IS_ERR(tmp))
+		return orig_prog;
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
+
+	jit_data = prog->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			prog = orig_prog;
+			goto out;
+		}
+		prog->aux->jit_data = jit_data;
+	}
+	if (jit_data->ctx.insn_offset) {
+		ctx = jit_data->ctx;
+		image_ptr = jit_data->image;
+		header = jit_data->header;
+		extra_pass = true;
+		image_size = sizeof(u32) * ctx.idx;
+		goto skip_init_ctx;
+	}
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.prog = prog;
+
+	ctx.insn_offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+	if (ctx.insn_offset == NULL) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	/* 1. Initial fake pass to compute ctx->idx. */
+
+	/* Fake pass to fill in ctx->offset. */
+	build_prologue(&ctx, was_classic);
+
+	if (build_body(&ctx)) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	ctx.epilogue_offset = ctx.idx;
+	build_epilogue(&ctx);
+
+	/* Now we know the actual image size. */
+	image_size = sizeof(u32) * ctx.idx;
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	if (header == NULL) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	/* 2. Now, the actual pass. */
+
+	ctx.image = (u32 *)image_ptr;
+skip_init_ctx:
+	ctx.idx = 0;
+
+	build_prologue(&ctx, was_classic);
+
+	if (build_body(&ctx)) {
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	build_epilogue(&ctx);
+
+	/* 3. Extra pass to validate JITed code. */
+	if (validate_code(&ctx)) {
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	/* And we're done. */
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
+
+	bpf_flush_icache(header, ctx.image + ctx.idx);
+
+	if (!prog->is_func || extra_pass) {
+		bpf_jit_binary_lock_ro(header);
+	} else {
+		jit_data->ctx = ctx;
+		jit_data->image = image_ptr;
+		jit_data->header = header;
+	}
+	prog->bpf_func = (void *)ctx.image;
+	prog->jited = 1;
+	prog->jited_len = image_size;
+
+	if (!prog->is_func || extra_pass) {
+out_off:
+		kfree(ctx.insn_offset);
+		kfree(jit_data);
+		prog->aux->jit_data = NULL;
+	}
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+					   tmp : orig_prog);
+	return prog;
+}
diff --git a/arch/sw_64/oprofile/Makefile b/arch/sw_64/oprofile/Makefile
new file mode 100644
index 000000000000..4b304a91c3b2
--- /dev/null
+++ b/arch/sw_64/oprofile/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -Werror -Wno-sign-compare
+
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) common.o
+oprofile-$(CONFIG_SW64)			+= op_model_sw2f.o
diff --git a/arch/sw_64/oprofile/common.c b/arch/sw_64/oprofile/common.c
new file mode 100644
index 000000000000..58ef9fcfdc92
--- /dev/null
+++ b/arch/sw_64/oprofile/common.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * @file arch/sw_64/oprofile/common.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson rth@twiddle.net
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <asm/ptrace.h>
+#include <asm/special_insns.h>
+
+#include "op_impl.h"
+
+extern struct op_axp_model op_model_sw2 __attribute__((weak));
+extern struct op_axp_model op_model_sw2 __attribute__((weak));
+extern struct op_axp_model op_model_pca56 __attribute__((weak));
+extern struct op_axp_model op_model_sw2 __attribute__((weak));
+extern struct op_axp_model op_model_sw2f __attribute__((weak));
+
+static struct op_axp_model *model;
+
+extern void (*perf_irq)(unsigned long vector, struct pt_regs *regs);
+static void (*save_perf_irq)(unsigned long vector, struct pt_regs *regs);
+
+static struct op_counter_config ctr[20];
+static struct op_system_config sys;
+static struct op_register_config reg;
+
+/* Called from do_entInt to handle the performance monitor interrupt.  */
+
+static void
+op_handle_interrupt(unsigned long which, struct pt_regs *regs)
+{
+	model->handle_interrupt(which, regs, ctr);
+
+	/*
+	 * If the user has selected an interrupt frequency that is
+	 * not exactly the width of the counter, write a new value
+	 * into the counter such that it'll overflow after N more
+	 * events.
+	 */
+	if ((reg.need_reset >> which) & 1)
+		model->reset_ctr(&reg, which);
+}
+
+static int
+op_axp_setup(void)
+{
+	unsigned long i, e;
+
+	/* Install our interrupt handler into the existing hook.  */
+	save_perf_irq = perf_irq;
+	perf_irq = op_handle_interrupt;
+
+	/* Compute the mask of enabled counters.  */
+	for (i = e = 0; i < model->num_counters; ++i)
+		if (ctr[i].enabled)
+			e |= 1 << i;
+	reg.enable = e;
+
+	/* Pre-compute the values to stuff in the hardware registers.  */
+	model->reg_setup(&reg, ctr, &sys);
+
+	/* Configure the registers on all cpus.  */
+	(void)smp_call_function(model->cpu_setup, &reg, 1);
+	model->cpu_setup(&reg);
+	return 0;
+}
+
+static void
+op_axp_shutdown(void)
+{
+	/* Remove our interrupt handler.  We may be removing this module.  */
+	perf_irq = save_perf_irq;
+}
+
+static void
+op_axp_cpu_start(void *dummy)
+{
+	wrperfmon(1, reg.enable);
+}
+
+static int
+op_axp_start(void)
+{
+	(void)smp_call_function(op_axp_cpu_start, NULL, 1);
+	op_axp_cpu_start(NULL);
+	return 0;
+}
+
+static inline void
+op_axp_cpu_stop(void *dummy)
+{
+	/* Disable performance monitoring for all counters.  */
+	wrperfmon(0, -1);
+}
+
+static void
+op_axp_stop(void)
+{
+	(void)smp_call_function(op_axp_cpu_stop, NULL, 1);
+	op_axp_cpu_stop(NULL);
+}
+
+static int
+op_axp_create_files(struct dentry *root)
+{
+	int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		struct dentry *dir;
+		char buf[4];
+
+		snprintf(buf, sizeof(buf), "%d", i);
+		dir = oprofilefs_mkdir(root, buf);
+
+		oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled);
+		oprofilefs_create_ulong(dir, "event", &ctr[i].event);
+		oprofilefs_create_ulong(dir, "count", &ctr[i].count);
+		/* Dummies.  */
+		oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel);
+		oprofilefs_create_ulong(dir, "user", &ctr[i].user);
+		oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask);
+	}
+
+	if (model->can_set_proc_mode) {
+		oprofilefs_create_ulong(root, "enable_pal",
+					&sys.enable_pal);
+		oprofilefs_create_ulong(root, "enable_kernel",
+					&sys.enable_kernel);
+		oprofilefs_create_ulong(root, "enable_user",
+					&sys.enable_user);
+	}
+
+	return 0;
+}
+
+int __init
+oprofile_arch_init(struct oprofile_operations *ops)
+{
+	struct op_axp_model *lmodel = NULL;
+
+	lmodel = &op_model_sw2f;
+
+	if (!lmodel)
+		return -ENODEV;
+	model = lmodel;
+
+	ops->create_files = op_axp_create_files;
+	ops->setup = op_axp_setup;
+	ops->shutdown = op_axp_shutdown;
+	ops->start = op_axp_start;
+	ops->stop = op_axp_stop;
+	ops->cpu_type = lmodel->cpu_type;
+
+	pr_info("oprofile: using %s performance monitoring.\n",
+	       lmodel->cpu_type);
+
+	return 0;
+}
+
+
+void
+oprofile_arch_exit(void)
+{
+}
diff --git a/arch/sw_64/oprofile/op_impl.h b/arch/sw_64/oprofile/op_impl.h
new file mode 100644
index 000000000000..10bdd455c3dd
--- /dev/null
+++ b/arch/sw_64/oprofile/op_impl.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * @file arch/sw_64/oprofile/op_impl.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson rth@twiddle.net
+ */
+
+#ifndef _SW64_OPROFILE_OP_IMPL_H
+#define _SW64_OPROFILE_OP_IMPL_H
+
+/* Per-counter configuration as set via oprofilefs.  */
+struct op_counter_config {
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long count;
+	/* Dummies because I am too lazy to hack the userspace tools.  */
+	unsigned long kernel;
+	unsigned long user;
+	unsigned long unit_mask;
+};
+
+/* System-wide configuration as set via oprofilefs.  */
+struct op_system_config {
+	unsigned long enable_pal;
+	unsigned long enable_kernel;
+	unsigned long enable_user;
+};
+
+/* Cached values for the various performance monitoring registers.  */
+struct op_register_config {
+	unsigned long enable;
+	unsigned long mux_select;
+	unsigned long proc_mode;
+	unsigned long freq;
+	unsigned long reset_values;
+	unsigned long need_reset;
+};
+
+/* Per-architecture configuration and hooks.  */
+struct op_axp_model {
+	void (*reg_setup)(struct op_register_config *reg,
+			  struct op_counter_config *ctr,
+			  struct op_system_config *sys);
+	void (*cpu_setup)(void *x);
+	void (*reset_ctr)(struct op_register_config *reg, unsigned long ctr);
+	void (*handle_interrupt)(unsigned long which, struct pt_regs *regs,
+				 struct op_counter_config *ctr);
+	char *cpu_type;
+	unsigned char num_counters;
+	unsigned char can_set_proc_mode;
+};
+
+#endif
diff --git a/arch/sw_64/oprofile/op_model_sw2f.c b/arch/sw_64/oprofile/op_model_sw2f.c
new file mode 100644
index 000000000000..27140dc9004e
--- /dev/null
+++ b/arch/sw_64/oprofile/op_model_sw2f.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @file arch/sw_64/oprofile/op_model_sw2f.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson rth@twiddle.net
+ * @author Falk Hueffner falk@debian.org
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+
+#include "op_impl.h"
+
+
+/* Compute all of the registers in preparation for enabling profiling.  */
+
+static void
+sw2f_reg_setup(struct op_register_config *reg,
+	       struct op_counter_config *ctr,
+	       struct op_system_config *sys)
+{
+	unsigned long ctl, reset, need_reset, i;
+
+	/* Select desired events.  */
+	ctl = 1UL << 4;		/* Enable ProfileMe mode. */
+
+	/*
+	 * The event numbers are chosen so we can use them directly if
+	 * PCTR1 is enabled.
+	 */
+	if (ctr[1].enabled) {
+		ctl |= (ctr[1].event & 3) << 2;
+	} else {
+		if (ctr[0].event == 0) /* cycles */
+			ctl |= 1UL << 2;
+	}
+	reg->mux_select = ctl;
+
+	/* Select logging options.  */
+	/*
+	 * ??? Need to come up with some mechanism to trace only
+	 * selected processes.  SW2f does not have a mechanism to
+	 * select kernel or user mode only.  For now, enable always.
+	 */
+	reg->proc_mode = 0;
+
+	/*
+	 * SW2f cannot change the width of the counters as with the
+	 * other implementations.  But fortunately, we can write to
+	 * the counters and set the value such that it will overflow
+	 * at the right time.
+	 */
+	reset = need_reset = 0;
+	for (i = 0; i < 2; ++i) {
+		unsigned long count = ctr[i].count;
+
+		if (!ctr[i].enabled)
+			continue;
+
+		if (count > 0x100000)
+			count = 0x100000;
+		ctr[i].count = count;
+		reset |= (0x100000 - count) << (i ? 6 : 28);
+		if (count != 0x100000)
+			need_reset |= 1 << i;
+	}
+	reg->reset_values = reset;
+	reg->need_reset = need_reset;
+}
+
+/* Program all of the registers in preparation for enabling profiling.  */
+
+static void
+sw2f_cpu_setup(void *x)
+{
+	struct op_register_config *reg = x;
+
+	wrperfmon(2, reg->mux_select);
+	wrperfmon(3, reg->proc_mode);
+	wrperfmon(6, reg->reset_values | 3);
+}
+
+/*
+ * CTR is a counter for which the user has requested an interrupt count
+ * in between one of the widths selectable in hardware.  Reset the count
+ * for CTR to the value stored in REG->RESET_VALUES.
+ */
+
+static void
+sw2f_reset_ctr(struct op_register_config *reg, unsigned long ctr)
+{
+	wrperfmon(6, reg->reset_values | (1 << ctr));
+}
+
+/*
+ * ProfileMe conditions which will show up as counters. We can also
+ * detect the following, but it seems unlikely that anybody is
+ * interested in counting them:
+ *  - Reset
+ *  - MT_FPCR (write to floating point control register)
+ *  - Arithmetic trap
+ *  - Dstream Fault
+ *  - Machine Check (ECC fault, etc.)
+ *  - OPCDEC (illegal opcode)
+ *  - Floating point disabled
+ *  - Differentiate between DTB single/double misses and 3 or 4 level
+ *    page tables
+ *  - Istream access violation
+ *  - Interrupt
+ *  - Icache Parity Error.
+ *  - Instruction killed (nop, trapb)
+
+ * Unfortunately, there seems to be no way to detect Dcache and Bcache
+ * misses; the latter could be approximated by making the counter
+ * count Bcache misses, but that is not precise.
+
+ * We model this as 20 counters:
+ *  - PCTR0
+ *  - PCTR1
+ *  - 9 ProfileMe events, induced by PCTR0
+ *  - 9 ProfileMe events, induced by PCTR1
+ */
+
+enum profileme_counters {
+	PM_STALLED,		/* Stalled for at least one cycle */
+				/* between the fetch and map stages  */
+	PM_TAKEN,		/* Conditional branch taken */
+	PM_MISPREDICT,		/* Branch caused mispredict trap */
+	PM_ITB_MISS,		/* ITB miss */
+	PM_DTB_MISS,		/* DTB miss */
+	PM_REPLAY,		/* Replay trap */
+	PM_LOAD_STORE,		/* Load-store order trap */
+	PM_ICACHE_MISS,		/* Icache miss */
+	PM_UNALIGNED,		/* Unaligned Load/Store */
+	PM_NUM_COUNTERS
+};
+
+static inline void
+op_add_pm(unsigned long pc, int kern, unsigned long counter,
+	  struct op_counter_config *ctr, unsigned long event)
+{
+	unsigned long fake_counter = 2 + event;
+
+	if (counter == 1)
+		fake_counter += PM_NUM_COUNTERS;
+	if (ctr[fake_counter].enabled)
+		oprofile_add_pc(pc, kern, fake_counter);
+}
+
+static void
+sw2f_handle_interrupt(unsigned long which, struct pt_regs *regs,
+		      struct op_counter_config *ctr)
+{
+	unsigned long pmpc, pctr_ctl;
+	int kern = !user_mode(regs);
+	int mispredict = 0;
+	union {
+		unsigned long v;
+		struct {
+			unsigned reserved:	30; /*  0-29 */
+			unsigned overcount:	 3; /* 30-32 */
+			unsigned icache_miss:	 1; /*    33 */
+			unsigned trap_type:	 4; /* 34-37 */
+			unsigned load_store:	 1; /*    38 */
+			unsigned trap:		 1; /*    39 */
+			unsigned mispredict:	 1; /*    40 */
+		} fields;
+	} i_stat;
+
+	enum trap_types {
+		TRAP_REPLAY,
+		TRAP_INVALID0,
+		TRAP_DTB_DOUBLE_MISS_3,
+		TRAP_DTB_DOUBLE_MISS_4,
+		TRAP_FP_DISABLED,
+		TRAP_UNALIGNED,
+		TRAP_DTB_SINGLE_MISS,
+		TRAP_DSTREAM_FAULT,
+		TRAP_OPCDEC,
+		TRAP_INVALID1,
+		TRAP_MACHINE_CHECK,
+		TRAP_INVALID2,
+		TRAP_ARITHMETIC,
+		TRAP_INVALID3,
+		TRAP_MT_FPCR,
+		TRAP_RESET
+	};
+
+	pmpc = wrperfmon(9, 0);
+	/* ??? Don't know how to handle physical-mode HMcode address.  */
+	if (pmpc & 1)
+		return;
+	pmpc &= ~2;		/* clear reserved bit */
+
+	i_stat.v = wrperfmon(8, 0);
+	if (i_stat.fields.trap) {
+		switch (i_stat.fields.trap_type) {
+		case TRAP_INVALID1:
+		case TRAP_INVALID2:
+		case TRAP_INVALID3:
+			/*
+			 * Pipeline redirection occurred. PMPC points
+			 * to HMcode. Recognize ITB miss by HMcode
+			 * offset address, and get actual PC from EXC_ADDR.
+			 */
+			oprofile_add_pc(regs->pc, kern, which);
+			if ((pmpc & ((1 << 15) - 1)) ==  581)
+				op_add_pm(regs->pc, kern, which,
+					  ctr, PM_ITB_MISS);
+			/*
+			 * Most other bit and counter values will be
+			 * those for the first instruction in the
+			 * fault handler, so we're done.
+			 */
+			return;
+		case TRAP_REPLAY:
+			op_add_pm(pmpc, kern, which, ctr,
+				  (i_stat.fields.load_store
+				   ? PM_LOAD_STORE : PM_REPLAY));
+			break;
+		case TRAP_DTB_DOUBLE_MISS_3:
+		case TRAP_DTB_DOUBLE_MISS_4:
+		case TRAP_DTB_SINGLE_MISS:
+			op_add_pm(pmpc, kern, which, ctr, PM_DTB_MISS);
+			break;
+		case TRAP_UNALIGNED:
+			op_add_pm(pmpc, kern, which, ctr, PM_UNALIGNED);
+			break;
+		case TRAP_INVALID0:
+		case TRAP_FP_DISABLED:
+		case TRAP_DSTREAM_FAULT:
+		case TRAP_OPCDEC:
+		case TRAP_MACHINE_CHECK:
+		case TRAP_ARITHMETIC:
+		case TRAP_MT_FPCR:
+		case TRAP_RESET:
+			break;
+		}
+
+		/*
+		 * ??? JSR/JMP/RET/COR or HW_JSR/HW_JMP/HW_RET/HW_COR
+		 * mispredicts do not set this bit but can be
+		 * recognized by the presence of one of these
+		 * instructions at the PMPC location with bit 39
+		 * set.
+		 */
+		if (i_stat.fields.mispredict) {
+			mispredict = 1;
+			op_add_pm(pmpc, kern, which, ctr, PM_MISPREDICT);
+		}
+	}
+
+	oprofile_add_pc(pmpc, kern, which);
+
+	pctr_ctl = wrperfmon(5, 0);
+	if (pctr_ctl & (1UL << 27))
+		op_add_pm(pmpc, kern, which, ctr, PM_STALLED);
+
+	/*
+	 * Unfortunately, TAK is undefined on mispredicted branches.
+	 * ??? It is also undefined for non-cbranch insns, should
+	 * check that.
+	 */
+	if (!mispredict && pctr_ctl & (1UL << 0))
+		op_add_pm(pmpc, kern, which, ctr, PM_TAKEN);
+}
+
+struct op_axp_model op_model_sw2f = {
+	.reg_setup		= sw2f_reg_setup,
+	.cpu_setup		= sw2f_cpu_setup,
+	.reset_ctr		= sw2f_reset_ctr,
+	.handle_interrupt	= sw2f_handle_interrupt,
+	.cpu_type		= "sw2f",
+	.num_counters		= 20,
+	.can_set_proc_mode	= 0,
+};
diff --git a/arch/sw_64/platform/Makefile b/arch/sw_64/platform/Makefile
new file mode 100644
index 000000000000..a972b931dea2
--- /dev/null
+++ b/arch/sw_64/platform/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PLATFORM_XUELANG)	+= platform_xuelang.o
diff --git a/arch/sw_64/platform/platform_xuelang.c b/arch/sw_64/platform/platform_xuelang.c
new file mode 100644
index 000000000000..f0e33c664b0e
--- /dev/null
+++ b/arch/sw_64/platform/platform_xuelang.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/platform.h>
+#include <asm/sw64_init.h>
+#include <linux/reboot.h>
+
+static void vt_mode_kill_arch(int mode)
+{
+	hcall(HCALL_SET_CLOCKEVENT, 0, 0, 0);
+
+	switch (mode) {
+	case LINUX_REBOOT_CMD_RESTART:
+		hcall(HCALL_RESTART, 0, 0, 0);
+		mb();
+		break;
+	case LINUX_REBOOT_CMD_HALT:
+	case LINUX_REBOOT_CMD_POWER_OFF:
+		hcall(HCALL_SHUTDOWN, 0, 0, 0);
+		mb();
+		break;
+	default:
+		break;
+	}
+}
+
+extern void cpld_write(uint8_t slave_addr, uint8_t reg, uint8_t data);
+
+static void xuelang_kill_arch(int mode)
+{
+	if (is_in_host()) {
+		switch (mode) {
+		case LINUX_REBOOT_CMD_RESTART:
+			cpld_write(0x64, 0x00, 0xc3);
+			mb();
+			break;
+		case LINUX_REBOOT_CMD_HALT:
+		case LINUX_REBOOT_CMD_POWER_OFF:
+			cpld_write(0x64, 0x00, 0xf0);
+			mb();
+			break;
+		default:
+			break;
+		}
+	} else {
+		vt_mode_kill_arch(mode);
+	}
+}
+
+static inline void __iomem *xuelang_ioportmap(unsigned long addr)
+{
+	unsigned long io_offset;
+
+	if (addr < 0x100000) {
+		io_offset = is_in_host() ? LPC_LEGACY_IO : PCI_VT_LEGACY_IO;
+		addr = addr | io_offset;
+	}
+
+	return (void __iomem *)(addr | PAGE_OFFSET);
+}
+
+struct sw64_platform_ops xuelang_ops = {
+	.kill_arch	= xuelang_kill_arch,
+	.ioportmap	= xuelang_ioportmap,
+	.ops_fixup	= sw64_init_noop,
+};
diff --git a/arch/sw_64/tools/.gitignore b/arch/sw_64/tools/.gitignore
new file mode 100644
index 000000000000..f73e86272b76
--- /dev/null
+++ b/arch/sw_64/tools/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+relocs
diff --git a/arch/sw_64/tools/Makefile b/arch/sw_64/tools/Makefile
new file mode 100644
index 000000000000..66f55b035e22
--- /dev/null
+++ b/arch/sw_64/tools/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+hostprogs	+= relocs
+relocs-objs	+= relocs.o
+relocs-objs	+= relocs_main.o
+PHONY += relocs
+relocs: $(obj)/relocs
+	@:
diff --git a/arch/sw_64/tools/relocs.c b/arch/sw_64/tools/relocs.c
new file mode 100644
index 000000000000..a8a9e08a0a65
--- /dev/null
+++ b/arch/sw_64/tools/relocs.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "relocs.h"
+
+#define ELF_BITS 64
+
+#define ELF_MACHINE             EM_SW64
+#define ELF_MACHINE_NAME        "SW64"
+#define SHT_REL_TYPE            SHT_RELA
+#define Elf_Rel                 Elf64_Rela
+
+#define ELF_CLASS               ELFCLASS64
+#define ELF_R_SYM(val)          ELF64_R_SYM(val)
+#define ELF_R_TYPE(val)         ELF64_R_TYPE(val)
+#define ELF_ST_TYPE(o)          ELF64_ST_TYPE(o)
+#define ELF_ST_BIND(o)          ELF64_ST_BIND(o)
+#define ELF_ST_VISIBILITY(o)    ELF64_ST_VISIBILITY(o)
+
+#define ElfW(type)		_ElfW(ELF_BITS, type)
+#define _ElfW(bits, type)	__ElfW(bits, type)
+#define __ElfW(bits, type)	Elf##bits##_##type
+
+#define Elf_Addr		ElfW(Addr)
+#define Elf_Ehdr		ElfW(Ehdr)
+#define Elf_Phdr		ElfW(Phdr)
+#define Elf_Shdr		ElfW(Shdr)
+#define Elf_Sym			ElfW(Sym)
+
+static Elf_Ehdr ehdr;
+
+struct relocs {
+	uint32_t	*offset;
+	unsigned long	count;
+	unsigned long	size;
+};
+
+static struct relocs relocs;
+
+struct section {
+	Elf_Shdr       shdr;
+	struct section *link;
+	Elf_Sym        *symtab;
+	Elf_Rel        *reltab;
+	char           *strtab;
+	long           shdr_offset;
+};
+static struct section *secs;
+
+static const char * const regex_sym_kernel = {
+/* Symbols matching these regex's should never be relocated */
+	"^(__crc_)",
+};
+
+static regex_t sym_regex_c;
+
+static int regex_skip_reloc(const char *sym_name)
+{
+	return !regexec(&sym_regex_c, sym_name, 0, NULL, 0);
+}
+
+static void regex_init(void)
+{
+	char errbuf[128];
+	int err;
+
+	err = regcomp(&sym_regex_c, regex_sym_kernel,
+			REG_EXTENDED|REG_NOSUB);
+
+	if (err) {
+		regerror(err, &sym_regex_c, errbuf, sizeof(errbuf));
+		die("%s", errbuf);
+	}
+}
+
+static const char *rel_type(unsigned int type)
+{
+	static const char * const type_name[] = {
+#define REL_TYPE(X)[X] = #X
+	REL_TYPE(R_SW64_NONE),
+	REL_TYPE(R_SW64_REFQUAD),
+	REL_TYPE(R_SW64_LITERAL),
+	REL_TYPE(R_SW64_LITUSE),
+	REL_TYPE(R_SW64_GPDISP),
+	REL_TYPE(R_SW64_BRADDR),
+	REL_TYPE(R_SW64_HINT),
+	REL_TYPE(R_SW64_SREL32),
+	REL_TYPE(R_SW64_GPRELHIGH),
+	REL_TYPE(R_SW64_GPRELLOW),
+#undef REL_TYPE
+	};
+	const char *name = "unknown type rel type name";
+
+	if (type < ARRAY_SIZE(type_name) && type_name[type])
+		name = type_name[type];
+	return name;
+}
+
+static const char *sec_name(unsigned int shndx)
+{
+	const char *sec_strtab;
+	const char *name;
+
+	sec_strtab = secs[ehdr.e_shstrndx].strtab;
+	if (shndx < ehdr.e_shnum)
+		name = sec_strtab + secs[shndx].shdr.sh_name;
+	else if (shndx == SHN_ABS)
+		name = "ABSOLUTE";
+	else if (shndx == SHN_COMMON)
+		name = "COMMON";
+	else
+		name = "<noname>";
+	return name;
+}
+
+static struct section *sec_lookup(const char *secname)
+{
+	int i;
+
+	for (i = 0; i < ehdr.e_shnum; i++)
+		if (strcmp(secname, sec_name(i)) == 0)
+			return &secs[i];
+
+	return NULL;
+}
+
+static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
+{
+	const char *name;
+
+	if (sym->st_name)
+		name = sym_strtab + sym->st_name;
+	else
+		name = sec_name(sym->st_shndx);
+	return name;
+}
+
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#define le64_to_cpu(val) (val)
+
+#define cpu_to_le16(val) (val)
+#define cpu_to_le32(val) (val)
+#define cpu_to_le64(val) (val)
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	return le32_to_cpu(val);
+}
+
+static uint32_t cpu_to_elf32(uint32_t val)
+{
+	return cpu_to_le32(val);
+}
+
+#define elf_half_to_cpu(x)	elf16_to_cpu(x)
+#define elf_word_to_cpu(x)	elf32_to_cpu(x)
+
+#if ELF_BITS == 64
+static uint64_t elf64_to_cpu(uint64_t val)
+{
+	return le64_to_cpu(val);
+}
+#define elf_addr_to_cpu(x)	elf64_to_cpu(x)
+#define elf_off_to_cpu(x)	elf64_to_cpu(x)
+#define elf_xword_to_cpu(x)	elf64_to_cpu(x)
+#else
+#define elf_addr_to_cpu(x)	elf32_to_cpu(x)
+#define elf_off_to_cpu(x)	elf32_to_cpu(x)
+#define elf_xword_to_cpu(x)	elf32_to_cpu(x)
+#endif
+
+static void read_ehdr(FILE *fp)
+{
+	if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+		die("Cannot read ELF header: %s\n", strerror(errno));
+
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0)
+		die("No ELF magic\n");
+
+	if (ehdr.e_ident[EI_CLASS] != ELF_CLASS)
+		die("Not a %d bit executable\n", ELF_BITS);
+
+	if ((ehdr.e_ident[EI_DATA] != ELFDATA2LSB) &&
+	    (ehdr.e_ident[EI_DATA] != ELFDATA2MSB))
+		die("Unknown ELF Endianness\n");
+
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT)
+		die("Unknown ELF version\n");
+
+	/* Convert the fields to native endian */
+	ehdr.e_type      = elf_half_to_cpu(ehdr.e_type);
+	ehdr.e_machine   = elf_half_to_cpu(ehdr.e_machine);
+	ehdr.e_version   = elf_word_to_cpu(ehdr.e_version);
+	ehdr.e_entry     = elf_addr_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff     = elf_off_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff     = elf_off_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags     = elf_word_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize    = elf_half_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf_half_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum     = elf_half_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf_half_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum     = elf_half_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf_half_to_cpu(ehdr.e_shstrndx);
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
+		die("Unsupported ELF header type\n");
+
+	if (ehdr.e_machine != ELF_MACHINE)
+		die("Not for %s\n", ELF_MACHINE_NAME);
+
+	if (ehdr.e_version != EV_CURRENT)
+		die("Unknown ELF version\n");
+
+	if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
+		die("Bad Elf header size\n");
+
+	if (ehdr.e_phentsize != sizeof(Elf_Phdr))
+		die("Bad program header entry\n");
+
+	if (ehdr.e_shentsize != sizeof(Elf_Shdr))
+		die("Bad section header entry\n");
+
+	if (ehdr.e_shstrndx >= ehdr.e_shnum)
+		die("String table index out of bounds\n");
+}
+
+static void read_shdrs(FILE *fp)
+{
+	int i;
+	Elf_Shdr shdr;
+
+	secs = calloc(ehdr.e_shnum, sizeof(struct section));
+	if (!secs)
+		die("Unable to allocate %d section headers\n", ehdr.e_shnum);
+
+	if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+		die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		sec->shdr_offset = ftell(fp);
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+		sec->shdr.sh_name      = elf_word_to_cpu(shdr.sh_name);
+		sec->shdr.sh_type      = elf_word_to_cpu(shdr.sh_type);
+		sec->shdr.sh_flags     = elf_xword_to_cpu(shdr.sh_flags);
+		sec->shdr.sh_addr      = elf_addr_to_cpu(shdr.sh_addr);
+		sec->shdr.sh_offset    = elf_off_to_cpu(shdr.sh_offset);
+		sec->shdr.sh_size      = elf_xword_to_cpu(shdr.sh_size);
+		sec->shdr.sh_link      = elf_word_to_cpu(shdr.sh_link);
+		sec->shdr.sh_info      = elf_word_to_cpu(shdr.sh_info);
+		sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
+		sec->shdr.sh_entsize   = elf_xword_to_cpu(shdr.sh_entsize);
+		if (sec->shdr.sh_link < ehdr.e_shnum)
+			sec->link = &secs[sec->shdr.sh_link];
+	}
+}
+
+static void read_strtabs(FILE *fp)
+{
+	int i;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_STRTAB)
+			continue;
+
+		sec->strtab = malloc(sec->shdr.sh_size);
+		if (!sec->strtab)
+			die("malloc of %d bytes for strtab failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+	}
+}
+
+static void read_symtabs(FILE *fp)
+{
+	int i, j;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_SYMTAB)
+			continue;
+
+		sec->symtab = malloc(sec->shdr.sh_size);
+		if (!sec->symtab)
+			die("malloc of %d bytes for symtab failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Sym); j++) {
+			Elf_Sym *sym = &sec->symtab[j];
+
+			sym->st_name  = elf_word_to_cpu(sym->st_name);
+			sym->st_value = elf_addr_to_cpu(sym->st_value);
+			sym->st_size  = elf_xword_to_cpu(sym->st_size);
+			sym->st_shndx = elf_half_to_cpu(sym->st_shndx);
+		}
+	}
+}
+
+static void read_relocs(FILE *fp)
+{
+	static unsigned long base;
+	int i, j;
+
+	if (!base) {
+		struct section *sec = sec_lookup(".text");
+
+		if (!sec)
+			die("Could not find .text section\n");
+
+		base = sec->shdr.sh_addr;
+	}
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		sec->reltab = malloc(sec->shdr.sh_size);
+		if (!sec->reltab)
+			die("malloc of %d bytes for relocs failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+
+			rel->r_offset = elf_addr_to_cpu(rel->r_offset);
+			/* Set offset into kernel image */
+			rel->r_offset -= base;
+			/* Convert SW64 RELA format - only the symbol
+			 * index needs converting to native endianness
+			 */
+			rel->r_info   = elf_xword_to_cpu(rel->r_info);
+#if (SHT_REL_TYPE == SHT_RELA)
+			rel->r_addend = elf_xword_to_cpu(rel->r_addend);
+#endif
+		}
+	}
+}
+
+static void remove_relocs(FILE *fp)
+{
+	int i;
+	Elf_Shdr shdr;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr_offset, strerror(errno));
+
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+
+		/* Set relocation section size to 0, effectively removing it.
+		 * This is necessary due to lack of support for relocations
+		 * in objcopy when creating 32bit elf from 64bit elf.
+		 */
+		shdr.sh_size = 0;
+
+		if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr_offset, strerror(errno));
+
+		if (fwrite(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot write ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+	}
+}
+
+static void add_reloc(struct relocs *r, uint32_t offset, unsigned int type)
+{
+	/* Relocation representation in binary table:
+	 * |76543210|76543210|76543210|76543210|
+	 * |  Type  |  offset from _text >> 2  |
+	 */
+	offset >>= 2;
+	if (offset > 0x00FFFFFF)
+		die("Kernel image exceeds maximum size for relocation!\n");
+
+	offset = (offset & 0x00FFFFFF) | ((type & 0xFF) << 24);
+
+	if (r->count == r->size) {
+		unsigned long newsize = r->size + 50000;
+		void *mem = realloc(r->offset, newsize * sizeof(r->offset[0]));
+
+		if (!mem)
+			die("realloc failed\n");
+
+		r->offset = mem;
+		r->size = newsize;
+	}
+	r->offset[r->count++] = offset;
+}
+
+static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
+			Elf_Sym *sym, const char *symname))
+{
+	int i;
+
+	/* Walk through the relocations */
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf_Sym *sh_symtab;
+		struct section *sec_applies, *sec_symtab;
+		int j;
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+		sec_symtab  = sec->link;
+		sec_applies = &secs[sec->shdr.sh_info];
+		if (!(sec_applies->shdr.sh_flags & SHF_ALLOC))
+			continue;
+
+		sh_symtab = sec_symtab->symtab;
+		sym_strtab = sec_symtab->link->strtab;
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+			Elf_Sym *sym = &sh_symtab[ELF_R_SYM(rel->r_info)];
+			const char *symname = sym_name(sym_strtab, sym);
+
+			process(sec, rel, sym, symname);
+		}
+	}
+}
+
+static int do_reloc(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
+		      const char *symname)
+{
+	unsigned int r_type = ELF_R_TYPE(rel->r_info);
+	unsigned int bind = ELF_ST_BIND(sym->st_info);
+
+	if ((bind == STB_WEAK) && (sym->st_value == 0)) {
+		/* Don't relocate weak symbols without a target */
+		return 0;
+	}
+
+	if (regex_skip_reloc(symname))
+		return 0;
+
+	switch (r_type) {
+	case R_SW64_NONE:
+	case R_SW64_LITERAL:   /* relocated by GOT */
+	case R_SW64_LITUSE:
+	case R_SW64_GPDISP:
+	case R_SW64_BRADDR:
+	case R_SW64_HINT:
+	case R_SW64_SREL32:
+	case R_SW64_GPRELHIGH:
+	case R_SW64_GPRELLOW:
+		/*
+		 * NONE can be ignored and PC relative relocations don't
+		 * need to be adjusted.
+		 */
+		break;
+
+	case R_SW64_REFQUAD:
+		add_reloc(&relocs, rel->r_offset, r_type);
+		break;
+
+	default:
+		die("Unsupported relocation type: %s (%d)\n",
+		    rel_type(r_type), r_type);
+		break;
+	}
+
+	return 0;
+}
+
+static int write_reloc_as_bin(uint32_t v, FILE *f)
+{
+	unsigned char buf[4];
+
+	v = cpu_to_elf32(v);
+
+	memcpy(buf, &v, sizeof(uint32_t));
+	return fwrite(buf, 1, 4, f);
+}
+
+static int write_reloc_as_text(uint32_t v, FILE *f)
+{
+	int res;
+
+	res = fprintf(f, "\t.long 0x%08"PRIx32"\n", v);
+	if (res < 0)
+		return res;
+	else
+		return sizeof(uint32_t);
+}
+
+static void emit_relocs(int as_text, int as_bin, FILE *outf)
+{
+	int i;
+	int (*write_reloc)(uint32_t, FILE *) = write_reloc_as_bin;
+	int size = 0;
+	int size_reserved;
+	struct section *sec_reloc;
+
+	sec_reloc = sec_lookup(".data.reloc");
+	if (!sec_reloc)
+		die("Could not find relocation section\n");
+
+	size_reserved = sec_reloc->shdr.sh_size;
+	/* Collect up the relocations */
+	walk_relocs(do_reloc);
+
+	/* Print the relocations */
+	if (as_text) {
+		/* Print the relocations in a form suitable that
+		 * gas will like.
+		 */
+		printf(".section ".data.reloc","a"\n");
+		printf(".balign 8\n");
+		/* Output text to stdout */
+		write_reloc = write_reloc_as_text;
+		outf = stdout;
+	} else if (as_bin) {
+		/* Output raw binary to stdout */
+		outf = stdout;
+	} else {
+		/*
+		 * Seek to offset of the relocation section.
+		 * Each relocation is then written into the
+		 * vmlinux kernel image.
+		 */
+		if (fseek(outf, sec_reloc->shdr.sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				sec_reloc->shdr.sh_offset, strerror(errno));
+		}
+	}
+
+	for (i = 0; i < relocs.count; i++)
+		size += write_reloc(relocs.offset[i], outf);
+
+	/* Print a stop, but only if we've actually written some relocs */
+	if (size)
+		size += write_reloc(0, outf);
+
+	if (size > size_reserved)
+		/*
+		 * Die, but suggest a value for CONFIG_RELOCATION_TABLE_SIZE
+		 * which will fix this problem and allow a bit of headroom
+		 * if more kernel features are enabled
+		 */
+		die("Relocations overflow available space!\n" \
+		    "Please adjust CONFIG_RELOCATION_TABLE_SIZE " \
+		    "to at least 0x%08x\n", (size + 0x1000) & ~0xFFF);
+}
+
+/*
+ * As an aid to debugging problems with different linkers
+ * print summary information about the relocs.
+ * Since different linkers tend to emit the sections in
+ * different orders we use the section names in the output.
+ */
+static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) * sym,
+				const char *symname)
+{
+	printf("%16s  0x%x  %16s  %40s  %16s\n",
+		sec_name(sec->shdr.sh_info),
+		(unsigned int)rel->r_offset,
+		rel_type(ELF_R_TYPE(rel->r_info)),
+		symname,
+		sec_name(sym->st_shndx));
+	return 0;
+}
+
+static void print_reloc_info(void)
+{
+	printf("%16s  %10s  %16s  %40s  %16s\n",
+		"reloc section",
+		"offset",
+		"reloc type",
+		"symbol",
+		"symbol section");
+	walk_relocs(do_reloc_info);
+}
+
+void process(FILE *fp, int as_text, int as_bin,
+	     int show_reloc_info, int keep_relocs)
+{
+	regex_init();
+	read_ehdr(fp);
+	read_shdrs(fp);
+	read_strtabs(fp);
+	read_symtabs(fp);
+	read_relocs(fp);
+	if (show_reloc_info) {
+		print_reloc_info();
+		return;
+	}
+	emit_relocs(as_text, as_bin, fp);
+	if (!keep_relocs)
+		remove_relocs(fp);
+}
diff --git a/arch/sw_64/tools/relocs.h b/arch/sw_64/tools/relocs.h
new file mode 100644
index 000000000000..7273ccaed11f
--- /dev/null
+++ b/arch/sw_64/tools/relocs.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef RELOCS_H
+#define RELOCS_H
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+#include <regex.h>
+
+#define EM_SW64        0x9916
+/*
+ * SW64 ELF relocation types
+ */
+#define R_SW64_NONE		0       /* No reloc */
+#define R_SW64_REFLONG		1       /* Direct 32 bit */
+#define R_SW64_REFQUAD		2       /* Direct 64 bit */
+#define R_SW64_GPREL32		3       /* GP relative 32 bit */
+#define R_SW64_LITERAL		4       /* GP relative 16 bit w/optimization */
+#define R_SW64_LITUSE		5       /* Optimization hint for LITERAL */
+#define R_SW64_GPDISP		6       /* Add displacement to GP */
+#define R_SW64_BRADDR		7       /* PC+4 relative 23 bit shifted */
+#define R_SW64_HINT		8       /* PC+4 relative 16 bit shifted */
+#define R_SW64_SREL16		9       /* PC relative 16 bit */
+#define R_SW64_SREL32		10      /* PC relative 32 bit */
+#define R_SW64_SREL64		11      /* PC relative 64 bit */
+#define R_SW64_GPRELHIGH	17      /* GP relative 32 bit, high 16 bits */
+#define R_SW64_GPRELLOW		18      /* GP relative 32 bit, low 16 bits */
+#define R_SW64_GPREL16		19      /* GP relative 16 bit */
+#define R_SW64_COPY		24      /* Copy symbol at runtime */
+#define R_SW64_GLOB_DAT		25      /* Create GOT entry */
+#define R_SW64_JMP_SLOT		26      /* Create PLT entry */
+#define R_SW64_RELATIVE		27      /* Adjust by program base */
+#define R_SW64_BRSGP		28
+#define R_SW64_TLSGD		29
+#define R_SW64_TLS_LDM		30
+#define R_SW64_DTPMOD64		31
+#define R_SW64_GOTDTPREL	32
+#define R_SW64_DTPREL64		33
+#define R_SW64_DTPRELHI		34
+#define R_SW64_DTPRELLO		35
+#define R_SW64_DTPREL16		36
+#define R_SW64_GOTTPREL		37
+#define R_SW64_TPREL64		38
+#define R_SW64_TPRELHI		39
+#define R_SW64_TPRELLO		40
+#define R_SW64_TPREL16		41
+
+void die(char *fmt, ...);
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+enum symtype {
+	S_ABS,
+	S_REL,
+	S_SEG,
+	S_LIN,
+	S_NSYMTYPES
+};
+
+void process(FILE *fp, int as_text, int as_bin,
+		int show_reloc_info, int keep_relocs);
+#endif /* RELOCS_H */
diff --git a/arch/sw_64/tools/relocs_main.c b/arch/sw_64/tools/relocs_main.c
new file mode 100644
index 000000000000..30a830a070db
--- /dev/null
+++ b/arch/sw_64/tools/relocs_main.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <endian.h>
+#include <elf.h>
+
+#include "relocs.h"
+
+void die(char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
+static void usage(void)
+{
+	die("relocs [--reloc-info|--text|--bin|--keep] vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+	int show_reloc_info, as_text, as_bin, keep_relocs;
+	const char *fname;
+	FILE *fp;
+	int i;
+	unsigned char e_ident[EI_NIDENT];
+
+	show_reloc_info = 0;
+	as_text = 0;
+	as_bin = 0;
+	keep_relocs = 0;
+	fname = NULL;
+	for (i = 1; i < argc; i++) {
+		char *arg = argv[i];
+
+		if (*arg == '-') {
+			if (strcmp(arg, "--reloc-info") == 0) {
+				show_reloc_info = 1;
+				continue;
+			}
+			if (strcmp(arg, "--text") == 0) {
+				as_text = 1;
+				continue;
+			}
+			if (strcmp(arg, "--bin") == 0) {
+				as_bin = 1;
+				continue;
+			}
+			if (strcmp(arg, "--keep") == 0) {
+				keep_relocs = 1;
+				continue;
+			}
+		} else if (!fname) {
+			fname = arg;
+			continue;
+		}
+		usage();
+	}
+	if (!fname)
+		usage();
+
+	fp = fopen(fname, "r+");
+	if (!fp)
+		die("Cannot open %s: %s\n", fname, strerror(errno));
+
+	if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT)
+		die("Cannot read %s: %s", fname, strerror(errno));
+
+	rewind(fp);
+	if (e_ident[EI_CLASS] == ELFCLASS64)
+		process(fp, as_text,  as_bin, show_reloc_info, keep_relocs);
+	else
+		die("Unsupport ELF class on SW64: %s", fname);
+		//process_32(fp, as_text, as_bin, show_reloc_info, keep_relocs);
+	fclose(fp);
+	return 0;
+}
-- 
2.20.1

    

2024

2023

2022

2021

2020

2019

[PATCH openEuler-5.10 60/90] sw64: add basic support for sw64