Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions platform/pc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ SRCS-kernel.elf= \
$(SRCDIR)/drivers/gve.c \
$(SRCDIR)/drivers/nvme.c \
$(SRCDIR)/drivers/netconsole.c \
$(SRCDIR)/drivers/ramdisk.c \
$(SRCDIR)/drivers/vga.c \
$(SRCDIR)/gdb/gdbstub.c \
$(SRCDIR)/gdb/gdbtcp.c \
Expand Down
159 changes: 134 additions & 25 deletions platform/pc/service.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@
#include <pci.h>
#include <xen_platform.h>
#include <virtio/virtio.h>
#include <drivers/ramdisk.h>
#include <vmware/vmware.h>
#include "serial.h"

#define BOOT_PARAM_OFFSET_E820_ENTRIES 0x01E8
#define BOOT_PARAM_OFFSET_BOOT_FLAG 0x01FE
#define BOOT_PARAM_OFFSET_HEADER 0x0202
#define BOOT_PARAM_OFFSET_RAMDISK_IMAGE 0x0218
#define BOOT_PARAM_OFFSET_RAMDISK_SIZE 0x021C
#define BOOT_PARAM_OFFSET_CMD_LINE_PTR 0x0228
#define BOOT_PARAM_OFFSET_CMDLINE_SIZE 0x0238
#define BOOT_PARAM_OFFSET_E820_TABLE 0x02D0
Expand Down Expand Up @@ -64,8 +67,17 @@ typedef struct hvm_memmap_entry {
u32 reserved;
} *hvm_memmap_entry;

typedef struct hvm_modlist_entry {
u64 paddr;
u64 size;
u64 cmdline_paddr;
u64 reserved;
} *hvm_modlist_entry;

extern u8 START, END;

boolean cmdline_verbose_logging = false;

range kern_get_elf(void)
{
for_regions(e) {
Expand All @@ -75,6 +87,15 @@ range kern_get_elf(void)
return irange(INVALID_PHYSICAL, INVALID_PHYSICAL);
}

range kern_get_ramdisk(void)
{
for_regions(e) {
if (e->type == REGION_RAMDISK)
return irangel(e->base, e->length);
}
return irange(INVALID_PHYSICAL, INVALID_PHYSICAL);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of having this function defined in all architectures, it's better to pass the ramdisk range directly to init_ramdisk() as a function argument, and call init_ramdisk() only when there is an actual ramdisk.


BSS_RO_AFTER_INIT static boolean have_rdseed;
BSS_RO_AFTER_INIT static boolean have_rdrand;

Expand Down Expand Up @@ -292,20 +313,21 @@ void init_physical_heap(void)
}

boolean found = false;
early_init_debug("physical memory:");
if (cmdline_verbose_logging)
early_debug("Physical memory: \n");
for_regions(e) {
if (e->type == REGION_PHYSICAL) {
u64 base = e->base;
u64 length = e->length;
if (length == 0)
continue;
#ifdef INIT_DEBUG
early_debug("INIT: [");
early_debug_u64(base);
early_debug(", ");
early_debug_u64(base + length);
early_debug(")\n");
#endif
if (cmdline_verbose_logging) {
early_debug("\t[");
early_debug_u64(base);
early_debug(", ");
early_debug_u64(base + length);
early_debug(")\n");
}
if (!pageheap_add_range(base, length))
halt(" - id_heap_add_range failed\n");
found = true;
Expand All @@ -319,19 +341,59 @@ void init_physical_heap(void)
static void setup_initmap(void)
{
u64 kernel_size = u64_from_pointer(&END) - KERNEL_BASE_PHYS;
create_region(KERNEL_BASE_PHYS, kernel_size, REGION_KERN_LOAD);

region page_region = 0;
for_regions(r) {
if ((r->type == REGION_PHYSICAL) && (r->base <= KERNEL_BASE_PHYS) &&
(r->base + r->length > KERNEL_BASE_PHYS)) {
/* This is the memory region where the kernel has been loaded: adjust the region
* boundaries so that the memory occupied by the kernel code does not appear as free
* memory, and possibly and make a new memory region. */
if (r->base < KERNEL_BASE_PHYS)
create_region(r->base, KERNEL_BASE_PHYS - r->base, r->type);
region_resize(r, r->base - pad(KERNEL_BASE_PHYS + kernel_size, PAGESIZE));

page_region = r;
break;
if (r->type == REGION_PHYSICAL) {
for_regions(s) {
if (s->type != REGION_PHYSICAL &&
s->length != 0 &&
r->base <= s->base &&
r->base + r->length > s->base) {
u64 inner_start = s->base;
u64 inner_end = s->base + s->length;

if (inner_end > r->base + r->length) {
inner_end = r->base + r->length;
}
if (cmdline_verbose_logging) {
early_debug("Reserving region: [");
early_debug_u64(r->base);
early_debug(" - ");
early_debug_u64(r->base + r->length);
early_debug("] (physmem), [");
early_debug_u64(inner_start);
early_debug(" - ");
early_debug_u64(inner_end);
early_debug("] (");
early_debug_u64(s->type);
early_debug(")\n");
}
if (r->base < inner_start) {
create_region(r->base, inner_start - r->base, r->type);
if (cmdline_verbose_logging) {
early_debug("\tcreate: ");
early_debug_u64(r->base);
early_debug(" - ");
early_debug_u64(inner_start);
early_debug("\n");
}
}
region_resize(r, r->base - pad(inner_end, PAGESIZE));
if (cmdline_verbose_logging) {
early_debug("\tshrink: ");
early_debug_u64(r->base);
early_debug(" - ");
early_debug_u64(r->base + r->length);
early_debug("\n");
}
if (s->type == REGION_KERN_LOAD) {
assert(!page_region);
page_region = r;
}
}
}
}
}
assert(page_region);
Expand Down Expand Up @@ -366,11 +428,14 @@ static void setup_initmap(void)
}

// init linker set
void init_service(u64 rdi, u64 rsi)
void init_service(u64 rdi, u64 rsi, hvm_start_info start_info)
{
u8 *params = pointer_from_u64(rsi);
boolean should_setup_initmap = false;
const char *cmdline = 0;
u32 cmdline_size;
void *ramdisk = 0;
u32 ramdisk_size;

if (params && (*(u16 *)(params + BOOT_PARAM_OFFSET_BOOT_FLAG) == 0xAA55) &&
(*(u32 *)(params + BOOT_PARAM_OFFSET_HEADER) == 0x53726448)) {
Expand All @@ -389,9 +454,55 @@ void init_service(u64 rdi, u64 rsi)
BOOT_PARAM_OFFSET_CMD_LINE_PTR)));
cmdline_size = *((u32 *)(params + BOOT_PARAM_OFFSET_CMDLINE_SIZE));

setup_initmap();
ramdisk = pointer_from_u64((u64)*((u32 *)(params +
BOOT_PARAM_OFFSET_RAMDISK_IMAGE)));
ramdisk_size = *((u32 *)(params + BOOT_PARAM_OFFSET_RAMDISK_SIZE));

should_setup_initmap = true;
} else if (start_info)
{
cmdline = pointer_from_u64(start_info->cmdline_paddr);
cmdline_size = 0;
const char *cmdline_p = cmdline;
while (*cmdline_p)
{
cmdline_size++;
cmdline_p++;
}

if (start_info->nr_modules)
{
hvm_modlist_entry ramdisk_entry = pointer_from_u64(start_info->modlist_paddr);
ramdisk = pointer_from_u64(ramdisk_entry->paddr);
ramdisk_size = ramdisk_entry->size;
}
should_setup_initmap = true;
}

if (cmdline) {
create_region(u64_from_pointer(cmdline), cmdline_size, REGION_CMDLINE);
sstring input = sstring_from_cstring(cmdline, cmdline_size);
sstring token, rest;
sstring delim = ss(" ");
token = runtime_strtok_r(&input, delim, &rest);
while (!sstring_is_null(token)) {
if(runtime_strcmp(token, ss("verbose")) == 0) {
cmdline_verbose_logging = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would remove the cmdline_verbose_logging variable, and keep logging confined to when INIT_DEBUG is #defined; after all, these logging messages are useful mostly when debugging, and I would rather avoid carrying this debug stuff in production kernels.

}
token = runtime_strtok_r(0, delim, &rest);
}
}

if (ramdisk)
create_region(u64_from_pointer(ramdisk), ramdisk_size, REGION_RAMDISK);

#ifdef INIT_DEBUG
cmdline_verbose_logging = true;
#endif

if (should_setup_initmap)
setup_initmap();

serial_init();
early_init_debug("init_service");

Expand All @@ -402,8 +513,6 @@ void init_service(u64 rdi, u64 rsi)
init_hwrand();
kaslr();
init_kernel_heaps();
if (cmdline)
create_region(u64_from_pointer(cmdline), cmdline_size, REGION_CMDLINE);
u64 stack_size = 32*PAGESIZE;
u64 stack_location = allocate_u64((heap)heap_page_backed(get_kernel_heaps()), stack_size);
stack_location += stack_size - STACK_ALIGNMENT;
Expand All @@ -421,8 +530,7 @@ void pvh_start(hvm_start_info start_info)
if (mem_table[i].type == HVM_MEMMAP_TYPE_RAM)
create_region(mem_table[i].addr, mem_table[i].size, REGION_PHYSICAL);
}
setup_initmap();
init_service(0, 0);
init_service(0, 0, start_info);
}

RO_AFTER_INIT static struct console_driver serial_console_driver = {
Expand Down Expand Up @@ -499,6 +607,7 @@ void detect_devices(kernel_heaps kh, storage_attach sa)
init_pvscsi(kh, sa);
init_nvme(kh, sa);
init_ata_pci(kh, sa);
init_ramdisk(kh, sa);

init_virtio_9p(kh);
init_virtio_socket(kh);
Expand Down
5 changes: 5 additions & 0 deletions platform/riscv-virt/service.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ range kern_get_elf(void)
return irange(INVALID_PHYSICAL, INVALID_PHYSICAL);
}

range kern_get_ramdisk(void)
{
return irange(INVALID_PHYSICAL, INVALID_PHYSICAL);
}

void reclaim_regions(void)
{
/* mmu init complete; unmap temporary identity map */
Expand Down
5 changes: 5 additions & 0 deletions platform/virt/service.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ range kern_get_elf(void)
return irange(INVALID_PHYSICAL, INVALID_PHYSICAL);
}

range kern_get_ramdisk(void)
{
return irange(INVALID_PHYSICAL, INVALID_PHYSICAL);
}

void reclaim_regions(void)
{
}
Expand Down
68 changes: 68 additions & 0 deletions src/drivers/ramdisk.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#include <kernel.h>
#include <storage.h>

#include "ramdisk.h"

typedef struct storage *storage;

declare_closure_struct(2, 3, void, ramdisk_io,
storage, st, boolean, write,
void *buf, range blocks, status_handler sh);

typedef struct storage
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to avoid such a generic name, I would add a ramdisk-related prefix, e.g. something like struct ramdisk_storage

{
closure_struct(storage_simple_req_handler, req_handler);
closure_struct(ramdisk_io, read);
closure_struct(ramdisk_io, write);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not necessary to define the ramdisk_io closure struct, you can use closure_struct(block_io) instead, for both read and write. Then, you can have 2 separate functions (defined with closure_func_basic) for read and write: for reading, you would retrieve the ramdisk_storage struct pointer via struct_from_closure(); for writing, you don't need any pointer and just apply the status handler with the error status.

void *ramdisk;
u64 ramdisk_size;
} *storage;

define_closure_function(2, 3, void, ramdisk_io,
storage, st, boolean, write,
void *buf, range blocks, status_handler sh)
{
storage st = bound(st);
boolean write = bound(write);

if (write)
{
apply(sh, timm("result", "read-only device"));
return;
}

u64 start_byte_offset = blocks.start * SECTOR_SIZE;
u64 end_byte_offset = blocks.end * SECTOR_SIZE;
if (start_byte_offset > end_byte_offset || end_byte_offset > st->ramdisk_size)
{
apply(sh, timm("result", "read out of bounds"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can use range_valid(blocks); also, instead of applying an error status to the status handler, I would just use assert(), since an out-of-bounds range would indicate a kernel bug bad enough to warrant taking down the VM.

return;
}

runtime_memcpy(buf, st->ramdisk + start_byte_offset, end_byte_offset - start_byte_offset);
apply(sh, STATUS_OK);
}

void init_ramdisk(kernel_heaps kh, storage_attach a)
{
range ramdisk_phys = kern_get_ramdisk();
if (ramdisk_phys.start == INVALID_PHYSICAL)
{
msg_print("RAMDISK: not detected");
return;
}
u64 ramdisk_size = range_span(ramdisk_phys);
u64 v = allocate_u64((heap)heap_virtual_huge(kh), ramdisk_size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert(v != INVALID_PHYSICAL);

map(v, ramdisk_phys.start, ramdisk_size, pageflags_memory());

heap h = heap_locked(kh);
storage st = allocate(h, sizeof(struct storage));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert(st != INVALID_ADDRESS);

st->ramdisk = pointer_from_u64(v);
st->ramdisk_size = ramdisk_size;
apply(a,
storage_init_req_handler(&st->req_handler,
init_closure(&st->read, ramdisk_io, st, false),
init_closure(&st->write, ramdisk_io, st, true)),
ramdisk_size, -1);
msg_print("RAMDISK: %u bytes at %p", st->ramdisk_size, st->ramdisk);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to use msg_info(). For most things the kernel is quiet during boot, unless explicitly told to be more verbose. (I know, right now LOG_INFO messages cannot be enabled this early during boot, but hopefully we will add this capability in the future.)

}
1 change: 1 addition & 0 deletions src/drivers/ramdisk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
void init_ramdisk(kernel_heaps kh, storage_attach a);
1 change: 1 addition & 0 deletions src/kernel/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ void init_platform_devices(kernel_heaps kh);
void init_cpuinfo_machine(cpuinfo ci, heap backed);
void kernel_runtime_init(kernel_heaps kh);
range kern_get_elf(void);
range kern_get_ramdisk(void);
void reclaim_regions(void);

extern u64 kernel_phys_offset;
Expand Down
3 changes: 2 additions & 1 deletion src/kernel/region.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ typedef struct region *region;
#define REGION_RECLAIM 14 /* areas to be unmapped and reclaimed in stage3 (only stage2 stack presently) */
#define REGION_SMBIOS 15 /* SMBIOS entry point */
#define REGION_RSDP 16 /* location of the ACPI RSDP */

#define REGION_RAMDISK 17 /* kernel ramdisk */
#define REGION_KERN_LOAD 18 /* kernel load region */

static inline region create_region(u64 base, u64 length, int type)
{
Expand Down
1 change: 1 addition & 0 deletions src/x86_64/crt0.s
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ write_xmsr:
.end:

_start:
xor edx, edx
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be xor rdx, rdx, so that all 64 bits are zeroed.

call init_service
hlt
.end:
Expand Down