Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions build_debian.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ TRUSTED_GPG_DIR=$BUILD_TOOL_PATH/trusted.gpg.d
exit 1
}

if [ "$IMAGE_TYPE" = "aboot" ]; then
TARGET_BOOTLOADER="aboot"
fi

## Check if not a last stage of RFS build
if [[ $RFS_SPLIT_LAST_STAGE != y ]]; then

Expand All @@ -68,9 +72,14 @@ if [[ -d $FILESYSTEM_ROOT ]]; then
fi
mkdir -p $FILESYSTEM_ROOT
mkdir -p $FILESYSTEM_ROOT/$PLATFORM_DIR
mkdir -p $FILESYSTEM_ROOT/$PLATFORM_DIR/grub
touch $FILESYSTEM_ROOT/$PLATFORM_DIR/firsttime

bootloader_packages=""
if [ "$TARGET_BOOTLOADER" != "aboot" ]; then
mkdir -p $FILESYSTEM_ROOT/$PLATFORM_DIR/grub
bootloader_packages="grub2-common"
fi

## ensure proc is mounted
sudo mount proc /proc -t proc || true

Expand Down Expand Up @@ -365,7 +374,7 @@ sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y in
gdisk \
sysfsutils \
squashfs-tools \
grub2-common \
$bootloader_packages \
screen \
hping3 \
tcptraceroute \
Expand Down Expand Up @@ -825,6 +834,17 @@ sudo mkdir -p $FILESYSTEM_ROOT/var/lib/docker
sudo rm -f $FILESYSTEM_ROOT/etc/resolvconf/resolv.conf.d/original
sudo cp files/image_config/resolv-config/resolv.conf.head $FILESYSTEM_ROOT/etc/resolvconf/resolv.conf.d/head

## Optimize filesystem size
if [ "$BUILD_REDUCE_IMAGE_SIZE" = "y" ]; then
sudo scripts/build-optimize-fs-size.py "$FILESYSTEM_ROOT" \
--image-type "$IMAGE_TYPE" \
--hardlinks var/lib/docker \
--hardlinks usr/share/sonic/device \
--remove-docs \
--remove-mans \
--remove-licenses
fi

sudo mksquashfs $FILESYSTEM_ROOT $FILESYSTEM_SQUASHFS -comp zstd -b 1M -e boot -e var/lib/docker -e $PLATFORM_DIR

## Reduce /boot permission
Expand Down
2 changes: 1 addition & 1 deletion files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/libnss-radius_*.deb || \
sudo sed -i -e '/^passwd/s/ radius//' $FILESYSTEM_ROOT/etc/nsswitch.conf

# Install a custom version of kdump-tools (and its dependencies via 'apt-get -y install -f')
if [[ $TARGET_BOOTLOADER == grub ]]; then
if [ "$TARGET_BOOTLOADER" != uboot ]; then
sudo DEBIAN_FRONTEND=noninteractive dpkg --root=$FILESYSTEM_ROOT -i $debs_path/kdump-tools_*.deb || \
sudo LANG=C DEBIAN_FRONTEND=noninteractive DEBCONF_NONINTERACTIVE_SEEN=true chroot $FILESYSTEM_ROOT apt-get -q --no-install-suggests --no-install-recommends install
cat $IMAGE_CONFIGS/kdump/kdump-tools | sudo tee -a $FILESYSTEM_ROOT/etc/default/kdump-tools > /dev/null
Expand Down
2 changes: 2 additions & 0 deletions rules/config
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,5 @@ SONIC_OS_VERSION ?= 11
# PIP timeout for http connection
PIP_HTTP_TIMEOUT ?= 60

# BUILD_REDUCE_IMAGE_SIZE - reduce the image size as much as possbible
BUILD_REDUCE_IMAGE_SIZE = n
253 changes: 253 additions & 0 deletions scripts/build-optimize-fs-size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
#!/usr/bin/env python3

import argparse
import hashlib
import os
import shutil
import subprocess
import sys

from collections import defaultdict
from functools import cached_property

DRY_RUN = False
def enable_dry_run(enabled):
global DRY_RUN # pylint: disable=global-statement
DRY_RUN = enabled

class File:
def __init__(self, path):
self.path = path

def __str__(self):
return self.path

def rmtree(self):
if DRY_RUN:
print(f'rmtree {self.path}')
return
shutil.rmtree(self.path)

def hardlink(self, src):
if DRY_RUN:
print(f'hardlink {self.path} {src}')
return
st = self.stats
os.remove(self.path)
os.link(src.path, self.path)
os.chmod(self.path, st.st_mode)
os.chown(self.path, st.st_uid, st.st_gid)
os.utime(self.path, times=(st.st_atime, st.st_mtime))

@property
def name(self):
return os.path.basename(self.path)

@cached_property
def stats(self):
return os.stat(self.path)

@cached_property
def size(self):
return self.stats.st_size

@cached_property
def checksum(self):
with open(self.path, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()

class FileManager:
def __init__(self, path):
self.path = path
self.files = []
self.folders = []
self.nindex = defaultdict(list)
self.cindex = defaultdict(list)

def add_file(self, path):
if not os.path.isfile(path) or os.path.islink(path):
return
f = File(path)
self.files.append(f)

def load_tree(self):
self.files = []
self.folders = []
for root, _, files in os.walk(self.path):
self.folders.append(File(root))
for f in files:
self.add_file(os.path.join(root, f))
print(f'loaded {len(self.files)} files and {len(self.folders)} folders')

def generate_index(self):
print('Computing file hashes')
for f in self.files:
self.nindex[f.name].append(f)
self.cindex[(f.name, f.checksum)].append(f)

def create_hardlinks(self):
print('Creating hard links')
for files in self.cindex.values():
if len(files) <= 1:
continue
orig = files[0]
for f in files[1:]:
f.hardlink(orig)

class FsRoot:
def __init__(self, path):
self.path = path

def iter_fsroots(self):
yield self.path
dimgpath = os.path.join(self.path, 'var/lib/docker/overlay2')
for layer in os.listdir(dimgpath):
yield os.path.join(dimgpath, layer, 'diff')

def collect_fsroot_size(self):
cmd = ['du', '-sb', self.path]
p = subprocess.run(cmd, text=True, check=False,
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
return int(p.stdout.split()[0])

def _remove_root_paths(self, relpaths):
for root in self.iter_fsroots():
for relpath in relpaths:
path = os.path.join(root, relpath)
if os.path.isdir(path):
if DRY_RUN:
print(f'rmtree {path}')
else:
shutil.rmtree(path)

def remove_docs(self):
self._remove_root_paths([
'usr/share/doc',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe some actions can be enabled by default.
E.g. we clean up /usr/share/doc here:

sudo LANG=C chroot $FILESYSTEM_ROOT bash -c 'rm -rf /usr/share/doc/* /usr/share/locale/* /var/lib/apt/lists/* /tmp/*'

I think it's possible to remove other directories with docs/mans/licensses without any conditons.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of these cleanup, especially the removal ones could definitely be added as part of the general build.

However this would have to be more than just here, since it would also need to apply to all docker images.
On top of that the removal of licenses could have some legal implication for public/redistributable builds such as the community ones.
This should however not be a problem for internal/private images.

Most of the savings of this change actually come from the hardlinking steps.
And for Aboot images the removal of other platforms and unneeded kernel modules and firmwares.

This mitigation, even if it merges in master, has 202305 as the main target for 4GB systems.
It therefore felt simpler to centralize the cleanup under a postprocessing script hidden behind a toggle instead of touching tens of files.
This script also has the advantage of cleaning up behind newly introduced code that forgot the cleanup steps.
Until there is some checks and tooling in place to prevent new code from contributing to bloat, things will slip and become a maintenance burden.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To add to your comment, the proper thing to do here would be to add an /etc/dpkg/dpkg.conf.d/01_sonic conf instead of removing files like it's currently done in build_debian.sh
In this configuration you can specify things like path-exclude=/usr/share/doc however that will not retroactively apply the previously installed packages.

In the case of docker, that means that the debian base image will retain these files no matter what.
If you decide to remove them as part of the build of a child Dockerfile, this is actually going to create whiteout files which consumes space instead of the final goal being to release space.

Copy link
Contributor

@k-v1 k-v1 Sep 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case of docker, that means that the debian base image will retain these files no matter what.

This is already done for docker containers:

RUN find /usr/share/doc -depth \( -type f -o -type l \) ! -name copyright | xargs rm || true

COPY ["dpkg_01_drop", "/etc/dpkg/dpkg.cfg.d/01_drop"]

So my suggestion was only about host system.

On top of that the removal of licenses could have some legal implication for public/redistributable builds such as the community ones.

We keep copyright files in docker containers but remove them from host system.
Probably we shouldn't remove these files from /usr/share/doc on host system.
There was my old PR #14417 to fix it. I can reopen it, but I'm still not sure should we keep these files or not.

'usr/share/doc-base',
'usr/local/share/doc',
'usr/local/share/doc-base',
])

def remove_mans(self):
self._remove_root_paths([
'usr/share/man',
'usr/local/share/man',
])

def remove_licenses(self):
self._remove_root_paths([
'usr/share/common-licenses',
])

def hardlink_under(self, path):
fm = FileManager(os.path.join(self.path, path))
fm.load_tree()
fm.generate_index()
fm.create_hardlinks()

def remove_platforms(self, filter_func):
devpath = os.path.join(self.path, 'usr/share/sonic/device')
for platform in os.listdir(devpath):
if not filter_func(platform):
path = os.path.join(devpath, platform)
if DRY_RUN:
print(f'rmtree platform {path}')
else:
shutil.rmtree(path)

def remove_modules(self, modules):
modpath = os.path.join(self.path, 'lib/modules')
kversion = os.listdir(modpath)[0]
kmodpath = os.path.join(modpath, kversion)
for module in modules:
path = os.path.join(kmodpath, module)
if os.path.isdir(path):
if DRY_RUN:
print(f'rmtree module {path}')
else:
shutil.rmtree(path)

def remove_firmwares(self, firmwares):
fwpath = os.path.join(self.path, 'lib/firmware')
for fw in firmwares:
path = os.path.join(fwpath, fw)
if os.path.isdir(path):
if DRY_RUN:
print(f'rmtree firmware {path}')
else:
shutil.rmtree(path)


def specialize_aboot_image(self):
fp = lambda p: '-' not in p or 'arista' in p or 'common' in p
self.remove_platforms(fp)
self.remove_modules([
'kernel/drivers/gpu',
'kernel/drivers/infiniband',
])
self.remove_firmwares([
'amdgpu',
'i915',
'mediatek',
'nvidia',
'radeon',
])

def specialize_image(self, image_type):
if image_type == 'aboot':
self.specialize_aboot_image()

def parse_args(args):
parser = argparse.ArgumentParser()
parser.add_argument('fsroot',
help="path to the fsroot build folder")
parser.add_argument('-s', '--stats', action='store_true',
help="show space statistics")
parser.add_argument('--hardlinks', action='append',
help="path where similar files need to be hardlinked")
parser.add_argument('--remove-docs', action='store_true',
help="remove documentation")
parser.add_argument('--remove-licenses', action='store_true',
help="remove license files")
parser.add_argument('--remove-mans', action='store_true',
help="remove manpages")
parser.add_argument('--image-type', default=None,
help="type of image being built")
parser.add_argument('--dry-run', action='store_true',
help="only display what would happen")
return parser.parse_args(args)

def main(args):
args = parse_args(args)

enable_dry_run(args.dry_run)

fs = FsRoot(args.fsroot)
if args.stats:
begin = fs.collect_fsroot_size()
print(f'fsroot size is {begin} bytes')

if args.remove_docs:
fs.remove_docs()

if args.remove_mans:
fs.remove_mans()

if args.remove_licenses:
fs.remove_licenses()

if args.image_type:
fs.specialize_image(args.image_type)

for path in args.hardlinks:
fs.hardlink_under(path)

if args.stats:
end = fs.collect_fsroot_size()
pct = 100 - end / begin * 100
print(f'fsroot reduced to {end} from {begin} {pct:.2f}')

return 0

if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
1 change: 1 addition & 0 deletions slave.mk
Original file line number Diff line number Diff line change
Expand Up @@ -1536,6 +1536,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
SONIC_VERSION_CACHE=$(SONIC_VERSION_CACHE) \
MULTIARCH_QEMU_ENVIRON=$(MULTIARCH_QEMU_ENVIRON) \
CROSS_BUILD_ENVIRON=$(CROSS_BUILD_ENVIRON) \
BUILD_REDUCE_IMAGE_SIZE=$(BUILD_REDUCE_IMAGE_SIZE) \
MASTER_KUBERNETES_VERSION=$(MASTER_KUBERNETES_VERSION) \
MASTER_KUBERNETES_CONTAINER_IMAGE_VERSION=$(MASTER_KUBERNETES_CONTAINER_IMAGE_VERSION) \
MASTER_PAUSE_VERSION=$(MASTER_PAUSE_VERSION) \
Expand Down