Kubernetes Pod 网络初始化原理
Jul 11, 2024 00:00 · 3965 words · 8 minute read
本文探索如何初始化 Pod 网络栈(Kubernetes 环境的容器运行时为 containerd,无 docker),虽然篇幅较长,且耐心看完。
大家必须要能够不假思索地说出 Kubernetes 在创建 Pod 时的调用链为 kubelet -> containerd -> runc
Sandbox(pause)
每个 Pod 都有一个 pause 容器来初始化整个 Pod 网络栈,pause 容器和业务容器在同一网络命名空间中。
kubelet 通过 CRI RunPodSandbox API 调用 containerd 拉起 pause 容器:
func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) {
// a lot of code here
if !hostNetwork(config) && !userNsEnabled {
var netnsMountDir = "/var/run/netns"
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
}
// Update network namespace in the container's spec
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
}
}
在 /var/run/netns 路径下能够看到很多 cni-
前缀的文件:
$ ll /var/run/netns
total 0
-r--r--r-- 1 root root 0 Jan 21 17:09 cni-0e658861-8c2e-aad0-9ed3-7d6603530739
-r--r--r-- 1 root root 0 Jan 21 17:10 cni-1e58c52f-3d19-161d-2a67-74bf4ec3d25d
-r--r--r-- 1 root root 0 Jan 21 17:09 cni-1f172a43-4dfa-3a55-5117-42219807b2f7
-r--r--r-- 1 root root 0 Jan 21 19:23 cni-232a665d-25eb-5541-86c6-687d9be31ffc
-r--r--r-- 1 root root 0 Mar 21 17:39 cni-23cf1be4-65af-1e5f-c1a6-beb1bca2e541
-r--r--r-- 1 root root 0 Jan 21 19:23 cni-24df7e4d-1b92-84a1-f20b-4d3e5f2aed37
-r--r--r-- 1 root root 0 Apr 29 21:02 cni-256356df-0a90-6dbf-bd49-c3fb7084ef33
-r--r--r-- 1 root root 0 Mar 8 10:28 cni-2b72ac82-443b-cff6-04d0-3f55f97964d4
-r--r--r-- 1 root root 0 Jan 21 17:09 cni-2b79ba54-29f8-d0d0-3a58-3cfb779c16e9
-r--r--r-- 1 root root 0 May 1 18:06 cni-2e61699f-834d-4bb3-07fc-284d758235ea
-r--r--r-- 1 root root 0 May 9 11:22 cni-2e874a72-cd36-8bfc-e326-3cee15b23c0c
-r--r--r-- 1 root root 0 Jan 21 17:09 cni-32b2060d-2df9-b643-93b8-652518af4f6b
接下来看 NewNetNS
函数:
// NewNetNS creates a network namespace.
func NewNetNS(baseDir string) (*NetNS, error) {
return NewNetNSFromPID(baseDir, 0) // PID 指定为 0
}
// NewNetNS returns the netns from pid or a new netns if pid is 0.
func NewNetNSFromPID(baseDir string, pid uint32) (*NetNS, error) {
path, err := newNS(baseDir, pid)
if err != nil {
return nil, fmt.Errorf("failed to setup netns: %w", err)
}
return &NetNS{path: path}, nil
}
然后来看 newNS
函数:
func newNS(baseDir string, pid uint32) (nsPath string, err error) {
b := make([]byte, 16)
_, err = rand.Read(b)
if err != nil {
return "", fmt.Errorf("failed to generate random netns name: %w", err)
}
// Create the directory for mounting network namespaces
// This needs to be a shared mountpoint in case it is mounted in to
// other namespaces (containers)
if err := os.MkdirAll(baseDir, 0755); err != nil {
return "", err
}
// create an empty file at the mount point and fail if it already exists
nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
nsPath = path.Join(baseDir, nsName)
mountPointFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
return "", err
}
mountPointFd.Close()
// do namespace work in a dedicated goroutine, so that we can safely
// Lock/Unlock OSThread without upsetting the lock/unlock state of
// the caller of this function
go (func() {
defer wg.Done()
runtime.LockOSThread()
// Don't unlock. By not unlocking, golang will kill the OS thread when the
// goroutine is done (for go1.10+)
var origNS cnins.NetNS
origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()
// create a new netns on the current thread
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}
// Put this thread back to the orig ns, since it might get reused (pre go1.10)
defer origNS.Set()
// bind mount the netns from the current thread (from /proc) onto the
// mount point. This causes the namespace to persist, even when there
// are no threads in the ns.
err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
if err != nil {
err = fmt.Errorf("failed to bind mount ns at %s: %w", nsPath, err)
}
})()
wg.Wait()
}
// getCurrentThreadNetNSPath copied from pkg/ns
func getCurrentThreadNetNSPath() string {
// /proc/self/ns/net returns the namespace of the main thread, not
// of whatever thread this goroutine is running on. Make sure we
// use the thread's net namespace since the thread is switching around
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
}
-
通过
Unshare(unix.CLONE_NEWNET)
系统调用为 containerd 当前线程创建一个网络命名空间 -
通过
Mount
系统调用将当前线程的网络命名空间 bind mount 至 /var/run/netns 路径下新建的挂载点所以 /var/run/netns 路径下都是共享的网络命名空间挂载点:
$ mount | grep cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9 nsfs on /run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9 type nsfs (rw) nsfs on /run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9 type nsfs (rw)
然后 containerd 将已经创建好的网络命名空间挂载点作为参数(在 OCI 容器配置文件中)传给 runc 来拉起 pause 进程,随便找一个 Pod 的 pause 容器验证一下:
$ get po pod-nginx -o yaml | grep sandbox
io.kubernetes.pod.sandbox.uid: f760e07f0771b5b1b0404f3758dec1a31b534494ddd7bc9287d084e03240b20e
ctr -n k8s.io container ls | grep f760e07f0771b5b1b0404f3758dec1a31b534494ddd7bc9287d084e03240b20e
f760e07f0771b5b1b0404f3758dec1a31b534494ddd7bc9287d084e03240b20e registry-1.ict-mec.net:18443/kubesphere/pause:3.8 io.containerd.runc.v2
$ ctr -n k8s.io container info f760e07f0771b5b1b0404f3758dec1a31b534494ddd7bc9287d084e03240b20e | jq ".Spec.linux.namespaces"
[
{
"type": "pid"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "mount"
},
{
"type": "network",
"path": "/var/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9"
}
]
OCI 容器配置文件中网络命名空间路径指定为 /var/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9,即 containerd 刚才新建的 netns。
runc
就从 OCI 容器配置文件的定义发起,搜索 json:"namespaces
关键字:
type Linux struct {
// Namespaces contains the namespaces that are created and/or joined by the container
Namespaces []LinuxNamespace `json:"namespaces,omitempty"`
}
再搜索 "network"
关键字:
const (
// PIDNamespace for isolating process IDs
PIDNamespace LinuxNamespaceType = "pid"
// NetworkNamespace for isolating network devices, stacks, ports, etc
NetworkNamespace LinuxNamespaceType = "network"
)
注意文件路径 vendor/github.com/opencontainers/runtime-spec/specs-go/config.go 表示这是 OCI 规范,containerd 以遵循 OCI 规范的配置文件(config.json)将容器的描述传递给 runc。
而 runc 依靠 libcontainer 来创建容器,所以在内部需要将 OCI 容器配置转换成 libcontainer 容器配置。
从这层转换中也能看出来容器圈有很多故事。
// CreateLibcontainerConfig creates a new libcontainer configuration from a
// given specification and a cgroup name
func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
// a lot of code here
if spec.Linux != nil {
for _, ns := range spec.Linux.Namespaces {
t, exists := namespaceMapping[ns.Type]
if !exists {
return nil, fmt.Errorf("namespace %q does not exist", ns)
}
if config.Namespaces.Contains(t) {
return nil, fmt.Errorf("malformed spec file: duplicated ns %q", ns)
}
config.Namespaces.Add(t, ns.Path)
}
// a lot of code here
}
}
这里 t
的值为 configs.NEWNET
即字符串 "NEWNET"
;ns.Path
的值为 "/var/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9"
(举个例子)。
runc 通过两步来拉起容器进程:
runc run
runc init
父进程 runc run
来到 libcontainer 启动容器的 Start
方法顺藤摸瓜:
Start
-> start
-> newParentProcess
-> newInitProcess
-> bootstrapData
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
if ns.Path != "" {
nsMaps[ns.Type] = ns.Path
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
}
// a lot of code here
}
从 libcontainer Config 中取出各种命名空间(其中包括了 NETNS)。
// bootstrapData encodes the necessary data in netlink binary format
// as a io.Reader.
// Consumer can write the data to a bootstrap program
// such as one that uses nsenter package to bootstrap the container's
// init process correctly, i.e. with correct namespaces, uid/gid
// mapping etc.
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (_ io.Reader, Err error) {
// create the netlink message
r := nl.NewNetlinkRequest(int(InitMsg), 0)
// a lot of code here
// write cloneFlags
r.AddData(&Int32msg{
Type: CloneFlagsAttr,
Value: uint32(cloneFlags),
})
// write custom namespace paths
if len(nsMaps) > 0 {
nsPaths, err := c.orderNamespacePaths(nsMaps)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: NsPathsAttr,
Value: []byte(strings.Join(nsPaths, ",")),
})
}
// a lot of code here
return bytes.NewReader(r.Serialize()), nil
}
从 nsMaps
中取出(包括 "/var/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9"
)所有 namespace path 并通过 ,
拼接。最后“序列化”至 bootstrap 数据。命名空间相关的,由 Bytemsg
类型组装。
// Bytemsg has the following representation
// | nlattr len | nlattr type |
// | value | pad |
type Bytemsg struct {
Type uint16
Value []byte
}
func (msg *Bytemsg) Serialize() []byte {
l := msg.Len()
if l > math.MaxUint16 {
// We cannot return nil nor an error here, so we panic with
// a specific type instead, which is handled via recover in
// bootstrapData.
panic(netlinkError{fmt.Errorf("netlink: cannot serialize bytemsg of length %d (larger than UINT16_MAX)", l)})
}
buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1))
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(l))
native.PutUint16(buf[2:4], msg.Type)
copy(buf[4:], msg.Value)
return buf
}
在它的反序列化方法 Serialize
中,Bytemsg
类型按 netlink 协议格式(见注释)拼接为字节数据。
再回到 libcontainer Start
-> start
-> parent.start()
-> start
func (p *initProcess) start() (retErr error) {
// a lot of code here
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
return fmt.Errorf("can't copy bootstrap data to pipe: %w", err)
}
// a lot of code here
}
将 bootstrap 数据发往一个 Unix SocketPair p.messageSockPair
的一端,它在 newParentProcess
方法中创建:
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
if err != nil {
return nil, fmt.Errorf("unable to create init pipe: %w", err)
}
messageSockPair := filePair{parentInitPipe, childInitPipe}
parentLogPipe, childLogPipe, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to create log pipe: %w", err)
}
logFilePair := filePair{parentLogPipe, childLogPipe}
cmd := c.commandTemplate(p, childInitPipe, childLogPipe)
if !p.Init {
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
}
// a lot of code here
}
socketpair 用于进程间(父子进程)双向通信,在 runc 中叫做 init pipe。这里所谓的父进程(Parent Process),就是 runc run
自身,它稍后会拉起第二个 runc 进程,第一个参数为 init
:
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
l := &LinuxFactory{
Root: root,
InitPath: "/proc/self/exe",
InitArgs: []string{os.Args[0], "init"},
Validator: validate.New(),
CriuPath: "criu",
}
}
p.messageSockPair
的另一端则被追加至容器进程 cmd 的 ExtraFiles
列表:
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
// a lot of code here
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
}
if c.shouldSendMountSources() {
for i, m := range c.config.Mounts {
if !m.IsBind() {
// Non bind-mounts do not use an fd.
mountFds[i] = -1
continue
}
// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
// to allocate a fd so that we know the number to pass in the environment variable. The fd
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
// lifecycle of that fd is already taken care of.
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
mountFds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
}
}
}
然后为 runc 子进程设置 _LIBCONTAINER_INITPIPE
环境变量传递 init pipe 的文件描述符(fd):
newParentProcess
-> commandTemplate
-> newInitProcess
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) *exec.Cmd {
// a lot of code here
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
"_LIBCONTAINER_STATEDIR="+c.root,
)
// a lot of code here
}
子进程 runc init
我们再来看 Child Process,即由 runc run
递归执行的 runc init
:
init
-> StartInitialization
-> newContainerInit
-> Init
import (
_ "github.com/opencontainers/runc/libcontainer/nsenter"
)
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
// This is the golang entry point for runc init, executed
// before main() but after libcontainer/nsenter's nsexec().
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
level, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGLEVEL"))
if err != nil {
panic(err)
}
logPipeFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE"))
if err != nil {
panic(err)
}
logrus.SetLevel(logrus.Level(level))
logrus.SetOutput(os.NewFile(uintptr(logPipeFd), "logpipe"))
logrus.SetFormatter(new(logrus.JSONFormatter))
logrus.Debug("child process in init()")
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
// as the error is sent back to the parent there is no need to log
// or write it to stderr because the parent process will handle this
os.Exit(1)
}
panic("libcontainer: container init failed to exec")
}
}
注意以上注释,在 init
执行前,会先执行 libcontainer/nsenter 的 nsexec
函数。
libcontainer/nsenter 包注册了一个特殊的初始化构造器,在 Go 运行时启动之前就被调用。这使得 runc 能够在现有的命名空间上使用 setns
系统调用,并且避免 Go 运行时在处理多线程时碰到的问题。
libcontainer/nsenter 包使用 cgo。每当导入该包,C 代码函数 nsexec
就会被调用。而 libcontainer/nsenter 又只在 init.go 中被导入,所以只有执行 runc 的 init
命令时,C 代码才会执行。
void nsexec(void)
{
// a lot of code here
/*
* Get the init pipe fd from the environment. The init pipe is used to
* read the bootstrap data and tell the parent what the new pids are
* after the setup is done.
*/
pipenum = getenv_int("_LIBCONTAINER_INITPIPE");
if (pipenum < 0) {
/* We are not a runc init. Just return to go runtime. */
return;
}
/* Parse all of the netlink configuration. */
nl_parse(pipenum, &config);
/*
* We need to setns first. We cannot do this earlier (in stage 0)
* because of the fact that we forked to get here (the PID of
* [stage 2: STAGE_INIT]) would be meaningless). We could send it
* using cmsg(3) but that's just annoying.
*/
if (config.namespaces)
join_namespaces(config.namespaces);
}
- 从环境变量
_LIBCONTAINER_INITPIPE
获取 init 管道编号 - 从管道中读取 netlink 数据至
config
结构,即序列化后的字节串 - 如果存在已有的 namespace,就调用
join_namespaces
函数加入其中
void join_namespaces(char *nslist)
{
// a lot of code here
/*
* We have to open the file descriptors first, since after
* we join the mnt namespace we might no longer be able to
* access the paths.
*/
do {
int fd;
char *path;
struct namespace_t *ns;
/* Resize the namespace array. */
namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
if (!namespaces)
bail("failed to reallocate namespace array");
ns = &namespaces[num - 1];
/* Split 'ns:path'. */
path = strstr(namespace, ":");
if (!path)
bail("failed to parse %s", namespace);
*path++ = '\0';
fd = open(path, O_RDONLY);
if (fd < 0)
bail("failed to open %s", path);
ns->fd = fd;
strncpy(ns->type, namespace, PATH_MAX - 1);
strncpy(ns->path, path, PATH_MAX - 1);
ns->path[PATH_MAX - 1] = '\0';
} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
/*
* The ordering in which we join namespaces is important. We should
* always join the user namespace *first*. This is all guaranteed
* from the container_linux.go side of this, so we're just going to
* follow the order given to us.
*/
for (i = 0; i < num; i++) {
struct namespace_t *ns = &namespaces[i];
int flag = nsflag(ns->type);
write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path);
if (setns(ns->fd, flag) < 0)
bail("failed to setns into %s namespace", ns->type);
close(ns->fd);
}
free(namespaces);
}
- 首先打开命名空间文件,例如 /var/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9 代表网络命名空间
setns
系统调用将当前进程加入到网络命名空间中
至此一切明了,containerd 首先创建好网络命名空间,通过 OCI 规范作为参数传递给 runc run
进程,runc 内部转换后创建 init pipe 将命名空间相关的数据发送给即将递归拉起的 runc init
,在 Go 运行时启动前就通过 setns
将进程加入到该网络命名空间。
CNI 插件
在 containerd 创建好网络命令空间后,就会调用 CNI 插件来“配置网络栈”:
func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) {
// a lot of code here
if !hostNetwork(config) && !userNsEnabled {
var netnsMountDir = "/var/run/netns"
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
}
// Update network namespace in the container's spec
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
}
}
}
RunPodSandbox
-> setupPodNetwork
调用 CNI 插件在宿主机和容器网络命令空间中创建虚拟网卡(veth pair),路由表等,使得容器连通宿主机网络。网络方案五花八门,但这不是本文的重点。
业务容器
kubelet 通过 CRI CreateContainer API 调用 containerd 在指定的 Sandbox 中拉起 Pod 定义中的业务容器:
// CreateContainer creates a new container in the given PodSandbox.
func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (_ *runtime.CreateContainerResponse, retErr error) {
config := r.GetConfig()
log.G(ctx).Debugf("Container config %+v", config)
sandboxConfig := r.GetSandboxConfig()
sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
if err != nil {
return nil, fmt.Errorf("failed to find sandbox id %q: %w", r.GetPodSandboxId(), err)
}
// a lot of code here
spec, err := c.containerSpec(id, sandboxID, sandboxPid, sandbox.NetNSPath, containerName, containerdImage.Name(), config, sandboxConfig,
&image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime)
if err != nil {
return nil, fmt.Errorf("failed to generate container %q spec: %w", id, err)
}
// a lot of code here
}
因为网络栈在 Sandbox 阶段已经全部准备好,所以只需在创建业务容器时提供已有的网络命名空间(OCI 配置)即可:
# sandbox
$ kubectl get po pod-nginx -o yaml | grep sand
io.kubernetes.pod.sandbox.uid: f760e07f0771b5b1b0404f3758dec1a31b534494ddd7bc9287d084e03240b20e
$ ctr -n k8s.io task list | grep f760e07f0771b5b1b0404f3758dec1a31b534494ddd7bc9287d084e03240b20e
f760e07f0771b5b1b0404f3758dec1a31b534494ddd7bc9287d084e03240b20e 204039 RUNNING
# container
$ kubectl get po pod-nginx -o yaml | grep containerd
- containerID: containerd://f2e17ff60a7c0e48cd44621fd4a236a8c1b5a2663cdb810aff5376c8dc6d3b9b
$ ctr -n k8s.io container info f2e17ff60a7c0e48cd44621fd4a236a8c1b5a2663cdb810aff5376c8dc6d3b9b | jq ".Spec.linux.namespaces"
[
{
"type": "pid"
},
{
"type": "ipc",
"path": "/proc/204039/ns/ipc"
},
{
"type": "uts",
"path": "/proc/204039/ns/uts"
},
{
"type": "mount"
},
{
"type": "network",
"path": "/proc/204039/ns/net"
}
]
$ ll /proc/204039/ns/net
lrwxrwxrwx 1 65535 65535 0 Jul 10 23:40 /proc/204039/ns/net -> 'net:[4026535107]'
$ lsns -o NSFS 4026535107
NSFS
/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9
/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9
/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9
/run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9
- pause 容器 PID 为 204039
- 业务容器网络命名空间指定为 /proc/204039/ns/net
- /proc/204039/ns/net 的 NSFS 就是 /run/netns/cni-9ca95d5b-837c-84c5-f2ee-b60a96ef36c9