扫二维码与项目经理沟通
我们在微信上24小时期待你的声音
解答本文疑问/技术咨询/运营咨询/技术建议/互联网交流
如何用runC源码分析namespace,相信很多没有经验的人对此束手无策,为此本文总结了问题出现的原因和解决方法,通过这篇文章希望你能解决这个问题。
网站建设哪家好,找成都创新互联公司!专注于网页设计、网站建设、微信开发、微信平台小程序开发、集团企业网站建设等服务项目。为回馈新老客户创新互联还提供了深泽免费建站欢迎大家使用!
runc/libcontainer/configs/config.go中定义了container对应的Namespaces。另外对于User Namespaces,还定义了UidMappings和GidMappings for user map。
// Config defines configuration options for executing a process inside a contained environment. type Config struct { ... // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process Namespaces Namespaces `json:"namespaces"` // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings"` // GidMappings is an array of Group ID mappings for User Namespaces GidMappings []IDMap `json:"gid_mappings"` ... }
runC中namespace的源码主要在: runc/libcontainer/configs/namespaces_unix.go runC支持的namespce type包括($nsName) "net"、"mnt"、"pid"、"ipc"、"user"、"uts":
const ( NEWNET NamespaceType = "NEWNET" NEWPID NamespaceType = "NEWPID" NEWNS NamespaceType = "NEWNS" NEWUTS NamespaceType = "NEWUTS" NEWIPC NamespaceType = "NEWIPC" NEWUSER NamespaceType = "NEWUSER" )
除了验证 Namespce Type是否在以上常量中,还要去验证 /proc/self/ns/$nsName是否存在并且可以read,都通过时,才认为该Namespace是在当前系统中是被支持的。
// IsNamespaceSupported returns whether a namespace is available or // not func IsNamespaceSupported(ns NamespaceType) bool { ... supported, ok := supportedNamespaces[ns] if ok { return supported } ... // 除了验证 Namespce Type是都在指定列表中,还要去验证 /proc/self/ns/$nsName是否存在并且可以read _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) supported = err == nil ... return supported }
如下是NameSpace的完整定义,很简单,只包括NamespaceType 和对应的Path。
// Namespace defines configuration for each namespace. It specifies an // alternate path that is able to be joined via setns. type Namespace struct { Type NamespaceType `json:"type"` Path string `json:"path"` }
从Namespace的GetPath方法中可见,一个pid对应的namespace path为 /proc/$pid/ns/$nsName。
func (n *Namespace) GetPath(pid int) string { if n.Path != "" { return n.Path } return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) }
除此之外,还定义了以下常用方法:
func (n *Namespaces) Remove(t NamespaceType) bool {...} func (n *Namespaces) Add(t NamespaceType, path string) {...} func (n *Namespaces) index(t NamespaceType) int {...} func (n *Namespaces) Contains(t NamespaceType) bool {...} func (n *Namespaces) PathOf(t NamespaceType) string {...}
在runc/libcontainer/configs/namespaces_syscall.go中,定义了linux clone时这些namespace对应的clone flags。
var namespaceInfo = map[NamespaceType]int{ NEWNET: syscall.CLONE_NEWNET, NEWNS: syscall.CLONE_NEWNS, NEWUSER: syscall.CLONE_NEWUSER, NEWIPC: syscall.CLONE_NEWIPC, NEWUTS: syscall.CLONE_NEWUTS, NEWPID: syscall.CLONE_NEWPID, } // CloneFlags parses the container's Namespaces options to set the correct // flags on clone, unshare. This function returns flags only for new namespaces. func (n *Namespaces) CloneFlags() uintptr { var flag int for _, v := range *n { if v.Path != "" { continue } flag |= namespaceInfo[v.Type] } return uintptr(flag) }
上面的CloneFlags()方法是用来解析linuxContainer的config中的namespace相关的参数,生成clone flags,提供给linuxContainer.bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) 来封装。
// bootstrapData encodes the necessary data in netlink binary format // as a io.Reader. // Consumer can write the data to a bootstrap program // such as one that uses nsenter package to bootstrap the container's // init process correctly, i.e. with correct namespaces, uid/gid // mapping etc. func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) { // create the netlink message r := nl.NewNetlinkRequest(int(InitMsg), 0) // write cloneFlags r.AddData(&Int32msg{ Type: CloneFlagsAttr, Value: uint32(cloneFlags), }) // write custom namespace paths if len(nsMaps) > 0 { nsPaths, err := c.orderNamespacePaths(nsMaps) if err != nil { return nil, err } r.AddData(&Bytemsg{ Type: NsPathsAttr, Value: []byte(strings.Join(nsPaths, ",")), }) } // write namespace paths only when we are not joining an existing user ns _, joinExistingUser := nsMaps[configs.NEWUSER] if !joinExistingUser { // write uid mappings if len(c.config.UidMappings) > 0 { b, err := encodeIDMapping(c.config.UidMappings) if err != nil { return nil, err } r.AddData(&Bytemsg{ Type: UidmapAttr, Value: b, }) } // write gid mappings if len(c.config.GidMappings) > 0 { b, err := encodeIDMapping(c.config.GidMappings) if err != nil { return nil, err } r.AddData(&Bytemsg{ Type: GidmapAttr, Value: b, }) // check if we have CAP_SETGID to setgroup properly pid, err := capability.NewPid(os.Getpid()) if err != nil { return nil, err } if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) { r.AddData(&Boolmsg{ Type: SetgroupAttr, Value: true, }) } } } return bytes.NewReader(r.Serialize()), nil }
linuxContainer.newInitProcess(...)最终会使用linuxContainer.bootstrapData封装的clone flags数据,完成initProcess的构建。
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) { cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) nsMaps := make(map[configs.NamespaceType]string) for _, ns := range c.config.Namespaces { if ns.Path != "" { nsMaps[ns.Type] = ns.Path } } _, sharePiDNS := nsMaps[configs.NEWPID] data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps) if err != nil { return nil, err } p.consoleChan = make(chan *os.File, 1) return &initProcess{ cmd: cmd, childPipe: childPipe, parentPipe: parentPipe, manager: c.cgroupManager, config: c.newInitConfig(p), container: c, process: p, bootstrapData: data, sharePidns: sharePidns, rootDir: rootDir, }, nil }
newInitProcess(...)在整个container create的流程中的位置,请参考:runC源码分析之Create/Run Container —— 王涛 如此,namespace在整个container create/run中的源码分析就完整了。
补充:runC中container的Spec是从bundle/config.json中解析得到的,见runC的create.go中的setupSpec(context)的调用。
Action: func(context *cli.Context) error { if context.NArg() != 1 { fmt.Printf("Incorrect Usage.\n\n") cli.ShowCommandHelp(context, "create") return fmt.Errorf("runc: \"create\" requires exactly one argument") } if err := revisePidFile(context); err != nil { return err } spec, err := setupSpec(context) if err != nil { return err } status, err := startContainer(context, spec, true) if err != nil { return err }
setupSepc(context)会去loadSpec("config.json"):
// setupSpec performs initial setup based on the cli.Context for the container func setupSpec(context *cli.Context) (*specs.Spec, error) { bundle := context.String("bundle") if bundle != "" { if err := os.Chdir(bundle); err != nil { return nil, err } } spec, err := loadSpec(specConfig) if err != nil { return nil, err } notifySocket := os.Getenv("NOTIFY_SOCKET") if notifySocket != "" { setupSdNotify(spec, notifySocket) } if os.Geteuid() != 0 { return nil, fmt.Errorf("runc should be run as root") } return spec, nil }
config.json样例如下,namespace部分见 “.linux.namespaces”。
{ "ociVersion": "0.4.0", "platform": { "os": "linux", "arch": "amd64" }, "process": { "terminal": true, "user": {}, "args": [ "redis-server", "--bind", "0.0.0.0" ], "env": [ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm" ], "cwd": "/", "capabilities": [ "CAP_AUDIT_WRITE", "CAP_KILL", "CAP_NET_BIND_SERVICE" ], "rlimits": [ { "type": "RLIMIT_NOFILE", "hard": 1024, "soft": 1024 } ], "noNewPrivileges": true }, "root": { "path": "rootfs", "readonly": true }, "hostname": "runc", "mounts": [ { "destination": "/proc", "type": "proc", "source": "proc" }, { "destination": "/dev", "type": "tmpfs", "source": "tmpfs", "options": [ "nosuid", "strictatime", "mode=755", "size=65536k" ] }, { "destination": "/dev/pts", "type": "devpts", "source": "devpts", "options": [ "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5" ] }, { "destination": "/dev/shm", "type": "tmpfs", "source": "shm", "options": [ "nosuid", "noexec", "nodev", "mode=1777", "size=65536k" ] }, { "destination": "/dev/mqueue", "type": "mqueue", "source": "mqueue", "options": [ "nosuid", "noexec", "nodev" ] }, { "destination": "/sys", "type": "sysfs", "source": "sysfs", "options": [ "nosuid", "noexec", "nodev", "ro" ] }, { "destination": "/sys/fs/cgroup", "type": "cgroup", "source": "cgroup", "options": [ "nosuid", "noexec", "nodev", "relatime", "ro" ] } ], "hooks": {}, "linux": { "resources": { "devices": [ { "allow": false, "access": "rwm" } ] }, "namespaces": [ { "type": "pid" }, { "type": "ipc" }, { "type": "uts" }, { "type": "mount" } ], "devices": null } }
看完上述内容,你们掌握如何用runC源码分析namespace的方法了吗?如果还想学到更多技能或想了解更多相关内容,欢迎关注创新互联行业资讯频道,感谢各位的阅读!
我们在微信上24小时期待你的声音
解答本文疑问/技术咨询/运营咨询/技术建议/互联网交流