summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/um/drivers/mconsole_kern.c28
-rw-r--r--fs/proc/root.c36
-rw-r--r--include/linux/pid_namespace.h2
-rw-r--r--include/linux/proc_ns.h5
-rw-r--r--kernel/pid.c8
-rw-r--r--kernel/pid_namespace.c38
6 files changed, 46 insertions, 71 deletions
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index b80a1d616e4e..30575bd92975 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -36,6 +36,8 @@
#include "mconsole_kern.h"
#include <os.h>
+static struct vfsmount *proc_mnt = NULL;
+
static int do_unlink_socket(struct notifier_block *notifier,
unsigned long what, void *data)
{
@@ -123,7 +125,7 @@ void mconsole_log(struct mc_request *req)
void mconsole_proc(struct mc_request *req)
{
- struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
+ struct vfsmount *mnt = proc_mnt;
char *buf;
int len;
struct file *file;
@@ -134,6 +136,10 @@ void mconsole_proc(struct mc_request *req)
ptr += strlen("proc");
ptr = skip_spaces(ptr);
+ if (!mnt) {
+ mconsole_reply(req, "Proc not available", 1, 0);
+ goto out;
+ }
file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
@@ -683,6 +689,24 @@ void mconsole_stack(struct mc_request *req)
with_console(req, stack_proc, to);
}
+static int __init mount_proc(void)
+{
+ struct file_system_type *proc_fs_type;
+ struct vfsmount *mnt;
+
+ proc_fs_type = get_fs_type("proc");
+ if (!proc_fs_type)
+ return -ENODEV;
+
+ mnt = kern_mount(proc_fs_type);
+ put_filesystem(proc_fs_type);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ proc_mnt = mnt;
+ return 0;
+}
+
/*
* Changed by mconsole_setup, which is __setup, and called before SMP is
* active.
@@ -696,6 +720,8 @@ static int __init mconsole_init(void)
int err;
char file[UNIX_PATH_MAX];
+ mount_proc();
+
if (umid_file_name("mconsole", file, sizeof(file)))
return -1;
snprintf(mconsole_socket_name, sizeof(file), "%s", file);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 608233dfd29c..2633f10446c3 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -292,39 +292,3 @@ struct proc_dir_entry proc_root = {
.subdir = RB_ROOT,
.name = "/proc",
};
-
-int pid_ns_prepare_proc(struct pid_namespace *ns)
-{
- struct proc_fs_context *ctx;
- struct fs_context *fc;
- struct vfsmount *mnt;
-
- fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT);
- if (IS_ERR(fc))
- return PTR_ERR(fc);
-
- if (fc->user_ns != ns->user_ns) {
- put_user_ns(fc->user_ns);
- fc->user_ns = get_user_ns(ns->user_ns);
- }
-
- ctx = fc->fs_private;
- if (ctx->pid_ns != ns) {
- put_pid_ns(ctx->pid_ns);
- get_pid_ns(ns);
- ctx->pid_ns = ns;
- }
-
- mnt = fc_mount(fc);
- put_fs_context(fc);
- if (IS_ERR(mnt))
- return PTR_ERR(mnt);
-
- ns->proc_mnt = mnt;
- return 0;
-}
-
-void pid_ns_release_proc(struct pid_namespace *ns)
-{
- kern_unmount(ns->proc_mnt);
-}
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 2ed6af88794b..4956e362e55e 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -33,7 +33,6 @@ struct pid_namespace {
unsigned int level;
struct pid_namespace *parent;
#ifdef CONFIG_PROC_FS
- struct vfsmount *proc_mnt;
struct dentry *proc_self;
struct dentry *proc_thread_self;
#endif
@@ -42,7 +41,6 @@ struct pid_namespace {
#endif
struct user_namespace *user_ns;
struct ucounts *ucounts;
- struct work_struct proc_work;
kgid_t pid_gid;
int hide_pid;
int reboot; /* group exit code if this pidns was rebooted */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 4626b1ac3b6c..e1106a077c1a 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -50,16 +50,11 @@ enum {
#ifdef CONFIG_PROC_FS
-extern int pid_ns_prepare_proc(struct pid_namespace *ns);
-extern void pid_ns_release_proc(struct pid_namespace *ns);
extern int proc_alloc_inum(unsigned int *pino);
extern void proc_free_inum(unsigned int inum);
#else /* CONFIG_PROC_FS */
-static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; }
-static inline void pid_ns_release_proc(struct pid_namespace *ns) {}
-
static inline int proc_alloc_inum(unsigned int *inum)
{
*inum = 1;
diff --git a/kernel/pid.c b/kernel/pid.c
index ca08d6a3aa77..60820e72634c 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -144,9 +144,6 @@ void free_pid(struct pid *pid)
/* Handle a fork failure of the first process */
WARN_ON(ns->child_reaper);
ns->pid_allocated = 0;
- /* fall through */
- case 0:
- schedule_work(&ns->proc_work);
break;
}
@@ -247,11 +244,6 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
tmp = tmp->parent;
}
- if (unlikely(is_child_reaper(pid))) {
- if (pid_ns_prepare_proc(ns))
- goto out_free;
- }
-
get_pid_ns(ns);
refcount_set(&pid->count, 1);
for (type = 0; type < PIDTYPE_MAX; ++type)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index d40017e79ebe..01f8ba32cc0c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -57,12 +57,6 @@ static struct kmem_cache *create_pid_cachep(unsigned int level)
return READ_ONCE(*pkc);
}
-static void proc_cleanup_work(struct work_struct *work)
-{
- struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
- pid_ns_release_proc(ns);
-}
-
static struct ucounts *inc_pid_namespaces(struct user_namespace *ns)
{
return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES);
@@ -114,7 +108,6 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
ns->pid_allocated = PIDNS_ADDING;
- INIT_WORK(&ns->proc_work, proc_cleanup_work);
return ns;
@@ -231,20 +224,27 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
} while (rc != -ECHILD);
/*
- * kernel_wait4() above can't reap the EXIT_DEAD children but we do not
- * really care, we could reparent them to the global init. We could
- * exit and reap ->child_reaper even if it is not the last thread in
- * this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(),
- * pid_ns can not go away until proc_kill_sb() drops the reference.
+ * kernel_wait4() misses EXIT_DEAD children, and EXIT_ZOMBIE
+ * process whose parents processes are outside of the pid
+ * namespace. Such processes are created with setns()+fork().
+ *
+ * If those EXIT_ZOMBIE processes are not reaped by their
+ * parents before their parents exit, they will be reparented
+ * to pid_ns->child_reaper. Thus pidns->child_reaper needs to
+ * stay valid until they all go away.
+ *
+ * The code relies on the the pid_ns->child_reaper ignoring
+ * SIGCHILD to cause those EXIT_ZOMBIE processes to be
+ * autoreaped if reparented.
*
- * But this ns can also have other tasks injected by setns()+fork().
- * Again, ignoring the user visible semantics we do not really need
- * to wait until they are all reaped, but they can be reparented to
- * us and thus we need to ensure that pid->child_reaper stays valid
- * until they all go away. See free_pid()->wake_up_process().
+ * Semantically it is also desirable to wait for EXIT_ZOMBIE
+ * processes before allowing the child_reaper to be reaped, as
+ * that gives the invariant that when the init process of a
+ * pid namespace is reaped all of the processes in the pid
+ * namespace are gone.
*
- * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
- * if reparented.
+ * Once all of the other tasks are gone from the pid_namespace
+ * free_pid() will awaken this task.
*/
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);