From 9548906b2bb7ff09e12c013a55d669bef2c8e121 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 25 Jul 2013 05:44:02 +0900
Subject: xattr: Constify ->name member of "struct xattr".

Since everybody sets kstrdup()ed constant string to "struct xattr"->name but
nobody modifies "struct xattr"->name , we can omit kstrdup() and its failure
checking by constifying ->name member of "struct xattr".

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Joel Becker <jlbec@evilplan.org> [ocfs2]
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Reviewed-by: Paul Moore <paul@paul-moore.com>
Tested-by: Paul Moore <paul@paul-moore.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/capability.c             |  2 +-
 security/integrity/evm/evm_main.c |  2 +-
 security/security.c               |  8 +++-----
 security/selinux/hooks.c          | 17 ++++++-----------
 security/smack/smack_lsm.c        |  9 +++------
 5 files changed, 14 insertions(+), 24 deletions(-)

(limited to 'security')

diff --git a/security/capability.c b/security/capability.c
index 32b515766df1..dbeb9bc27b24 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -129,7 +129,7 @@ static void cap_inode_free_security(struct inode *inode)
 }
 
 static int cap_inode_init_security(struct inode *inode, struct inode *dir,
-				   const struct qstr *qstr, char **name,
+				   const struct qstr *qstr, const char **name,
 				   void **value, size_t *len)
 {
 	return -EOPNOTSUPP;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index df0fa451a871..af9b6852f4e1 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -418,7 +418,7 @@ int evm_inode_init_security(struct inode *inode,
 
 	evm_xattr->value = xattr_data;
 	evm_xattr->value_len = sizeof(*xattr_data);
-	evm_xattr->name = kstrdup(XATTR_EVM_SUFFIX, GFP_NOFS);
+	evm_xattr->name = XATTR_EVM_SUFFIX;
 	return 0;
 out:
 	kfree(xattr_data);
diff --git a/security/security.c b/security/security.c
index 94b35aef6871..4dc31f4f2700 100644
--- a/security/security.c
+++ b/security/security.c
@@ -348,10 +348,10 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
 
-	memset(new_xattrs, 0, sizeof new_xattrs);
 	if (!initxattrs)
 		return security_ops->inode_init_security(inode, dir, qstr,
 							 NULL, NULL, NULL);
+	memset(new_xattrs, 0, sizeof(new_xattrs));
 	lsm_xattr = new_xattrs;
 	ret = security_ops->inode_init_security(inode, dir, qstr,
 						&lsm_xattr->name,
@@ -366,16 +366,14 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
 		goto out;
 	ret = initxattrs(inode, new_xattrs, fs_data);
 out:
-	for (xattr = new_xattrs; xattr->name != NULL; xattr++) {
-		kfree(xattr->name);
+	for (xattr = new_xattrs; xattr->value != NULL; xattr++)
 		kfree(xattr->value);
-	}
 	return (ret == -EOPNOTSUPP) ? 0 : ret;
 }
 EXPORT_SYMBOL(security_inode_init_security);
 
 int security_old_inode_init_security(struct inode *inode, struct inode *dir,
-				     const struct qstr *qstr, char **name,
+				     const struct qstr *qstr, const char **name,
 				     void **value, size_t *len)
 {
 	if (unlikely(IS_PRIVATE(inode)))
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c956390a9136..a5091ec06aa6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2587,7 +2587,8 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode,
 }
 
 static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
-				       const struct qstr *qstr, char **name,
+				       const struct qstr *qstr,
+				       const char **name,
 				       void **value, size_t *len)
 {
 	const struct task_security_struct *tsec = current_security();
@@ -2595,7 +2596,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 	struct superblock_security_struct *sbsec;
 	u32 sid, newsid, clen;
 	int rc;
-	char *namep = NULL, *context;
+	char *context;
 
 	dsec = dir->i_security;
 	sbsec = dir->i_sb->s_security;
@@ -2631,19 +2632,13 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 	if (!ss_initialized || !(sbsec->flags & SE_SBLABELSUPP))
 		return -EOPNOTSUPP;
 
-	if (name) {
-		namep = kstrdup(XATTR_SELINUX_SUFFIX, GFP_NOFS);
-		if (!namep)
-			return -ENOMEM;
-		*name = namep;
-	}
+	if (name)
+		*name = XATTR_SELINUX_SUFFIX;
 
 	if (value && len) {
 		rc = security_sid_to_context_force(newsid, &context, &clen);
-		if (rc) {
-			kfree(namep);
+		if (rc)
 			return rc;
-		}
 		*value = context;
 		*len = clen;
 	}
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 3f7682a387b7..a113a779f00c 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -582,7 +582,7 @@ static void smack_inode_free_security(struct inode *inode)
  * Returns 0 if it all works out, -ENOMEM if there's no memory
  */
 static int smack_inode_init_security(struct inode *inode, struct inode *dir,
-				     const struct qstr *qstr, char **name,
+				     const struct qstr *qstr, const char **name,
 				     void **value, size_t *len)
 {
 	struct inode_smack *issp = inode->i_security;
@@ -591,11 +591,8 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
 	char *dsp = smk_of_inode(dir);
 	int may;
 
-	if (name) {
-		*name = kstrdup(XATTR_SMACK_SUFFIX, GFP_NOFS);
-		if (*name == NULL)
-			return -ENOMEM;
-	}
+	if (name)
+		*name = XATTR_SMACK_SUFFIX;
 
 	if (value) {
 		rcu_read_lock();
-- 
cgit v1.2.3


From ca4c3fc24e293719fe7410c4e63da9b6bc633b83 Mon Sep 17 00:00:00 2001
From: "fan.du" <fan.du@windriver.com>
Date: Tue, 30 Jul 2013 08:33:53 +0800
Subject: net: split rt_genid for ipv4 and ipv6

Current net name space has only one genid for both IPv4 and IPv6, it has below
drawbacks:

- Add/delete an IPv4 address will invalidate all IPv6 routing table entries.
- Insert/remove XFRM policy will also invalidate both IPv4/IPv6 routing table
  entries even when the policy is only applied for one address family.

Thus, this patch attempt to split one genid for two to cater for IPv4 and IPv6
separately in a fine granularity.

Signed-off-by: Fan Du <fan.du@windriver.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h     | 37 ++++++++++++++++++++++++++++++++-----
 include/net/netns/ipv4.h        |  1 +
 include/net/netns/ipv6.h        |  1 +
 net/ipv4/route.c                | 16 ++++++++--------
 net/ipv6/af_inet6.c             |  1 +
 net/ipv6/route.c                |  4 ++--
 net/xfrm/xfrm_policy.c          |  8 +++++++-
 security/selinux/include/xfrm.h |  7 ++++++-
 8 files changed, 58 insertions(+), 17 deletions(-)

(limited to 'security')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 84e37b1ca9e1..1313456a0994 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,7 +119,6 @@ struct net {
 	struct netns_ipvs	*ipvs;
 #endif
 	struct sock		*diag_nlsk;
-	atomic_t		rt_genid;
 	atomic_t		fnhe_genid;
 };
 
@@ -333,14 +332,42 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header)
 }
 #endif
 
-static inline int rt_genid(struct net *net)
+static inline int rt_genid_ipv4(struct net *net)
 {
-	return atomic_read(&net->rt_genid);
+	return atomic_read(&net->ipv4.rt_genid);
 }
 
-static inline void rt_genid_bump(struct net *net)
+static inline void rt_genid_bump_ipv4(struct net *net)
 {
-	atomic_inc(&net->rt_genid);
+	atomic_inc(&net->ipv4.rt_genid);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int rt_genid_ipv6(struct net *net)
+{
+	return atomic_read(&net->ipv6.rt_genid);
+}
+
+static inline void rt_genid_bump_ipv6(struct net *net)
+{
+	atomic_inc(&net->ipv6.rt_genid);
+}
+#else
+static inline int rt_genid_ipv6(struct net *net)
+{
+	return 0;
+}
+
+static inline void rt_genid_bump_ipv6(struct net *net)
+{
+}
+#endif
+
+/* For callers who don't really care about whether it's IPv4 or IPv6 */
+static inline void rt_genid_bump_all(struct net *net)
+{
+	rt_genid_bump_ipv4(net);
+	rt_genid_bump_ipv6(net);
 }
 
 static inline int fnhe_genid(struct net *net)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2ba9de89e8ec..bf2ec2202c56 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -77,5 +77,6 @@ struct netns_ipv4 {
 	struct fib_rules_ops	*mr_rules_ops;
 #endif
 #endif
+	atomic_t	rt_genid;
 };
 #endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 005e2c2e39a9..0fb2401197c5 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -72,6 +72,7 @@ struct netns_ipv6 {
 #endif
 #endif
 	atomic_t		dev_addr_genid;
+	atomic_t		rt_genid;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a9a54a236832..e805481eff72 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -435,12 +435,12 @@ static inline int ip_rt_proc_init(void)
 
 static inline bool rt_is_expired(const struct rtable *rth)
 {
-	return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
+	return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
 }
 
 void rt_cache_flush(struct net *net)
 {
-	rt_genid_bump(net);
+	rt_genid_bump_ipv4(net);
 }
 
 static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -1458,7 +1458,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 #endif
 	rth->dst.output = ip_rt_bug;
 
-	rth->rt_genid	= rt_genid(dev_net(dev));
+	rth->rt_genid	= rt_genid_ipv4(dev_net(dev));
 	rth->rt_flags	= RTCF_MULTICAST;
 	rth->rt_type	= RTN_MULTICAST;
 	rth->rt_is_input= 1;
@@ -1589,7 +1589,7 @@ static int __mkroute_input(struct sk_buff *skb,
 		goto cleanup;
 	}
 
-	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
+	rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
 	rth->rt_flags = flags;
 	rth->rt_type = res->type;
 	rth->rt_is_input = 1;
@@ -1760,7 +1760,7 @@ local_input:
 	rth->dst.tclassid = itag;
 #endif
 
-	rth->rt_genid = rt_genid(net);
+	rth->rt_genid = rt_genid_ipv4(net);
 	rth->rt_flags 	= flags|RTCF_LOCAL;
 	rth->rt_type	= res.type;
 	rth->rt_is_input = 1;
@@ -1945,7 +1945,7 @@ add:
 
 	rth->dst.output = ip_output;
 
-	rth->rt_genid = rt_genid(dev_net(dev_out));
+	rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
 	rth->rt_flags	= flags;
 	rth->rt_type	= type;
 	rth->rt_is_input = 0;
@@ -2227,7 +2227,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_iif = ort->rt_iif;
 		rt->rt_pmtu = ort->rt_pmtu;
 
-		rt->rt_genid = rt_genid(net);
+		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
 		rt->rt_gateway = ort->rt_gateway;
@@ -2665,7 +2665,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 
 static __net_init int rt_genid_init(struct net *net)
 {
-	atomic_set(&net->rt_genid, 0);
+	atomic_set(&net->ipv4.rt_genid, 0);
 	atomic_set(&net->fnhe_genid, 0);
 	get_random_bytes(&net->ipv4.dev_addr_genid,
 			 sizeof(net->ipv4.dev_addr_genid));
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a5ac969aeefe..0d1a9b153fbb 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -766,6 +766,7 @@ static int __net_init inet6_net_init(struct net *net)
 
 	net->ipv6.sysctl.bindv6only = 0;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
+	atomic_set(&net->ipv6.rt_genid, 0);
 
 	err = ipv6_init_mibs(net);
 	if (err)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 74ab1f74abcd..ce9616304521 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -283,7 +283,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 
 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
-		rt->rt6i_genid = rt_genid(net);
+		rt->rt6i_genid = rt_genid_ipv6(net);
 		INIT_LIST_HEAD(&rt->rt6i_siblings);
 	}
 	return rt;
@@ -1061,7 +1061,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
 	 * into this function always.
 	 */
-	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
+	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
 		return NULL;
 
 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e52cab3591dd..d8da6b8c6ba8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -660,7 +660,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	xfrm_pol_hold(policy);
 	net->xfrm.policy_count[dir]++;
 	atomic_inc(&flow_cache_genid);
-	rt_genid_bump(net);
+
+	/* After previous checking, family can either be AF_INET or AF_INET6 */
+	if (policy->family == AF_INET)
+		rt_genid_bump_ipv4(net);
+	else
+		rt_genid_bump_ipv6(net);
+
 	if (delpol) {
 		xfrm_policy_requeue(delpol, policy);
 		__xfrm_policy_unlink(delpol, dir);
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 65f67cb0aefb..6713f04e30ba 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -50,8 +50,13 @@ int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 
 static inline void selinux_xfrm_notify_policyload(void)
 {
+	struct net *net;
+
 	atomic_inc(&flow_cache_genid);
-	rt_genid_bump(&init_net);
+	rtnl_lock();
+	for_each_net(net)
+		rt_genid_bump_all(net);
+	rtnl_unlock();
 }
 #else
 static inline int selinux_xfrm_enabled(void)
-- 
cgit v1.2.3


From 470043ba995a79a274a5db306856975002a06f19 Mon Sep 17 00:00:00 2001
From: Tomasz Stanislawski <t.stanislaws@samsung.com>
Date: Thu, 6 Jun 2013 09:30:50 +0200
Subject: security: smack: fix memleak in smk_write_rules_list()

The smack_parsed_rule structure is allocated.  If a rule is successfully
installed then the last reference to the object is lost.  This patch fixes this
leak. Moreover smack_parsed_rule is allocated on stack because it no longer
needed ofter smk_write_rules_list() is finished.

Signed-off-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
---
 security/smack/smackfs.c | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

(limited to 'security')

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index ab167037b2dd..269b270c6473 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -447,7 +447,7 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
 					struct list_head *rule_list,
 					struct mutex *rule_lock, int format)
 {
-	struct smack_parsed_rule *rule;
+	struct smack_parsed_rule rule;
 	char *data;
 	int datalen;
 	int rc = -EINVAL;
@@ -479,47 +479,36 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
 		goto out;
 	}
 
-	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
-	if (rule == NULL) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
 	if (format == SMK_LONG_FMT) {
 		/*
 		 * Be sure the data string is terminated.
 		 */
 		data[count] = '\0';
-		if (smk_parse_long_rule(data, rule, 1, 0))
-			goto out_free_rule;
+		if (smk_parse_long_rule(data, &rule, 1, 0))
+			goto out;
 	} else if (format == SMK_CHANGE_FMT) {
 		data[count] = '\0';
-		if (smk_parse_long_rule(data, rule, 1, 1))
-			goto out_free_rule;
+		if (smk_parse_long_rule(data, &rule, 1, 1))
+			goto out;
 	} else {
 		/*
 		 * More on the minor hack for backward compatibility
 		 */
 		if (count == (SMK_OLOADLEN))
 			data[SMK_OLOADLEN] = '-';
-		if (smk_parse_rule(data, rule, 1))
-			goto out_free_rule;
+		if (smk_parse_rule(data, &rule, 1))
+			goto out;
 	}
 
 	if (rule_list == NULL) {
 		load = 1;
-		rule_list = &rule->smk_subject->smk_rules;
-		rule_lock = &rule->smk_subject->smk_rules_lock;
+		rule_list = &rule.smk_subject->smk_rules;
+		rule_lock = &rule.smk_subject->smk_rules_lock;
 	}
 
-	rc = smk_set_access(rule, rule_list, rule_lock, load);
-	if (rc == 0) {
+	rc = smk_set_access(&rule, rule_list, rule_lock, load);
+	if (rc == 0)
 		rc = count;
-		goto out;
-	}
-
-out_free_rule:
-	kfree(rule);
 out:
 	kfree(data);
 	return rc;
-- 
cgit v1.2.3


From 4d7cf4a1f49f76f4069114ee08be75cd68c37c5a Mon Sep 17 00:00:00 2001
From: Tomasz Stanislawski <t.stanislaws@samsung.com>
Date: Tue, 11 Jun 2013 14:55:13 +0200
Subject: security: smack: add a hash table to quicken smk_find_entry()

Accepted for the smack-next tree after changing the number of
slots from 128 to 16.

This patch adds a hash table to quicken searching of a smack label by its name.

Basically, the patch improves performance of SMACK initialization.  Parsing of
rules involves translation from a string to a smack_known (aka label) entity
which is done in smk_find_entry().

The current implementation of the function iterates over a global list of
smack_known resulting in O(N) complexity for smk_find_entry().  The total
complexity of SMACK initialization becomes O(rules * labels).  Therefore it
scales quadratically with a complexity of a system.

Applying the patch reduced the complexity of smk_find_entry() to O(1) as long
as number of label is in hundreds. If the number of labels is increased please
update SMACK_HASH_SLOTS constant defined in security/smack/smack.h. Introducing
the configuration of this constant with Kconfig or cmdline might be a good
idea.

The size of the hash table was adjusted experimentally.  The rule set used by
TIZEN contains circa 17K rules for 500 labels.  The table above contains
results of SMACK initialization using 'time smackctl apply' bash command.
The 'Ref' is a kernel without this patch applied. The consecutive values
refers to value of SMACK_HASH_SLOTS.  Every measurement was repeated three
times to reduce noise.

     |  Ref  |   1   |   2   |   4   |   8   |   16  |   32  |   64  |  128  |  256  |  512
--------------------------------------------------------------------------------------------
Run1 | 1.156 | 1.096 | 0.883 | 0.764 | 0.692 | 0.667 | 0.649 | 0.633 | 0.634 | 0.629 | 0.620
Run2 | 1.156 | 1.111 | 0.885 | 0.764 | 0.694 | 0.661 | 0.649 | 0.651 | 0.634 | 0.638 | 0.623
Run3 | 1.160 | 1.107 | 0.886 | 0.764 | 0.694 | 0.671 | 0.661 | 0.638 | 0.631 | 0.624 | 0.638
AVG  | 1.157 | 1.105 | 0.885 | 0.764 | 0.693 | 0.666 | 0.653 | 0.641 | 0.633 | 0.630 | 0.627

Surprisingly, a single hlist is slightly faster than a double-linked list.
The speed-up saturates near 64 slots.  Therefore I chose value 128 to provide
some margin if more labels were used.
It looks that IO becomes a new bottleneck.

Signed-off-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
---
 security/smack/smack.h        |  5 +++++
 security/smack/smack_access.c | 29 ++++++++++++++++++++++++++---
 security/smack/smack_lsm.c    | 12 ++++++------
 3 files changed, 37 insertions(+), 9 deletions(-)

(limited to 'security')

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 339614c76e63..e80597a3048a 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -53,6 +53,7 @@
  */
 struct smack_known {
 	struct list_head		list;
+	struct hlist_node		smk_hashed;
 	char				*smk_known;
 	u32				smk_secid;
 	struct netlbl_lsm_secattr	smk_netlabel;	/* on wire labels */
@@ -222,6 +223,7 @@ char *smk_parse_smack(const char *string, int len);
 int smk_netlbl_mls(int, char *, struct netlbl_lsm_secattr *, int);
 char *smk_import(const char *, int);
 struct smack_known *smk_import_entry(const char *, int);
+void smk_insert_entry(struct smack_known *skp);
 struct smack_known *smk_find_entry(const char *);
 u32 smack_to_secid(const char *);
 
@@ -247,6 +249,9 @@ extern struct list_head smk_netlbladdr_list;
 
 extern struct security_operations smack_ops;
 
+#define SMACK_HASH_SLOTS 16
+extern struct hlist_head smack_known_hash[SMACK_HASH_SLOTS];
+
 /*
  * Is the directory transmuting?
  */
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 6a0377f38620..b3b59b1e93d6 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -325,6 +325,25 @@ void smack_log(char *subject_label, char *object_label, int request,
 
 DEFINE_MUTEX(smack_known_lock);
 
+struct hlist_head smack_known_hash[SMACK_HASH_SLOTS];
+
+/**
+ * smk_insert_entry - insert a smack label into a hash map,
+ *
+ * this function must be called under smack_known_lock
+ */
+void smk_insert_entry(struct smack_known *skp)
+{
+	unsigned int hash;
+	struct hlist_head *head;
+
+	hash = full_name_hash(skp->smk_known, strlen(skp->smk_known));
+	head = &smack_known_hash[hash & (SMACK_HASH_SLOTS - 1)];
+
+	hlist_add_head_rcu(&skp->smk_hashed, head);
+	list_add_rcu(&skp->list, &smack_known_list);
+}
+
 /**
  * smk_find_entry - find a label on the list, return the list entry
  * @string: a text string that might be a Smack label
@@ -334,12 +353,16 @@ DEFINE_MUTEX(smack_known_lock);
  */
 struct smack_known *smk_find_entry(const char *string)
 {
+	unsigned int hash;
+	struct hlist_head *head;
 	struct smack_known *skp;
 
-	list_for_each_entry_rcu(skp, &smack_known_list, list) {
+	hash = full_name_hash(string, strlen(string));
+	head = &smack_known_hash[hash & (SMACK_HASH_SLOTS - 1)];
+
+	hlist_for_each_entry_rcu(skp, head, smk_hashed)
 		if (strcmp(skp->smk_known, string) == 0)
 			return skp;
-	}
 
 	return NULL;
 }
@@ -475,7 +498,7 @@ struct smack_known *smk_import_entry(const char *string, int len)
 		 * Make sure that the entry is actually
 		 * filled before putting it on the list.
 		 */
-		list_add_rcu(&skp->list, &smack_known_list);
+		smk_insert_entry(skp);
 		goto unlockout;
 	}
 	/*
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 3f7682a387b7..ce000a81caf7 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3879,12 +3879,12 @@ static __init void init_smack_known_list(void)
 	/*
 	 * Create the known labels list
 	 */
-	list_add(&smack_known_huh.list, &smack_known_list);
-	list_add(&smack_known_hat.list, &smack_known_list);
-	list_add(&smack_known_star.list, &smack_known_list);
-	list_add(&smack_known_floor.list, &smack_known_list);
-	list_add(&smack_known_invalid.list, &smack_known_list);
-	list_add(&smack_known_web.list, &smack_known_list);
+	smk_insert_entry(&smack_known_huh);
+	smk_insert_entry(&smack_known_hat);
+	smk_insert_entry(&smack_known_star);
+	smk_insert_entry(&smack_known_floor);
+	smk_insert_entry(&smack_known_invalid);
+	smk_insert_entry(&smack_known_web);
 }
 
 /**
-- 
cgit v1.2.3


From 677264e8fb73ea35a508700e19ce76c527576d1c Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Fri, 28 Jun 2013 13:47:07 -0700
Subject: Smack: network label match fix

The Smack code that matches incoming CIPSO tags with Smack labels
reaches through the NetLabel interfaces and compares the network
data with the CIPSO header associated with a Smack label. This was
done in a ill advised attempt to optimize performance. It works
so long as the categories fit in a single capset, but this isn't
always the case.

This patch changes the Smack code to use the appropriate NetLabel
interfaces to compare the incoming CIPSO header with the CIPSO
header associated with a label. It will always match the CIPSO
headers correctly.

Targeted for git://git.gitorious.org/smack-next/kernel.git

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack.h     |  8 ++++++--
 security/smack/smack_lsm.c | 30 ++++++++++++++++++++++++------
 security/smack/smackfs.c   |  2 +-
 3 files changed, 31 insertions(+), 9 deletions(-)

(limited to 'security')

diff --git a/security/smack/smack.h b/security/smack/smack.h
index e80597a3048a..076b8e8a51ab 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -168,9 +168,13 @@ struct smk_port_label {
 #define SMACK_CIPSO_DOI_INVALID		-1	/* Not a DOI */
 #define SMACK_CIPSO_DIRECT_DEFAULT	250	/* Arbitrary */
 #define SMACK_CIPSO_MAPPED_DEFAULT	251	/* Also arbitrary */
-#define SMACK_CIPSO_MAXCATVAL		63	/* Bigger gets harder */
 #define SMACK_CIPSO_MAXLEVEL            255     /* CIPSO 2.2 standard */
-#define SMACK_CIPSO_MAXCATNUM           239     /* CIPSO 2.2 standard */
+/*
+ * CIPSO 2.2 standard is 239, but Smack wants to use the
+ * categories in a structured way that limits the value to
+ * the bits in 23 bytes, hence the unusual number.
+ */
+#define SMACK_CIPSO_MAXCATNUM           184     /* 23 * 8 */
 
 /*
  * Flag for transmute access
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index ce000a81caf7..19204e11c02c 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3066,6 +3066,8 @@ static struct smack_known *smack_from_secattr(struct netlbl_lsm_secattr *sap,
 {
 	struct smack_known *skp;
 	int found = 0;
+	int acat;
+	int kcat;
 
 	if ((sap->flags & NETLBL_SECATTR_MLS_LVL) != 0) {
 		/*
@@ -3082,12 +3084,28 @@ static struct smack_known *smack_from_secattr(struct netlbl_lsm_secattr *sap,
 		list_for_each_entry(skp, &smack_known_list, list) {
 			if (sap->attr.mls.lvl != skp->smk_netlabel.attr.mls.lvl)
 				continue;
-			if (memcmp(sap->attr.mls.cat,
-				skp->smk_netlabel.attr.mls.cat,
-				SMK_CIPSOLEN) != 0)
-				continue;
-			found = 1;
-			break;
+			/*
+			 * Compare the catsets. Use the netlbl APIs.
+			 */
+			if ((sap->flags & NETLBL_SECATTR_MLS_CAT) == 0) {
+				if ((skp->smk_netlabel.flags &
+				     NETLBL_SECATTR_MLS_CAT) == 0)
+					found = 1;
+				break;
+			}
+			for (acat = -1, kcat = -1; acat == kcat; ) {
+				acat = netlbl_secattr_catmap_walk(
+					sap->attr.mls.cat, acat + 1);
+				kcat = netlbl_secattr_catmap_walk(
+					skp->smk_netlabel.attr.mls.cat,
+					kcat + 1);
+				if (acat < 0 || kcat < 0)
+					break;
+			}
+			if (acat == kcat) {
+				found = 1;
+				break;
+			}
 		}
 		rcu_read_unlock();
 
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 269b270c6473..a07e93f00a0f 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -890,7 +890,7 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf,
 	for (i = 0; i < catlen; i++) {
 		rule += SMK_DIGITLEN;
 		ret = sscanf(rule, "%u", &cat);
-		if (ret != 1 || cat > SMACK_CIPSO_MAXCATVAL)
+		if (ret != 1 || cat > SMACK_CIPSO_MAXCATNUM)
 			goto out;
 
 		smack_catset_bit(cat, mapcatset);
-- 
cgit v1.2.3


From 8af01f56a03e9cbd91a55d688fce1315021efba8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Aug 2013 20:11:22 -0400
Subject: cgroup: s/cgroup_subsys_state/cgroup_css/
 s/task_subsys_state/task_css/

The names of the two struct cgroup_subsys_state accessors -
cgroup_subsys_state() and task_subsys_state() - are somewhat awkward.
The former clashes with the type name and the latter doesn't even
indicate it's somehow related to cgroup.

We're about to revamp large portion of cgroup API, so, let's rename
them so that they're less awkward.  Most per-controller usages of the
accessors are localized in accessor wrappers and given the amount of
scheduled changes, this isn't gonna add any noticeable headache.

Rename cgroup_subsys_state() to cgroup_css() and task_subsys_state()
to task_css().  This patch is pure rename.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 block/blk-cgroup.h           |  5 ++---
 fs/bio.c                     |  2 +-
 include/linux/cgroup.h       | 31 +++++++++++++++++++------------
 include/net/cls_cgroup.h     |  4 ++--
 include/net/netprio_cgroup.h |  4 ++--
 kernel/cgroup.c              |  2 +-
 kernel/cgroup_freezer.c      |  4 ++--
 kernel/cpuset.c              |  6 +++---
 kernel/events/core.c         |  6 +++---
 kernel/sched/core.c          |  4 ++--
 kernel/sched/cpuacct.c       |  4 ++--
 kernel/sched/sched.h         |  6 +++---
 mm/hugetlb_cgroup.c          |  6 ++----
 mm/memcontrol.c              |  5 ++---
 mm/vmpressure.c              |  2 +-
 net/core/netprio_cgroup.c    |  2 +-
 net/sched/cls_cgroup.c       |  4 ++--
 security/device_cgroup.c     |  4 ++--
 18 files changed, 52 insertions(+), 49 deletions(-)

(limited to 'security')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 8056c03a3382..628e50f6f8a8 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -181,14 +181,13 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
 static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
 {
-	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
+	return container_of(cgroup_css(cgroup, blkio_subsys_id),
 			    struct blkcg, css);
 }
 
 static inline struct blkcg *task_blkcg(struct task_struct *tsk)
 {
-	return container_of(task_subsys_state(tsk, blkio_subsys_id),
-			    struct blkcg, css);
+	return container_of(task_css(tsk, blkio_subsys_id), struct blkcg, css);
 }
 
 static inline struct blkcg *bio_blkcg(struct bio *bio)
diff --git a/fs/bio.c b/fs/bio.c
index 94bbc04dba77..8e0348f6e5bd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1946,7 +1946,7 @@ int bio_associate_current(struct bio *bio)
 
 	/* associate blkcg if exists */
 	rcu_read_lock();
-	css = task_subsys_state(current, blkio_subsys_id);
+	css = task_css(current, blkio_subsys_id);
 	if (css && css_tryget(css))
 		bio->bi_css = css;
 	rcu_read_unlock();
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 44dd422d7e9b..552c5feef733 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -647,8 +647,15 @@ struct cgroup_subsys {
 #undef IS_SUBSYS_ENABLED
 #undef SUBSYS
 
-static inline struct cgroup_subsys_state *cgroup_subsys_state(
-	struct cgroup *cgrp, int subsys_id)
+/**
+ * cgroup_css - obtain a cgroup's css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @subsys_id: the subsystem of interest
+ *
+ * Return @cgrp's css (cgroup_subsys_state) associated with @subsys_id.
+ */
+static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
+						     int subsys_id)
 {
 	return cgrp->subsys[subsys_id];
 }
@@ -678,7 +685,7 @@ extern struct mutex cgroup_mutex;
 #endif
 
 /**
- * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds
+ * task_css_check - obtain css for (task, subsys) w/ extra access conds
  * @task: the target task
  * @subsys_id: the target subsystem ID
  * @__c: extra condition expression to be passed to rcu_dereference_check()
@@ -686,7 +693,7 @@ extern struct mutex cgroup_mutex;
  * Return the cgroup_subsys_state for the (@task, @subsys_id) pair.  The
  * synchronization rules are the same as task_css_set_check().
  */
-#define task_subsys_state_check(task, subsys_id, __c)			\
+#define task_css_check(task, subsys_id, __c)				\
 	task_css_set_check((task), (__c))->subsys[(subsys_id)]
 
 /**
@@ -701,22 +708,22 @@ static inline struct css_set *task_css_set(struct task_struct *task)
 }
 
 /**
- * task_subsys_state - obtain css for (task, subsys)
+ * task_css - obtain css for (task, subsys)
  * @task: the target task
  * @subsys_id: the target subsystem ID
  *
- * See task_subsys_state_check().
+ * See task_css_check().
  */
-static inline struct cgroup_subsys_state *
-task_subsys_state(struct task_struct *task, int subsys_id)
+static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
+						   int subsys_id)
 {
-	return task_subsys_state_check(task, subsys_id, false);
+	return task_css_check(task, subsys_id, false);
 }
 
-static inline struct cgroup* task_cgroup(struct task_struct *task,
-					       int subsys_id)
+static inline struct cgroup *task_cgroup(struct task_struct *task,
+					 int subsys_id)
 {
-	return task_subsys_state(task, subsys_id)->cgroup;
+	return task_css(task, subsys_id)->cgroup;
 }
 
 /**
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 0fee0617fb7d..52adaa75dac9 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -35,7 +35,7 @@ static inline u32 task_cls_classid(struct task_struct *p)
 		return 0;
 
 	rcu_read_lock();
-	classid = container_of(task_subsys_state(p, net_cls_subsys_id),
+	classid = container_of(task_css(p, net_cls_subsys_id),
 			       struct cgroup_cls_state, css)->classid;
 	rcu_read_unlock();
 
@@ -51,7 +51,7 @@ static inline u32 task_cls_classid(struct task_struct *p)
 		return 0;
 
 	rcu_read_lock();
-	css = task_subsys_state(p, net_cls_subsys_id);
+	css = task_css(p, net_cls_subsys_id);
 	if (css)
 		classid = container_of(css,
 				       struct cgroup_cls_state, css)->classid;
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
index 50ab8c26ab59..8110fa7ae60a 100644
--- a/include/net/netprio_cgroup.h
+++ b/include/net/netprio_cgroup.h
@@ -39,7 +39,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
 	u32 idx;
 
 	rcu_read_lock();
-	css = task_subsys_state(p, net_prio_subsys_id);
+	css = task_css(p, net_prio_subsys_id);
 	idx = css->cgroup->id;
 	rcu_read_unlock();
 	return idx;
@@ -53,7 +53,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
 	u32 idx = 0;
 
 	rcu_read_lock();
-	css = task_subsys_state(p, net_prio_subsys_id);
+	css = task_css(p, net_prio_subsys_id);
 	if (css)
 		idx = css->cgroup->id;
 	rcu_read_unlock();
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ae4c46834633..0b3caa3220cb 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -81,7 +81,7 @@
  */
 #ifdef CONFIG_PROVE_RCU
 DEFINE_MUTEX(cgroup_mutex);
-EXPORT_SYMBOL_GPL(cgroup_mutex);	/* only for task_subsys_state_check() */
+EXPORT_SYMBOL_GPL(cgroup_mutex);	/* only for lockdep */
 #else
 static DEFINE_MUTEX(cgroup_mutex);
 #endif
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 75dda1ea5026..9d3f61566fec 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -47,13 +47,13 @@ struct freezer {
 
 static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
 {
-	return container_of(cgroup_subsys_state(cgroup, freezer_subsys_id),
+	return container_of(cgroup_css(cgroup, freezer_subsys_id),
 			    struct freezer, css);
 }
 
 static inline struct freezer *task_freezer(struct task_struct *task)
 {
-	return container_of(task_subsys_state(task, freezer_subsys_id),
+	return container_of(task_css(task, freezer_subsys_id),
 			    struct freezer, css);
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1b9c31549797..be4512ba2c0c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -117,14 +117,14 @@ struct cpuset {
 /* Retrieve the cpuset for a cgroup */
 static inline struct cpuset *cgroup_cs(struct cgroup *cgrp)
 {
-	return container_of(cgroup_subsys_state(cgrp, cpuset_subsys_id),
+	return container_of(cgroup_css(cgrp, cpuset_subsys_id),
 			    struct cpuset, css);
 }
 
 /* Retrieve the cpuset for a task */
 static inline struct cpuset *task_cs(struct task_struct *task)
 {
-	return container_of(task_subsys_state(task, cpuset_subsys_id),
+	return container_of(task_css(task, cpuset_subsys_id),
 			    struct cpuset, css);
 }
 
@@ -2724,7 +2724,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
 		goto out_free;
 
 	rcu_read_lock();
-	css = task_subsys_state(tsk, cpuset_subsys_id);
+	css = task_css(tsk, cpuset_subsys_id);
 	retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
 	rcu_read_unlock();
 	if (retval < 0)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1833bc5a84a7..414c61f4d776 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -340,8 +340,8 @@ struct perf_cgroup {
 static inline struct perf_cgroup *
 perf_cgroup_from_task(struct task_struct *task)
 {
-	return container_of(task_subsys_state(task, perf_subsys_id),
-			struct perf_cgroup, css);
+	return container_of(task_css(task, perf_subsys_id),
+			    struct perf_cgroup, css);
 }
 
 static inline bool
@@ -7798,7 +7798,7 @@ static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont)
 static void perf_cgroup_css_free(struct cgroup *cont)
 {
 	struct perf_cgroup *jc;
-	jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
+	jc = container_of(cgroup_css(cont, perf_subsys_id),
 			  struct perf_cgroup, css);
 	free_percpu(jc->info);
 	kfree(jc);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9b1f2e533b95..323d907eac1a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6761,7 +6761,7 @@ void sched_move_task(struct task_struct *tsk)
 	if (unlikely(running))
 		tsk->sched_class->put_prev_task(rq, tsk);
 
-	tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
+	tg = container_of(task_css_check(tsk, cpu_cgroup_subsys_id,
 				lockdep_is_held(&tsk->sighand->siglock)),
 			  struct task_group, css);
 	tg = autogroup_task_group(tsk, tg);
@@ -7086,7 +7086,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
 /* return corresponding task_group object of a cgroup */
 static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
 {
-	return container_of(cgroup_subsys_state(cgrp, cpu_cgroup_subsys_id),
+	return container_of(cgroup_css(cgrp, cpu_cgroup_subsys_id),
 			    struct task_group, css);
 }
 
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index dbb7e2cd95eb..4a210faaab77 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -36,14 +36,14 @@ struct cpuacct {
 /* return cpu accounting group corresponding to this container */
 static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
 {
-	return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
+	return container_of(cgroup_css(cgrp, cpuacct_subsys_id),
 			    struct cpuacct, css);
 }
 
 /* return cpu accounting group to which this task belongs */
 static inline struct cpuacct *task_ca(struct task_struct *tsk)
 {
-	return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
+	return container_of(task_css(tsk, cpuacct_subsys_id),
 			    struct cpuacct, css);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef0a7b2439dd..471a56db05ea 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -665,9 +665,9 @@ extern int group_balance_cpu(struct sched_group *sg);
 /*
  * Return the group to which this tasks belongs.
  *
- * We cannot use task_subsys_state() and friends because the cgroup
- * subsystem changes that value before the cgroup_subsys::attach() method
- * is called, therefore we cannot pin it and might observe the wrong value.
+ * We cannot use task_css() and friends because the cgroup subsystem
+ * changes that value before the cgroup_subsys::attach() method is called,
+ * therefore we cannot pin it and might observe the wrong value.
  *
  * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
  * core changes this before calling sched_move_task().
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 9cea7de22ffb..50f213fc52c7 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -42,15 +42,13 @@ struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
 static inline
 struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
 {
-	return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup,
-							   hugetlb_subsys_id));
+	return hugetlb_cgroup_from_css(cgroup_css(cgroup, hugetlb_subsys_id));
 }
 
 static inline
 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
 {
-	return hugetlb_cgroup_from_css(task_subsys_state(task,
-							 hugetlb_subsys_id));
+	return hugetlb_cgroup_from_css(task_css(task, hugetlb_subsys_id));
 }
 
 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d12ca6f3c293..b47bd3ad3c2b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1037,8 +1037,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 
 struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 {
-	return mem_cgroup_from_css(
-		cgroup_subsys_state(cont, mem_cgroup_subsys_id));
+	return mem_cgroup_from_css(cgroup_css(cont, mem_cgroup_subsys_id));
 }
 
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1051,7 +1050,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 	if (unlikely(!p))
 		return NULL;
 
-	return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));
+	return mem_cgroup_from_css(task_css(p, mem_cgroup_subsys_id));
 }
 
 struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 736a6011c2c8..7f1654d3cec7 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -76,7 +76,7 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work)
 
 static struct vmpressure *cg_to_vmpressure(struct cgroup *cg)
 {
-	return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id));
+	return css_to_vmpressure(cgroup_css(cg, mem_cgroup_subsys_id));
 }
 
 static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index e533259dce3c..ccf852311987 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -31,7 +31,7 @@
 
 static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
 {
-	return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
+	return container_of(cgroup_css(cgrp, net_prio_subsys_id),
 			    struct cgroup_netprio_state, css);
 }
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3a294eb98d61..5ee72a001df0 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -25,13 +25,13 @@
 
 static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
 {
-	return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id),
+	return container_of(cgroup_css(cgrp, net_cls_subsys_id),
 			    struct cgroup_cls_state, css);
 }
 
 static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
 {
-	return container_of(task_subsys_state(p, net_cls_subsys_id),
+	return container_of(task_css(p, net_cls_subsys_id),
 			    struct cgroup_cls_state, css);
 }
 
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index e8aad69f0d69..87a0a037fbd6 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -58,12 +58,12 @@ static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
 
 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
 {
-	return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
+	return css_to_devcgroup(cgroup_css(cgroup, devices_subsys_id));
 }
 
 static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 {
-	return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+	return css_to_devcgroup(task_css(task, devices_subsys_id));
 }
 
 struct cgroup_subsys devices_subsys;
-- 
cgit v1.2.3


From a7c6d554aa01236ac2a9f851ab0f75704f76dfa2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Aug 2013 20:11:23 -0400
Subject: cgroup: add/update accessors which obtain subsys specific data from
 css

css (cgroup_subsys_state) is usually embedded in a subsys specific
data structure.  Subsystems either use container_of() directly to cast
from css to such data structure or has an accessor function wrapping
such cast.  As cgroup as whole is moving towards using css as the main
interface handle, add and update such accessors to ease dealing with
css's.

All accessors explicitly handle NULL input and return NULL in those
cases.  While this looks like an extra branch in the code, as all
controllers specific data structures have css as the first field, the
casting doesn't involve any offsetting and the compiler can trivially
optimize out the branch.

* blkio, freezer, cpuset, cpu, cpuacct and net_cls didn't have such
  accessor.  Added.

* memory, hugetlb and devices already had one but didn't explicitly
  handle NULL input.  Updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 block/blk-cgroup.h       | 12 ++++++++----
 kernel/cgroup_freezer.c  | 11 +++++++----
 kernel/cpuset.c          | 11 +++++++----
 kernel/sched/core.c      |  8 ++++++--
 kernel/sched/cpuacct.c   | 11 +++++++----
 mm/hugetlb_cgroup.c      |  2 +-
 mm/memcontrol.c          |  2 +-
 net/sched/cls_cgroup.c   | 11 +++++++----
 security/device_cgroup.c |  2 +-
 9 files changed, 45 insertions(+), 25 deletions(-)

(limited to 'security')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 628e50f6f8a8..8e5863e900bf 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -179,21 +179,25 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
 
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct blkcg, css) : NULL;
+}
+
 static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
 {
-	return container_of(cgroup_css(cgroup, blkio_subsys_id),
-			    struct blkcg, css);
+	return css_to_blkcg(cgroup_css(cgroup, blkio_subsys_id));
 }
 
 static inline struct blkcg *task_blkcg(struct task_struct *tsk)
 {
-	return container_of(task_css(tsk, blkio_subsys_id), struct blkcg, css);
+	return css_to_blkcg(task_css(tsk, blkio_subsys_id));
 }
 
 static inline struct blkcg *bio_blkcg(struct bio *bio)
 {
 	if (bio && bio->bi_css)
-		return container_of(bio->bi_css, struct blkcg, css);
+		return css_to_blkcg(bio->bi_css);
 	return task_blkcg(current);
 }
 
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 9d3f61566fec..1db686e47a22 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -45,16 +45,19 @@ struct freezer {
 	spinlock_t			lock;
 };
 
+static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct freezer, css) : NULL;
+}
+
 static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
 {
-	return container_of(cgroup_css(cgroup, freezer_subsys_id),
-			    struct freezer, css);
+	return css_freezer(cgroup_css(cgroup, freezer_subsys_id));
 }
 
 static inline struct freezer *task_freezer(struct task_struct *task)
 {
-	return container_of(task_css(task, freezer_subsys_id),
-			    struct freezer, css);
+	return css_freezer(task_css(task, freezer_subsys_id));
 }
 
 static struct freezer *parent_freezer(struct freezer *freezer)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f7371341d42a..6e9cbdde25bd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -114,18 +114,21 @@ struct cpuset {
 	int relax_domain_level;
 };
 
+static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct cpuset, css) : NULL;
+}
+
 /* Retrieve the cpuset for a cgroup */
 static inline struct cpuset *cgroup_cs(struct cgroup *cgrp)
 {
-	return container_of(cgroup_css(cgrp, cpuset_subsys_id),
-			    struct cpuset, css);
+	return css_cs(cgroup_css(cgrp, cpuset_subsys_id));
 }
 
 /* Retrieve the cpuset for a task */
 static inline struct cpuset *task_cs(struct task_struct *task)
 {
-	return container_of(task_css(task, cpuset_subsys_id),
-			    struct cpuset, css);
+	return css_cs(task_css(task, cpuset_subsys_id));
 }
 
 static inline struct cpuset *parent_cs(struct cpuset *cs)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 323d907eac1a..5bccb0277129 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7083,11 +7083,15 @@ int sched_rt_handler(struct ctl_table *table, int write,
 
 #ifdef CONFIG_CGROUP_SCHED
 
+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct task_group, css) : NULL;
+}
+
 /* return corresponding task_group object of a cgroup */
 static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
 {
-	return container_of(cgroup_css(cgrp, cpu_cgroup_subsys_id),
-			    struct task_group, css);
+	return css_tg(cgroup_css(cgrp, cpu_cgroup_subsys_id));
 }
 
 static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 4a210faaab77..8ccfa10cc89f 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -33,18 +33,21 @@ struct cpuacct {
 	struct kernel_cpustat __percpu *cpustat;
 };
 
+static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct cpuacct, css) : NULL;
+}
+
 /* return cpu accounting group corresponding to this container */
 static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
 {
-	return container_of(cgroup_css(cgrp, cpuacct_subsys_id),
-			    struct cpuacct, css);
+	return css_ca(cgroup_css(cgrp, cpuacct_subsys_id));
 }
 
 /* return cpu accounting group to which this task belongs */
 static inline struct cpuacct *task_ca(struct task_struct *tsk)
 {
-	return container_of(task_css(tsk, cpuacct_subsys_id),
-			    struct cpuacct, css);
+	return css_ca(task_css(tsk, cpuacct_subsys_id));
 }
 
 static inline struct cpuacct *__parent_ca(struct cpuacct *ca)
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index d2f9fc0b186e..95585a0b9c8d 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -36,7 +36,7 @@ static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
 static inline
 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
 {
-	return container_of(s, struct hugetlb_cgroup, css);
+	return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
 }
 
 static inline
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b47bd3ad3c2b..11d659e3b08e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -486,7 +486,7 @@ static DEFINE_MUTEX(memcg_create_mutex);
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
 {
-	return container_of(s, struct mem_cgroup, css);
+	return s ? container_of(s, struct mem_cgroup, css) : NULL;
 }
 
 /* Some nice accessors for the vmpressure. */
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 5ee72a001df0..af412ab2b477 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,16 +23,19 @@
 #include <net/sock.h>
 #include <net/cls_cgroup.h>
 
+static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
+}
+
 static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
 {
-	return container_of(cgroup_css(cgrp, net_cls_subsys_id),
-			    struct cgroup_cls_state, css);
+	return css_cls_state(cgroup_css(cgrp, net_cls_subsys_id));
 }
 
 static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
 {
-	return container_of(task_css(p, net_cls_subsys_id),
-			    struct cgroup_cls_state, css);
+	return css_cls_state(task_css(p, net_cls_subsys_id));
 }
 
 static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 87a0a037fbd6..90953648c643 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -53,7 +53,7 @@ struct dev_cgroup {
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
 {
-	return container_of(s, struct dev_cgroup, css);
+	return s ? container_of(s, struct dev_cgroup, css) : NULL;
 }
 
 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
-- 
cgit v1.2.3


From 6387698699afd72d6304566fb6ccf84bffe07c56 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Aug 2013 20:11:23 -0400
Subject: cgroup: add css_parent()

Currently, controllers have to explicitly follow the cgroup hierarchy
to find the parent of a given css.  cgroup is moving towards using
cgroup_subsys_state as the main controller interface construct, so
let's provide a way to climb the hierarchy using just csses.

This patch implements css_parent() which, given a css, returns its
parent.  The function is guarnateed to valid non-NULL parent css as
long as the target css is not at the top of the hierarchy.

freezer, cpuset, cpu, cpuacct, hugetlb, memory, net_cls and devices
are converted to use css_parent() instead of accessing cgroup->parent
directly.

* __parent_ca() is dropped from cpuacct and its usage is replaced with
  parent_ca().  The only difference between the two was NULL test on
  cgroup->parent which is now embedded in css_parent() making the
  distinction moot.  Note that eventually a css->parent field will be
  added to css and the NULL check in css_parent() will go away.

This patch shouldn't cause any behavior differences.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 block/blk-cgroup.h       |  4 +---
 include/linux/cgroup.h   | 15 +++++++++++++++
 kernel/cgroup_freezer.c  |  8 ++------
 kernel/cpuset.c          |  6 +-----
 kernel/sched/core.c      |  9 +++------
 kernel/sched/cpuacct.c   | 11 ++---------
 mm/hugetlb_cgroup.c      |  6 +-----
 mm/memcontrol.c          | 39 +++++++++++----------------------------
 net/sched/cls_cgroup.c   |  8 +++++---
 security/device_cgroup.c | 18 +++++-------------
 10 files changed, 46 insertions(+), 78 deletions(-)

(limited to 'security')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 8e5863e900bf..b6802c46d68f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -209,9 +209,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
  */
 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 {
-	struct cgroup *pcg = blkcg->css.cgroup->parent;
-
-	return pcg ? cgroup_to_blkcg(pcg) : NULL;
+	return css_to_blkcg(css_parent(&blkcg->css));
 }
 
 /**
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 821678aae4db..18112a3bb12b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -646,6 +646,21 @@ struct cgroup_subsys {
 #undef IS_SUBSYS_ENABLED
 #undef SUBSYS
 
+/**
+ * css_parent - find the parent css
+ * @css: the target cgroup_subsys_state
+ *
+ * Return the parent css of @css.  This function is guaranteed to return
+ * non-NULL parent as long as @css isn't the root.
+ */
+static inline
+struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
+{
+	struct cgroup *parent_cgrp = css->cgroup->parent;
+
+	return parent_cgrp ? parent_cgrp->subsys[css->ss->subsys_id] : NULL;
+}
+
 /**
  * cgroup_css - obtain a cgroup's css for the specified subsystem
  * @cgrp: the cgroup of interest
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 1db686e47a22..657a73cd44c4 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -62,11 +62,7 @@ static inline struct freezer *task_freezer(struct task_struct *task)
 
 static struct freezer *parent_freezer(struct freezer *freezer)
 {
-	struct cgroup *pcg = freezer->css.cgroup->parent;
-
-	if (pcg)
-		return cgroup_freezer(pcg);
-	return NULL;
+	return css_freezer(css_parent(&freezer->css));
 }
 
 bool cgroup_freezing(struct task_struct *task)
@@ -234,7 +230,7 @@ static void freezer_fork(struct task_struct *task)
 	 * The root cgroup is non-freezable, so we can skip the
 	 * following check.
 	 */
-	if (!freezer->css.cgroup->parent)
+	if (!parent_freezer(freezer))
 		goto out;
 
 	spin_lock_irq(&freezer->lock);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6e9cbdde25bd..259a4af37e69 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -133,11 +133,7 @@ static inline struct cpuset *task_cs(struct task_struct *task)
 
 static inline struct cpuset *parent_cs(struct cpuset *cs)
 {
-	struct cgroup *pcgrp = cs->css.cgroup->parent;
-
-	if (pcgrp)
-		return cgroup_cs(pcgrp);
-	return NULL;
+	return css_cs(css_parent(&cs->css));
 }
 
 #ifdef CONFIG_NUMA
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5bccb0277129..7a10742b389a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7114,13 +7114,10 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
 static int cpu_cgroup_css_online(struct cgroup *cgrp)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
-	struct task_group *parent;
+	struct task_group *parent = css_tg(css_parent(&tg->css));
 
-	if (!cgrp->parent)
-		return 0;
-
-	parent = cgroup_tg(cgrp->parent);
-	sched_online_group(tg, parent);
+	if (parent)
+		sched_online_group(tg, parent);
 	return 0;
 }
 
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 8ccfa10cc89f..f6926a149a71 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -50,16 +50,9 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk)
 	return css_ca(task_css(tsk, cpuacct_subsys_id));
 }
 
-static inline struct cpuacct *__parent_ca(struct cpuacct *ca)
-{
-	return cgroup_ca(ca->css.cgroup->parent);
-}
-
 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
 {
-	if (!ca->css.cgroup->parent)
-		return NULL;
-	return cgroup_ca(ca->css.cgroup->parent);
+	return css_ca(css_parent(&ca->css));
 }
 
 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
@@ -284,7 +277,7 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val)
 	while (ca != &root_cpuacct) {
 		kcpustat = this_cpu_ptr(ca->cpustat);
 		kcpustat->cpustat[index] += val;
-		ca = __parent_ca(ca);
+		ca = parent_ca(ca);
 	}
 	rcu_read_unlock();
 }
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 95585a0b9c8d..57ecb5d2513f 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -59,11 +59,7 @@ static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
 static inline struct hugetlb_cgroup *
 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
 {
-	struct cgroup *parent = h_cg->css.cgroup->parent;
-
-	if (!parent)
-		return NULL;
-	return hugetlb_cgroup_from_cgroup(parent);
+	return hugetlb_cgroup_from_css(css_parent(&h_cg->css));
 }
 
 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 11d659e3b08e..69b3e520f921 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1524,10 +1524,8 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 
 int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 {
-	struct cgroup *cgrp = memcg->css.cgroup;
-
 	/* root ? */
-	if (cgrp->parent == NULL)
+	if (!css_parent(&memcg->css))
 		return vm_swappiness;
 
 	return memcg->swappiness;
@@ -5026,11 +5024,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
 {
 	int retval = 0;
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
-	struct cgroup *parent = cont->parent;
-	struct mem_cgroup *parent_memcg = NULL;
-
-	if (parent)
-		parent_memcg = mem_cgroup_from_cont(parent);
+	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
 
 	mutex_lock(&memcg_create_mutex);
 
@@ -5282,18 +5276,15 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
 		unsigned long long *mem_limit, unsigned long long *memsw_limit)
 {
-	struct cgroup *cgroup;
 	unsigned long long min_limit, min_memsw_limit, tmp;
 
 	min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
 	min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
-	cgroup = memcg->css.cgroup;
 	if (!memcg->use_hierarchy)
 		goto out;
 
-	while (cgroup->parent) {
-		cgroup = cgroup->parent;
-		memcg = mem_cgroup_from_cont(cgroup);
+	while (css_parent(&memcg->css)) {
+		memcg = mem_cgroup_from_css(css_parent(&memcg->css));
 		if (!memcg->use_hierarchy)
 			break;
 		tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -5523,16 +5514,11 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
 				       u64 val)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
-	struct mem_cgroup *parent;
-
-	if (val > 100)
-		return -EINVAL;
+	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
 
-	if (cgrp->parent == NULL)
+	if (val > 100 || !parent)
 		return -EINVAL;
 
-	parent = mem_cgroup_from_cont(cgrp->parent);
-
 	mutex_lock(&memcg_create_mutex);
 
 	/* If under hierarchy, only empty-root can set this value */
@@ -5861,14 +5847,12 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
 	struct cftype *cft, u64 val)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
-	struct mem_cgroup *parent;
+	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
 
 	/* cannot set to root cgroup and only 0 and 1 are allowed */
-	if (!cgrp->parent || !((val == 0) || (val == 1)))
+	if (!parent || !((val == 0) || (val == 1)))
 		return -EINVAL;
 
-	parent = mem_cgroup_from_cont(cgrp->parent);
-
 	mutex_lock(&memcg_create_mutex);
 	/* oom-kill-disable is a flag for subhierarchy. */
 	if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
@@ -6266,15 +6250,14 @@ free_out:
 static int
 mem_cgroup_css_online(struct cgroup *cont)
 {
-	struct mem_cgroup *memcg, *parent;
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
 	int error = 0;
 
-	if (!cont->parent)
+	if (!parent)
 		return 0;
 
 	mutex_lock(&memcg_create_mutex);
-	memcg = mem_cgroup_from_cont(cont);
-	parent = mem_cgroup_from_cont(cont->parent);
 
 	memcg->use_hierarchy = parent->use_hierarchy;
 	memcg->oom_kill_disable = parent->oom_kill_disable;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index af412ab2b477..9e6b75e5efce 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -50,9 +50,11 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 
 static int cgrp_css_online(struct cgroup *cgrp)
 {
-	if (cgrp->parent)
-		cgrp_cls_state(cgrp)->classid =
-			cgrp_cls_state(cgrp->parent)->classid;
+	struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
+	struct cgroup_cls_state *parent = css_cls_state(css_parent(&cs->css));
+
+	if (parent)
+		cs->classid = parent->classid;
 	return 0;
 }
 
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 90953648c643..635a49db005d 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -198,13 +198,11 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg)
  */
 static int devcgroup_online(struct cgroup *cgroup)
 {
-	struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
+	struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
+	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(&dev_cgroup->css));
 	int ret = 0;
 
 	mutex_lock(&devcgroup_mutex);
-	dev_cgroup = cgroup_to_devcgroup(cgroup);
-	if (cgroup->parent)
-		parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);
 
 	if (parent_dev_cgroup == NULL)
 		dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
@@ -394,12 +392,10 @@ static bool may_access(struct dev_cgroup *dev_cgroup,
 static int parent_has_perm(struct dev_cgroup *childcg,
 				  struct dev_exception_item *ex)
 {
-	struct cgroup *pcg = childcg->css.cgroup->parent;
-	struct dev_cgroup *parent;
+	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
 
-	if (!pcg)
+	if (!parent)
 		return 1;
-	parent = cgroup_to_devcgroup(pcg);
 	return may_access(parent, ex, childcg->behavior);
 }
 
@@ -524,15 +520,11 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	char temp[12];		/* 11 + 1 characters needed for a u32 */
 	int count, rc = 0;
 	struct dev_exception_item ex;
-	struct cgroup *p = devcgroup->css.cgroup;
-	struct dev_cgroup *parent = NULL;
+	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css));
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (p->parent)
-		parent = cgroup_to_devcgroup(p->parent);
-
 	memset(&ex, 0, sizeof(ex));
 	b = buffer;
 
-- 
cgit v1.2.3


From eb95419b023abacb415e2a18fea899023ce7624d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Aug 2013 20:11:23 -0400
Subject: cgroup: pass around cgroup_subsys_state instead of cgroup in
 subsystem methods

cgroup is currently in the process of transitioning to using struct
cgroup_subsys_state * as the primary handle instead of struct cgroup *
in subsystem implementations for the following reasons.

* With unified hierarchy, subsystems will be dynamically bound and
  unbound from cgroups and thus css's (cgroup_subsys_state) may be
  created and destroyed dynamically over the lifetime of a cgroup,
  which is different from the current state where all css's are
  allocated and destroyed together with the associated cgroup.  This
  in turn means that cgroup_css() should be synchronized and may
  return NULL, making it more cumbersome to use.

* Differing levels of per-subsystem granularity in the unified
  hierarchy means that the task and descendant iterators should behave
  differently depending on the specific subsystem the iteration is
  being performed for.

* In majority of the cases, subsystems only care about its part in the
  cgroup hierarchy - ie. the hierarchy of css's.  Subsystem methods
  often obtain the matching css pointer from the cgroup and don't
  bother with the cgroup pointer itself.  Passing around css fits
  much better.

This patch converts all cgroup_subsys methods to take @css instead of
@cgroup.  The conversions are mostly straight-forward.  A few
noteworthy changes are

* ->css_alloc() now takes css of the parent cgroup rather than the
  pointer to the new cgroup as the css for the new cgroup doesn't
  exist yet.  Knowing the parent css is enough for all the existing
  subsystems.

* In kernel/cgroup.c::offline_css(), unnecessary open coded css
  dereference is replaced with local variable access.

This patch shouldn't cause any behavior differences.

v2: Unnecessary explicit cgrp->subsys[] deref in css_online() replaced
    with local variable @css as suggested by Li Zefan.

    Rebased on top of new for-3.12 which includes for-3.11-fixes so
    that ->css_free() invocation added by da0a12caff ("cgroup: fix a
    leak when percpu_ref_init() fails") is converted too.  Suggested
    by Li Zefan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 block/blk-cgroup.c        | 25 +++++++++++----------
 include/linux/cgroup.h    | 24 +++++++++++---------
 kernel/cgroup.c           | 57 ++++++++++++++++++++++++++++-------------------
 kernel/cgroup_freezer.c   | 40 +++++++++++++++++----------------
 kernel/cpuset.c           | 39 +++++++++++++++++---------------
 kernel/events/core.c      | 18 ++++++++-------
 kernel/sched/core.c       | 39 ++++++++++++++++----------------
 kernel/sched/cpuacct.c    |  9 ++++----
 mm/hugetlb_cgroup.c       | 19 ++++++++--------
 mm/memcontrol.c           | 38 +++++++++++++++----------------
 net/core/netprio_cgroup.c | 20 ++++++++---------
 net/sched/cls_cgroup.c    | 18 ++++++++-------
 security/device_cgroup.c  | 22 +++++++++---------
 13 files changed, 197 insertions(+), 171 deletions(-)

(limited to 'security')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 290792a13e3c..79fd9f4fadb7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -765,18 +765,18 @@ struct cftype blkcg_files[] = {
 
 /**
  * blkcg_css_offline - cgroup css_offline callback
- * @cgroup: cgroup of interest
+ * @css: css of interest
  *
- * This function is called when @cgroup is about to go away and responsible
- * for shooting down all blkgs associated with @cgroup.  blkgs should be
+ * This function is called when @css is about to go away and responsible
+ * for shooting down all blkgs associated with @css.  blkgs should be
  * removed while holding both q and blkcg locks.  As blkcg lock is nested
  * inside q lock, this function performs reverse double lock dancing.
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
-static void blkcg_css_offline(struct cgroup *cgroup)
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	spin_lock_irq(&blkcg->lock);
 
@@ -798,21 +798,21 @@ static void blkcg_css_offline(struct cgroup *cgroup)
 	spin_unlock_irq(&blkcg->lock);
 }
 
-static void blkcg_css_free(struct cgroup *cgroup)
+static void blkcg_css_free(struct cgroup_subsys_state *css)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	if (blkcg != &blkcg_root)
 		kfree(blkcg);
 }
 
-static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	static atomic64_t id_seq = ATOMIC64_INIT(0);
 	struct blkcg *blkcg;
-	struct cgroup *parent = cgroup->parent;
 
-	if (!parent) {
+	if (!parent_css) {
 		blkcg = &blkcg_root;
 		goto done;
 	}
@@ -883,14 +883,15 @@ void blkcg_exit_queue(struct request_queue *q)
  * of the main cic data structures.  For now we allow a task to change
  * its cgroup only if it's the only owner of its ioc.
  */
-static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static int blkcg_can_attach(struct cgroup_subsys_state *css,
+			    struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	struct io_context *ioc;
 	int ret = 0;
 
 	/* task_lock() is needed to avoid races with exit_io_context() */
-	cgroup_taskset_for_each(task, cgrp, tset) {
+	cgroup_taskset_for_each(task, css->cgroup, tset) {
 		task_lock(task);
 		ioc = task->io_context;
 		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 18112a3bb12b..9c2b9dd9121d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -579,18 +579,22 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
  */
 
 struct cgroup_subsys {
-	struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp);
-	int (*css_online)(struct cgroup *cgrp);
-	void (*css_offline)(struct cgroup *cgrp);
-	void (*css_free)(struct cgroup *cgrp);
-
-	int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
-	void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
-	void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
+	struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
+	int (*css_online)(struct cgroup_subsys_state *css);
+	void (*css_offline)(struct cgroup_subsys_state *css);
+	void (*css_free)(struct cgroup_subsys_state *css);
+
+	int (*can_attach)(struct cgroup_subsys_state *css,
+			  struct cgroup_taskset *tset);
+	void (*cancel_attach)(struct cgroup_subsys_state *css,
+			      struct cgroup_taskset *tset);
+	void (*attach)(struct cgroup_subsys_state *css,
+		       struct cgroup_taskset *tset);
 	void (*fork)(struct task_struct *task);
-	void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
+	void (*exit)(struct cgroup_subsys_state *css,
+		     struct cgroup_subsys_state *old_css,
 		     struct task_struct *task);
-	void (*bind)(struct cgroup *root);
+	void (*bind)(struct cgroup_subsys_state *root_css);
 
 	int subsys_id;
 	int disabled;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4234428f1014..271d9a5cde5f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -853,8 +853,11 @@ static void cgroup_free_fn(struct work_struct *work)
 	/*
 	 * Release the subsystem state objects.
 	 */
-	for_each_root_subsys(cgrp->root, ss)
-		ss->css_free(cgrp);
+	for_each_root_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
+		ss->css_free(css);
+	}
 
 	cgrp->root->number_of_cgroups--;
 	mutex_unlock(&cgroup_mutex);
@@ -1056,7 +1059,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			list_move(&ss->sibling, &root->subsys_list);
 			ss->root = root;
 			if (ss->bind)
-				ss->bind(cgrp);
+				ss->bind(cgrp->subsys[i]);
 
 			/* refcount was already taken, and we're keeping it */
 			root->subsys_mask |= bit;
@@ -1066,7 +1069,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
 
 			if (ss->bind)
-				ss->bind(cgroup_dummy_top);
+				ss->bind(cgroup_dummy_top->subsys[i]);
 			cgroup_dummy_top->subsys[i]->cgroup = cgroup_dummy_top;
 			cgrp->subsys[i] = NULL;
 			cgroup_subsys[i]->root = &cgroup_dummy_root;
@@ -2049,8 +2052,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
 	 * step 1: check that we can legitimately attach to the cgroup.
 	 */
 	for_each_root_subsys(root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
 		if (ss->can_attach) {
-			retval = ss->can_attach(cgrp, &tset);
+			retval = ss->can_attach(css, &tset);
 			if (retval) {
 				failed_ss = ss;
 				goto out_cancel_attach;
@@ -2089,8 +2094,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
 	 * step 4: do subsystem attach callbacks.
 	 */
 	for_each_root_subsys(root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
 		if (ss->attach)
-			ss->attach(cgrp, &tset);
+			ss->attach(css, &tset);
 	}
 
 	/*
@@ -2109,10 +2116,12 @@ out_put_css_set_refs:
 out_cancel_attach:
 	if (retval) {
 		for_each_root_subsys(root, ss) {
+			struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
 			if (ss == failed_ss)
 				break;
 			if (ss->cancel_attach)
-				ss->cancel_attach(cgrp, &tset);
+				ss->cancel_attach(css, &tset);
 		}
 	}
 out_free_group_list:
@@ -4206,14 +4215,15 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 /* invoke ->css_online() on a new CSS and mark it online if successful */
 static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
+	struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
 	int ret = 0;
 
 	lockdep_assert_held(&cgroup_mutex);
 
 	if (ss->css_online)
-		ret = ss->css_online(cgrp);
+		ret = ss->css_online(css);
 	if (!ret)
-		cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
+		css->flags |= CSS_ONLINE;
 	return ret;
 }
 
@@ -4228,9 +4238,9 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 		return;
 
 	if (ss->css_offline)
-		ss->css_offline(cgrp);
+		ss->css_offline(css);
 
-	cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
+	css->flags &= ~CSS_ONLINE;
 }
 
 /*
@@ -4305,7 +4315,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	for_each_root_subsys(root, ss) {
 		struct cgroup_subsys_state *css;
 
-		css = ss->css_alloc(cgrp);
+		css = ss->css_alloc(parent->subsys[ss->subsys_id]);
 		if (IS_ERR(css)) {
 			err = PTR_ERR(css);
 			goto err_free_all;
@@ -4313,7 +4323,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
 		err = percpu_ref_init(&css->refcnt, css_release);
 		if (err) {
-			ss->css_free(cgrp);
+			ss->css_free(css);
 			goto err_free_all;
 		}
 
@@ -4386,7 +4396,7 @@ err_free_all:
 
 		if (css) {
 			percpu_ref_cancel_init(&css->refcnt);
-			ss->css_free(cgrp);
+			ss->css_free(css);
 		}
 	}
 	mutex_unlock(&cgroup_mutex);
@@ -4641,7 +4651,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	/* Create the top cgroup state for this subsystem */
 	list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
 	ss->root = &cgroup_dummy_root;
-	css = ss->css_alloc(cgroup_dummy_top);
+	css = ss->css_alloc(cgroup_dummy_top->subsys[ss->subsys_id]);
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
 	init_cgroup_css(css, ss, cgroup_dummy_top);
@@ -4720,7 +4730,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	 * struct, so this can happen first (i.e. before the dummy root
 	 * attachment).
 	 */
-	css = ss->css_alloc(cgroup_dummy_top);
+	css = ss->css_alloc(cgroup_dummy_top->subsys[ss->subsys_id]);
 	if (IS_ERR(css)) {
 		/* failure case - need to deassign the cgroup_subsys[] slot. */
 		cgroup_subsys[ss->subsys_id] = NULL;
@@ -4836,7 +4846,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 	 * the cgrp->subsys pointer to find their state. note that this
 	 * also takes care of freeing the css_id.
 	 */
-	ss->css_free(cgroup_dummy_top);
+	ss->css_free(cgroup_dummy_top->subsys[ss->subsys_id]);
 	cgroup_dummy_top->subsys[ss->subsys_id] = NULL;
 
 	mutex_unlock(&cgroup_mutex);
@@ -5192,10 +5202,10 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 		 */
 		for_each_builtin_subsys(ss, i) {
 			if (ss->exit) {
-				struct cgroup *old_cgrp = cset->subsys[i]->cgroup;
-				struct cgroup *cgrp = task_cgroup(tsk, i);
+				struct cgroup_subsys_state *old_css = cset->subsys[i];
+				struct cgroup_subsys_state *css = task_css(tsk, i);
 
-				ss->exit(cgrp, old_cgrp, tsk);
+				ss->exit(css, old_css, tsk);
 			}
 		}
 	}
@@ -5529,7 +5539,8 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
 }
 
 #ifdef CONFIG_CGROUP_DEBUG
-static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+debug_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
 
@@ -5539,9 +5550,9 @@ static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp)
 	return css;
 }
 
-static void debug_css_free(struct cgroup *cgrp)
+static void debug_css_free(struct cgroup_subsys_state *css)
 {
-	kfree(cgrp->subsys[debug_subsys_id]);
+	kfree(css);
 }
 
 static u64 debug_taskcount_read(struct cgroup *cgrp, struct cftype *cft)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 657a73cd44c4..f03a85719c3c 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -91,7 +91,8 @@ static const char *freezer_state_strs(unsigned int state)
 
 struct cgroup_subsys freezer_subsys;
 
-static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+freezer_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct freezer *freezer;
 
@@ -104,16 +105,16 @@ static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup)
 }
 
 /**
- * freezer_css_online - commit creation of a freezer cgroup
- * @cgroup: cgroup being created
+ * freezer_css_online - commit creation of a freezer css
+ * @css: css being created
  *
- * We're committing to creation of @cgroup.  Mark it online and inherit
+ * We're committing to creation of @css.  Mark it online and inherit
  * parent's freezing state while holding both parent's and our
  * freezer->lock.
  */
-static int freezer_css_online(struct cgroup *cgroup)
+static int freezer_css_online(struct cgroup_subsys_state *css)
 {
-	struct freezer *freezer = cgroup_freezer(cgroup);
+	struct freezer *freezer = css_freezer(css);
 	struct freezer *parent = parent_freezer(freezer);
 
 	/*
@@ -140,15 +141,15 @@ static int freezer_css_online(struct cgroup *cgroup)
 }
 
 /**
- * freezer_css_offline - initiate destruction of @cgroup
- * @cgroup: cgroup being destroyed
+ * freezer_css_offline - initiate destruction of a freezer css
+ * @css: css being destroyed
  *
- * @cgroup is going away.  Mark it dead and decrement system_freezing_count
- * if it was holding one.
+ * @css is going away.  Mark it dead and decrement system_freezing_count if
+ * it was holding one.
  */
-static void freezer_css_offline(struct cgroup *cgroup)
+static void freezer_css_offline(struct cgroup_subsys_state *css)
 {
-	struct freezer *freezer = cgroup_freezer(cgroup);
+	struct freezer *freezer = css_freezer(css);
 
 	spin_lock_irq(&freezer->lock);
 
@@ -160,9 +161,9 @@ static void freezer_css_offline(struct cgroup *cgroup)
 	spin_unlock_irq(&freezer->lock);
 }
 
-static void freezer_css_free(struct cgroup *cgroup)
+static void freezer_css_free(struct cgroup_subsys_state *css)
 {
-	kfree(cgroup_freezer(cgroup));
+	kfree(css_freezer(css));
 }
 
 /*
@@ -174,25 +175,26 @@ static void freezer_css_free(struct cgroup *cgroup)
  * @freezer->lock.  freezer_attach() makes the new tasks conform to the
  * current state and all following state changes can see the new tasks.
  */
-static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
+static void freezer_attach(struct cgroup_subsys_state *new_css,
+			   struct cgroup_taskset *tset)
 {
-	struct freezer *freezer = cgroup_freezer(new_cgrp);
+	struct freezer *freezer = css_freezer(new_css);
 	struct task_struct *task;
 	bool clear_frozen = false;
 
 	spin_lock_irq(&freezer->lock);
 
 	/*
-	 * Make the new tasks conform to the current state of @new_cgrp.
+	 * Make the new tasks conform to the current state of @new_css.
 	 * For simplicity, when migrating any task to a FROZEN cgroup, we
 	 * revert it to FREEZING and let update_if_frozen() determine the
 	 * correct state later.
 	 *
-	 * Tasks in @tset are on @new_cgrp but may not conform to its
+	 * Tasks in @tset are on @new_css but may not conform to its
 	 * current state before executing the following - !frozen tasks may
 	 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
 	 */
-	cgroup_taskset_for_each(task, new_cgrp, tset) {
+	cgroup_taskset_for_each(task, new_css->cgroup, tset) {
 		if (!(freezer->state & CGROUP_FREEZING)) {
 			__thaw_task(task);
 		} else {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 259a4af37e69..8ce3fdc3dfcc 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1455,9 +1455,10 @@ static int fmeter_getrate(struct fmeter *fmp)
 }
 
 /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
-static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static int cpuset_can_attach(struct cgroup_subsys_state *css,
+			     struct cgroup_taskset *tset)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	struct task_struct *task;
 	int ret;
 
@@ -1468,11 +1469,11 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 	 * flag is set.
 	 */
 	ret = -ENOSPC;
-	if (!cgroup_sane_behavior(cgrp) &&
+	if (!cgroup_sane_behavior(css->cgroup) &&
 	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
 		goto out_unlock;
 
-	cgroup_taskset_for_each(task, cgrp, tset) {
+	cgroup_taskset_for_each(task, css->cgroup, tset) {
 		/*
 		 * Kthreads which disallow setaffinity shouldn't be moved
 		 * to a new cpuset; we don't want to change their cpu
@@ -1501,11 +1502,11 @@ out_unlock:
 	return ret;
 }
 
-static void cpuset_cancel_attach(struct cgroup *cgrp,
+static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
 				 struct cgroup_taskset *tset)
 {
 	mutex_lock(&cpuset_mutex);
-	cgroup_cs(cgrp)->attach_in_progress--;
+	css_cs(css)->attach_in_progress--;
 	mutex_unlock(&cpuset_mutex);
 }
 
@@ -1516,7 +1517,8 @@ static void cpuset_cancel_attach(struct cgroup *cgrp,
  */
 static cpumask_var_t cpus_attach;
 
-static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void cpuset_attach(struct cgroup_subsys_state *css,
+			  struct cgroup_taskset *tset)
 {
 	/* static buf protected by cpuset_mutex */
 	static nodemask_t cpuset_attach_nodemask_to;
@@ -1524,7 +1526,7 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 	struct task_struct *task;
 	struct task_struct *leader = cgroup_taskset_first(tset);
 	struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	struct cpuset *oldcs = cgroup_cs(oldcgrp);
 	struct cpuset *cpus_cs = effective_cpumask_cpuset(cs);
 	struct cpuset *mems_cs = effective_nodemask_cpuset(cs);
@@ -1539,7 +1541,7 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 
 	guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to);
 
-	cgroup_taskset_for_each(task, cgrp, tset) {
+	cgroup_taskset_for_each(task, css->cgroup, tset) {
 		/*
 		 * can_attach beforehand should guarantee that this doesn't
 		 * fail.  TODO: have a better way to handle failure here
@@ -1940,11 +1942,12 @@ static struct cftype files[] = {
  *	cgrp:	control group that the new cpuset will be part of
  */
 
-static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct cpuset *cs;
 
-	if (!cgrp->parent)
+	if (!parent_css)
 		return &top_cpuset.css;
 
 	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
@@ -1964,9 +1967,9 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cgrp)
 	return &cs->css;
 }
 
-static int cpuset_css_online(struct cgroup *cgrp)
+static int cpuset_css_online(struct cgroup_subsys_state *css)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	struct cpuset *parent = parent_cs(cs);
 	struct cpuset *tmp_cs;
 	struct cgroup *pos_cgrp;
@@ -1984,7 +1987,7 @@ static int cpuset_css_online(struct cgroup *cgrp)
 
 	number_of_cpusets++;
 
-	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags))
+	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
 		goto out_unlock;
 
 	/*
@@ -2024,9 +2027,9 @@ out_unlock:
  * will call rebuild_sched_domains_locked().
  */
 
-static void cpuset_css_offline(struct cgroup *cgrp)
+static void cpuset_css_offline(struct cgroup_subsys_state *css)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 
 	mutex_lock(&cpuset_mutex);
 
@@ -2039,9 +2042,9 @@ static void cpuset_css_offline(struct cgroup *cgrp)
 	mutex_unlock(&cpuset_mutex);
 }
 
-static void cpuset_css_free(struct cgroup *cgrp)
+static void cpuset_css_free(struct cgroup_subsys_state *css)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 
 	free_cpumask_var(cs->cpus_allowed);
 	kfree(cs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 414c61f4d776..9705a0ed1dce 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7778,7 +7778,8 @@ unlock:
 device_initcall(perf_event_sysfs_init);
 
 #ifdef CONFIG_CGROUP_PERF
-static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont)
+static struct cgroup_subsys_state *
+perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct perf_cgroup *jc;
 
@@ -7795,11 +7796,10 @@ static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont)
 	return &jc->css;
 }
 
-static void perf_cgroup_css_free(struct cgroup *cont)
+static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
 {
-	struct perf_cgroup *jc;
-	jc = container_of(cgroup_css(cont, perf_subsys_id),
-			  struct perf_cgroup, css);
+	struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);
+
 	free_percpu(jc->info);
 	kfree(jc);
 }
@@ -7811,15 +7811,17 @@ static int __perf_cgroup_move(void *info)
 	return 0;
 }
 
-static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void perf_cgroup_attach(struct cgroup_subsys_state *css,
+			       struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 
-	cgroup_taskset_for_each(task, cgrp, tset)
+	cgroup_taskset_for_each(task, css->cgroup, tset)
 		task_function_call(task, __perf_cgroup_move, task);
 }
 
-static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
+static void perf_cgroup_exit(struct cgroup_subsys_state *css,
+			     struct cgroup_subsys_state *old_css,
 			     struct task_struct *task)
 {
 	/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7a10742b389a..622b7efc5ade 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7094,16 +7094,17 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
 	return css_tg(cgroup_css(cgrp, cpu_cgroup_subsys_id));
 }
 
-static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
-	struct task_group *tg, *parent;
+	struct task_group *parent = css_tg(parent_css);
+	struct task_group *tg;
 
-	if (!cgrp->parent) {
+	if (!parent) {
 		/* This is early initialization for the top cgroup */
 		return &root_task_group.css;
 	}
 
-	parent = cgroup_tg(cgrp->parent);
 	tg = sched_create_group(parent);
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
@@ -7111,38 +7112,38 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
 	return &tg->css;
 }
 
-static int cpu_cgroup_css_online(struct cgroup *cgrp)
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
-	struct task_group *parent = css_tg(css_parent(&tg->css));
+	struct task_group *tg = css_tg(css);
+	struct task_group *parent = css_tg(css_parent(css));
 
 	if (parent)
 		sched_online_group(tg, parent);
 	return 0;
 }
 
-static void cpu_cgroup_css_free(struct cgroup *cgrp)
+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
+	struct task_group *tg = css_tg(css);
 
 	sched_destroy_group(tg);
 }
 
-static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
+	struct task_group *tg = css_tg(css);
 
 	sched_offline_group(tg);
 }
 
-static int cpu_cgroup_can_attach(struct cgroup *cgrp,
+static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
 				 struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 
-	cgroup_taskset_for_each(task, cgrp, tset) {
+	cgroup_taskset_for_each(task, css->cgroup, tset) {
 #ifdef CONFIG_RT_GROUP_SCHED
-		if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
+		if (!sched_rt_can_attach(css_tg(css), task))
 			return -EINVAL;
 #else
 		/* We don't support RT-tasks being in separate groups */
@@ -7153,18 +7154,18 @@ static int cpu_cgroup_can_attach(struct cgroup *cgrp,
 	return 0;
 }
 
-static void cpu_cgroup_attach(struct cgroup *cgrp,
+static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
 			      struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 
-	cgroup_taskset_for_each(task, cgrp, tset)
+	cgroup_taskset_for_each(task, css->cgroup, tset)
 		sched_move_task(task);
 }
 
-static void
-cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
-		struct task_struct *task)
+static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
+			    struct cgroup_subsys_state *old_css,
+			    struct task_struct *task)
 {
 	/*
 	 * cgroup_exit() is called in the copy_process() failure path.
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index f6926a149a71..1b784d9b3630 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -62,11 +62,12 @@ static struct cpuacct root_cpuacct = {
 };
 
 /* create a new cpu accounting group */
-static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct cpuacct *ca;
 
-	if (!cgrp->parent)
+	if (!parent_css)
 		return &root_cpuacct.css;
 
 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
@@ -92,9 +93,9 @@ out:
 }
 
 /* destroy an existing cpu accounting group */
-static void cpuacct_css_free(struct cgroup *cgrp)
+static void cpuacct_css_free(struct cgroup_subsys_state *css)
 {
-	struct cpuacct *ca = cgroup_ca(cgrp);
+	struct cpuacct *ca = css_ca(css);
 
 	free_percpu(ca->cpustat);
 	free_percpu(ca->cpuusage);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 57ecb5d2513f..e2132435060f 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -73,19 +73,18 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
 	return false;
 }
 
-static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
+	struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
+	struct hugetlb_cgroup *h_cgroup;
 	int idx;
-	struct cgroup *parent_cgroup;
-	struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup;
 
 	h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
 	if (!h_cgroup)
 		return ERR_PTR(-ENOMEM);
 
-	parent_cgroup = cgroup->parent;
-	if (parent_cgroup) {
-		parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup);
+	if (parent_h_cgroup) {
 		for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
 			res_counter_init(&h_cgroup->hugepage[idx],
 					 &parent_h_cgroup->hugepage[idx]);
@@ -97,11 +96,11 @@ static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgrou
 	return &h_cgroup->css;
 }
 
-static void hugetlb_cgroup_css_free(struct cgroup *cgroup)
+static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 {
 	struct hugetlb_cgroup *h_cgroup;
 
-	h_cgroup = hugetlb_cgroup_from_cgroup(cgroup);
+	h_cgroup = hugetlb_cgroup_from_css(css);
 	kfree(h_cgroup);
 }
 
@@ -150,9 +149,9 @@ out:
  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
  * the parent cgroup.
  */
-static void hugetlb_cgroup_css_offline(struct cgroup *cgroup)
+static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
 {
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 	struct hstate *h;
 	struct page *page;
 	int idx = 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 69b3e520f921..32cca0f0af0d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6211,7 +6211,7 @@ static void __init mem_cgroup_soft_limit_tree_init(void)
 }
 
 static struct cgroup_subsys_state * __ref
-mem_cgroup_css_alloc(struct cgroup *cont)
+mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct mem_cgroup *memcg;
 	long error = -ENOMEM;
@@ -6226,7 +6226,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
 			goto free_out;
 
 	/* root ? */
-	if (cont->parent == NULL) {
+	if (parent_css == NULL) {
 		root_mem_cgroup = memcg;
 		res_counter_init(&memcg->res, NULL);
 		res_counter_init(&memcg->memsw, NULL);
@@ -6248,10 +6248,10 @@ free_out:
 }
 
 static int
-mem_cgroup_css_online(struct cgroup *cont)
+mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
 	int error = 0;
 
 	if (!parent)
@@ -6308,9 +6308,9 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
 		mem_cgroup_iter_invalidate(root_mem_cgroup);
 }
 
-static void mem_cgroup_css_offline(struct cgroup *cont)
+static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	kmem_cgroup_css_offline(memcg);
 
@@ -6319,9 +6319,9 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
 	mem_cgroup_destroy_all_caches(memcg);
 }
 
-static void mem_cgroup_css_free(struct cgroup *cont)
+static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	memcg_destroy_kmem(memcg);
 	__mem_cgroup_free(memcg);
@@ -6691,12 +6691,12 @@ static void mem_cgroup_clear_mc(void)
 	mem_cgroup_end_move(from);
 }
 
-static int mem_cgroup_can_attach(struct cgroup *cgroup,
+static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 				 struct cgroup_taskset *tset)
 {
 	struct task_struct *p = cgroup_taskset_first(tset);
 	int ret = 0;
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	unsigned long move_charge_at_immigrate;
 
 	/*
@@ -6738,7 +6738,7 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
 	return ret;
 }
 
-static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
 				     struct cgroup_taskset *tset)
 {
 	mem_cgroup_clear_mc();
@@ -6886,7 +6886,7 @@ retry:
 	up_read(&mm->mmap_sem);
 }
 
-static void mem_cgroup_move_task(struct cgroup *cont,
+static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
 				 struct cgroup_taskset *tset)
 {
 	struct task_struct *p = cgroup_taskset_first(tset);
@@ -6901,16 +6901,16 @@ static void mem_cgroup_move_task(struct cgroup *cont,
 		mem_cgroup_clear_mc();
 }
 #else	/* !CONFIG_MMU */
-static int mem_cgroup_can_attach(struct cgroup *cgroup,
+static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 				 struct cgroup_taskset *tset)
 {
 	return 0;
 }
-static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
 				     struct cgroup_taskset *tset)
 {
 }
-static void mem_cgroup_move_task(struct cgroup *cont,
+static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
 				 struct cgroup_taskset *tset)
 {
 }
@@ -6920,15 +6920,15 @@ static void mem_cgroup_move_task(struct cgroup *cont,
  * Cgroup retains root cgroups across [un]mount cycles making it necessary
  * to verify sane_behavior flag on each mount attempt.
  */
-static void mem_cgroup_bind(struct cgroup *root)
+static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
 {
 	/*
 	 * use_hierarchy is forced with sane_behavior.  cgroup core
 	 * guarantees that @root doesn't have any children, so turning it
 	 * on for the root memcg is enough.
 	 */
-	if (cgroup_sane_behavior(root))
-		mem_cgroup_from_cont(root)->use_hierarchy = true;
+	if (cgroup_sane_behavior(root_css->cgroup))
+		mem_cgroup_from_css(root_css)->use_hierarchy = true;
 }
 
 struct cgroup_subsys mem_cgroup_subsys = {
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5dfac8886e12..8d095b4c2f6f 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -126,7 +126,8 @@ static int netprio_set_prio(struct cgroup_subsys_state *css,
 	return 0;
 }
 
-static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct cgroup_subsys_state *css;
 
@@ -137,16 +138,14 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 	return css;
 }
 
-static int cgrp_css_online(struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
-	struct cgroup_subsys_state *css = cgroup_css(cgrp, net_prio_subsys_id);
-	struct cgroup_subsys_state *parent_css;
+	struct cgroup_subsys_state *parent_css = css_parent(css);
 	struct net_device *dev;
 	int ret = 0;
 
-	if (!cgrp->parent)
+	if (!parent_css)
 		return 0;
-	parent_css = cgroup_css(cgrp->parent, net_prio_subsys_id);
 
 	rtnl_lock();
 	/*
@@ -164,9 +163,9 @@ static int cgrp_css_online(struct cgroup *cgrp)
 	return ret;
 }
 
-static void cgrp_css_free(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
 {
-	kfree(cgroup_css(cgrp, net_prio_subsys_id));
+	kfree(css);
 }
 
 static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
@@ -221,12 +220,13 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
 	return 0;
 }
 
-static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup_subsys_state *css,
+			    struct cgroup_taskset *tset)
 {
 	struct task_struct *p;
 	void *v;
 
-	cgroup_taskset_for_each(p, cgrp, tset) {
+	cgroup_taskset_for_each(p, css->cgroup, tset) {
 		task_lock(p);
 		v = (void *)(unsigned long)task_netprioidx(p);
 		iterate_fd(p->files, 0, update_netprio, v);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 9e6b75e5efce..dc3983835893 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -38,7 +38,8 @@ static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
 	return css_cls_state(task_css(p, net_cls_subsys_id));
 }
 
-static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct cgroup_cls_state *cs;
 
@@ -48,19 +49,19 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 	return &cs->css;
 }
 
-static int cgrp_css_online(struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
-	struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
-	struct cgroup_cls_state *parent = css_cls_state(css_parent(&cs->css));
+	struct cgroup_cls_state *cs = css_cls_state(css);
+	struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
 
 	if (parent)
 		cs->classid = parent->classid;
 	return 0;
 }
 
-static void cgrp_css_free(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
 {
-	kfree(cgrp_cls_state(cgrp));
+	kfree(css_cls_state(css));
 }
 
 static int update_classid(const void *v, struct file *file, unsigned n)
@@ -72,12 +73,13 @@ static int update_classid(const void *v, struct file *file, unsigned n)
 	return 0;
 }
 
-static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void cgrp_attach(struct cgroup_subsys_state *css,
+			struct cgroup_taskset *tset)
 {
 	struct task_struct *p;
 	void *v;
 
-	cgroup_taskset_for_each(p, cgrp, tset) {
+	cgroup_taskset_for_each(p, css->cgroup, tset) {
 		task_lock(p);
 		v = (void *)(unsigned long)task_cls_classid(p);
 		iterate_fd(p->files, 0, update_classid, v);
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 635a49db005d..7293ac49ba7b 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -68,7 +68,7 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 
 struct cgroup_subsys devices_subsys;
 
-static int devcgroup_can_attach(struct cgroup *new_cgrp,
+static int devcgroup_can_attach(struct cgroup_subsys_state *new_css,
 				struct cgroup_taskset *set)
 {
 	struct task_struct *task = cgroup_taskset_first(set);
@@ -193,13 +193,13 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg)
 /**
  * devcgroup_online - initializes devcgroup's behavior and exceptions based on
  * 		      parent's
- * @cgroup: cgroup getting online
+ * @css: css getting online
  * returns 0 in case of success, error code otherwise
  */
-static int devcgroup_online(struct cgroup *cgroup)
+static int devcgroup_online(struct cgroup_subsys_state *css)
 {
-	struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
-	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(&dev_cgroup->css));
+	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
+	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css));
 	int ret = 0;
 
 	mutex_lock(&devcgroup_mutex);
@@ -217,9 +217,9 @@ static int devcgroup_online(struct cgroup *cgroup)
 	return ret;
 }
 
-static void devcgroup_offline(struct cgroup *cgroup)
+static void devcgroup_offline(struct cgroup_subsys_state *css)
 {
-	struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
+	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
 
 	mutex_lock(&devcgroup_mutex);
 	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
@@ -229,7 +229,8 @@ static void devcgroup_offline(struct cgroup *cgroup)
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
-static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+devcgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct dev_cgroup *dev_cgroup;
 
@@ -242,11 +243,10 @@ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 	return &dev_cgroup->css;
 }
 
-static void devcgroup_css_free(struct cgroup *cgroup)
+static void devcgroup_css_free(struct cgroup_subsys_state *css)
 {
-	struct dev_cgroup *dev_cgroup;
+	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
 
-	dev_cgroup = cgroup_to_devcgroup(cgroup);
 	__dev_exception_clean(dev_cgroup);
 	kfree(dev_cgroup);
 }
-- 
cgit v1.2.3


From 182446d087906de40e514573a92a97b203695f71 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Aug 2013 20:11:24 -0400
Subject: cgroup: pass around cgroup_subsys_state instead of cgroup in file
 methods

cgroup is currently in the process of transitioning to using struct
cgroup_subsys_state * as the primary handle instead of struct cgroup.
Please see the previous commit which converts the subsystem methods
for rationale.

This patch converts all cftype file operations to take @css instead of
@cgroup.  cftypes for the cgroup core files don't have their subsytem
pointer set.  These will automatically use the dummy_css added by the
previous patch and can be converted the same way.

Most subsystem conversions are straight forwards but there are some
interesting ones.

* freezer: update_if_frozen() is also converted to take @css instead
  of @cgroup for consistency.  This will make the code look simpler
  too once iterators are converted to use css.

* memory/vmpressure: mem_cgroup_from_css() needs to be exported to
  vmpressure while mem_cgroup_from_cont() can be made static.
  Updated accordingly.

* cpu: cgroup_tg() doesn't have any user left.  Removed.

* cpuacct: cgroup_ca() doesn't have any user left.  Removed.

* hugetlb: hugetlb_cgroup_form_cgroup() doesn't have any user left.
  Removed.

* net_cls: cgrp_cls_state() doesn't have any user left.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 block/blk-cgroup.c         |   6 +-
 block/blk-throttle.c       |  32 ++++-----
 block/cfq-iosched.c        |  90 ++++++++++++-------------
 include/linux/cgroup.h     |  24 ++++---
 include/linux/memcontrol.h |   2 +-
 kernel/cgroup.c            | 162 +++++++++++++++++++++++----------------------
 kernel/cgroup_freezer.c    |  40 +++++------
 kernel/cpuset.c            |  35 +++++-----
 kernel/sched/core.c        |  65 +++++++++---------
 kernel/sched/cpuacct.c     |  28 +++-----
 mm/hugetlb_cgroup.c        |  26 +++-----
 mm/memcontrol.c            |  88 ++++++++++++------------
 mm/vmpressure.c            |   4 +-
 net/core/netprio_cgroup.c  |  10 ++-
 net/ipv4/tcp_memcontrol.c  |  12 ++--
 net/sched/cls_cgroup.c     |  14 ++--
 security/device_cgroup.c   |  12 ++--
 17 files changed, 322 insertions(+), 328 deletions(-)

(limited to 'security')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 34063739745b..f46f3c69179c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -437,10 +437,10 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
 	return &blkg->rl;
 }
 
-static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
-			     u64 val)
+static int blkcg_reset_stats(struct cgroup_subsys_state *css,
+			     struct cftype *cftype, u64 val)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+	struct blkcg *blkcg = css_to_blkcg(css);
 	struct blkcg_gq *blkg;
 	int i;
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 08a32dfd3844..88bcfb651b0b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1293,10 +1293,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
 	return __blkg_prfill_rwstat(sf, pd, &rwstat);
 }
 
-static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
-			       struct seq_file *sf)
+static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css,
+			       struct cftype *cft, struct seq_file *sf)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl,
 			  cft->private, true);
@@ -1325,26 +1325,26 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
 	return __blkg_prfill_u64(sf, pd, v);
 }
 
-static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
-			     struct seq_file *sf)
+static int tg_print_conf_u64(struct cgroup_subsys_state *css,
+			     struct cftype *cft, struct seq_file *sf)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_u64,
+	blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64,
 			  &blkcg_policy_throtl, cft->private, false);
 	return 0;
 }
 
-static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft,
-			      struct seq_file *sf)
+static int tg_print_conf_uint(struct cgroup_subsys_state *css,
+			      struct cftype *cft, struct seq_file *sf)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_uint,
+	blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint,
 			  &blkcg_policy_throtl, cft->private, false);
 	return 0;
 }
 
-static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
-		       bool is_u64)
+static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
+		       const char *buf, bool is_u64)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 	struct blkg_conf_ctx ctx;
 	struct throtl_grp *tg;
 	struct throtl_service_queue *sq;
@@ -1403,16 +1403,16 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
 	return 0;
 }
 
-static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 			   const char *buf)
 {
-	return tg_set_conf(cgrp, cft, buf, true);
+	return tg_set_conf(css, cft, buf, true);
 }
 
-static int tg_set_conf_uint(struct cgroup *cgrp, struct cftype *cft,
+static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft,
 			    const char *buf)
 {
-	return tg_set_conf(cgrp, cft, buf, false);
+	return tg_set_conf(css, cft, buf, false);
 }
 
 static struct cftype throtl_files[] = {
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d5bbdcfd0dab..dabb9d02cf9a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1607,12 +1607,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf,
 	return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
 }
 
-static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				    struct seq_file *sf)
+static int cfqg_print_weight_device(struct cgroup_subsys_state *css,
+				    struct cftype *cft, struct seq_file *sf)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
-			  cfqg_prfill_weight_device, &blkcg_policy_cfq, 0,
-			  false);
+	blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device,
+			  &blkcg_policy_cfq, 0, false);
 	return 0;
 }
 
@@ -1626,35 +1625,34 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
 	return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
 }
 
-static int cfqg_print_leaf_weight_device(struct cgroup *cgrp,
+static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css,
 					 struct cftype *cft,
 					 struct seq_file *sf)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
-			  cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0,
-			  false);
+	blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device,
+			  &blkcg_policy_cfq, 0, false);
 	return 0;
 }
 
-static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft,
+static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft,
 			    struct seq_file *sf)
 {
-	seq_printf(sf, "%u\n", cgroup_to_blkcg(cgrp)->cfq_weight);
+	seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight);
 	return 0;
 }
 
-static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft,
-				 struct seq_file *sf)
+static int cfq_print_leaf_weight(struct cgroup_subsys_state *css,
+				 struct cftype *cft, struct seq_file *sf)
 {
-	seq_printf(sf, "%u\n",
-		   cgroup_to_blkcg(cgrp)->cfq_leaf_weight);
+	seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight);
 	return 0;
 }
 
-static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				    const char *buf, bool is_leaf_weight)
+static int __cfqg_set_weight_device(struct cgroup_subsys_state *css,
+				    struct cftype *cft, const char *buf,
+				    bool is_leaf_weight)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 	struct blkg_conf_ctx ctx;
 	struct cfq_group *cfqg;
 	int ret;
@@ -1680,22 +1678,22 @@ static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
 	return ret;
 }
 
-static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				  const char *buf)
+static int cfqg_set_weight_device(struct cgroup_subsys_state *css,
+				  struct cftype *cft, const char *buf)
 {
-	return __cfqg_set_weight_device(cgrp, cft, buf, false);
+	return __cfqg_set_weight_device(css, cft, buf, false);
 }
 
-static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				       const char *buf)
+static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css,
+				       struct cftype *cft, const char *buf)
 {
-	return __cfqg_set_weight_device(cgrp, cft, buf, true);
+	return __cfqg_set_weight_device(css, cft, buf, true);
 }
 
-static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val,
-			    bool is_leaf_weight)
+static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
+			    u64 val, bool is_leaf_weight)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 	struct blkcg_gq *blkg;
 
 	if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
@@ -1727,30 +1725,32 @@ static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val,
 	return 0;
 }
 
-static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
+			  u64 val)
 {
-	return __cfq_set_weight(cgrp, cft, val, false);
+	return __cfq_set_weight(css, cft, val, false);
 }
 
-static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
+			       struct cftype *cft, u64 val)
 {
-	return __cfq_set_weight(cgrp, cft, val, true);
+	return __cfq_set_weight(css, cft, val, true);
 }
 
-static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft,
+static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft,
 			   struct seq_file *sf)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq,
 			  cft->private, false);
 	return 0;
 }
 
-static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
-			     struct seq_file *sf)
+static int cfqg_print_rwstat(struct cgroup_subsys_state *css,
+			     struct cftype *cft, struct seq_file *sf)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq,
 			  cft->private, true);
@@ -1773,20 +1773,20 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
 	return __blkg_prfill_rwstat(sf, pd, &sum);
 }
 
-static int cfqg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft,
-				     struct seq_file *sf)
+static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css,
+				     struct cftype *cft, struct seq_file *sf)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive,
 			  &blkcg_policy_cfq, cft->private, false);
 	return 0;
 }
 
-static int cfqg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft,
-				       struct seq_file *sf)
+static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css,
+				       struct cftype *cft, struct seq_file *sf)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive,
 			  &blkcg_policy_cfq, cft->private, true);
@@ -1810,10 +1810,10 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
 }
 
 /* print avg_queue_size */
-static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
-				     struct seq_file *sf)
+static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css,
+				     struct cftype *cft, struct seq_file *sf)
 {
-	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg *blkcg = css_to_blkcg(css);
 
 	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
 			  &blkcg_policy_cfq, 0, false);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b0d5f53ae5e1..0b91436c68ef 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -439,34 +439,34 @@ struct cftype {
 	struct cgroup_subsys *ss;
 
 	int (*open)(struct inode *inode, struct file *file);
-	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+	ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft,
 			struct file *file,
 			char __user *buf, size_t nbytes, loff_t *ppos);
 	/*
 	 * read_u64() is a shortcut for the common case of returning a
 	 * single integer. Use it in place of read()
 	 */
-	u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
+	u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
 	/*
 	 * read_s64() is a signed version of read_u64()
 	 */
-	s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
+	s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
 	/*
 	 * read_map() is used for defining a map of key/value
 	 * pairs. It should call cb->fill(cb, key, value) for each
 	 * entry. The key/value pairs (and their ordering) should not
 	 * change between reboots.
 	 */
-	int (*read_map)(struct cgroup *cgrp, struct cftype *cft,
+	int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft,
 			struct cgroup_map_cb *cb);
 	/*
 	 * read_seq_string() is used for outputting a simple sequence
 	 * using seqfile.
 	 */
-	int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft,
-			       struct seq_file *m);
+	int (*read_seq_string)(struct cgroup_subsys_state *css,
+			       struct cftype *cft, struct seq_file *m);
 
-	ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+	ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft,
 			 struct file *file,
 			 const char __user *buf, size_t nbytes, loff_t *ppos);
 
@@ -475,18 +475,20 @@ struct cftype {
 	 * a single integer (as parsed by simple_strtoull) from
 	 * userspace. Use in place of write(); return 0 or error.
 	 */
-	int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
+	int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
+			 u64 val);
 	/*
 	 * write_s64() is a signed version of write_u64()
 	 */
-	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
+	int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
+			 s64 val);
 
 	/*
 	 * write_string() is passed a nul-terminated kernelspace
 	 * buffer of maximum length determined by max_write_len.
 	 * Returns 0 or -ve error code.
 	 */
-	int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+	int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
 			    const char *buffer);
 	/*
 	 * trigger() callback can be used to get some kick from the
@@ -494,7 +496,7 @@ struct cftype {
 	 * at all. The private field can be used to determine the
 	 * kick type for multiplexing.
 	 */
-	int (*trigger)(struct cgroup *cgrp, unsigned int event);
+	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
 
 	int (*release)(struct inode *inode, struct file *file);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7b4d9d79570b..6c416092e324 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -85,7 +85,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
 
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
-extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
+extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
 
 static inline
 bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c049992f1ffa..6ee469837fda 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2235,34 +2235,38 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
 
-static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
+static int cgroup_tasks_write(struct cgroup_subsys_state *css,
+			      struct cftype *cft, u64 pid)
 {
-	return attach_task_by_pid(cgrp, pid, false);
+	return attach_task_by_pid(css->cgroup, pid, false);
 }
 
-static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
+static int cgroup_procs_write(struct cgroup_subsys_state *css,
+			      struct cftype *cft, u64 tgid)
 {
-	return attach_task_by_pid(cgrp, tgid, true);
+	return attach_task_by_pid(css->cgroup, tgid, true);
 }
 
-static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
-				      const char *buffer)
+static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
+				      struct cftype *cft, const char *buffer)
 {
-	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+	BUILD_BUG_ON(sizeof(css->cgroup->root->release_agent_path) < PATH_MAX);
 	if (strlen(buffer) >= PATH_MAX)
 		return -EINVAL;
-	if (!cgroup_lock_live_group(cgrp))
+	if (!cgroup_lock_live_group(css->cgroup))
 		return -ENODEV;
 	mutex_lock(&cgroup_root_mutex);
-	strcpy(cgrp->root->release_agent_path, buffer);
+	strcpy(css->cgroup->root->release_agent_path, buffer);
 	mutex_unlock(&cgroup_root_mutex);
 	mutex_unlock(&cgroup_mutex);
 	return 0;
 }
 
-static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
-				     struct seq_file *seq)
+static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
+				     struct cftype *cft, struct seq_file *seq)
 {
+	struct cgroup *cgrp = css->cgroup;
+
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 	seq_puts(seq, cgrp->root->release_agent_path);
@@ -2271,10 +2275,10 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
-static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft,
-				     struct seq_file *seq)
+static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
+				     struct cftype *cft, struct seq_file *seq)
 {
-	seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
+	seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
 	return 0;
 }
 
@@ -2292,10 +2296,10 @@ static struct cgroup_subsys_state *cgroup_file_css(struct cfent *cfe)
 /* A buffer size big enough for numbers or short strings */
 #define CGROUP_LOCAL_BUFFER_SIZE 64
 
-static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
-				struct file *file,
-				const char __user *userbuf,
-				size_t nbytes, loff_t *unused_ppos)
+static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
+				struct cftype *cft, struct file *file,
+				const char __user *userbuf, size_t nbytes,
+				loff_t *unused_ppos)
 {
 	char buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
@@ -2313,22 +2317,22 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 		u64 val = simple_strtoull(strstrip(buffer), &end, 0);
 		if (*end)
 			return -EINVAL;
-		retval = cft->write_u64(cgrp, cft, val);
+		retval = cft->write_u64(css, cft, val);
 	} else {
 		s64 val = simple_strtoll(strstrip(buffer), &end, 0);
 		if (*end)
 			return -EINVAL;
-		retval = cft->write_s64(cgrp, cft, val);
+		retval = cft->write_s64(css, cft, val);
 	}
 	if (!retval)
 		retval = nbytes;
 	return retval;
 }
 
-static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
-				   struct file *file,
-				   const char __user *userbuf,
-				   size_t nbytes, loff_t *unused_ppos)
+static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
+				   struct cftype *cft, struct file *file,
+				   const char __user *userbuf, size_t nbytes,
+				   loff_t *unused_ppos)
 {
 	char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
@@ -2351,7 +2355,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
 	}
 
 	buffer[nbytes] = 0;     /* nul-terminate */
-	retval = cft->write_string(cgrp, cft, strstrip(buffer));
+	retval = cft->write_string(css, cft, strstrip(buffer));
 	if (!retval)
 		retval = nbytes;
 out:
@@ -2361,60 +2365,60 @@ out:
 }
 
 static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
-						size_t nbytes, loff_t *ppos)
+				 size_t nbytes, loff_t *ppos)
 {
+	struct cfent *cfe = __d_cfe(file->f_dentry);
 	struct cftype *cft = __d_cft(file->f_dentry);
-	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct cgroup_subsys_state *css = cgroup_file_css(cfe);
 
 	if (cft->write)
-		return cft->write(cgrp, cft, file, buf, nbytes, ppos);
+		return cft->write(css, cft, file, buf, nbytes, ppos);
 	if (cft->write_u64 || cft->write_s64)
-		return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+		return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
 	if (cft->write_string)
-		return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
+		return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
 	if (cft->trigger) {
-		int ret = cft->trigger(cgrp, (unsigned int)cft->private);
+		int ret = cft->trigger(css, (unsigned int)cft->private);
 		return ret ? ret : nbytes;
 	}
 	return -EINVAL;
 }
 
-static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
-			       struct file *file,
-			       char __user *buf, size_t nbytes,
-			       loff_t *ppos)
+static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
+			       struct cftype *cft, struct file *file,
+			       char __user *buf, size_t nbytes, loff_t *ppos)
 {
 	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
-	u64 val = cft->read_u64(cgrp, cft);
+	u64 val = cft->read_u64(css, cft);
 	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
 
 	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
 
-static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
-			       struct file *file,
-			       char __user *buf, size_t nbytes,
-			       loff_t *ppos)
+static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
+			       struct cftype *cft, struct file *file,
+			       char __user *buf, size_t nbytes, loff_t *ppos)
 {
 	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
-	s64 val = cft->read_s64(cgrp, cft);
+	s64 val = cft->read_s64(css, cft);
 	int len = sprintf(tmp, "%lld\n", (long long) val);
 
 	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
 
 static ssize_t cgroup_file_read(struct file *file, char __user *buf,
-				   size_t nbytes, loff_t *ppos)
+				size_t nbytes, loff_t *ppos)
 {
+	struct cfent *cfe = __d_cfe(file->f_dentry);
 	struct cftype *cft = __d_cft(file->f_dentry);
-	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct cgroup_subsys_state *css = cgroup_file_css(cfe);
 
 	if (cft->read)
-		return cft->read(cgrp, cft, file, buf, nbytes, ppos);
+		return cft->read(css, cft, file, buf, nbytes, ppos);
 	if (cft->read_u64)
-		return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
+		return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
 	if (cft->read_s64)
-		return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
+		return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
 	return -EINVAL;
 }
 
@@ -2433,16 +2437,16 @@ static int cgroup_seqfile_show(struct seq_file *m, void *arg)
 {
 	struct cfent *cfe = m->private;
 	struct cftype *cft = cfe->type;
-	struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
+	struct cgroup_subsys_state *css = cgroup_file_css(cfe);
 
 	if (cft->read_map) {
 		struct cgroup_map_cb cb = {
 			.fill = cgroup_map_add,
 			.state = m,
 		};
-		return cft->read_map(cgrp, cft, &cb);
+		return cft->read_map(css, cft, &cb);
 	}
-	return cft->read_seq_string(cgrp, cft, m);
+	return cft->read_seq_string(css, cft, m);
 }
 
 static const struct file_operations cgroup_seqfile_operations = {
@@ -3860,21 +3864,20 @@ static int cgroup_procs_open(struct inode *unused, struct file *file)
 	return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
 }
 
-static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
-					    struct cftype *cft)
+static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
+					 struct cftype *cft)
 {
-	return notify_on_release(cgrp);
+	return notify_on_release(css->cgroup);
 }
 
-static int cgroup_write_notify_on_release(struct cgroup *cgrp,
-					  struct cftype *cft,
-					  u64 val)
+static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
+					  struct cftype *cft, u64 val)
 {
-	clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+	clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
 	if (val)
-		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+		set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
 	else
-		clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+		clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
 	return 0;
 }
 
@@ -3972,9 +3975,10 @@ static void cgroup_event_ptable_queue_proc(struct file *file,
  * Input must be in format '<event_fd> <control_fd> <args>'.
  * Interpretation of args is defined by control file implementation.
  */
-static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
-				      const char *buffer)
+static int cgroup_write_event_control(struct cgroup_subsys_state *css,
+				      struct cftype *cft, const char *buffer)
 {
+	struct cgroup *cgrp = css->cgroup;
 	struct cgroup_event *event;
 	struct cgroup *cgrp_cfile;
 	unsigned int efd, cfd;
@@ -4082,20 +4086,19 @@ out_kfree:
 	return ret;
 }
 
-static u64 cgroup_clone_children_read(struct cgroup *cgrp,
-				    struct cftype *cft)
+static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
+				      struct cftype *cft)
 {
-	return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
+	return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
 }
 
-static int cgroup_clone_children_write(struct cgroup *cgrp,
-				     struct cftype *cft,
-				     u64 val)
+static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
+				       struct cftype *cft, u64 val)
 {
 	if (val)
-		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
+		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
 	else
-		clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
+		clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
 	return 0;
 }
 
@@ -5585,17 +5588,19 @@ static void debug_css_free(struct cgroup_subsys_state *css)
 	kfree(css);
 }
 
-static u64 debug_taskcount_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
 {
-	return cgroup_task_count(cgrp);
+	return cgroup_task_count(css->cgroup);
 }
 
-static u64 current_css_set_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 current_css_set_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
 {
 	return (u64)(unsigned long)current->cgroups;
 }
 
-static u64 current_css_set_refcount_read(struct cgroup *cgrp,
+static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
 					 struct cftype *cft)
 {
 	u64 count;
@@ -5606,7 +5611,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cgrp,
 	return count;
 }
 
-static int current_css_set_cg_links_read(struct cgroup *cgrp,
+static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
 					 struct cftype *cft,
 					 struct seq_file *seq)
 {
@@ -5633,14 +5638,13 @@ static int current_css_set_cg_links_read(struct cgroup *cgrp,
 }
 
 #define MAX_TASKS_SHOWN_PER_CSS 25
-static int cgroup_css_links_read(struct cgroup *cgrp,
-				 struct cftype *cft,
-				 struct seq_file *seq)
+static int cgroup_css_links_read(struct cgroup_subsys_state *css,
+				 struct cftype *cft, struct seq_file *seq)
 {
 	struct cgrp_cset_link *link;
 
 	read_lock(&css_set_lock);
-	list_for_each_entry(link, &cgrp->cset_links, cset_link) {
+	list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
 		struct css_set *cset = link->cset;
 		struct task_struct *task;
 		int count = 0;
@@ -5659,9 +5663,9 @@ static int cgroup_css_links_read(struct cgroup *cgrp,
 	return 0;
 }
 
-static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+	return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
 }
 
 static struct cftype debug_files[] =  {
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f03a85719c3c..19613ba51444 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -245,7 +245,7 @@ out:
 
 /**
  * update_if_frozen - update whether a cgroup finished freezing
- * @cgroup: cgroup of interest
+ * @css: css of interest
  *
  * Once FREEZING is initiated, transition to FROZEN is lazily updated by
  * calling this function.  If the current state is FREEZING but not FROZEN,
@@ -256,12 +256,12 @@ out:
  * update_if_frozen() on all descendants prior to invoking this function.
  *
  * Task states and freezer state might disagree while tasks are being
- * migrated into or out of @cgroup, so we can't verify task states against
+ * migrated into or out of @css, so we can't verify task states against
  * @freezer state here.  See freezer_attach() for details.
  */
-static void update_if_frozen(struct cgroup *cgroup)
+static void update_if_frozen(struct cgroup_subsys_state *css)
 {
-	struct freezer *freezer = cgroup_freezer(cgroup);
+	struct freezer *freezer = css_freezer(css);
 	struct cgroup *pos;
 	struct cgroup_iter it;
 	struct task_struct *task;
@@ -275,7 +275,7 @@ static void update_if_frozen(struct cgroup *cgroup)
 		goto out_unlock;
 
 	/* are all (live) children frozen? */
-	cgroup_for_each_child(pos, cgroup) {
+	cgroup_for_each_child(pos, css->cgroup) {
 		struct freezer *child = cgroup_freezer(pos);
 
 		if ((child->state & CGROUP_FREEZER_ONLINE) &&
@@ -284,9 +284,9 @@ static void update_if_frozen(struct cgroup *cgroup)
 	}
 
 	/* are all tasks frozen? */
-	cgroup_iter_start(cgroup, &it);
+	cgroup_iter_start(css->cgroup, &it);
 
-	while ((task = cgroup_iter_next(cgroup, &it))) {
+	while ((task = cgroup_iter_next(css->cgroup, &it))) {
 		if (freezing(task)) {
 			/*
 			 * freezer_should_skip() indicates that the task
@@ -301,12 +301,12 @@ static void update_if_frozen(struct cgroup *cgroup)
 
 	freezer->state |= CGROUP_FROZEN;
 out_iter_end:
-	cgroup_iter_end(cgroup, &it);
+	cgroup_iter_end(css->cgroup, &it);
 out_unlock:
 	spin_unlock_irq(&freezer->lock);
 }
 
-static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
 			struct seq_file *m)
 {
 	struct cgroup *pos;
@@ -314,13 +314,13 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
 	rcu_read_lock();
 
 	/* update states bottom-up */
-	cgroup_for_each_descendant_post(pos, cgroup)
-		update_if_frozen(pos);
-	update_if_frozen(cgroup);
+	cgroup_for_each_descendant_post(pos, css->cgroup)
+		update_if_frozen(cgroup_css(pos, freezer_subsys_id));
+	update_if_frozen(css);
 
 	rcu_read_unlock();
 
-	seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state));
+	seq_puts(m, freezer_state_strs(css_freezer(css)->state));
 	seq_putc(m, '\n');
 	return 0;
 }
@@ -426,7 +426,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 	rcu_read_unlock();
 }
 
-static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
+static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			 const char *buffer)
 {
 	bool freeze;
@@ -438,20 +438,22 @@ static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
 	else
 		return -EINVAL;
 
-	freezer_change_state(cgroup_freezer(cgroup), freeze);
+	freezer_change_state(css_freezer(css), freeze);
 	return 0;
 }
 
-static u64 freezer_self_freezing_read(struct cgroup *cgroup, struct cftype *cft)
+static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
+				      struct cftype *cft)
 {
-	struct freezer *freezer = cgroup_freezer(cgroup);
+	struct freezer *freezer = css_freezer(css);
 
 	return (bool)(freezer->state & CGROUP_FREEZING_SELF);
 }
 
-static u64 freezer_parent_freezing_read(struct cgroup *cgroup, struct cftype *cft)
+static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
 {
-	struct freezer *freezer = cgroup_freezer(cgroup);
+	struct freezer *freezer = css_freezer(css);
 
 	return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8ce3fdc3dfcc..89b76e1d3aa1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1603,9 +1603,10 @@ typedef enum {
 	FILE_SPREAD_SLAB,
 } cpuset_filetype_t;
 
-static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
+			    u64 val)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	cpuset_filetype_t type = cft->private;
 	int retval = -ENODEV;
 
@@ -1650,9 +1651,10 @@ out_unlock:
 	return retval;
 }
 
-static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
+static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
+			    s64 val)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	cpuset_filetype_t type = cft->private;
 	int retval = -ENODEV;
 
@@ -1676,10 +1678,10 @@ out_unlock:
 /*
  * Common handling for a write to a "cpus" or "mems" file.
  */
-static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
-				const char *buf)
+static int cpuset_write_resmask(struct cgroup_subsys_state *css,
+				struct cftype *cft, const char *buf)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	struct cpuset *trialcs;
 	int retval = -ENODEV;
 
@@ -1758,13 +1760,12 @@ static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
 	return count;
 }
 
-static ssize_t cpuset_common_file_read(struct cgroup *cgrp,
-				       struct cftype *cft,
-				       struct file *file,
-				       char __user *buf,
-				       size_t nbytes, loff_t *ppos)
+static ssize_t cpuset_common_file_read(struct cgroup_subsys_state *css,
+				       struct cftype *cft, struct file *file,
+				       char __user *buf, size_t nbytes,
+				       loff_t *ppos)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	cpuset_filetype_t type = cft->private;
 	char *page;
 	ssize_t retval = 0;
@@ -1794,9 +1795,9 @@ out:
 	return retval;
 }
 
-static u64 cpuset_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	cpuset_filetype_t type = cft->private;
 	switch (type) {
 	case FILE_CPU_EXCLUSIVE:
@@ -1825,9 +1826,9 @@ static u64 cpuset_read_u64(struct cgroup *cgrp, struct cftype *cft)
 	return 0;
 }
 
-static s64 cpuset_read_s64(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *cs = css_cs(css);
 	cpuset_filetype_t type = cft->private;
 	switch (type) {
 	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 622b7efc5ade..cc9a49266382 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7088,12 +7088,6 @@ static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
 	return css ? container_of(css, struct task_group, css) : NULL;
 }
 
-/* return corresponding task_group object of a cgroup */
-static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
-{
-	return css_tg(cgroup_css(cgrp, cpu_cgroup_subsys_id));
-}
-
 static struct cgroup_subsys_state *
 cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -7179,15 +7173,16 @@ static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
-				u64 shareval)
+static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
+				struct cftype *cftype, u64 shareval)
 {
-	return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
+	return sched_group_set_shares(css_tg(css), scale_load(shareval));
 }
 
-static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
+			       struct cftype *cft)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
+	struct task_group *tg = css_tg(css);
 
 	return (u64) scale_load_down(tg->shares);
 }
@@ -7309,26 +7304,28 @@ long tg_get_cfs_period(struct task_group *tg)
 	return cfs_period_us;
 }
 
-static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
+				  struct cftype *cft)
 {
-	return tg_get_cfs_quota(cgroup_tg(cgrp));
+	return tg_get_cfs_quota(css_tg(css));
 }
 
-static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype,
-				s64 cfs_quota_us)
+static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css,
+				   struct cftype *cftype, s64 cfs_quota_us)
 {
-	return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
+	return tg_set_cfs_quota(css_tg(css), cfs_quota_us);
 }
 
-static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css,
+				   struct cftype *cft)
 {
-	return tg_get_cfs_period(cgroup_tg(cgrp));
+	return tg_get_cfs_period(css_tg(css));
 }
 
-static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
-				u64 cfs_period_us)
+static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
+				    struct cftype *cftype, u64 cfs_period_us)
 {
-	return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
+	return tg_set_cfs_period(css_tg(css), cfs_period_us);
 }
 
 struct cfs_schedulable_data {
@@ -7409,10 +7406,10 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
 	return ret;
 }
 
-static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
+static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft,
 		struct cgroup_map_cb *cb)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
+	struct task_group *tg = css_tg(css);
 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
 
 	cb->fill(cb, "nr_periods", cfs_b->nr_periods);
@@ -7425,26 +7422,28 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
 #ifdef CONFIG_RT_GROUP_SCHED
-static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
-				s64 val)
+static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, s64 val)
 {
-	return sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+	return sched_group_set_rt_runtime(css_tg(css), val);
 }
 
-static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css,
+			       struct cftype *cft)
 {
-	return sched_group_rt_runtime(cgroup_tg(cgrp));
+	return sched_group_rt_runtime(css_tg(css));
 }
 
-static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
-		u64 rt_period_us)
+static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css,
+				    struct cftype *cftype, u64 rt_period_us)
 {
-	return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us);
+	return sched_group_set_rt_period(css_tg(css), rt_period_us);
 }
 
-static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+				   struct cftype *cft)
 {
-	return sched_group_rt_period(cgroup_tg(cgrp));
+	return sched_group_rt_period(css_tg(css));
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 1b784d9b3630..f64722ff0299 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -38,12 +38,6 @@ static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
 	return css ? container_of(css, struct cpuacct, css) : NULL;
 }
 
-/* return cpu accounting group corresponding to this container */
-static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
-{
-	return css_ca(cgroup_css(cgrp, cpuacct_subsys_id));
-}
-
 /* return cpu accounting group to which this task belongs */
 static inline struct cpuacct *task_ca(struct task_struct *tsk)
 {
@@ -138,9 +132,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
 }
 
 /* return total cpu usage (in nanoseconds) of a group */
-static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	struct cpuacct *ca = cgroup_ca(cgrp);
+	struct cpuacct *ca = css_ca(css);
 	u64 totalcpuusage = 0;
 	int i;
 
@@ -150,10 +144,10 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
 	return totalcpuusage;
 }
 
-static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
-								u64 reset)
+static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
+			  u64 reset)
 {
-	struct cpuacct *ca = cgroup_ca(cgrp);
+	struct cpuacct *ca = css_ca(css);
 	int err = 0;
 	int i;
 
@@ -169,10 +163,10 @@ out:
 	return err;
 }
 
-static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
-				   struct seq_file *m)
+static int cpuacct_percpu_seq_read(struct cgroup_subsys_state *css,
+				   struct cftype *cft, struct seq_file *m)
 {
-	struct cpuacct *ca = cgroup_ca(cgroup);
+	struct cpuacct *ca = css_ca(css);
 	u64 percpu;
 	int i;
 
@@ -189,10 +183,10 @@ static const char * const cpuacct_stat_desc[] = {
 	[CPUACCT_STAT_SYSTEM] = "system",
 };
 
-static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
-			      struct cgroup_map_cb *cb)
+static int cpuacct_stats_show(struct cgroup_subsys_state *css,
+			      struct cftype *cft, struct cgroup_map_cb *cb)
 {
-	struct cpuacct *ca = cgroup_ca(cgrp);
+	struct cpuacct *ca = css_ca(css);
 	int cpu;
 	s64 val = 0;
 
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index e2132435060f..bda8e44f6fde 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -39,12 +39,6 @@ struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
 	return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
 }
 
-static inline
-struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
-{
-	return hugetlb_cgroup_from_css(cgroup_css(cgroup, hugetlb_subsys_id));
-}
-
 static inline
 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
 {
@@ -248,14 +242,15 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 	return;
 }
 
-static ssize_t hugetlb_cgroup_read(struct cgroup *cgroup, struct cftype *cft,
-				   struct file *file, char __user *buf,
-				   size_t nbytes, loff_t *ppos)
+static ssize_t hugetlb_cgroup_read(struct cgroup_subsys_state *css,
+				   struct cftype *cft, struct file *file,
+				   char __user *buf, size_t nbytes,
+				   loff_t *ppos)
 {
 	u64 val;
 	char str[64];
 	int idx, name, len;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 
 	idx = MEMFILE_IDX(cft->private);
 	name = MEMFILE_ATTR(cft->private);
@@ -265,12 +260,12 @@ static ssize_t hugetlb_cgroup_read(struct cgroup *cgroup, struct cftype *cft,
 	return simple_read_from_buffer(buf, nbytes, ppos, str, len);
 }
 
-static int hugetlb_cgroup_write(struct cgroup *cgroup, struct cftype *cft,
-				const char *buffer)
+static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, const char *buffer)
 {
 	int idx, name, ret;
 	unsigned long long val;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 
 	idx = MEMFILE_IDX(cft->private);
 	name = MEMFILE_ATTR(cft->private);
@@ -295,10 +290,11 @@ static int hugetlb_cgroup_write(struct cgroup *cgroup, struct cftype *cft,
 	return ret;
 }
 
-static int hugetlb_cgroup_reset(struct cgroup *cgroup, unsigned int event)
+static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
+				unsigned int event)
 {
 	int idx, name, ret = 0;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 
 	idx = MEMFILE_IDX(event);
 	name = MEMFILE_ATTR(event);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 32cca0f0af0d..ab64dfc84f8c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -483,7 +483,6 @@ enum res_type {
  */
 static DEFINE_MUTEX(memcg_create_mutex);
 
-static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
 {
 	return s ? container_of(s, struct mem_cgroup, css) : NULL;
@@ -1035,7 +1034,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 		preempt_enable();
 }
 
-struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+static inline struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 {
 	return mem_cgroup_from_css(cgroup_css(cont, mem_cgroup_subsys_id));
 }
@@ -2951,10 +2950,10 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
 }
 
 #ifdef CONFIG_SLABINFO
-static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft,
-					struct seq_file *m)
+static int mem_cgroup_slabinfo_read(struct cgroup_subsys_state *css,
+				    struct cftype *cft, struct seq_file *m)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct memcg_cache_params *params;
 
 	if (!memcg_can_account_kmem(memcg))
@@ -4999,9 +4998,10 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 	return 0;
 }
 
-static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
+static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
+					unsigned int event)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	int ret;
 
 	if (mem_cgroup_is_root(memcg))
@@ -5014,16 +5014,17 @@ static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
 }
 
 
-static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
+static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
+				     struct cftype *cft)
 {
-	return mem_cgroup_from_cont(cont)->use_hierarchy;
+	return mem_cgroup_from_css(css)->use_hierarchy;
 }
 
-static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
-					u64 val)
+static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
+				      struct cftype *cft, u64 val)
 {
 	int retval = 0;
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
 
 	mutex_lock(&memcg_create_mutex);
@@ -5094,11 +5095,11 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 	return val << PAGE_SHIFT;
 }
 
-static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
-			       struct file *file, char __user *buf,
-			       size_t nbytes, loff_t *ppos)
+static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css,
+			       struct cftype *cft, struct file *file,
+			       char __user *buf, size_t nbytes, loff_t *ppos)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	char str[64];
 	u64 val;
 	int name, len;
@@ -5131,11 +5132,11 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
 	return simple_read_from_buffer(buf, nbytes, ppos, str, len);
 }
 
-static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
+static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
 {
 	int ret = -EINVAL;
 #ifdef CONFIG_MEMCG_KMEM
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	/*
 	 * For simplicity, we won't allow this to be disabled.  It also can't
 	 * be changed if the cgroup has children already, or if tasks had
@@ -5151,7 +5152,7 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 	mutex_lock(&memcg_create_mutex);
 	mutex_lock(&set_limit_mutex);
 	if (!memcg->kmem_account_flags && val != RESOURCE_MAX) {
-		if (cgroup_task_count(cont) || memcg_has_children(memcg)) {
+		if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) {
 			ret = -EBUSY;
 			goto out;
 		}
@@ -5221,10 +5222,10 @@ out:
  * The user of this function is...
  * RES_LIMIT.
  */
-static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			    const char *buffer)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	enum res_type type;
 	int name;
 	unsigned long long val;
@@ -5248,7 +5249,7 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 		else if (type == _MEMSWAP)
 			ret = mem_cgroup_resize_memsw_limit(memcg, val);
 		else if (type == _KMEM)
-			ret = memcg_update_kmem_limit(cont, val);
+			ret = memcg_update_kmem_limit(css, val);
 		else
 			return -EINVAL;
 		break;
@@ -5297,9 +5298,9 @@ out:
 	*memsw_limit = min_memsw_limit;
 }
 
-static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
+static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	int name;
 	enum res_type type;
 
@@ -5332,17 +5333,17 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 	return 0;
 }
 
-static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
+static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
 					struct cftype *cft)
 {
-	return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
+	return mem_cgroup_from_css(css)->move_charge_at_immigrate;
 }
 
 #ifdef CONFIG_MMU
-static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
+static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
 					struct cftype *cft, u64 val)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	if (val >= (1 << NR_MOVE_TYPE))
 		return -EINVAL;
@@ -5357,7 +5358,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
 	return 0;
 }
 #else
-static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
+static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
 					struct cftype *cft, u64 val)
 {
 	return -ENOSYS;
@@ -5365,13 +5366,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
 #endif
 
 #ifdef CONFIG_NUMA
-static int memcg_numa_stat_show(struct cgroup *cont, struct cftype *cft,
-				      struct seq_file *m)
+static int memcg_numa_stat_show(struct cgroup_subsys_state *css,
+				struct cftype *cft, struct seq_file *m)
 {
 	int nid;
 	unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
 	unsigned long node_nr;
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
 	seq_printf(m, "total=%lu", total_nr);
@@ -5416,10 +5417,10 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)
 	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
 }
 
-static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
+static int memcg_stat_show(struct cgroup_subsys_state *css, struct cftype *cft,
 				 struct seq_file *m)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *mi;
 	unsigned int i;
 
@@ -5503,17 +5504,18 @@ static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
 	return 0;
 }
 
-static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
+				      struct cftype *cft)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	return mem_cgroup_swappiness(memcg);
 }
 
-static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
-				       u64 val)
+static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
+				       struct cftype *cft, u64 val)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
 
 	if (val > 100 || !parent)
@@ -5829,10 +5831,10 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
 	spin_unlock(&memcg_oom_lock);
 }
 
-static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
+static int mem_cgroup_oom_control_read(struct cgroup_subsys_state *css,
 	struct cftype *cft,  struct cgroup_map_cb *cb)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
 
@@ -5843,10 +5845,10 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
 	return 0;
 }
 
-static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
+static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
 	struct cftype *cft, u64 val)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
 
 	/* cannot set to root cgroup and only 0 and 1 are allowed */
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 7f1654d3cec7..2a8a736e95cc 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -81,8 +81,8 @@ static struct vmpressure *cg_to_vmpressure(struct cgroup *cg)
 
 static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
 {
-	struct cgroup *cg = vmpressure_to_css(vmpr)->cgroup;
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cg);
+	struct cgroup_subsys_state *css = vmpressure_to_css(vmpr);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	memcg = parent_mem_cgroup(memcg);
 	if (!memcg)
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 8d095b4c2f6f..e00f60e5baea 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -168,15 +168,14 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
 	kfree(css);
 }
 
-static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	return cgrp->id;
+	return css->cgroup->id;
 }
 
-static int read_priomap(struct cgroup *cont, struct cftype *cft,
+static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
 			struct cgroup_map_cb *cb)
 {
-	struct cgroup_subsys_state *css = cgroup_css(cont, net_prio_subsys_id);
 	struct net_device *dev;
 
 	rcu_read_lock();
@@ -186,10 +185,9 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,
 	return 0;
 }
 
-static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
 			 const char *buffer)
 {
-	struct cgroup_subsys_state *css = cgroup_css(cgrp, net_prio_subsys_id);
 	char devname[IFNAMSIZ + 1];
 	struct net_device *dev;
 	u32 prio;
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index da14436c1735..8a57d79b0b16 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -132,10 +132,10 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
 	return 0;
 }
 
-static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
+static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			    const char *buffer)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	unsigned long long val;
 	int ret = 0;
 
@@ -180,9 +180,9 @@ static u64 tcp_read_usage(struct mem_cgroup *memcg)
 	return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
 }
 
-static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
+static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	u64 val;
 
 	switch (cft->private) {
@@ -202,13 +202,13 @@ static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
 	return val;
 }
 
-static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
+static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 {
 	struct mem_cgroup *memcg;
 	struct tcp_memcontrol *tcp;
 	struct cg_proto *cg_proto;
 
-	memcg = mem_cgroup_from_cont(cont);
+	memcg = mem_cgroup_from_css(css);
 	cg_proto = tcp_prot.proto_cgroup(memcg);
 	if (!cg_proto)
 		return 0;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index dc3983835893..8ea1184cec92 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -28,11 +28,6 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
 	return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
 }
 
-static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
-{
-	return css_cls_state(cgroup_css(cgrp, net_cls_subsys_id));
-}
-
 static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
 {
 	return css_cls_state(task_css(p, net_cls_subsys_id));
@@ -87,14 +82,15 @@ static void cgrp_attach(struct cgroup_subsys_state *css,
 	}
 }
 
-static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	return cgrp_cls_state(cgrp)->classid;
+	return css_cls_state(css)->classid;
 }
 
-static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
+			 u64 value)
 {
-	cgrp_cls_state(cgrp)->classid = (u32) value;
+	css_cls_state(css)->classid = (u32) value;
 	return 0;
 }
 
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 7293ac49ba7b..e0ca464fa854 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -289,10 +289,10 @@ static void set_majmin(char *str, unsigned m)
 		sprintf(str, "%u", m);
 }
 
-static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
-				struct seq_file *m)
+static int devcgroup_seq_read(struct cgroup_subsys_state *css,
+			      struct cftype *cft, struct seq_file *m)
 {
-	struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
+	struct dev_cgroup *devcgroup = css_to_devcgroup(css);
 	struct dev_exception_item *ex;
 	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 
@@ -669,13 +669,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	return rc;
 }
 
-static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
-				  const char *buffer)
+static int devcgroup_access_write(struct cgroup_subsys_state *css,
+				  struct cftype *cft, const char *buffer)
 {
 	int retval;
 
 	mutex_lock(&devcgroup_mutex);
-	retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+	retval = devcgroup_update_access(css_to_devcgroup(css),
 					 cft->private, buffer);
 	mutex_unlock(&devcgroup_mutex);
 	return retval;
-- 
cgit v1.2.3


From 492eb21b98f88e411a8bb43d6edcd7d7022add10 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Aug 2013 20:11:25 -0400
Subject: cgroup: make hierarchy iterators deal with cgroup_subsys_state
 instead of cgroup

cgroup is currently in the process of transitioning to using css
(cgroup_subsys_state) as the primary handle instead of cgroup in
subsystem API.  For hierarchy iterators, this is beneficial because

* In most cases, css is the only thing subsystems care about anyway.

* On the planned unified hierarchy, iterations for different
  subsystems will need to skip over different subtrees of the
  hierarchy depending on which subsystems are enabled on each cgroup.
  Passing around css makes it unnecessary to explicitly specify the
  subsystem in question as css is intersection between cgroup and
  subsystem

* For the planned unified hierarchy, css's would need to be created
  and destroyed dynamically independent from cgroup hierarchy.  Having
  cgroup core manage css iteration makes enforcing deref rules a lot
  easier.

Most subsystem conversions are straight-forward.  Noteworthy changes
are

* blkio: cgroup_to_blkcg() is no longer used.  Removed.

* freezer: cgroup_freezer() is no longer used.  Removed.

* devices: cgroup_to_devcgroup() is no longer used.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c       |   8 +--
 block/blk-cgroup.h       |  25 ++++-----
 block/blk-throttle.c     |   8 +--
 include/linux/cgroup.h   |  88 ++++++++++++++++---------------
 kernel/cgroup.c          | 131 ++++++++++++++++++++++++++---------------------
 kernel/cgroup_freezer.c  |  25 ++++-----
 kernel/cpuset.c          |  58 ++++++++++-----------
 mm/memcontrol.c          |  20 ++++----
 security/device_cgroup.c |  11 ++--
 9 files changed, 187 insertions(+), 187 deletions(-)

(limited to 'security')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f46f3c69179c..4b40640240a4 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -614,7 +614,7 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
 {
 	struct blkcg_policy *pol = blkcg_policy[pd->plid];
 	struct blkcg_gq *pos_blkg;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 	u64 sum;
 
 	lockdep_assert_held(pd->blkg->q->queue_lock);
@@ -622,7 +622,7 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
 	sum = blkg_stat_read((void *)pd + off);
 
 	rcu_read_lock();
-	blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+	blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
 		struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
 		struct blkg_stat *stat = (void *)pos_pd + off;
 
@@ -649,7 +649,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
 {
 	struct blkcg_policy *pol = blkcg_policy[pd->plid];
 	struct blkcg_gq *pos_blkg;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 	struct blkg_rwstat sum;
 	int i;
 
@@ -658,7 +658,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
 	sum = blkg_rwstat_read((void *)pd + off);
 
 	rcu_read_lock();
-	blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+	blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
 		struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
 		struct blkg_rwstat *rwstat = (void *)pos_pd + off;
 		struct blkg_rwstat tmp;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index b6802c46d68f..855538630300 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -184,11 +184,6 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 	return css ? container_of(css, struct blkcg, css) : NULL;
 }
 
-static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
-{
-	return css_to_blkcg(cgroup_css(cgroup, blkio_subsys_id));
-}
-
 static inline struct blkcg *task_blkcg(struct task_struct *tsk)
 {
 	return css_to_blkcg(task_css(tsk, blkio_subsys_id));
@@ -289,32 +284,31 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
 /**
  * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
  * @d_blkg: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
  * @p_blkg: target blkg to walk descendants of
  *
  * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
  * read locked.  If called under either blkcg or queue lock, the iteration
  * is guaranteed to include all and only online blkgs.  The caller may
- * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
- * subtree.
+ * update @pos_css by calling css_rightmost_descendant() to skip subtree.
  */
-#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg)		\
-	cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
-		if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
+#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
+	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
+		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
 					      (p_blkg)->q, false)))
 
 /**
  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
  * @d_blkg: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
  * @p_blkg: target blkg to walk descendants of
  *
  * Similar to blkg_for_each_descendant_pre() but performs post-order
  * traversal instead.  Synchronization rules are the same.
  */
-#define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg)		\
-	cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
-		if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
+#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
+	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
+		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
 					      (p_blkg)->q, false)))
 
 /**
@@ -577,7 +571,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
 					   const struct blkcg_policy *pol) { }
 
-static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 88bcfb651b0b..8cefa7f8590e 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1349,7 +1349,7 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
 	struct throtl_grp *tg;
 	struct throtl_service_queue *sq;
 	struct blkcg_gq *blkg;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 	int ret;
 
 	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
@@ -1380,7 +1380,7 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
 	 * blk-throttle.
 	 */
 	tg_update_has_rules(tg);
-	blkg_for_each_descendant_pre(blkg, pos_cgrp, ctx.blkg)
+	blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
 		tg_update_has_rules(blkg_to_tg(blkg));
 
 	/*
@@ -1623,7 +1623,7 @@ void blk_throtl_drain(struct request_queue *q)
 {
 	struct throtl_data *td = q->td;
 	struct blkcg_gq *blkg;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 	struct bio *bio;
 	int rw;
 
@@ -1636,7 +1636,7 @@ void blk_throtl_drain(struct request_queue *q)
 	 * better to walk service_queue tree directly but blkg walk is
 	 * easier.
 	 */
-	blkg_for_each_descendant_post(blkg, pos_cgrp, td->queue->root_blkg)
+	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
 		tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
 
 	tg_drain_bios(&td_root_tg(td)->service_queue);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c288bce428f8..4bc22f4a1abb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -779,68 +779,72 @@ static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id)
 	return idr_find(&ss->root->cgroup_idr, id);
 }
 
-struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp);
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+					   struct cgroup_subsys_state *parent);
 
 /**
- * cgroup_for_each_child - iterate through children of a cgroup
- * @pos: the cgroup * to use as the loop cursor
- * @cgrp: cgroup whose children to walk
+ * css_for_each_child - iterate through children of a css
+ * @pos: the css * to use as the loop cursor
+ * @parent: css whose children to walk
  *
- * Walk @cgrp's children.  Must be called under rcu_read_lock().  A child
- * cgroup which hasn't finished ->css_online() or already has finished
+ * Walk @parent's children.  Must be called under rcu_read_lock().  A child
+ * css which hasn't finished ->css_online() or already has finished
  * ->css_offline() may show up during traversal and it's each subsystem's
  * responsibility to verify that each @pos is alive.
  *
  * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, a cgroup which finished ->css_online() is
+ * before starting iterating, a css which finished ->css_online() is
  * guaranteed to be visible in the future iterations.
  *
  * It is allowed to temporarily drop RCU read lock during iteration.  The
  * caller is responsible for ensuring that @pos remains accessible until
  * the start of the next iteration by, for example, bumping the css refcnt.
  */
-#define cgroup_for_each_child(pos, cgrp)				\
-	for ((pos) = cgroup_next_child(NULL, (cgrp)); (pos);		\
-	     (pos) = cgroup_next_child((pos), (cgrp)))
+#define css_for_each_child(pos, parent)					\
+	for ((pos) = css_next_child(NULL, (parent)); (pos);		\
+	     (pos) = css_next_child((pos), (parent)))
 
-struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
-					  struct cgroup *cgroup);
-struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
+struct cgroup_subsys_state *
+css_next_descendant_pre(struct cgroup_subsys_state *pos,
+			struct cgroup_subsys_state *css);
+
+struct cgroup_subsys_state *
+css_rightmost_descendant(struct cgroup_subsys_state *pos);
 
 /**
- * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
- * @pos: the cgroup * to use as the loop cursor
- * @cgroup: cgroup whose descendants to walk
+ * css_for_each_descendant_pre - pre-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @root: css whose descendants to walk
  *
- * Walk @cgroup's descendants.  Must be called under rcu_read_lock().  A
- * descendant cgroup which hasn't finished ->css_online() or already has
+ * Walk @root's descendants.  Must be called under rcu_read_lock().  A
+ * descendant css which hasn't finished ->css_online() or already has
  * finished ->css_offline() may show up during traversal and it's each
  * subsystem's responsibility to verify that each @pos is alive.
  *
  * If a subsystem synchronizes against the parent in its ->css_online() and
  * before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant cgroup which finished ->css_online() is
+ * iteration, any descendant css which finished ->css_online() is
  * guaranteed to be visible in the future iterations.
  *
  * In other words, the following guarantees that a descendant can't escape
  * state updates of its ancestors.
  *
- * my_online(@cgrp)
+ * my_online(@css)
  * {
- *	Lock @cgrp->parent and @cgrp;
- *	Inherit state from @cgrp->parent;
+ *	Lock @css's parent and @css;
+ *	Inherit state from the parent;
  *	Unlock both.
  * }
  *
- * my_update_state(@cgrp)
+ * my_update_state(@css)
  * {
- *	Lock @cgrp;
- *	Update @cgrp's state;
- *	Unlock @cgrp;
+ *	Lock @css;
+ *	Update @css's state;
+ *	Unlock @css;
  *
- *	cgroup_for_each_descendant_pre(@pos, @cgrp) {
+ *	css_for_each_descendant_pre(@pos, @css) {
  *		Lock @pos;
- *		Verify @pos is alive and inherit state from @pos->parent;
+ *		Verify @pos is alive and inherit state from @pos's parent;
  *		Unlock @pos;
  *	}
  * }
@@ -851,8 +855,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
  * visible by walking order and, as long as inheriting operations to the
  * same @pos are atomic to each other, multiple updates racing each other
  * still result in the correct state.  It's guaranateed that at least one
- * inheritance happens for any cgroup after the latest update to its
- * parent.
+ * inheritance happens for any css after the latest update to its parent.
  *
  * If checking parent's state requires locking the parent, each inheriting
  * iteration should lock and unlock both @pos->parent and @pos.
@@ -865,25 +868,26 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
  * caller is responsible for ensuring that @pos remains accessible until
  * the start of the next iteration by, for example, bumping the css refcnt.
  */
-#define cgroup_for_each_descendant_pre(pos, cgroup)			\
-	for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos);	\
-	     pos = cgroup_next_descendant_pre((pos), (cgroup)))
+#define css_for_each_descendant_pre(pos, css)				\
+	for ((pos) = css_next_descendant_pre(NULL, (css)); (pos);	\
+	     (pos) = css_next_descendant_pre((pos), (css)))
 
-struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
-					   struct cgroup *cgroup);
+struct cgroup_subsys_state *
+css_next_descendant_post(struct cgroup_subsys_state *pos,
+			 struct cgroup_subsys_state *css);
 
 /**
- * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
- * @pos: the cgroup * to use as the loop cursor
- * @cgroup: cgroup whose descendants to walk
+ * css_for_each_descendant_post - post-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @css: css whose descendants to walk
  *
- * Similar to cgroup_for_each_descendant_pre() but performs post-order
+ * Similar to css_for_each_descendant_pre() but performs post-order
  * traversal instead.  Note that the walk visibility guarantee described in
  * pre-order walk doesn't apply the same to post-order walks.
  */
-#define cgroup_for_each_descendant_post(pos, cgroup)			\
-	for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos);	\
-	     pos = cgroup_next_descendant_post((pos), (cgroup)))
+#define css_for_each_descendant_post(pos, css)				\
+	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
+	     (pos) = css_next_descendant_post((pos), (css)))
 
 /* A cgroup_iter should be treated as an opaque object */
 struct cgroup_iter {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2b7354faaca7..91eac33fac86 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2814,8 +2814,8 @@ static void cgroup_cfts_prepare(void)
 	/*
 	 * Thanks to the entanglement with vfs inode locking, we can't walk
 	 * the existing cgroups under cgroup_mutex and create files.
-	 * Instead, we use cgroup_for_each_descendant_pre() and drop RCU
-	 * read lock before calling cgroup_addrm_files().
+	 * Instead, we use css_for_each_descendant_pre() and drop RCU read
+	 * lock before calling cgroup_addrm_files().
 	 */
 	mutex_lock(&cgroup_mutex);
 }
@@ -2825,10 +2825,11 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
 {
 	LIST_HEAD(pending);
 	struct cgroup_subsys *ss = cfts[0].ss;
-	struct cgroup *cgrp, *root = &ss->root->top_cgroup;
+	struct cgroup *root = &ss->root->top_cgroup;
 	struct super_block *sb = ss->root->sb;
 	struct dentry *prev = NULL;
 	struct inode *inode;
+	struct cgroup_subsys_state *css;
 	u64 update_before;
 	int ret = 0;
 
@@ -2861,7 +2862,9 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
 
 	/* add/rm files for all cgroups created before */
 	rcu_read_lock();
-	cgroup_for_each_descendant_pre(cgrp, root) {
+	css_for_each_descendant_pre(css, cgroup_css(root, ss->subsys_id)) {
+		struct cgroup *cgrp = css->cgroup;
+
 		if (cgroup_is_dead(cgrp))
 			continue;
 
@@ -3037,17 +3040,21 @@ static void cgroup_enable_task_cg_lists(void)
 }
 
 /**
- * cgroup_next_child - find the next child of a given cgroup
- * @pos: the current position (%NULL to initiate traversal)
- * @cgrp: cgroup whose descendants to walk
+ * css_next_child - find the next child of a given css
+ * @pos_css: the current position (%NULL to initiate traversal)
+ * @parent_css: css whose children to walk
  *
- * This function returns the next child of @cgrp and should be called under
- * RCU read lock.  The only requirement is that @cgrp and @pos are
- * accessible.  The next sibling is guaranteed to be returned regardless of
- * their states.
+ * This function returns the next child of @parent_css and should be called
+ * under RCU read lock.  The only requirement is that @parent_css and
+ * @pos_css are accessible.  The next sibling is guaranteed to be returned
+ * regardless of their states.
  */
-struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp)
+struct cgroup_subsys_state *
+css_next_child(struct cgroup_subsys_state *pos_css,
+	       struct cgroup_subsys_state *parent_css)
 {
+	struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
+	struct cgroup *cgrp = parent_css->cgroup;
 	struct cgroup *next;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
@@ -3081,59 +3088,64 @@ struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp)
 				break;
 	}
 
-	if (&next->sibling != &cgrp->children)
-		return next;
-	return NULL;
+	if (&next->sibling == &cgrp->children)
+		return NULL;
+
+	if (parent_css->ss)
+		return cgroup_css(next, parent_css->ss->subsys_id);
+	else
+		return &next->dummy_css;
 }
-EXPORT_SYMBOL_GPL(cgroup_next_child);
+EXPORT_SYMBOL_GPL(css_next_child);
 
 /**
- * cgroup_next_descendant_pre - find the next descendant for pre-order walk
+ * css_next_descendant_pre - find the next descendant for pre-order walk
  * @pos: the current position (%NULL to initiate traversal)
- * @cgroup: cgroup whose descendants to walk
+ * @root: css whose descendants to walk
  *
- * To be used by cgroup_for_each_descendant_pre().  Find the next
- * descendant to visit for pre-order traversal of @cgroup's descendants.
+ * To be used by css_for_each_descendant_pre().  Find the next descendant
+ * to visit for pre-order traversal of @root's descendants.
  *
  * While this function requires RCU read locking, it doesn't require the
  * whole traversal to be contained in a single RCU critical section.  This
  * function will return the correct next descendant as long as both @pos
- * and @cgroup are accessible and @pos is a descendant of @cgroup.
+ * and @root are accessible and @pos is a descendant of @root.
  */
-struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
-					  struct cgroup *cgroup)
+struct cgroup_subsys_state *
+css_next_descendant_pre(struct cgroup_subsys_state *pos,
+			struct cgroup_subsys_state *root)
 {
-	struct cgroup *next;
+	struct cgroup_subsys_state *next;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
-	/* if first iteration, pretend we just visited @cgroup */
+	/* if first iteration, pretend we just visited @root */
 	if (!pos)
-		pos = cgroup;
+		pos = root;
 
 	/* visit the first child if exists */
-	next = cgroup_next_child(NULL, pos);
+	next = css_next_child(NULL, pos);
 	if (next)
 		return next;
 
 	/* no child, visit my or the closest ancestor's next sibling */
-	while (pos != cgroup) {
-		next = cgroup_next_child(pos, pos->parent);
+	while (pos != root) {
+		next = css_next_child(pos, css_parent(pos));
 		if (next)
 			return next;
-		pos = pos->parent;
+		pos = css_parent(pos);
 	}
 
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
+EXPORT_SYMBOL_GPL(css_next_descendant_pre);
 
 /**
- * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup
- * @pos: cgroup of interest
+ * css_rightmost_descendant - return the rightmost descendant of a css
+ * @pos: css of interest
  *
- * Return the rightmost descendant of @pos.  If there's no descendant,
- * @pos is returned.  This can be used during pre-order traversal to skip
+ * Return the rightmost descendant of @pos.  If there's no descendant, @pos
+ * is returned.  This can be used during pre-order traversal to skip
  * subtree of @pos.
  *
  * While this function requires RCU read locking, it doesn't require the
@@ -3141,9 +3153,10 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
  * function will return the correct rightmost descendant as long as @pos is
  * accessible.
  */
-struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
+struct cgroup_subsys_state *
+css_rightmost_descendant(struct cgroup_subsys_state *pos)
 {
-	struct cgroup *last, *tmp;
+	struct cgroup_subsys_state *last, *tmp;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
@@ -3151,62 +3164,64 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
 		last = pos;
 		/* ->prev isn't RCU safe, walk ->next till the end */
 		pos = NULL;
-		cgroup_for_each_child(tmp, last)
+		css_for_each_child(tmp, last)
 			pos = tmp;
 	} while (pos);
 
 	return last;
 }
-EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
+EXPORT_SYMBOL_GPL(css_rightmost_descendant);
 
-static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
+static struct cgroup_subsys_state *
+css_leftmost_descendant(struct cgroup_subsys_state *pos)
 {
-	struct cgroup *last;
+	struct cgroup_subsys_state *last;
 
 	do {
 		last = pos;
-		pos = cgroup_next_child(NULL, pos);
+		pos = css_next_child(NULL, pos);
 	} while (pos);
 
 	return last;
 }
 
 /**
- * cgroup_next_descendant_post - find the next descendant for post-order walk
+ * css_next_descendant_post - find the next descendant for post-order walk
  * @pos: the current position (%NULL to initiate traversal)
- * @cgroup: cgroup whose descendants to walk
+ * @root: css whose descendants to walk
  *
- * To be used by cgroup_for_each_descendant_post().  Find the next
- * descendant to visit for post-order traversal of @cgroup's descendants.
+ * To be used by css_for_each_descendant_post().  Find the next descendant
+ * to visit for post-order traversal of @root's descendants.
  *
  * While this function requires RCU read locking, it doesn't require the
  * whole traversal to be contained in a single RCU critical section.  This
  * function will return the correct next descendant as long as both @pos
  * and @cgroup are accessible and @pos is a descendant of @cgroup.
  */
-struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
-					   struct cgroup *cgroup)
+struct cgroup_subsys_state *
+css_next_descendant_post(struct cgroup_subsys_state *pos,
+			 struct cgroup_subsys_state *root)
 {
-	struct cgroup *next;
+	struct cgroup_subsys_state *next;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	/* if first iteration, visit the leftmost descendant */
 	if (!pos) {
-		next = cgroup_leftmost_descendant(cgroup);
-		return next != cgroup ? next : NULL;
+		next = css_leftmost_descendant(root);
+		return next != root ? next : NULL;
 	}
 
 	/* if there's an unvisited sibling, visit its leftmost descendant */
-	next = cgroup_next_child(pos, pos->parent);
+	next = css_next_child(pos, css_parent(pos));
 	if (next)
-		return cgroup_leftmost_descendant(next);
+		return css_leftmost_descendant(next);
 
 	/* no sibling left, visit parent */
-	next = pos->parent;
-	return next != cgroup ? next : NULL;
+	next = css_parent(pos);
+	return next != root ? next : NULL;
 }
-EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
+EXPORT_SYMBOL_GPL(css_next_descendant_post);
 
 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
 	__acquires(css_set_lock)
@@ -4549,9 +4564,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	/*
 	 * Mark @cgrp dead.  This prevents further task migration and child
 	 * creation by disabling cgroup_lock_live_group().  Note that
-	 * CGRP_DEAD assertion is depended upon by cgroup_next_child() to
+	 * CGRP_DEAD assertion is depended upon by css_next_child() to
 	 * resume iteration after dropping RCU read lock.  See
-	 * cgroup_next_child() for details.
+	 * css_next_child() for details.
 	 */
 	set_bit(CGRP_DEAD, &cgrp->flags);
 
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 19613ba51444..98ca48d9ceb4 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -50,11 +50,6 @@ static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
 	return css ? container_of(css, struct freezer, css) : NULL;
 }
 
-static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
-{
-	return css_freezer(cgroup_css(cgroup, freezer_subsys_id));
-}
-
 static inline struct freezer *task_freezer(struct task_struct *task)
 {
 	return css_freezer(task_css(task, freezer_subsys_id));
@@ -120,7 +115,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
 	/*
 	 * The following double locking and freezing state inheritance
 	 * guarantee that @cgroup can never escape ancestors' freezing
-	 * states.  See cgroup_for_each_descendant_pre() for details.
+	 * states.  See css_for_each_descendant_pre() for details.
 	 */
 	if (parent)
 		spin_lock_irq(&parent->lock);
@@ -262,7 +257,7 @@ out:
 static void update_if_frozen(struct cgroup_subsys_state *css)
 {
 	struct freezer *freezer = css_freezer(css);
-	struct cgroup *pos;
+	struct cgroup_subsys_state *pos;
 	struct cgroup_iter it;
 	struct task_struct *task;
 
@@ -275,8 +270,8 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
 		goto out_unlock;
 
 	/* are all (live) children frozen? */
-	cgroup_for_each_child(pos, css->cgroup) {
-		struct freezer *child = cgroup_freezer(pos);
+	css_for_each_child(pos, css) {
+		struct freezer *child = css_freezer(pos);
 
 		if ((child->state & CGROUP_FREEZER_ONLINE) &&
 		    !(child->state & CGROUP_FROZEN))
@@ -309,13 +304,13 @@ out_unlock:
 static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
 			struct seq_file *m)
 {
-	struct cgroup *pos;
+	struct cgroup_subsys_state *pos;
 
 	rcu_read_lock();
 
 	/* update states bottom-up */
-	cgroup_for_each_descendant_post(pos, css->cgroup)
-		update_if_frozen(cgroup_css(pos, freezer_subsys_id));
+	css_for_each_descendant_post(pos, css)
+		update_if_frozen(pos);
 	update_if_frozen(css);
 
 	rcu_read_unlock();
@@ -396,7 +391,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
  */
 static void freezer_change_state(struct freezer *freezer, bool freeze)
 {
-	struct cgroup *pos;
+	struct cgroup_subsys_state *pos;
 
 	/* update @freezer */
 	spin_lock_irq(&freezer->lock);
@@ -409,8 +404,8 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 	 * CGROUP_FREEZING_PARENT.
 	 */
 	rcu_read_lock();
-	cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) {
-		struct freezer *pos_f = cgroup_freezer(pos);
+	css_for_each_descendant_pre(pos, &freezer->css) {
+		struct freezer *pos_f = css_freezer(pos);
 		struct freezer *parent = parent_freezer(pos_f);
 
 		/*
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 89b76e1d3aa1..be4f5036ea5e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -210,29 +210,29 @@ static struct cpuset top_cpuset = {
 /**
  * cpuset_for_each_child - traverse online children of a cpuset
  * @child_cs: loop cursor pointing to the current child
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
  * @parent_cs: target cpuset to walk children of
  *
  * Walk @child_cs through the online children of @parent_cs.  Must be used
  * with RCU read locked.
  */
-#define cpuset_for_each_child(child_cs, pos_cgrp, parent_cs)		\
-	cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup)	\
-		if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp)))))
+#define cpuset_for_each_child(child_cs, pos_css, parent_cs)		\
+	css_for_each_child((pos_css), &(parent_cs)->css)		\
+		if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
 
 /**
  * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
  * @des_cs: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
  * @root_cs: target cpuset to walk ancestor of
  *
  * Walk @des_cs through the online descendants of @root_cs.  Must be used
- * with RCU read locked.  The caller may modify @pos_cgrp by calling
- * cgroup_rightmost_descendant() to skip subtree.
+ * with RCU read locked.  The caller may modify @pos_css by calling
+ * css_rightmost_descendant() to skip subtree.
  */
-#define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs)	\
-	cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \
-		if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp)))))
+#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\
+	css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\
+		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
 
 /*
  * There are two global mutexes guarding cpuset structures - cpuset_mutex
@@ -430,7 +430,7 @@ static void free_trial_cpuset(struct cpuset *trial)
 
 static int validate_change(struct cpuset *cur, struct cpuset *trial)
 {
-	struct cgroup *cgrp;
+	struct cgroup_subsys_state *css;
 	struct cpuset *c, *par;
 	int ret;
 
@@ -438,7 +438,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 
 	/* Each of our child cpusets must be a subset of us */
 	ret = -EBUSY;
-	cpuset_for_each_child(c, cgrp, cur)
+	cpuset_for_each_child(c, css, cur)
 		if (!is_cpuset_subset(c, trial))
 			goto out;
 
@@ -459,7 +459,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 	 * overlap
 	 */
 	ret = -EINVAL;
-	cpuset_for_each_child(c, cgrp, par) {
+	cpuset_for_each_child(c, css, par) {
 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
 		    c != cur &&
 		    cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
@@ -508,13 +508,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
 				    struct cpuset *root_cs)
 {
 	struct cpuset *cp;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 
 	rcu_read_lock();
-	cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
+	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
 		/* skip the whole subtree if @cp doesn't have any CPU */
 		if (cpumask_empty(cp->cpus_allowed)) {
-			pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
+			pos_css = css_rightmost_descendant(pos_css);
 			continue;
 		}
 
@@ -589,7 +589,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 	struct sched_domain_attr *dattr;  /* attributes for custom domains */
 	int ndoms = 0;		/* number of sched domains in result */
 	int nslot;		/* next empty doms[] struct cpumask slot */
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 
 	doms = NULL;
 	dattr = NULL;
@@ -618,7 +618,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 	csn = 0;
 
 	rcu_read_lock();
-	cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) {
+	cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
 		/*
 		 * Continue traversing beyond @cp iff @cp has some CPUs and
 		 * isn't load balancing.  The former is obvious.  The
@@ -635,7 +635,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 			csa[csn++] = cp;
 
 		/* skip @cp's subtree */
-		pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
+		pos_css = css_rightmost_descendant(pos_css);
 	}
 	rcu_read_unlock();
 
@@ -886,16 +886,16 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
 				      bool update_root, struct ptr_heap *heap)
 {
 	struct cpuset *cp;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 
 	if (update_root)
 		update_tasks_cpumask(root_cs, heap);
 
 	rcu_read_lock();
-	cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
+	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
 		/* skip the whole subtree if @cp have some CPU */
 		if (!cpumask_empty(cp->cpus_allowed)) {
-			pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
+			pos_css = css_rightmost_descendant(pos_css);
 			continue;
 		}
 		if (!css_tryget(&cp->css))
@@ -1143,16 +1143,16 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs,
 				       bool update_root, struct ptr_heap *heap)
 {
 	struct cpuset *cp;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 
 	if (update_root)
 		update_tasks_nodemask(root_cs, heap);
 
 	rcu_read_lock();
-	cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
+	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
 		/* skip the whole subtree if @cp have some CPU */
 		if (!nodes_empty(cp->mems_allowed)) {
-			pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
+			pos_css = css_rightmost_descendant(pos_css);
 			continue;
 		}
 		if (!css_tryget(&cp->css))
@@ -1973,7 +1973,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	struct cpuset *cs = css_cs(css);
 	struct cpuset *parent = parent_cs(cs);
 	struct cpuset *tmp_cs;
-	struct cgroup *pos_cgrp;
+	struct cgroup_subsys_state *pos_css;
 
 	if (!parent)
 		return 0;
@@ -2005,7 +2005,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	 * (and likewise for mems) to the new cgroup.
 	 */
 	rcu_read_lock();
-	cpuset_for_each_child(tmp_cs, pos_cgrp, parent) {
+	cpuset_for_each_child(tmp_cs, pos_css, parent) {
 		if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
 			rcu_read_unlock();
 			goto out_unlock;
@@ -2252,10 +2252,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 	/* if cpus or mems changed, we need to propagate to descendants */
 	if (cpus_updated || mems_updated) {
 		struct cpuset *cs;
-		struct cgroup *pos_cgrp;
+		struct cgroup_subsys_state *pos_css;
 
 		rcu_read_lock();
-		cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) {
+		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
 			if (!css_tryget(&cs->css))
 				continue;
 			rcu_read_unlock();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ab64dfc84f8c..2285319e23a9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1082,7 +1082,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
 		struct mem_cgroup *last_visited)
 {
-	struct cgroup *prev_cgroup, *next_cgroup;
+	struct cgroup_subsys_state *prev_css, *next_css;
 
 	/*
 	 * Root is not visited by cgroup iterators so it needs an
@@ -1091,11 +1091,9 @@ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
 	if (!last_visited)
 		return root;
 
-	prev_cgroup = (last_visited == root) ? NULL
-		: last_visited->css.cgroup;
+	prev_css = (last_visited == root) ? NULL : &last_visited->css;
 skip_node:
-	next_cgroup = cgroup_next_descendant_pre(
-			prev_cgroup, root->css.cgroup);
+	next_css = css_next_descendant_pre(prev_css, &root->css);
 
 	/*
 	 * Even if we found a group we have to make sure it is
@@ -1104,13 +1102,13 @@ skip_node:
 	 * last_visited css is safe to use because it is
 	 * protected by css_get and the tree walk is rcu safe.
 	 */
-	if (next_cgroup) {
-		struct mem_cgroup *mem = mem_cgroup_from_cont(
-				next_cgroup);
+	if (next_css) {
+		struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
+
 		if (css_tryget(&mem->css))
 			return mem;
 		else {
-			prev_cgroup = next_cgroup;
+			prev_css = next_css;
 			goto skip_node;
 		}
 	}
@@ -4939,10 +4937,10 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
  */
 static inline bool __memcg_has_children(struct mem_cgroup *memcg)
 {
-	struct cgroup *pos;
+	struct cgroup_subsys_state *pos;
 
 	/* bounce at first found */
-	cgroup_for_each_child(pos, memcg->css.cgroup)
+	css_for_each_child(pos, &memcg->css)
 		return true;
 	return false;
 }
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index e0ca464fa854..9bf230aa28b0 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -56,11 +56,6 @@ static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
 	return s ? container_of(s, struct dev_cgroup, css) : NULL;
 }
 
-static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
-{
-	return css_to_devcgroup(cgroup_css(cgroup, devices_subsys_id));
-}
-
 static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 {
 	return css_to_devcgroup(task_css(task, devices_subsys_id));
@@ -447,13 +442,13 @@ static void revalidate_active_exceptions(struct dev_cgroup *devcg)
 static int propagate_exception(struct dev_cgroup *devcg_root,
 			       struct dev_exception_item *ex)
 {
-	struct cgroup *root = devcg_root->css.cgroup, *pos;
+	struct cgroup_subsys_state *pos;
 	int rc = 0;
 
 	rcu_read_lock();
 
-	cgroup_for_each_descendant_pre(pos, root) {
-		struct dev_cgroup *devcg = cgroup_to_devcgroup(pos);
+	css_for_each_descendant_pre(pos, &devcg_root->css) {
+		struct dev_cgroup *devcg = css_to_devcgroup(pos);
 
 		/*
 		 * Because devcgroup_mutex is held, no devcg will become
-- 
cgit v1.2.3


From bd8815a6d802fc16a7a106e170593aa05dc17e72 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Aug 2013 20:11:27 -0400
Subject: cgroup: make css_for_each_descendant() and friends include the origin
 css in the iteration

Previously, all css descendant iterators didn't include the origin
(root of subtree) css in the iteration.  The reasons were maintaining
consistency with css_for_each_child() and that at the time of
introduction more use cases needed skipping the origin anyway;
however, given that css_is_descendant() considers self to be a
descendant, omitting the origin css has become more confusing and
looking at the accumulated use cases rather clearly indicates that
including origin would result in simpler code overall.

While this is a change which can easily lead to subtle bugs, cgroup
API including the iterators has recently gone through major
restructuring and no out-of-tree changes will be applicable without
adjustments making this a relatively acceptable opportunity for this
type of change.

The conversions are mostly straight-forward.  If the iteration block
had explicit origin handling before or after, it's moved inside the
iteration.  If not, if (pos == origin) continue; is added.  Some
conversions add extra reference get/put around origin handling by
consolidating origin handling and the rest.  While the extra ref
operations aren't strictly necessary, this shouldn't cause any
noticeable difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
---
 block/blk-cgroup.c       |  8 ++------
 block/blk-cgroup.h       |  4 +++-
 block/blk-throttle.c     |  3 ---
 include/linux/cgroup.h   | 17 +++++++++--------
 kernel/cgroup.c          | 29 +++++++++++------------------
 kernel/cgroup_freezer.c  | 29 ++++++++++++++++-------------
 kernel/cpuset.c          | 42 ++++++++++++++++++++++++++----------------
 mm/memcontrol.c          |  9 +--------
 security/device_cgroup.c |  2 +-
 9 files changed, 69 insertions(+), 74 deletions(-)

(limited to 'security')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 54ad00292edf..e90c7c164c83 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -615,12 +615,10 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
 	struct blkcg_policy *pol = blkcg_policy[pd->plid];
 	struct blkcg_gq *pos_blkg;
 	struct cgroup_subsys_state *pos_css;
-	u64 sum;
+	u64 sum = 0;
 
 	lockdep_assert_held(pd->blkg->q->queue_lock);
 
-	sum = blkg_stat_read((void *)pd + off);
-
 	rcu_read_lock();
 	blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
 		struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
@@ -650,13 +648,11 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
 	struct blkcg_policy *pol = blkcg_policy[pd->plid];
 	struct blkcg_gq *pos_blkg;
 	struct cgroup_subsys_state *pos_css;
-	struct blkg_rwstat sum;
+	struct blkg_rwstat sum = { };
 	int i;
 
 	lockdep_assert_held(pd->blkg->q->queue_lock);
 
-	sum = blkg_rwstat_read((void *)pd + off);
-
 	rcu_read_lock();
 	blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
 		struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 855538630300..ae6969a7ffd4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -291,6 +291,7 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
  * read locked.  If called under either blkcg or queue lock, the iteration
  * is guaranteed to include all and only online blkgs.  The caller may
  * update @pos_css by calling css_rightmost_descendant() to skip subtree.
+ * @p_blkg is included in the iteration and the first node to be visited.
  */
 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
@@ -304,7 +305,8 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
  * @p_blkg: target blkg to walk descendants of
  *
  * Similar to blkg_for_each_descendant_pre() but performs post-order
- * traversal instead.  Synchronization rules are the same.
+ * traversal instead.  Synchronization rules are the same.  @p_blkg is
+ * included in the iteration and the last node to be visited.
  */
 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 8cefa7f8590e..8331aba9426f 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1379,7 +1379,6 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
 	 * restrictions in the whole hierarchy and allows them to bypass
 	 * blk-throttle.
 	 */
-	tg_update_has_rules(tg);
 	blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
 		tg_update_has_rules(blkg_to_tg(blkg));
 
@@ -1639,8 +1638,6 @@ void blk_throtl_drain(struct request_queue *q)
 	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
 		tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
 
-	tg_drain_bios(&td_root_tg(td)->service_queue);
-
 	/* finally, transfer bios from top-level tg's into the td */
 	tg_drain_bios(&td->service_queue);
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c40e508d54e9..8ec5b0f38292 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -798,7 +798,8 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
  * @pos: the css * to use as the loop cursor
  * @root: css whose descendants to walk
  *
- * Walk @root's descendants.  Must be called under rcu_read_lock().  A
+ * Walk @root's descendants.  @root is included in the iteration and the
+ * first node to be visited.  Must be called under rcu_read_lock().  A
  * descendant css which hasn't finished ->css_online() or already has
  * finished ->css_offline() may show up during traversal and it's each
  * subsystem's responsibility to verify that each @pos is alive.
@@ -820,13 +821,12 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
  *
  * my_update_state(@css)
  * {
- *	Lock @css;
- *	Update @css's state;
- *	Unlock @css;
- *
  *	css_for_each_descendant_pre(@pos, @css) {
  *		Lock @pos;
- *		Verify @pos is alive and inherit state from @pos's parent;
+ *		if (@pos == @css)
+ *			Update @css's state;
+ *		else
+ *			Verify @pos is alive and inherit state from its parent;
  *		Unlock @pos;
  *	}
  * }
@@ -864,8 +864,9 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
  * @css: css whose descendants to walk
  *
  * Similar to css_for_each_descendant_pre() but performs post-order
- * traversal instead.  Note that the walk visibility guarantee described in
- * pre-order walk doesn't apply the same to post-order walks.
+ * traversal instead.  @root is included in the iteration and the last
+ * node to be visited.  Note that the walk visibility guarantee described
+ * in pre-order walk doesn't apply the same to post-order walks.
  */
 #define css_for_each_descendant_post(pos, css)				\
 	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c02a288a4e3d..52f0498db946 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2868,17 +2868,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
 
 	mutex_unlock(&cgroup_mutex);
 
-	/* @root always needs to be updated */
-	inode = root->dentry->d_inode;
-	mutex_lock(&inode->i_mutex);
-	mutex_lock(&cgroup_mutex);
-	ret = cgroup_addrm_files(root, cfts, is_add);
-	mutex_unlock(&cgroup_mutex);
-	mutex_unlock(&inode->i_mutex);
-
-	if (ret)
-		goto out_deact;
-
 	/* add/rm files for all cgroups created before */
 	rcu_read_lock();
 	css_for_each_descendant_pre(css, cgroup_css(root, ss->subsys_id)) {
@@ -2907,7 +2896,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
 	}
 	rcu_read_unlock();
 	dput(prev);
-out_deact:
 	deactivate_super(sb);
 	return ret;
 }
@@ -3099,7 +3087,8 @@ EXPORT_SYMBOL_GPL(css_next_child);
  * @root: css whose descendants to walk
  *
  * To be used by css_for_each_descendant_pre().  Find the next descendant
- * to visit for pre-order traversal of @root's descendants.
+ * to visit for pre-order traversal of @root's descendants.  @root is
+ * included in the iteration and the first node to be visited.
  *
  * While this function requires RCU read locking, it doesn't require the
  * whole traversal to be contained in a single RCU critical section.  This
@@ -3114,9 +3103,9 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos,
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
-	/* if first iteration, pretend we just visited @root */
+	/* if first iteration, visit @root */
 	if (!pos)
-		pos = root;
+		return root;
 
 	/* visit the first child if exists */
 	next = css_next_child(NULL, pos);
@@ -3186,7 +3175,8 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
  * @root: css whose descendants to walk
  *
  * To be used by css_for_each_descendant_post().  Find the next descendant
- * to visit for post-order traversal of @root's descendants.
+ * to visit for post-order traversal of @root's descendants.  @root is
+ * included in the iteration and the last node to be visited.
  *
  * While this function requires RCU read locking, it doesn't require the
  * whole traversal to be contained in a single RCU critical section.  This
@@ -3207,14 +3197,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 		return next != root ? next : NULL;
 	}
 
+	/* if we visited @root, we're done */
+	if (pos == root)
+		return NULL;
+
 	/* if there's an unvisited sibling, visit its leftmost descendant */
 	next = css_next_child(pos, css_parent(pos));
 	if (next)
 		return css_leftmost_descendant(next);
 
 	/* no sibling left, visit parent */
-	next = css_parent(pos);
-	return next != root ? next : NULL;
+	return css_parent(pos);
 }
 EXPORT_SYMBOL_GPL(css_next_descendant_post);
 
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 224da9aa27f5..f0ff64d0ebaa 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -311,7 +311,6 @@ static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
 	/* update states bottom-up */
 	css_for_each_descendant_post(pos, css)
 		update_if_frozen(pos);
-	update_if_frozen(css);
 
 	rcu_read_unlock();
 
@@ -391,11 +390,6 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 {
 	struct cgroup_subsys_state *pos;
 
-	/* update @freezer */
-	spin_lock_irq(&freezer->lock);
-	freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
-	spin_unlock_irq(&freezer->lock);
-
 	/*
 	 * Update all its descendants in pre-order traversal.  Each
 	 * descendant will try to inherit its parent's FREEZING state as
@@ -406,14 +400,23 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 		struct freezer *pos_f = css_freezer(pos);
 		struct freezer *parent = parent_freezer(pos_f);
 
-		/*
-		 * Our update to @parent->state is already visible which is
-		 * all we need.  No need to lock @parent.  For more info on
-		 * synchronization, see freezer_post_create().
-		 */
 		spin_lock_irq(&pos_f->lock);
-		freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING,
-				    CGROUP_FREEZING_PARENT);
+
+		if (pos_f == freezer) {
+			freezer_apply_state(pos_f, freeze,
+					    CGROUP_FREEZING_SELF);
+		} else {
+			/*
+			 * Our update to @parent->state is already visible
+			 * which is all we need.  No need to lock @parent.
+			 * For more info on synchronization, see
+			 * freezer_post_create().
+			 */
+			freezer_apply_state(pos_f,
+					    parent->state & CGROUP_FREEZING,
+					    CGROUP_FREEZING_PARENT);
+		}
+
 		spin_unlock_irq(&pos_f->lock);
 	}
 	rcu_read_unlock();
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index bf69717325b4..72a0383f382f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -222,7 +222,8 @@ static struct cpuset top_cpuset = {
  *
  * Walk @des_cs through the online descendants of @root_cs.  Must be used
  * with RCU read locked.  The caller may modify @pos_css by calling
- * css_rightmost_descendant() to skip subtree.
+ * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
+ * iteration and the first node to be visited.
  */
 #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\
 	css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\
@@ -506,6 +507,9 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
 
 	rcu_read_lock();
 	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
+		if (cp == root_cs)
+			continue;
+
 		/* skip the whole subtree if @cp doesn't have any CPU */
 		if (cpumask_empty(cp->cpus_allowed)) {
 			pos_css = css_rightmost_descendant(pos_css);
@@ -613,6 +617,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
 
 	rcu_read_lock();
 	cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
+		if (cp == &top_cpuset)
+			continue;
 		/*
 		 * Continue traversing beyond @cp iff @cp has some CPUs and
 		 * isn't load balancing.  The former is obvious.  The
@@ -875,15 +881,17 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
 	struct cpuset *cp;
 	struct cgroup_subsys_state *pos_css;
 
-	if (update_root)
-		update_tasks_cpumask(root_cs, heap);
-
 	rcu_read_lock();
 	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
-		/* skip the whole subtree if @cp have some CPU */
-		if (!cpumask_empty(cp->cpus_allowed)) {
-			pos_css = css_rightmost_descendant(pos_css);
-			continue;
+		if (cp == root_cs) {
+			if (!update_root)
+				continue;
+		} else {
+			/* skip the whole subtree if @cp have some CPU */
+			if (!cpumask_empty(cp->cpus_allowed)) {
+				pos_css = css_rightmost_descendant(pos_css);
+				continue;
+			}
 		}
 		if (!css_tryget(&cp->css))
 			continue;
@@ -1130,15 +1138,17 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs,
 	struct cpuset *cp;
 	struct cgroup_subsys_state *pos_css;
 
-	if (update_root)
-		update_tasks_nodemask(root_cs, heap);
-
 	rcu_read_lock();
 	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
-		/* skip the whole subtree if @cp have some CPU */
-		if (!nodes_empty(cp->mems_allowed)) {
-			pos_css = css_rightmost_descendant(pos_css);
-			continue;
+		if (cp == root_cs) {
+			if (!update_root)
+				continue;
+		} else {
+			/* skip the whole subtree if @cp have some CPU */
+			if (!nodes_empty(cp->mems_allowed)) {
+				pos_css = css_rightmost_descendant(pos_css);
+				continue;
+			}
 		}
 		if (!css_tryget(&cp->css))
 			continue;
@@ -2237,7 +2247,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 
 		rcu_read_lock();
 		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
-			if (!css_tryget(&cs->css))
+			if (cs == &top_cpuset || !css_tryget(&cs->css))
 				continue;
 			rcu_read_unlock();
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2885e3e85047..b89d4cbc0c08 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1079,14 +1079,7 @@ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
 {
 	struct cgroup_subsys_state *prev_css, *next_css;
 
-	/*
-	 * Root is not visited by cgroup iterators so it needs an
-	 * explicit visit.
-	 */
-	if (!last_visited)
-		return root;
-
-	prev_css = (last_visited == root) ? NULL : &last_visited->css;
+	prev_css = last_visited ? &last_visited->css : NULL;
 skip_node:
 	next_css = css_next_descendant_pre(prev_css, &root->css);
 
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 9bf230aa28b0..c123628d3f84 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -456,7 +456,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root,
 		 * methods), and online ones are safe to access outside RCU
 		 * read lock without bumping refcnt.
 		 */
-		if (!is_devcg_online(devcg))
+		if (pos == &devcg_root->css || !is_devcg_online(devcg))
 			continue;
 
 		rcu_read_unlock();
-- 
cgit v1.2.3


From 10289b0f738e8b301969f2288c4942455f1b1e59 Mon Sep 17 00:00:00 2001
From: Rafal Krypa <r.krypa@samsung.com>
Date: Fri, 9 Aug 2013 11:47:07 +0200
Subject: Smack: parse multiple rules per write to load2, up to PAGE_SIZE-1
 bytes

Smack interface for loading rules has always parsed only single rule from
data written to it. This requires user program to call one write() per
each rule it wants to load.
This change makes it possible to write multiple rules, separated by new
line character. Smack will load at most PAGE_SIZE-1 characters and properly
return number of processed bytes. In case when user buffer is larger, it
will be additionally truncated. All characters after last \n will not get
parsed to avoid partial rule near input buffer boundary.

Signed-off-by: Rafal Krypa <r.krypa@samsung.com>
---
 security/smack/smackfs.c | 167 +++++++++++++++++++++++------------------------
 1 file changed, 82 insertions(+), 85 deletions(-)

(limited to 'security')

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index a07e93f00a0f..80f4b4a45725 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -368,56 +368,43 @@ static int smk_parse_rule(const char *data, struct smack_parsed_rule *rule,
  * @data: string to be parsed, null terminated
  * @rule: Will be filled with Smack parsed rule
  * @import: if non-zero, import labels
- * @change: if non-zero, data is from /smack/change-rule
+ * @tokens: numer of substrings expected in data
  *
- * Returns 0 on success, -1 on failure
+ * Returns number of processed bytes on success, -1 on failure.
  */
-static int smk_parse_long_rule(const char *data, struct smack_parsed_rule *rule,
-				int import, int change)
+static ssize_t smk_parse_long_rule(char *data, struct smack_parsed_rule *rule,
+				int import, int tokens)
 {
-	char *subject;
-	char *object;
-	char *access1;
-	char *access2;
-	int datalen;
-	int rc = -1;
+	ssize_t cnt = 0;
+	char *tok[4];
+	int i;
 
-	/* This is inefficient */
-	datalen = strlen(data);
+	/*
+	 * Parsing the rule in-place, filling all white-spaces with '\0'
+	 */
+	for (i = 0; i < tokens; ++i) {
+		while (isspace(data[cnt]))
+			data[cnt++] = '\0';
 
-	/* Our first element can be 64 + \0 with no spaces */
-	subject = kzalloc(datalen + 1, GFP_KERNEL);
-	if (subject == NULL)
-		return -1;
-	object = kzalloc(datalen, GFP_KERNEL);
-	if (object == NULL)
-		goto free_out_s;
-	access1 = kzalloc(datalen, GFP_KERNEL);
-	if (access1 == NULL)
-		goto free_out_o;
-	access2 = kzalloc(datalen, GFP_KERNEL);
-	if (access2 == NULL)
-		goto free_out_a;
-
-	if (change) {
-		if (sscanf(data, "%s %s %s %s",
-			subject, object, access1, access2) == 4)
-			rc = smk_fill_rule(subject, object, access1, access2,
-				rule, import, 0);
-	} else {
-		if (sscanf(data, "%s %s %s", subject, object, access1) == 3)
-			rc = smk_fill_rule(subject, object, access1, NULL,
-				rule, import, 0);
+		if (data[cnt] == '\0')
+			/* Unexpected end of data */
+			return -1;
+
+		tok[i] = data + cnt;
+
+		while (data[cnt] && !isspace(data[cnt]))
+			++cnt;
 	}
+	while (isspace(data[cnt]))
+		data[cnt++] = '\0';
 
-	kfree(access2);
-free_out_a:
-	kfree(access1);
-free_out_o:
-	kfree(object);
-free_out_s:
-	kfree(subject);
-	return rc;
+	while (i < 4)
+		tok[i++] = NULL;
+
+	if (smk_fill_rule(tok[0], tok[1], tok[2], tok[3], rule, import, 0))
+		return -1;
+
+	return cnt;
 }
 
 #define SMK_FIXED24_FMT	0	/* Fixed 24byte label format */
@@ -449,9 +436,10 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
 {
 	struct smack_parsed_rule rule;
 	char *data;
-	int datalen;
-	int rc = -EINVAL;
-	int load = 0;
+	int rc;
+	int trunc = 0;
+	int tokens;
+	ssize_t cnt = 0;
 
 	/*
 	 * No partial writes.
@@ -466,11 +454,14 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
 		 */
 		if (count != SMK_OLOADLEN && count != SMK_LOADLEN)
 			return -EINVAL;
-		datalen = SMK_LOADLEN;
-	} else
-		datalen = count + 1;
+	} else {
+		if (count >= PAGE_SIZE) {
+			count = PAGE_SIZE - 1;
+			trunc = 1;
+		}
+	}
 
-	data = kzalloc(datalen, GFP_KERNEL);
+	data = kmalloc(count + 1, GFP_KERNEL);
 	if (data == NULL)
 		return -ENOMEM;
 
@@ -479,36 +470,49 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
 		goto out;
 	}
 
-	if (format == SMK_LONG_FMT) {
-		/*
-		 * Be sure the data string is terminated.
-		 */
-		data[count] = '\0';
-		if (smk_parse_long_rule(data, &rule, 1, 0))
-			goto out;
-	} else if (format == SMK_CHANGE_FMT) {
-		data[count] = '\0';
-		if (smk_parse_long_rule(data, &rule, 1, 1))
-			goto out;
-	} else {
-		/*
-		 * More on the minor hack for backward compatibility
-		 */
-		if (count == (SMK_OLOADLEN))
-			data[SMK_OLOADLEN] = '-';
-		if (smk_parse_rule(data, &rule, 1))
+	/*
+	 * In case of parsing only part of user buf,
+	 * avoid having partial rule at the data buffer
+	 */
+	if (trunc) {
+		while (count > 0 && (data[count - 1] != '\n'))
+			--count;
+		if (count == 0) {
+			rc = -EINVAL;
 			goto out;
+		}
 	}
 
-	if (rule_list == NULL) {
-		load = 1;
-		rule_list = &rule.smk_subject->smk_rules;
-		rule_lock = &rule.smk_subject->smk_rules_lock;
+	data[count] = '\0';
+	tokens = (format == SMK_CHANGE_FMT ? 4 : 3);
+	while (cnt < count) {
+		if (format == SMK_FIXED24_FMT) {
+			rc = smk_parse_rule(data, &rule, 1);
+			if (rc != 0) {
+				rc = -EINVAL;
+				goto out;
+			}
+			cnt = count;
+		} else {
+			rc = smk_parse_long_rule(data + cnt, &rule, 1, tokens);
+			if (rc <= 0) {
+				rc = -EINVAL;
+				goto out;
+			}
+			cnt += rc;
+		}
+
+		if (rule_list == NULL)
+			rc = smk_set_access(&rule, &rule.smk_subject->smk_rules,
+				&rule.smk_subject->smk_rules_lock, 1);
+		else
+			rc = smk_set_access(&rule, rule_list, rule_lock, 0);
+
+		if (rc)
+			goto out;
 	}
 
-	rc = smk_set_access(&rule, rule_list, rule_lock, load);
-	if (rc == 0)
-		rc = count;
+	rc = cnt;
 out:
 	kfree(data);
 	return rc;
@@ -1829,7 +1833,6 @@ static ssize_t smk_user_access(struct file *file, const char __user *buf,
 {
 	struct smack_parsed_rule rule;
 	char *data;
-	char *cod;
 	int res;
 
 	data = simple_transaction_get(file, buf, count);
@@ -1842,18 +1845,12 @@ static ssize_t smk_user_access(struct file *file, const char __user *buf,
 		res = smk_parse_rule(data, &rule, 0);
 	} else {
 		/*
-		 * Copy the data to make sure the string is terminated.
+		 * simple_transaction_get() returns null-terminated data
 		 */
-		cod = kzalloc(count + 1, GFP_KERNEL);
-		if (cod == NULL)
-			return -ENOMEM;
-		memcpy(cod, data, count);
-		cod[count] = '\0';
-		res = smk_parse_long_rule(cod, &rule, 0, 0);
-		kfree(cod);
+		res = smk_parse_long_rule(data, &rule, 0, 3);
 	}
 
-	if (res)
+	if (res < 0)
 		return -EINVAL;
 
 	res = smk_access(rule.smk_subject, rule.smk_object,
-- 
cgit v1.2.3


From dfe4ac28be73833556756dca6771d4274a7f1157 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 17 Jun 2013 21:25:08 +0900
Subject: apparmor: remove minimum size check for vmalloc()

This is a follow-up to commit b5b3ee6c "apparmor: no need to delay vfree()".

Since vmalloc() will do "size = PAGE_ALIGN(size);",
we don't need to check for "size >= sizeof(struct work_struct)".

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lib.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index fcfe0233574c..69689922c491 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -97,11 +97,6 @@ void *__aa_kvmalloc(size_t size, gfp_t flags)
 	if (size <= (16*PAGE_SIZE))
 		buffer = kmalloc(size, flags | GFP_NOIO | __GFP_NOWARN);
 	if (!buffer) {
-		/* see kvfree for why size must be at least work_struct size
-		 * when allocated via vmalloc
-		 */
-		if (size < sizeof(struct work_struct))
-			size = sizeof(struct work_struct);
 		if (flags & __GFP_ZERO)
 			buffer = vzalloc(size);
 		else
-- 
cgit v1.2.3


From c611616cd3cb27f9605ee4954532b3fe144d951b Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:03:43 -0700
Subject: apparmor: enable users to query whether apparmor is enabled

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lsm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 2e2a0dd4a73f..96506dfe51ec 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -742,7 +742,7 @@ module_param_named(paranoid_load, aa_g_paranoid_load, aabool,
 
 /* Boot time disable flag */
 static bool apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE;
-module_param_named(enabled, apparmor_enabled, aabool, S_IRUSR);
+module_param_named(enabled, apparmor_enabled, bool, S_IRUGO);
 
 static int __init apparmor_enabled_setup(char *str)
 {
-- 
cgit v1.2.3


From 9d910a3bc01008d432b3bb79a69e7e3cdb4821b2 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:04:43 -0700
Subject: apparmor: add a features/policy dir to interface

Add a policy directory to features to contain features that can affect
policy compilation but do not affect mediation. Eg of such features would
be types of dfa compression supported, etc.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Kees Cook <kees@ubuntu.com>
---
 security/apparmor/apparmorfs.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'security')

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 16c15ec6f670..ad6c74892b5f 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -198,7 +198,12 @@ static struct aa_fs_entry aa_fs_entry_domain[] = {
 	{ }
 };
 
+static struct aa_fs_entry aa_fs_entry_policy[] = {
+	{}
+};
+
 static struct aa_fs_entry aa_fs_entry_features[] = {
+	AA_FS_DIR("policy",			aa_fs_entry_policy),
 	AA_FS_DIR("domain",			aa_fs_entry_domain),
 	AA_FS_DIR("file",			aa_fs_entry_file),
 	AA_FS_FILE_U64("capability",		VFS_CAP_FLAGS_MASK),
-- 
cgit v1.2.3


From dd51c84857630e77c139afe4d9bba65fc051dc3f Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:05:43 -0700
Subject: apparmor: provide base for multiple profiles to be replaced at once

previously profiles had to be loaded one at a time, which could result
in cases where a replacement of a set would partially succeed, and then fail
resulting in inconsistent policy.

Allow multiple profiles to replaced "atomically" so that the replacement
either succeeds or fails for the entire set of profiles.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c            |   1 +
 security/apparmor/include/policy_unpack.h |  14 +-
 security/apparmor/policy.c                | 300 ++++++++++++++++++------------
 security/apparmor/policy_unpack.c         | 114 +++++++++---
 4 files changed, 283 insertions(+), 146 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index ad6c74892b5f..3ed56e21a9fd 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -199,6 +199,7 @@ static struct aa_fs_entry aa_fs_entry_domain[] = {
 };
 
 static struct aa_fs_entry aa_fs_entry_policy[] = {
+	AA_FS_FILE_BOOLEAN("set_load",          1),
 	{}
 };
 
diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h
index a2dcccac45aa..0d7ad722b8ff 100644
--- a/security/apparmor/include/policy_unpack.h
+++ b/security/apparmor/include/policy_unpack.h
@@ -15,6 +15,18 @@
 #ifndef __POLICY_INTERFACE_H
 #define __POLICY_INTERFACE_H
 
-struct aa_profile *aa_unpack(void *udata, size_t size, const char **ns);
+#include <linux/list.h>
+
+struct aa_load_ent {
+	struct list_head list;
+	struct aa_profile *new;
+	struct aa_profile *old;
+	struct aa_profile *rename;
+};
+
+void aa_load_ent_free(struct aa_load_ent *ent);
+struct aa_load_ent *aa_load_ent_alloc(void);
+
+int aa_unpack(void *udata, size_t size, struct list_head *lh, const char **ns);
 
 #endif /* __POLICY_INTERFACE_H */
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 0f345c4dee5f..407b442c0a2c 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -472,45 +472,6 @@ static void __list_remove_profile(struct aa_profile *profile)
 		aa_put_profile(profile);
 }
 
-/**
- * __replace_profile - replace @old with @new on a list
- * @old: profile to be replaced  (NOT NULL)
- * @new: profile to replace @old with  (NOT NULL)
- *
- * Will duplicate and refcount elements that @new inherits from @old
- * and will inherit @old children.
- *
- * refcount @new for list, put @old list refcount
- *
- * Requires: namespace list lock be held, or list not be shared
- */
-static void __replace_profile(struct aa_profile *old, struct aa_profile *new)
-{
-	struct aa_policy *policy;
-	struct aa_profile *child, *tmp;
-
-	if (old->parent)
-		policy = &old->parent->base;
-	else
-		policy = &old->ns->base;
-
-	/* released when @new is freed */
-	new->parent = aa_get_profile(old->parent);
-	new->ns = aa_get_namespace(old->ns);
-	__list_add_profile(&policy->profiles, new);
-	/* inherit children */
-	list_for_each_entry_safe(child, tmp, &old->base.profiles, base.list) {
-		aa_put_profile(child->parent);
-		child->parent = aa_get_profile(new);
-		/* list refcount transferred to @new*/
-		list_move(&child->base.list, &new->base.profiles);
-	}
-
-	/* released by free_profile */
-	old->replacedby = aa_get_profile(new);
-	__list_remove_profile(old);
-}
-
 static void __profile_list_release(struct list_head *head);
 
 /**
@@ -952,25 +913,6 @@ static int replacement_allowed(struct aa_profile *profile, int noreplace,
 	return 0;
 }
 
-/**
- * __add_new_profile - simple wrapper around __list_add_profile
- * @ns: namespace that profile is being added to  (NOT NULL)
- * @policy: the policy container to add the profile to  (NOT NULL)
- * @profile: profile to add  (NOT NULL)
- *
- * add a profile to a list and do other required basic allocations
- */
-static void __add_new_profile(struct aa_namespace *ns, struct aa_policy *policy,
-			      struct aa_profile *profile)
-{
-	if (policy != &ns->base)
-		/* released on profile replacement or free_profile */
-		profile->parent = aa_get_profile((struct aa_profile *) policy);
-	__list_add_profile(&policy->profiles, profile);
-	/* released on free_profile */
-	profile->ns = aa_get_namespace(ns);
-}
-
 /**
  * aa_audit_policy - Do auditing of policy changes
  * @op: policy operation being performed
@@ -1019,6 +961,109 @@ bool aa_may_manage_policy(int op)
 	return 1;
 }
 
+static struct aa_profile *__list_lookup_parent(struct list_head *lh,
+					       struct aa_profile *profile)
+{
+	const char *base = hname_tail(profile->base.hname);
+	long len = base - profile->base.hname;
+	struct aa_load_ent *ent;
+
+	/* parent won't have trailing // so remove from len */
+	if (len <= 2)
+		return NULL;
+	len -= 2;
+
+	list_for_each_entry(ent, lh, list) {
+		if (ent->new == profile)
+			continue;
+		if (strncmp(ent->new->base.hname, profile->base.hname, len) ==
+		    0 && ent->new->base.hname[len] == 0)
+			return ent->new;
+	}
+
+	return NULL;
+}
+
+/**
+ * __replace_profile - replace @old with @new on a list
+ * @old: profile to be replaced  (NOT NULL)
+ * @new: profile to replace @old with  (NOT NULL)
+ *
+ * Will duplicate and refcount elements that @new inherits from @old
+ * and will inherit @old children.
+ *
+ * refcount @new for list, put @old list refcount
+ *
+ * Requires: namespace list lock be held, or list not be shared
+ */
+static void __replace_profile(struct aa_profile *old, struct aa_profile *new)
+{
+	struct aa_profile *child, *tmp;
+
+	if (!list_empty(&old->base.profiles)) {
+		LIST_HEAD(lh);
+		list_splice_init(&old->base.profiles, &lh);
+
+		list_for_each_entry_safe(child, tmp, &lh, base.list) {
+			struct aa_profile *p;
+
+			list_del_init(&child->base.list);
+			p = __find_child(&new->base.profiles, child->base.name);
+			if (p) {
+				/* @p replaces @child  */
+				__replace_profile(child, p);
+				continue;
+			}
+
+			/* inherit @child and its children */
+			/* TODO: update hname of inherited children */
+			/* list refcount transferred to @new */
+			list_add(&child->base.list, &new->base.profiles);
+			aa_put_profile(child->parent);
+			child->parent = aa_get_profile(new);
+		}
+	}
+
+	if (!new->parent)
+		new->parent = aa_get_profile(old->parent);
+	/* released by free_profile */
+	old->replacedby = aa_get_profile(new);
+
+	if (list_empty(&new->base.list)) {
+		/* new is not on a list already */
+		list_replace_init(&old->base.list, &new->base.list);
+		aa_get_profile(new);
+		aa_put_profile(old);
+	} else
+		__list_remove_profile(old);
+}
+
+/**
+ * __lookup_replace - lookup replacement information for a profile
+ * @ns - namespace the lookup occurs in
+ * @hname - name of profile to lookup
+ * @noreplace - true if not replacing an existing profile
+ * @p - Returns: profile to be replaced
+ * @info - Returns: info string on why lookup failed
+ *
+ * Returns: profile to replace (no ref) on success else ptr error
+ */
+static int __lookup_replace(struct aa_namespace *ns, const char *hname,
+			    bool noreplace, struct aa_profile **p,
+			    const char **info)
+{
+	*p = aa_get_profile(__lookup_profile(&ns->base, hname));
+	if (*p) {
+		int error = replacement_allowed(*p, noreplace, info);
+		if (error) {
+			*info = "profile can not be replaced";
+			return error;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * aa_replace_profiles - replace profile(s) on the profile list
  * @udata: serialized data stream  (NOT NULL)
@@ -1033,21 +1078,17 @@ bool aa_may_manage_policy(int op)
  */
 ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 {
-	struct aa_policy *policy;
-	struct aa_profile *old_profile = NULL, *new_profile = NULL;
-	struct aa_profile *rename_profile = NULL;
-	struct aa_namespace *ns = NULL;
 	const char *ns_name, *name = NULL, *info = NULL;
+	struct aa_namespace *ns = NULL;
+	struct aa_load_ent *ent, *tmp;
 	int op = OP_PROF_REPL;
 	ssize_t error;
+	LIST_HEAD(lh);
 
 	/* released below */
-	new_profile = aa_unpack(udata, size, &ns_name);
-	if (IS_ERR(new_profile)) {
-		error = PTR_ERR(new_profile);
-		new_profile = NULL;
-		goto fail;
-	}
+	error = aa_unpack(udata, size, &lh, &ns_name);
+	if (error)
+		goto out;
 
 	/* released below */
 	ns = aa_prepare_namespace(ns_name);
@@ -1058,71 +1099,96 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 		goto fail;
 	}
 
-	name = new_profile->base.hname;
-
 	write_lock(&ns->lock);
-	/* no ref on policy only use inside lock */
-	policy = __lookup_parent(ns, new_profile->base.hname);
-
-	if (!policy) {
-		info = "parent does not exist";
-		error = -ENOENT;
-		goto audit;
-	}
-
-	old_profile = __find_child(&policy->profiles, new_profile->base.name);
-	/* released below */
-	aa_get_profile(old_profile);
-
-	if (new_profile->rename) {
-		rename_profile = __lookup_profile(&ns->base,
-						  new_profile->rename);
-		/* released below */
-		aa_get_profile(rename_profile);
-
-		if (!rename_profile) {
-			info = "profile to rename does not exist";
-			name = new_profile->rename;
-			error = -ENOENT;
-			goto audit;
+	/* setup parent and ns info */
+	list_for_each_entry(ent, &lh, list) {
+		struct aa_policy *policy;
+
+		name = ent->new->base.hname;
+		error = __lookup_replace(ns, ent->new->base.hname, noreplace,
+					 &ent->old, &info);
+		if (error)
+			goto fail_lock;
+
+		if (ent->new->rename) {
+			error = __lookup_replace(ns, ent->new->rename,
+						 noreplace, &ent->rename,
+						 &info);
+			if (error)
+				goto fail_lock;
 		}
-	}
-
-	error = replacement_allowed(old_profile, noreplace, &info);
-	if (error)
-		goto audit;
 
-	error = replacement_allowed(rename_profile, noreplace, &info);
-	if (error)
-		goto audit;
-
-audit:
-	if (!old_profile && !rename_profile)
-		op = OP_PROF_LOAD;
+		/* released when @new is freed */
+		ent->new->ns = aa_get_namespace(ns);
+
+		if (ent->old || ent->rename)
+			continue;
+
+		/* no ref on policy only use inside lock */
+		policy = __lookup_parent(ns, ent->new->base.hname);
+		if (!policy) {
+			struct aa_profile *p;
+			p = __list_lookup_parent(&lh, ent->new);
+			if (!p) {
+				error = -ENOENT;
+				info = "parent does not exist";
+				name = ent->new->base.hname;
+				goto fail_lock;
+			}
+			ent->new->parent = aa_get_profile(p);
+		} else if (policy != &ns->base)
+			/* released on profile replacement or free_profile */
+			ent->new->parent = aa_get_profile((struct aa_profile *)
+							  policy);
+	}
 
-	error = audit_policy(op, GFP_ATOMIC, name, info, error);
+	/* do actual replacement */
+	list_for_each_entry_safe(ent, tmp, &lh, list) {
+		list_del_init(&ent->list);
+		op = (!ent->old && !ent->rename) ? OP_PROF_LOAD : OP_PROF_REPL;
+
+		audit_policy(op, GFP_ATOMIC, ent->new->base.name, NULL, error);
+
+		if (ent->old) {
+			__replace_profile(ent->old, ent->new);
+			if (ent->rename)
+				__replace_profile(ent->rename, ent->new);
+		} else if (ent->rename) {
+			__replace_profile(ent->rename, ent->new);
+		} else if (ent->new->parent) {
+			struct aa_profile *parent;
+			parent = aa_newest_version(ent->new->parent);
+			/* parent replaced in this atomic set? */
+			if (parent != ent->new->parent) {
+				aa_get_profile(parent);
+				aa_put_profile(ent->new->parent);
+				ent->new->parent = parent;
+			}
+			__list_add_profile(&parent->base.profiles, ent->new);
+		} else
+			__list_add_profile(&ns->base.profiles, ent->new);
 
-	if (!error) {
-		if (rename_profile)
-			__replace_profile(rename_profile, new_profile);
-		if (old_profile)
-			__replace_profile(old_profile, new_profile);
-		if (!(old_profile || rename_profile))
-			__add_new_profile(ns, policy, new_profile);
+		aa_load_ent_free(ent);
 	}
 	write_unlock(&ns->lock);
 
 out:
 	aa_put_namespace(ns);
-	aa_put_profile(rename_profile);
-	aa_put_profile(old_profile);
-	aa_put_profile(new_profile);
+
 	if (error)
 		return error;
 	return size;
 
+fail_lock:
+	write_unlock(&ns->lock);
 fail:
 	error = audit_policy(op, GFP_KERNEL, name, info, error);
+
+	list_for_each_entry_safe(ent, tmp, &lh, list) {
+		list_del_init(&ent->list);
+		aa_load_ent_free(ent);
+	}
+
 	goto out;
 }
 
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 6dac7d77cb4d..080a26b11f01 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -333,8 +333,10 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e)
 		/*
 		 * The dfa is aligned with in the blob to 8 bytes
 		 * from the beginning of the stream.
+		 * alignment adjust needed by dfa unpack
 		 */
-		size_t sz = blob - (char *)e->start;
+		size_t sz = blob - (char *) e->start -
+			((e->pos - e->start) & 7);
 		size_t pad = ALIGN(sz, 8) - sz;
 		int flags = TO_ACCEPT1_FLAG(YYTD_DATA32) |
 			TO_ACCEPT2_FLAG(YYTD_DATA32);
@@ -622,29 +624,41 @@ fail:
 /**
  * verify_head - unpack serialized stream header
  * @e: serialized data read head (NOT NULL)
+ * @required: whether the header is required or optional
  * @ns: Returns - namespace if one is specified else NULL (NOT NULL)
  *
  * Returns: error or 0 if header is good
  */
-static int verify_header(struct aa_ext *e, const char **ns)
+static int verify_header(struct aa_ext *e, int required, const char **ns)
 {
 	int error = -EPROTONOSUPPORT;
+	const char *name = NULL;
+	*ns = NULL;
+
 	/* get the interface version */
 	if (!unpack_u32(e, &e->version, "version")) {
-		audit_iface(NULL, NULL, "invalid profile format", e, error);
-		return error;
-	}
+		if (required) {
+			audit_iface(NULL, NULL, "invalid profile format", e,
+				    error);
+			return error;
+		}
 
-	/* check that the interface version is currently supported */
-	if (e->version != 5) {
-		audit_iface(NULL, NULL, "unsupported interface version", e,
-			    error);
-		return error;
+		/* check that the interface version is currently supported */
+		if (e->version != 5) {
+			audit_iface(NULL, NULL, "unsupported interface version",
+				    e, error);
+			return error;
+		}
 	}
 
+
 	/* read the namespace if present */
-	if (!unpack_str(e, ns, "namespace"))
-		*ns = NULL;
+	if (unpack_str(e, &name, "namespace")) {
+		if (*ns && strcmp(*ns, name))
+			audit_iface(NULL, NULL, "invalid ns change", e, error);
+		else if (!*ns)
+			*ns = name;
+	}
 
 	return 0;
 }
@@ -693,18 +707,40 @@ static int verify_profile(struct aa_profile *profile)
 	return 0;
 }
 
+void aa_load_ent_free(struct aa_load_ent *ent)
+{
+	if (ent) {
+		aa_put_profile(ent->rename);
+		aa_put_profile(ent->old);
+		aa_put_profile(ent->new);
+		kzfree(ent);
+	}
+}
+
+struct aa_load_ent *aa_load_ent_alloc(void)
+{
+	struct aa_load_ent *ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+	if (ent)
+		INIT_LIST_HEAD(&ent->list);
+	return ent;
+}
+
 /**
- * aa_unpack - unpack packed binary profile data loaded from user space
+ * aa_unpack - unpack packed binary profile(s) data loaded from user space
  * @udata: user data copied to kmem  (NOT NULL)
  * @size: the size of the user data
+ * @lh: list to place unpacked profiles in a aa_repl_ws
  * @ns: Returns namespace profile is in if specified else NULL (NOT NULL)
  *
- * Unpack user data and return refcounted allocated profile or ERR_PTR
+ * Unpack user data and return refcounted allocated profile(s) stored in
+ * @lh in order of discovery, with the list chain stored in base.list
+ * or error
  *
- * Returns: profile else error pointer if fails to unpack
+ * Returns: profile(s) on @lh else error pointer if fails to unpack
  */
-struct aa_profile *aa_unpack(void *udata, size_t size, const char **ns)
+int aa_unpack(void *udata, size_t size, struct list_head *lh, const char **ns)
 {
+	struct aa_load_ent *tmp, *ent;
 	struct aa_profile *profile = NULL;
 	int error;
 	struct aa_ext e = {
@@ -713,20 +749,42 @@ struct aa_profile *aa_unpack(void *udata, size_t size, const char **ns)
 		.pos = udata,
 	};
 
-	error = verify_header(&e, ns);
-	if (error)
-		return ERR_PTR(error);
+	*ns = NULL;
+	while (e.pos < e.end) {
+		error = verify_header(&e, e.pos == e.start, ns);
+		if (error)
+			goto fail;
 
-	profile = unpack_profile(&e);
-	if (IS_ERR(profile))
-		return profile;
+		profile = unpack_profile(&e);
+		if (IS_ERR(profile)) {
+			error = PTR_ERR(profile);
+			goto fail;
+		}
+
+		error = verify_profile(profile);
+		if (error) {
+			aa_put_profile(profile);
+			goto fail;
+		}
+
+		ent = aa_load_ent_alloc();
+		if (!ent) {
+			error = -ENOMEM;
+			aa_put_profile(profile);
+			goto fail;
+		}
 
-	error = verify_profile(profile);
-	if (error) {
-		aa_put_profile(profile);
-		profile = ERR_PTR(error);
+		ent->new = profile;
+		list_add_tail(&ent->list, lh);
 	}
 
-	/* return refcount */
-	return profile;
+	return 0;
+
+fail:
+	list_for_each_entry_safe(ent, tmp, lh, list) {
+		list_del_init(&ent->list);
+		aa_load_ent_free(ent);
+	}
+
+	return error;
 }
-- 
cgit v1.2.3


From 01e2b670aa898a39259bc85c78e3d74820f4d3b6 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:06:43 -0700
Subject: apparmor: convert profile lists to RCU based locking

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/domain.c           |  14 ++-
 security/apparmor/include/apparmor.h |   6 +
 security/apparmor/include/policy.h   |  45 +++++++-
 security/apparmor/policy.c           | 213 ++++++++++++++++++-----------------
 4 files changed, 167 insertions(+), 111 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 01b7bd669a88..454bcd7f3452 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -144,7 +144,7 @@ static struct aa_profile *__attach_match(const char *name,
 	int len = 0;
 	struct aa_profile *profile, *candidate = NULL;
 
-	list_for_each_entry(profile, head, base.list) {
+	list_for_each_entry_rcu(profile, head, base.list) {
 		if (profile->flags & PFLAG_NULL)
 			continue;
 		if (profile->xmatch && profile->xmatch_len > len) {
@@ -177,9 +177,9 @@ static struct aa_profile *find_attach(struct aa_namespace *ns,
 {
 	struct aa_profile *profile;
 
-	read_lock(&ns->lock);
+	rcu_read_lock();
 	profile = aa_get_profile(__attach_match(name, list));
-	read_unlock(&ns->lock);
+	rcu_read_unlock();
 
 	return profile;
 }
@@ -641,7 +641,10 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 	if (count) {
 		/* attempting to change into a new hat or switch to a sibling */
 		struct aa_profile *root;
-		root = PROFILE_IS_HAT(profile) ? profile->parent : profile;
+		if (PROFILE_IS_HAT(profile))
+			root = aa_get_profile_rcu(&profile->parent);
+		else
+			root = aa_get_profile(profile);
 
 		/* find first matching hat */
 		for (i = 0; i < count && !hat; i++)
@@ -653,6 +656,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 					error = -ECHILD;
 				else
 					error = -ENOENT;
+				aa_put_profile(root);
 				goto out;
 			}
 
@@ -667,6 +671,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 
 			/* freed below */
 			name = new_compound_name(root->base.hname, hats[0]);
+			aa_put_profile(root);
 			target = name;
 			/* released below */
 			hat = aa_new_null_profile(profile, 1);
@@ -676,6 +681,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 				goto audit;
 			}
 		} else {
+			aa_put_profile(root);
 			target = hat->base.hname;
 			if (!PROFILE_IS_HAT(hat)) {
 				info = "target not hat";
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 1ba2ca56a6ef..8fb1488a3cd4 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -78,6 +78,12 @@ static inline void *kvzalloc(size_t size)
 	return __aa_kvmalloc(size, __GFP_ZERO);
 }
 
+/* returns 0 if kref not incremented */
+static inline int kref_get_not0(struct kref *kref)
+{
+	return atomic_inc_not_zero(&kref->refcount);
+}
+
 /**
  * aa_strneq - compare null terminated @str to a non null terminated substring
  * @str: a null terminated string
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index b25491a3046a..82487a853353 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -42,6 +42,8 @@ extern const char *const profile_mode_names[];
 
 #define PROFILE_IS_HAT(_profile) ((_profile)->flags & PFLAG_HAT)
 
+#define on_list_rcu(X) (!list_empty(X) && (X)->prev != LIST_POISON2)
+
 /*
  * FIXME: currently need a clean way to replace and remove profiles as a
  * set.  It should be done at the namespace level.
@@ -75,6 +77,7 @@ struct aa_profile;
  * @hname - The hierarchical name
  * @count: reference count of the obj
  * @list: list policy object is on
+ * @rcu: rcu head used when removing from @list
  * @profiles: head of the profiles list contained in the object
  */
 struct aa_policy {
@@ -83,6 +86,7 @@ struct aa_policy {
 	struct kref count;
 	struct list_head list;
 	struct list_head profiles;
+	struct rcu_head rcu;
 };
 
 /* struct aa_ns_acct - accounting of profiles in namespace
@@ -124,7 +128,7 @@ struct aa_ns_acct {
 struct aa_namespace {
 	struct aa_policy base;
 	struct aa_namespace *parent;
-	rwlock_t lock;
+	struct mutex lock;
 	struct aa_ns_acct acct;
 	struct aa_profile *unconfined;
 	struct list_head sub_ns;
@@ -166,7 +170,7 @@ struct aa_policydb {
  * attachments are determined by profile X transition rules.
  *
  * The @replacedby field is write protected by the profile lock.  Reads
- * are assumed to be atomic, and are done without locking.
+ * are assumed to be atomic.
  *
  * Profiles have a hierarchy where hats and children profiles keep
  * a reference to their parent.
@@ -177,7 +181,7 @@ struct aa_policydb {
  */
 struct aa_profile {
 	struct aa_policy base;
-	struct aa_profile *parent;
+	struct aa_profile __rcu *parent;
 
 	struct aa_namespace *ns;
 	struct aa_profile *replacedby;
@@ -295,6 +299,41 @@ static inline struct aa_profile *aa_get_profile(struct aa_profile *p)
 	return p;
 }
 
+/**
+ * aa_get_profile_not0 - increment refcount on profile @p found via lookup
+ * @p: profile  (MAYBE NULL)
+ *
+ * Returns: pointer to @p if @p is NULL will return NULL
+ * Requires: @p must be held with valid refcount when called
+ */
+static inline struct aa_profile *aa_get_profile_not0(struct aa_profile *p)
+{
+	if (p && kref_get_not0(&p->base.count))
+		return p;
+
+	return NULL;
+}
+
+/**
+ * aa_get_profile_rcu - increment a refcount profile that can be replaced
+ * @p: pointer to profile that can be replaced (NOT NULL)
+ *
+ * Returns: pointer to a refcounted profile.
+ *     else NULL if no profile
+ */
+static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p)
+{
+	struct aa_profile *c;
+
+	rcu_read_lock();
+	do {
+		c = rcu_dereference(*p);
+	} while (c && !kref_get_not0(&c->base.count));
+	rcu_read_unlock();
+
+	return c;
+}
+
 /**
  * aa_put_profile - decrement refcount on profile @p
  * @p: profile  (MAYBE NULL)
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 407b442c0a2c..25bbbb482bb6 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -153,13 +153,13 @@ static bool policy_init(struct aa_policy *policy, const char *prefix,
 static void policy_destroy(struct aa_policy *policy)
 {
 	/* still contains profiles -- invalid */
-	if (!list_empty(&policy->profiles)) {
+	if (on_list_rcu(&policy->profiles)) {
 		AA_ERROR("%s: internal error, "
 			 "policy '%s' still contains profiles\n",
 			 __func__, policy->name);
 		BUG();
 	}
-	if (!list_empty(&policy->list)) {
+	if (on_list_rcu(&policy->list)) {
 		AA_ERROR("%s: internal error, policy '%s' still on list\n",
 			 __func__, policy->name);
 		BUG();
@@ -174,7 +174,7 @@ static void policy_destroy(struct aa_policy *policy)
  * @head: list to search  (NOT NULL)
  * @name: name to search for  (NOT NULL)
  *
- * Requires: correct locks for the @head list be held
+ * Requires: rcu_read_lock be held
  *
  * Returns: unrefcounted policy that match @name or NULL if not found
  */
@@ -182,7 +182,7 @@ static struct aa_policy *__policy_find(struct list_head *head, const char *name)
 {
 	struct aa_policy *policy;
 
-	list_for_each_entry(policy, head, list) {
+	list_for_each_entry_rcu(policy, head, list) {
 		if (!strcmp(policy->name, name))
 			return policy;
 	}
@@ -195,7 +195,7 @@ static struct aa_policy *__policy_find(struct list_head *head, const char *name)
  * @str: string to search for  (NOT NULL)
  * @len: length of match required
  *
- * Requires: correct locks for the @head list be held
+ * Requires: rcu_read_lock be held
  *
  * Returns: unrefcounted policy that match @str or NULL if not found
  *
@@ -207,7 +207,7 @@ static struct aa_policy *__policy_strn_find(struct list_head *head,
 {
 	struct aa_policy *policy;
 
-	list_for_each_entry(policy, head, list) {
+	list_for_each_entry_rcu(policy, head, list) {
 		if (aa_strneq(policy->name, str, len))
 			return policy;
 	}
@@ -284,7 +284,7 @@ static struct aa_namespace *alloc_namespace(const char *prefix,
 		goto fail_ns;
 
 	INIT_LIST_HEAD(&ns->sub_ns);
-	rwlock_init(&ns->lock);
+	mutex_init(&ns->lock);
 
 	/* released by free_namespace */
 	ns->unconfined = aa_alloc_profile("unconfined");
@@ -350,7 +350,7 @@ void aa_free_namespace_kref(struct kref *kref)
  *
  * Returns: unrefcounted namespace
  *
- * Requires: ns lock be held
+ * Requires: rcu_read_lock be held
  */
 static struct aa_namespace *__aa_find_namespace(struct list_head *head,
 						const char *name)
@@ -373,9 +373,9 @@ struct aa_namespace *aa_find_namespace(struct aa_namespace *root,
 {
 	struct aa_namespace *ns = NULL;
 
-	read_lock(&root->lock);
+	rcu_read_lock();
 	ns = aa_get_namespace(__aa_find_namespace(&root->sub_ns, name));
-	read_unlock(&root->lock);
+	rcu_read_unlock();
 
 	return ns;
 }
@@ -392,7 +392,7 @@ static struct aa_namespace *aa_prepare_namespace(const char *name)
 
 	root = aa_current_profile()->ns;
 
-	write_lock(&root->lock);
+	mutex_lock(&root->lock);
 
 	/* if name isn't specified the profile is loaded to the current ns */
 	if (!name) {
@@ -405,31 +405,17 @@ static struct aa_namespace *aa_prepare_namespace(const char *name)
 	/* released by caller */
 	ns = aa_get_namespace(__aa_find_namespace(&root->sub_ns, name));
 	if (!ns) {
-		/* namespace not found */
-		struct aa_namespace *new_ns;
-		write_unlock(&root->lock);
-		new_ns = alloc_namespace(root->base.hname, name);
-		if (!new_ns)
-			return NULL;
-		write_lock(&root->lock);
-		/* test for race when new_ns was allocated */
-		ns = __aa_find_namespace(&root->sub_ns, name);
-		if (!ns) {
-			/* add parent ref */
-			new_ns->parent = aa_get_namespace(root);
-
-			list_add(&new_ns->base.list, &root->sub_ns);
-			/* add list ref */
-			ns = aa_get_namespace(new_ns);
-		} else {
-			/* raced so free the new one */
-			free_namespace(new_ns);
-			/* get reference on namespace */
-			aa_get_namespace(ns);
-		}
+		ns = alloc_namespace(root->base.hname, name);
+		if (!ns)
+			goto out;
+		/* add parent ref */
+		ns->parent = aa_get_namespace(root);
+		list_add_rcu(&ns->base.list, &root->sub_ns);
+		/* add list ref */
+		aa_get_namespace(ns);
 	}
 out:
-	write_unlock(&root->lock);
+	mutex_unlock(&root->lock);
 
 	/* return ref */
 	return ns;
@@ -447,7 +433,7 @@ out:
 static void __list_add_profile(struct list_head *list,
 			       struct aa_profile *profile)
 {
-	list_add(&profile->base.list, list);
+	list_add_rcu(&profile->base.list, list);
 	/* get list reference */
 	aa_get_profile(profile);
 }
@@ -466,10 +452,8 @@ static void __list_add_profile(struct list_head *list,
  */
 static void __list_remove_profile(struct aa_profile *profile)
 {
-	list_del_init(&profile->base.list);
-	if (!(profile->flags & PFLAG_NO_LIST_REF))
-		/* release list reference */
-		aa_put_profile(profile);
+	list_del_rcu(&profile->base.list);
+	aa_put_profile(profile);
 }
 
 static void __profile_list_release(struct list_head *head);
@@ -510,17 +494,40 @@ static void __ns_list_release(struct list_head *head);
  */
 static void destroy_namespace(struct aa_namespace *ns)
 {
+	struct aa_profile *unconfined;
+
 	if (!ns)
 		return;
 
-	write_lock(&ns->lock);
+	mutex_lock(&ns->lock);
 	/* release all profiles in this namespace */
 	__profile_list_release(&ns->base.profiles);
 
 	/* release all sub namespaces */
 	__ns_list_release(&ns->sub_ns);
 
-	write_unlock(&ns->lock);
+	unconfined = ns->unconfined;
+	/*
+	 * break the ns, unconfined profile cyclic reference and forward
+	 * all new unconfined profiles requests to the parent namespace
+	 * This will result in all confined tasks that have a profile
+	 * being removed, inheriting the parent->unconfined profile.
+	 */
+	if (ns->parent)
+		ns->unconfined = aa_get_profile(ns->parent->unconfined);
+
+	/* release original ns->unconfined ref */
+	aa_put_profile(unconfined);
+
+	mutex_unlock(&ns->lock);
+}
+
+static void aa_put_ns_rcu(struct rcu_head *head)
+{
+	struct aa_namespace *ns = container_of(head, struct aa_namespace,
+					       base.rcu);
+	/* release ns->base.list ref */
+	aa_put_namespace(ns);
 }
 
 /**
@@ -531,26 +538,12 @@ static void destroy_namespace(struct aa_namespace *ns)
  */
 static void __remove_namespace(struct aa_namespace *ns)
 {
-	struct aa_profile *unconfined = ns->unconfined;
-
 	/* remove ns from namespace list */
-	list_del_init(&ns->base.list);
-
-	/*
-	 * break the ns, unconfined profile cyclic reference and forward
-	 * all new unconfined profiles requests to the parent namespace
-	 * This will result in all confined tasks that have a profile
-	 * being removed, inheriting the parent->unconfined profile.
-	 */
-	if (ns->parent)
-		ns->unconfined = aa_get_profile(ns->parent->unconfined);
+	list_del_rcu(&ns->base.list);
 
 	destroy_namespace(ns);
 
-	/* release original ns->unconfined ref */
-	aa_put_profile(unconfined);
-	/* release ns->base.list ref, from removal above */
-	aa_put_namespace(ns);
+	call_rcu(&ns->base.rcu, aa_put_ns_rcu);
 }
 
 /**
@@ -614,16 +607,9 @@ static void free_profile(struct aa_profile *profile)
 	if (!profile)
 		return;
 
-	if (!list_empty(&profile->base.list)) {
-		AA_ERROR("%s: internal error, "
-			 "profile '%s' still on ns list\n",
-			 __func__, profile->base.name);
-		BUG();
-	}
-
 	/* free children profiles */
 	policy_destroy(&profile->base);
-	aa_put_profile(profile->parent);
+	aa_put_profile(rcu_access_pointer(profile->parent));
 
 	aa_put_namespace(profile->ns);
 	kzfree(profile->rename);
@@ -660,6 +646,16 @@ static void free_profile(struct aa_profile *profile)
 	kzfree(profile);
 }
 
+/**
+ * aa_free_profile_rcu - free aa_profile by rcu (called by aa_free_profile_kref)
+ * @head: rcu_head callback for freeing of a profile  (NOT NULL)
+ */
+static void aa_free_profile_rcu(struct rcu_head *head)
+{
+	struct aa_profile *p = container_of(head, struct aa_profile, base.rcu);
+	free_profile(p);
+}
+
 /**
  * aa_free_profile_kref - free aa_profile by kref (called by aa_put_profile)
  * @kr: kref callback for freeing of a profile  (NOT NULL)
@@ -668,8 +664,7 @@ void aa_free_profile_kref(struct kref *kref)
 {
 	struct aa_profile *p = container_of(kref, struct aa_profile,
 					    base.count);
-
-	free_profile(p);
+	call_rcu(&p->base.rcu, aa_free_profile_rcu);
 }
 
 /**
@@ -733,12 +728,12 @@ struct aa_profile *aa_new_null_profile(struct aa_profile *parent, int hat)
 		profile->flags |= PFLAG_HAT;
 
 	/* released on free_profile */
-	profile->parent = aa_get_profile(parent);
+	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
 	profile->ns = aa_get_namespace(parent->ns);
 
-	write_lock(&profile->ns->lock);
+	mutex_lock(&profile->ns->lock);
 	__list_add_profile(&parent->base.profiles, profile);
-	write_unlock(&profile->ns->lock);
+	mutex_unlock(&profile->ns->lock);
 
 	/* refcount released by caller */
 	return profile;
@@ -754,7 +749,7 @@ fail:
  * @head: list to search  (NOT NULL)
  * @name: name of profile (NOT NULL)
  *
- * Requires: ns lock protecting list be held
+ * Requires: rcu_read_lock be held
  *
  * Returns: unrefcounted profile ptr, or NULL if not found
  */
@@ -769,7 +764,7 @@ static struct aa_profile *__find_child(struct list_head *head, const char *name)
  * @name: name of profile (NOT NULL)
  * @len: length of @name substring to match
  *
- * Requires: ns lock protecting list be held
+ * Requires: rcu_read_lock be held
  *
  * Returns: unrefcounted profile ptr, or NULL if not found
  */
@@ -790,9 +785,9 @@ struct aa_profile *aa_find_child(struct aa_profile *parent, const char *name)
 {
 	struct aa_profile *profile;
 
-	read_lock(&parent->ns->lock);
+	rcu_read_lock();
 	profile = aa_get_profile(__find_child(&parent->base.profiles, name));
-	read_unlock(&parent->ns->lock);
+	rcu_read_unlock();
 
 	/* refcount released by caller */
 	return profile;
@@ -807,7 +802,7 @@ struct aa_profile *aa_find_child(struct aa_profile *parent, const char *name)
  * that matches hname does not need to exist, in general this
  * is used to load a new profile.
  *
- * Requires: ns->lock be held
+ * Requires: rcu_read_lock be held
  *
  * Returns: unrefcounted policy or NULL if not found
  */
@@ -839,7 +834,7 @@ static struct aa_policy *__lookup_parent(struct aa_namespace *ns,
  * @base: base list to start looking up profile name from  (NOT NULL)
  * @hname: hierarchical profile name  (NOT NULL)
  *
- * Requires: ns->lock be held
+ * Requires: rcu_read_lock be held
  *
  * Returns: unrefcounted profile pointer or NULL if not found
  *
@@ -878,9 +873,11 @@ struct aa_profile *aa_lookup_profile(struct aa_namespace *ns, const char *hname)
 {
 	struct aa_profile *profile;
 
-	read_lock(&ns->lock);
-	profile = aa_get_profile(__lookup_profile(&ns->base, hname));
-	read_unlock(&ns->lock);
+	rcu_read_lock();
+	do {
+		profile = __lookup_profile(&ns->base, hname);
+	} while (profile && !aa_get_profile_not0(profile));
+	rcu_read_unlock();
 
 	/* the unconfined profile is not in the regular profile list */
 	if (!profile && strcmp(hname, "unconfined") == 0)
@@ -1002,7 +999,7 @@ static void __replace_profile(struct aa_profile *old, struct aa_profile *new)
 
 	if (!list_empty(&old->base.profiles)) {
 		LIST_HEAD(lh);
-		list_splice_init(&old->base.profiles, &lh);
+		list_splice_init_rcu(&old->base.profiles, &lh, synchronize_rcu);
 
 		list_for_each_entry_safe(child, tmp, &lh, base.list) {
 			struct aa_profile *p;
@@ -1018,20 +1015,24 @@ static void __replace_profile(struct aa_profile *old, struct aa_profile *new)
 			/* inherit @child and its children */
 			/* TODO: update hname of inherited children */
 			/* list refcount transferred to @new */
-			list_add(&child->base.list, &new->base.profiles);
-			aa_put_profile(child->parent);
-			child->parent = aa_get_profile(new);
+			p = rcu_dereference_protected(child->parent,
+					     mutex_is_locked(&child->ns->lock));
+			rcu_assign_pointer(child->parent, aa_get_profile(new));
+			list_add_rcu(&child->base.list, &new->base.profiles);
+			aa_put_profile(p);
 		}
 	}
 
-	if (!new->parent)
-		new->parent = aa_get_profile(old->parent);
+	if (!rcu_access_pointer(new->parent)) {
+		struct aa_profile *parent = rcu_dereference(old->parent);
+		rcu_assign_pointer(new->parent, aa_get_profile(parent));
+	}
 	/* released by free_profile */
 	old->replacedby = aa_get_profile(new);
 
 	if (list_empty(&new->base.list)) {
 		/* new is not on a list already */
-		list_replace_init(&old->base.list, &new->base.list);
+		list_replace_rcu(&old->base.list, &new->base.list);
 		aa_get_profile(new);
 		aa_put_profile(old);
 	} else
@@ -1099,7 +1100,7 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 		goto fail;
 	}
 
-	write_lock(&ns->lock);
+	mutex_lock(&ns->lock);
 	/* setup parent and ns info */
 	list_for_each_entry(ent, &lh, list) {
 		struct aa_policy *policy;
@@ -1135,11 +1136,12 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 				name = ent->new->base.hname;
 				goto fail_lock;
 			}
-			ent->new->parent = aa_get_profile(p);
-		} else if (policy != &ns->base)
+			rcu_assign_pointer(ent->new->parent, aa_get_profile(p));
+		} else if (policy != &ns->base) {
 			/* released on profile replacement or free_profile */
-			ent->new->parent = aa_get_profile((struct aa_profile *)
-							  policy);
+			struct aa_profile *p = (struct aa_profile *) policy;
+			rcu_assign_pointer(ent->new->parent, aa_get_profile(p));
+		}
 	}
 
 	/* do actual replacement */
@@ -1156,13 +1158,16 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 		} else if (ent->rename) {
 			__replace_profile(ent->rename, ent->new);
 		} else if (ent->new->parent) {
-			struct aa_profile *parent;
-			parent = aa_newest_version(ent->new->parent);
+			struct aa_profile *parent, *newest;
+			parent = rcu_dereference_protected(ent->new->parent,
+						    mutex_is_locked(&ns->lock));
+			newest = aa_newest_version(parent);
+
 			/* parent replaced in this atomic set? */
-			if (parent != ent->new->parent) {
-				aa_get_profile(parent);
-				aa_put_profile(ent->new->parent);
-				ent->new->parent = parent;
+			if (newest != parent) {
+				aa_get_profile(newest);
+				aa_put_profile(parent);
+				rcu_assign_pointer(ent->new->parent, newest);
 			}
 			__list_add_profile(&parent->base.profiles, ent->new);
 		} else
@@ -1170,7 +1175,7 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 
 		aa_load_ent_free(ent);
 	}
-	write_unlock(&ns->lock);
+	mutex_unlock(&ns->lock);
 
 out:
 	aa_put_namespace(ns);
@@ -1180,7 +1185,7 @@ out:
 	return size;
 
 fail_lock:
-	write_unlock(&ns->lock);
+	mutex_unlock(&ns->lock);
 fail:
 	error = audit_policy(op, GFP_KERNEL, name, info, error);
 
@@ -1235,12 +1240,12 @@ ssize_t aa_remove_profiles(char *fqname, size_t size)
 
 	if (!name) {
 		/* remove namespace - can only happen if fqname[0] == ':' */
-		write_lock(&ns->parent->lock);
+		mutex_lock(&ns->parent->lock);
 		__remove_namespace(ns);
-		write_unlock(&ns->parent->lock);
+		mutex_unlock(&ns->parent->lock);
 	} else {
 		/* remove profile */
-		write_lock(&ns->lock);
+		mutex_lock(&ns->lock);
 		profile = aa_get_profile(__lookup_profile(&ns->base, name));
 		if (!profile) {
 			error = -ENOENT;
@@ -1249,7 +1254,7 @@ ssize_t aa_remove_profiles(char *fqname, size_t size)
 		}
 		name = profile->base.hname;
 		__remove_profile(profile);
-		write_unlock(&ns->lock);
+		mutex_unlock(&ns->lock);
 	}
 
 	/* don't fail removal if audit fails */
@@ -1259,7 +1264,7 @@ ssize_t aa_remove_profiles(char *fqname, size_t size)
 	return size;
 
 fail_ns_lock:
-	write_unlock(&ns->lock);
+	mutex_unlock(&ns->lock);
 	aa_put_namespace(ns);
 
 fail:
-- 
cgit v1.2.3


From 77b071b34045a0c65d0e1f85f3d47fd2b8b7a8a1 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:07:43 -0700
Subject: apparmor: change how profile replacement update is done

remove the use of replaced by chaining and move to profile invalidation
and lookup to handle task replacement.

Replacement chaining can result in large chains of profiles being pinned
in memory when one profile in the chain is use. With implicit labeling
this will be even more of a problem, so move to a direct lookup method.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/context.c         | 16 +++----
 security/apparmor/domain.c          |  4 +-
 security/apparmor/include/context.h | 15 +++----
 security/apparmor/include/policy.h  | 78 ++++++++++++++++++++++++----------
 security/apparmor/lsm.c             | 14 +++---
 security/apparmor/policy.c          | 85 ++++++++++++++++++++-----------------
 6 files changed, 125 insertions(+), 87 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/context.c b/security/apparmor/context.c
index d5af1d15f26d..3064c6ced87c 100644
--- a/security/apparmor/context.c
+++ b/security/apparmor/context.c
@@ -112,9 +112,9 @@ int aa_replace_current_profile(struct aa_profile *profile)
 		aa_clear_task_cxt_trans(cxt);
 
 	/* be careful switching cxt->profile, when racing replacement it
-	 * is possible that cxt->profile->replacedby is the reference keeping
-	 * @profile valid, so make sure to get its reference before dropping
-	 * the reference on cxt->profile */
+	 * is possible that cxt->profile->replacedby->profile is the reference
+	 * keeping @profile valid, so make sure to get its reference before
+	 * dropping the reference on cxt->profile */
 	aa_get_profile(profile);
 	aa_put_profile(cxt->profile);
 	cxt->profile = profile;
@@ -175,7 +175,7 @@ int aa_set_current_hat(struct aa_profile *profile, u64 token)
 		abort_creds(new);
 		return -EACCES;
 	}
-	cxt->profile = aa_get_profile(aa_newest_version(profile));
+	cxt->profile = aa_get_newest_profile(profile);
 	/* clear exec on switching context */
 	aa_put_profile(cxt->onexec);
 	cxt->onexec = NULL;
@@ -212,14 +212,8 @@ int aa_restore_previous_profile(u64 token)
 	}
 
 	aa_put_profile(cxt->profile);
-	cxt->profile = aa_newest_version(cxt->previous);
+	cxt->profile = aa_get_newest_profile(cxt->previous);
 	BUG_ON(!cxt->profile);
-	if (unlikely(cxt->profile != cxt->previous)) {
-		aa_get_profile(cxt->profile);
-		aa_put_profile(cxt->previous);
-	}
-	/* ref has been transfered so avoid putting ref in clear_task_cxt */
-	cxt->previous = NULL;
 	/* clear exec && prev information when restoring to previous context */
 	aa_clear_task_cxt_trans(cxt);
 
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 454bcd7f3452..5488d095af6f 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -359,7 +359,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 	cxt = cred_cxt(bprm->cred);
 	BUG_ON(!cxt);
 
-	profile = aa_get_profile(aa_newest_version(cxt->profile));
+	profile = aa_get_newest_profile(cxt->profile);
 	/*
 	 * get the namespace from the replacement profile as replacement
 	 * can change the namespace
@@ -417,7 +417,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 
 		if (!(cp.allow & AA_MAY_ONEXEC))
 			goto audit;
-		new_profile = aa_get_profile(aa_newest_version(cxt->onexec));
+		new_profile = aa_get_newest_profile(cxt->onexec);
 		goto apply;
 	}
 
diff --git a/security/apparmor/include/context.h b/security/apparmor/include/context.h
index d44ba5802e3d..6bf65798e5d1 100644
--- a/security/apparmor/include/context.h
+++ b/security/apparmor/include/context.h
@@ -98,7 +98,7 @@ static inline struct aa_profile *aa_cred_profile(const struct cred *cred)
 {
 	struct aa_task_cxt *cxt = cred_cxt(cred);
 	BUG_ON(!cxt || !cxt->profile);
-	return aa_newest_version(cxt->profile);
+	return cxt->profile;
 }
 
 /**
@@ -152,15 +152,14 @@ static inline struct aa_profile *aa_current_profile(void)
 	struct aa_profile *profile;
 	BUG_ON(!cxt || !cxt->profile);
 
-	profile = aa_newest_version(cxt->profile);
-	/*
-	 * Whether or not replacement succeeds, use newest profile so
-	 * there is no need to update it after replacement.
-	 */
-	if (unlikely((cxt->profile != profile)))
+	if (PROFILE_INVALID(cxt->profile)) {
+		profile = aa_get_newest_profile(cxt->profile);
 		aa_replace_current_profile(profile);
+		aa_put_profile(profile);
+		cxt = current_cxt();
+	}
 
-	return profile;
+	return cxt->profile;
 }
 
 /**
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 82487a853353..e9f2baf4467e 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -42,6 +42,8 @@ extern const char *const profile_mode_names[];
 
 #define PROFILE_IS_HAT(_profile) ((_profile)->flags & PFLAG_HAT)
 
+#define PROFILE_INVALID(_profile) ((_profile)->flags & PFLAG_INVALID)
+
 #define on_list_rcu(X) (!list_empty(X) && (X)->prev != LIST_POISON2)
 
 /*
@@ -65,6 +67,7 @@ enum profile_flags {
 	PFLAG_USER_DEFINED = 0x20,	/* user based profile - lower privs */
 	PFLAG_NO_LIST_REF = 0x40,	/* list doesn't keep profile ref */
 	PFLAG_OLD_NULL_TRANS = 0x100,	/* use // as the null transition */
+	PFLAG_INVALID = 0x200,		/* profile replaced/removed */
 
 	/* These flags must correspond with PATH_flags */
 	PFLAG_MEDIATE_DELETED = 0x10000, /* mediate instead delegate deleted */
@@ -146,6 +149,12 @@ struct aa_policydb {
 
 };
 
+struct aa_replacedby {
+	struct kref count;
+	struct aa_profile __rcu *profile;
+};
+
+
 /* struct aa_profile - basic confinement data
  * @base - base components of the profile (name, refcount, lists, lock ...)
  * @parent: parent of profile
@@ -169,8 +178,7 @@ struct aa_policydb {
  * used to determine profile attachment against unconfined tasks.  All other
  * attachments are determined by profile X transition rules.
  *
- * The @replacedby field is write protected by the profile lock.  Reads
- * are assumed to be atomic.
+ * The @replacedby struct is write protected by the profile lock.
  *
  * Profiles have a hierarchy where hats and children profiles keep
  * a reference to their parent.
@@ -184,14 +192,14 @@ struct aa_profile {
 	struct aa_profile __rcu *parent;
 
 	struct aa_namespace *ns;
-	struct aa_profile *replacedby;
+	struct aa_replacedby *replacedby;
 	const char *rename;
 
 	struct aa_dfa *xmatch;
 	int xmatch_len;
 	enum audit_mode audit;
 	enum profile_mode mode;
-	u32 flags;
+	long flags;
 	u32 path_flags;
 	int size;
 
@@ -250,6 +258,7 @@ static inline void aa_put_namespace(struct aa_namespace *ns)
 		kref_put(&ns->base.count, aa_free_namespace_kref);
 }
 
+void aa_free_replacedby_kref(struct kref *kref);
 struct aa_profile *aa_alloc_profile(const char *name);
 struct aa_profile *aa_new_null_profile(struct aa_profile *parent, int hat);
 void aa_free_profile_kref(struct kref *kref);
@@ -265,24 +274,6 @@ ssize_t aa_remove_profiles(char *name, size_t size);
 
 #define unconfined(X) ((X)->flags & PFLAG_UNCONFINED)
 
-/**
- * aa_newest_version - find the newest version of @profile
- * @profile: the profile to check for newer versions of (NOT NULL)
- *
- * Returns: newest version of @profile, if @profile is the newest version
- *          return @profile.
- *
- * NOTE: the profile returned is not refcounted, The refcount on @profile
- * must be held until the caller decides what to do with the returned newest
- * version.
- */
-static inline struct aa_profile *aa_newest_version(struct aa_profile *profile)
-{
-	while (profile->replacedby)
-		profile = profile->replacedby;
-
-	return profile;
-}
 
 /**
  * aa_get_profile - increment refcount on profile @p
@@ -334,6 +325,25 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p)
 	return c;
 }
 
+/**
+ * aa_get_newest_profile - find the newest version of @profile
+ * @profile: the profile to check for newer versions of
+ *
+ * Returns: refcounted newest version of @profile taking into account
+ *          replacement, renames and removals
+ *          return @profile.
+ */
+static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p)
+{
+	if (!p)
+		return NULL;
+
+	if (PROFILE_INVALID(p))
+		return aa_get_profile_rcu(&p->replacedby->profile);
+
+	return aa_get_profile(p);
+}
+
 /**
  * aa_put_profile - decrement refcount on profile @p
  * @p: profile  (MAYBE NULL)
@@ -344,6 +354,30 @@ static inline void aa_put_profile(struct aa_profile *p)
 		kref_put(&p->base.count, aa_free_profile_kref);
 }
 
+static inline struct aa_replacedby *aa_get_replacedby(struct aa_replacedby *p)
+{
+	if (p)
+		kref_get(&(p->count));
+
+	return p;
+}
+
+static inline void aa_put_replacedby(struct aa_replacedby *p)
+{
+	if (p)
+		kref_put(&p->count, aa_free_replacedby_kref);
+}
+
+/* requires profile list write lock held */
+static inline void __aa_update_replacedby(struct aa_profile *orig,
+					  struct aa_profile *new)
+{
+	struct aa_profile *tmp = rcu_dereference(orig->replacedby->profile);
+	rcu_assign_pointer(orig->replacedby->profile, aa_get_profile(new));
+	orig->flags |= PFLAG_INVALID;
+	aa_put_profile(tmp);
+}
+
 static inline int AUDIT_MODE(struct aa_profile *profile)
 {
 	if (aa_g_audit != AUDIT_NORMAL)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 96506dfe51ec..c8c148a738f7 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -508,19 +508,21 @@ static int apparmor_getprocattr(struct task_struct *task, char *name,
 	/* released below */
 	const struct cred *cred = get_task_cred(task);
 	struct aa_task_cxt *cxt = cred_cxt(cred);
+	struct aa_profile *profile = NULL;
 
 	if (strcmp(name, "current") == 0)
-		error = aa_getprocattr(aa_newest_version(cxt->profile),
-				       value);
+		profile = aa_get_newest_profile(cxt->profile);
 	else if (strcmp(name, "prev") == 0  && cxt->previous)
-		error = aa_getprocattr(aa_newest_version(cxt->previous),
-				       value);
+		profile = aa_get_newest_profile(cxt->previous);
 	else if (strcmp(name, "exec") == 0 && cxt->onexec)
-		error = aa_getprocattr(aa_newest_version(cxt->onexec),
-				       value);
+		profile = aa_get_newest_profile(cxt->onexec);
 	else
 		error = -EINVAL;
 
+	if (profile)
+		error = aa_getprocattr(profile, value);
+
+	aa_put_profile(profile);
 	put_cred(cred);
 
 	return error;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 25bbbb482bb6..41b8f275c626 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -469,7 +469,7 @@ static void __remove_profile(struct aa_profile *profile)
 	/* release any children lists first */
 	__profile_list_release(&profile->base.profiles);
 	/* released by free_profile */
-	profile->replacedby = aa_get_profile(profile->ns->unconfined);
+	__aa_update_replacedby(profile, profile->ns->unconfined);
 	__list_remove_profile(profile);
 }
 
@@ -588,6 +588,23 @@ void __init aa_free_root_ns(void)
 	 aa_put_namespace(ns);
 }
 
+
+static void free_replacedby(struct aa_replacedby *r)
+{
+	if (r) {
+		aa_put_profile(rcu_dereference(r->profile));
+		kzfree(r);
+	}
+}
+
+
+void aa_free_replacedby_kref(struct kref *kref)
+{
+	struct aa_replacedby *r = container_of(kref, struct aa_replacedby,
+					       count);
+	free_replacedby(r);
+}
+
 /**
  * free_profile - free a profile
  * @profile: the profile to free  (MAYBE NULL)
@@ -600,8 +617,6 @@ void __init aa_free_root_ns(void)
  */
 static void free_profile(struct aa_profile *profile)
 {
-	struct aa_profile *p;
-
 	AA_DEBUG("%s(%p)\n", __func__, profile);
 
 	if (!profile)
@@ -620,28 +635,7 @@ static void free_profile(struct aa_profile *profile)
 
 	aa_put_dfa(profile->xmatch);
 	aa_put_dfa(profile->policy.dfa);
-
-	/* put the profile reference for replacedby, but not via
-	 * put_profile(kref_put).
-	 * replacedby can form a long chain that can result in cascading
-	 * frees that blows the stack because kref_put makes a nested fn
-	 * call (it looks like recursion, with free_profile calling
-	 * free_profile) for each profile in the chain lp#1056078.
-	 */
-	for (p = profile->replacedby; p; ) {
-		if (atomic_dec_and_test(&p->base.count.refcount)) {
-			/* no more refs on p, grab its replacedby */
-			struct aa_profile *next = p->replacedby;
-			/* break the chain */
-			p->replacedby = NULL;
-			/* now free p, chain is broken */
-			free_profile(p);
-
-			/* follow up with next profile in the chain */
-			p = next;
-		} else
-			break;
-	}
+	aa_put_replacedby(profile->replacedby);
 
 	kzfree(profile);
 }
@@ -682,13 +676,22 @@ struct aa_profile *aa_alloc_profile(const char *hname)
 	if (!profile)
 		return NULL;
 
-	if (!policy_init(&profile->base, NULL, hname)) {
-		kzfree(profile);
-		return NULL;
-	}
+	profile->replacedby = kzalloc(sizeof(struct aa_replacedby), GFP_KERNEL);
+	if (!profile->replacedby)
+		goto fail;
+	kref_init(&profile->replacedby->count);
+
+	if (!policy_init(&profile->base, NULL, hname))
+		goto fail;
 
 	/* refcount released by caller */
 	return profile;
+
+fail:
+	kzfree(profile->replacedby);
+	kzfree(profile);
+
+	return NULL;
 }
 
 /**
@@ -985,6 +988,7 @@ static struct aa_profile *__list_lookup_parent(struct list_head *lh,
  * __replace_profile - replace @old with @new on a list
  * @old: profile to be replaced  (NOT NULL)
  * @new: profile to replace @old with  (NOT NULL)
+ * @share_replacedby: transfer @old->replacedby to @new
  *
  * Will duplicate and refcount elements that @new inherits from @old
  * and will inherit @old children.
@@ -993,7 +997,8 @@ static struct aa_profile *__list_lookup_parent(struct list_head *lh,
  *
  * Requires: namespace list lock be held, or list not be shared
  */
-static void __replace_profile(struct aa_profile *old, struct aa_profile *new)
+static void __replace_profile(struct aa_profile *old, struct aa_profile *new,
+			      bool share_replacedby)
 {
 	struct aa_profile *child, *tmp;
 
@@ -1008,7 +1013,7 @@ static void __replace_profile(struct aa_profile *old, struct aa_profile *new)
 			p = __find_child(&new->base.profiles, child->base.name);
 			if (p) {
 				/* @p replaces @child  */
-				__replace_profile(child, p);
+				__replace_profile(child, p, share_replacedby);
 				continue;
 			}
 
@@ -1027,8 +1032,11 @@ static void __replace_profile(struct aa_profile *old, struct aa_profile *new)
 		struct aa_profile *parent = rcu_dereference(old->parent);
 		rcu_assign_pointer(new->parent, aa_get_profile(parent));
 	}
-	/* released by free_profile */
-	old->replacedby = aa_get_profile(new);
+	__aa_update_replacedby(old, new);
+	if (share_replacedby) {
+		aa_put_replacedby(new->replacedby);
+		new->replacedby = aa_get_replacedby(old->replacedby);
+	}
 
 	if (list_empty(&new->base.list)) {
 		/* new is not on a list already */
@@ -1152,23 +1160,24 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 		audit_policy(op, GFP_ATOMIC, ent->new->base.name, NULL, error);
 
 		if (ent->old) {
-			__replace_profile(ent->old, ent->new);
+			__replace_profile(ent->old, ent->new, 1);
 			if (ent->rename)
-				__replace_profile(ent->rename, ent->new);
+				__replace_profile(ent->rename, ent->new, 0);
 		} else if (ent->rename) {
-			__replace_profile(ent->rename, ent->new);
+			__replace_profile(ent->rename, ent->new, 0);
 		} else if (ent->new->parent) {
 			struct aa_profile *parent, *newest;
 			parent = rcu_dereference_protected(ent->new->parent,
 						    mutex_is_locked(&ns->lock));
-			newest = aa_newest_version(parent);
+			newest = aa_get_newest_profile(parent);
 
 			/* parent replaced in this atomic set? */
 			if (newest != parent) {
 				aa_get_profile(newest);
 				aa_put_profile(parent);
 				rcu_assign_pointer(ent->new->parent, newest);
-			}
+			} else
+				aa_put_profile(newest);
 			__list_add_profile(&parent->base.profiles, ent->new);
 		} else
 			__list_add_profile(&ns->base.profiles, ent->new);
-- 
cgit v1.2.3


From fa2ac468db510c653499a47c1ec3deb045bf4763 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:08:43 -0700
Subject: apparmor: update how unconfined is handled

ns->unconfined is being used read side without locking, nor rcu but is
being updated when a namespace is removed. This works for the root ns
which is never removed but has a race window and can cause failures when
children namespaces are removed.

Also ns and ns->unconfined have a circular refcounting dependency that
is problematic and must be broken. Currently this is done incorrectly
when the namespace is destroyed.

Fix this by forward referencing unconfined via the replacedby infrastructure
instead of directly updating the ns->unconfined pointer.

Remove the circular refcount dependency by making the ns and its unconfined
profile share the same refcount.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/domain.c         |  2 +-
 security/apparmor/include/policy.h | 80 +++++++++++++++++++-------------------
 security/apparmor/policy.c         | 68 +++++++++++++-------------------
 3 files changed, 67 insertions(+), 83 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 5488d095af6f..bc28f2670ee4 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -434,7 +434,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 				new_profile = aa_get_profile(profile);
 				goto x_clear;
 			} else if (perms.xindex & AA_X_UNCONFINED) {
-				new_profile = aa_get_profile(ns->unconfined);
+				new_profile = aa_get_newest_profile(ns->unconfined);
 				info = "ux fallback";
 			} else {
 				error = -ENOENT;
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index e9f2baf4467e..1ddd5e5728b8 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -68,6 +68,7 @@ enum profile_flags {
 	PFLAG_NO_LIST_REF = 0x40,	/* list doesn't keep profile ref */
 	PFLAG_OLD_NULL_TRANS = 0x100,	/* use // as the null transition */
 	PFLAG_INVALID = 0x200,		/* profile replaced/removed */
+	PFLAG_NS_COUNT = 0x400,		/* carries NS ref count */
 
 	/* These flags must correspond with PATH_flags */
 	PFLAG_MEDIATE_DELETED = 0x10000, /* mediate instead delegate deleted */
@@ -78,7 +79,6 @@ struct aa_profile;
 /* struct aa_policy - common part of both namespaces and profiles
  * @name: name of the object
  * @hname - The hierarchical name
- * @count: reference count of the obj
  * @list: list policy object is on
  * @rcu: rcu head used when removing from @list
  * @profiles: head of the profiles list contained in the object
@@ -86,7 +86,6 @@ struct aa_profile;
 struct aa_policy {
 	char *name;
 	char *hname;
-	struct kref count;
 	struct list_head list;
 	struct list_head profiles;
 	struct rcu_head rcu;
@@ -157,6 +156,7 @@ struct aa_replacedby {
 
 /* struct aa_profile - basic confinement data
  * @base - base components of the profile (name, refcount, lists, lock ...)
+ * @count: reference count of the obj
  * @parent: parent of profile
  * @ns: namespace the profile is in
  * @replacedby: is set to the profile that replaced this profile
@@ -189,6 +189,7 @@ struct aa_replacedby {
  */
 struct aa_profile {
 	struct aa_policy base;
+	struct kref count;
 	struct aa_profile __rcu *parent;
 
 	struct aa_namespace *ns;
@@ -223,40 +224,6 @@ void aa_free_namespace_kref(struct kref *kref);
 struct aa_namespace *aa_find_namespace(struct aa_namespace *root,
 				       const char *name);
 
-static inline struct aa_policy *aa_get_common(struct aa_policy *c)
-{
-	if (c)
-		kref_get(&c->count);
-
-	return c;
-}
-
-/**
- * aa_get_namespace - increment references count on @ns
- * @ns: namespace to increment reference count of (MAYBE NULL)
- *
- * Returns: pointer to @ns, if @ns is NULL returns NULL
- * Requires: @ns must be held with valid refcount when called
- */
-static inline struct aa_namespace *aa_get_namespace(struct aa_namespace *ns)
-{
-	if (ns)
-		kref_get(&(ns->base.count));
-
-	return ns;
-}
-
-/**
- * aa_put_namespace - decrement refcount on @ns
- * @ns: namespace to put reference of
- *
- * Decrement reference count of @ns and if no longer in use free it
- */
-static inline void aa_put_namespace(struct aa_namespace *ns)
-{
-	if (ns)
-		kref_put(&ns->base.count, aa_free_namespace_kref);
-}
 
 void aa_free_replacedby_kref(struct kref *kref);
 struct aa_profile *aa_alloc_profile(const char *name);
@@ -285,7 +252,7 @@ ssize_t aa_remove_profiles(char *name, size_t size);
 static inline struct aa_profile *aa_get_profile(struct aa_profile *p)
 {
 	if (p)
-		kref_get(&(p->base.count));
+		kref_get(&(p->count));
 
 	return p;
 }
@@ -299,7 +266,7 @@ static inline struct aa_profile *aa_get_profile(struct aa_profile *p)
  */
 static inline struct aa_profile *aa_get_profile_not0(struct aa_profile *p)
 {
-	if (p && kref_get_not0(&p->base.count))
+	if (p && kref_get_not0(&p->count))
 		return p;
 
 	return NULL;
@@ -319,7 +286,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p)
 	rcu_read_lock();
 	do {
 		c = rcu_dereference(*p);
-	} while (c && !kref_get_not0(&c->base.count));
+	} while (c && !kref_get_not0(&c->count));
 	rcu_read_unlock();
 
 	return c;
@@ -350,8 +317,12 @@ static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p)
  */
 static inline void aa_put_profile(struct aa_profile *p)
 {
-	if (p)
-		kref_put(&p->base.count, aa_free_profile_kref);
+	if (p) {
+		if (p->flags & PFLAG_NS_COUNT)
+			kref_put(&p->count, aa_free_namespace_kref);
+		else
+			kref_put(&p->count, aa_free_profile_kref);
+	}
 }
 
 static inline struct aa_replacedby *aa_get_replacedby(struct aa_replacedby *p)
@@ -378,6 +349,33 @@ static inline void __aa_update_replacedby(struct aa_profile *orig,
 	aa_put_profile(tmp);
 }
 
+/**
+ * aa_get_namespace - increment references count on @ns
+ * @ns: namespace to increment reference count of (MAYBE NULL)
+ *
+ * Returns: pointer to @ns, if @ns is NULL returns NULL
+ * Requires: @ns must be held with valid refcount when called
+ */
+static inline struct aa_namespace *aa_get_namespace(struct aa_namespace *ns)
+{
+	if (ns)
+		aa_get_profile(ns->unconfined);
+
+	return ns;
+}
+
+/**
+ * aa_put_namespace - decrement refcount on @ns
+ * @ns: namespace to put reference of
+ *
+ * Decrement reference count of @ns and if no longer in use free it
+ */
+static inline void aa_put_namespace(struct aa_namespace *ns)
+{
+	if (ns)
+		aa_put_profile(ns->unconfined);
+}
+
 static inline int AUDIT_MODE(struct aa_profile *profile)
 {
 	if (aa_g_audit != AUDIT_NORMAL)
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 41b8f275c626..0ceee967434c 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -141,7 +141,6 @@ static bool policy_init(struct aa_policy *policy, const char *prefix,
 	policy->name = (char *)hname_tail(policy->hname);
 	INIT_LIST_HEAD(&policy->list);
 	INIT_LIST_HEAD(&policy->profiles);
-	kref_init(&policy->count);
 
 	return 1;
 }
@@ -292,14 +291,10 @@ static struct aa_namespace *alloc_namespace(const char *prefix,
 		goto fail_unconfined;
 
 	ns->unconfined->flags = PFLAG_UNCONFINED | PFLAG_IX_ON_NAME_ERROR |
-	    PFLAG_IMMUTABLE;
+	    PFLAG_IMMUTABLE | PFLAG_NS_COUNT;
 
-	/*
-	 * released by free_namespace, however __remove_namespace breaks
-	 * the cyclic references (ns->unconfined, and unconfined->ns) and
-	 * replaces with refs to parent namespace unconfined
-	 */
-	ns->unconfined->ns = aa_get_namespace(ns);
+	/* ns and ns->unconfined share ns->unconfined refcount */
+	ns->unconfined->ns = ns;
 
 	atomic_set(&ns->uniq_null, 0);
 
@@ -312,6 +307,7 @@ fail_ns:
 	return NULL;
 }
 
+static void free_profile(struct aa_profile *profile);
 /**
  * free_namespace - free a profile namespace
  * @ns: the namespace to free  (MAYBE NULL)
@@ -327,20 +323,33 @@ static void free_namespace(struct aa_namespace *ns)
 	policy_destroy(&ns->base);
 	aa_put_namespace(ns->parent);
 
-	if (ns->unconfined && ns->unconfined->ns == ns)
-		ns->unconfined->ns = NULL;
-
-	aa_put_profile(ns->unconfined);
+	ns->unconfined->ns = NULL;
+	free_profile(ns->unconfined);
 	kzfree(ns);
 }
 
+/**
+ * aa_free_namespace_rcu - free aa_namespace by rcu
+ * @head: rcu_head callback for freeing of a profile  (NOT NULL)
+ *
+ * rcu_head is to the unconfined profile associated with the namespace
+ */
+static void aa_free_namespace_rcu(struct rcu_head *head)
+{
+	struct aa_profile *p = container_of(head, struct aa_profile, base.rcu);
+	free_namespace(p->ns);
+}
+
 /**
  * aa_free_namespace_kref - free aa_namespace by kref (see aa_put_namespace)
  * @kr: kref callback for freeing of a namespace  (NOT NULL)
+ *
+ * kref is to the unconfined profile associated with the namespace
  */
 void aa_free_namespace_kref(struct kref *kref)
 {
-	free_namespace(container_of(kref, struct aa_namespace, base.count));
+	struct aa_profile *p = container_of(kref, struct aa_profile, count);
+	call_rcu(&p->base.rcu, aa_free_namespace_rcu);
 }
 
 /**
@@ -494,8 +503,6 @@ static void __ns_list_release(struct list_head *head);
  */
 static void destroy_namespace(struct aa_namespace *ns)
 {
-	struct aa_profile *unconfined;
-
 	if (!ns)
 		return;
 
@@ -506,30 +513,11 @@ static void destroy_namespace(struct aa_namespace *ns)
 	/* release all sub namespaces */
 	__ns_list_release(&ns->sub_ns);
 
-	unconfined = ns->unconfined;
-	/*
-	 * break the ns, unconfined profile cyclic reference and forward
-	 * all new unconfined profiles requests to the parent namespace
-	 * This will result in all confined tasks that have a profile
-	 * being removed, inheriting the parent->unconfined profile.
-	 */
 	if (ns->parent)
-		ns->unconfined = aa_get_profile(ns->parent->unconfined);
-
-	/* release original ns->unconfined ref */
-	aa_put_profile(unconfined);
-
+		__aa_update_replacedby(ns->unconfined, ns->parent->unconfined);
 	mutex_unlock(&ns->lock);
 }
 
-static void aa_put_ns_rcu(struct rcu_head *head)
-{
-	struct aa_namespace *ns = container_of(head, struct aa_namespace,
-					       base.rcu);
-	/* release ns->base.list ref */
-	aa_put_namespace(ns);
-}
-
 /**
  * __remove_namespace - remove a namespace and all its children
  * @ns: namespace to be removed  (NOT NULL)
@@ -540,10 +528,8 @@ static void __remove_namespace(struct aa_namespace *ns)
 {
 	/* remove ns from namespace list */
 	list_del_rcu(&ns->base.list);
-
 	destroy_namespace(ns);
-
-	call_rcu(&ns->base.rcu, aa_put_ns_rcu);
+	aa_put_namespace(ns);
 }
 
 /**
@@ -656,8 +642,7 @@ static void aa_free_profile_rcu(struct rcu_head *head)
  */
 void aa_free_profile_kref(struct kref *kref)
 {
-	struct aa_profile *p = container_of(kref, struct aa_profile,
-					    base.count);
+	struct aa_profile *p = container_of(kref, struct aa_profile, count);
 	call_rcu(&p->base.rcu, aa_free_profile_rcu);
 }
 
@@ -683,6 +668,7 @@ struct aa_profile *aa_alloc_profile(const char *hname)
 
 	if (!policy_init(&profile->base, NULL, hname))
 		goto fail;
+	kref_init(&profile->count);
 
 	/* refcount released by caller */
 	return profile;
@@ -884,7 +870,7 @@ struct aa_profile *aa_lookup_profile(struct aa_namespace *ns, const char *hname)
 
 	/* the unconfined profile is not in the regular profile list */
 	if (!profile && strcmp(hname, "unconfined") == 0)
-		profile = aa_get_profile(ns->unconfined);
+		profile = aa_get_newest_profile(ns->unconfined);
 
 	/* refcount released by caller */
 	return profile;
-- 
cgit v1.2.3


From 742058b0f3a2ed32e2a7349aff97989dc4e32452 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:10:43 -0700
Subject: apparmor: rework namespace free path

namespaces now completely use the unconfined profile to track the
refcount and rcu freeing cycle. So rework the code to simplify (track
everything through the profile path right up to the end), and move the
rcu_head from policy base to profile as the namespace no longer needs
it.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/include/policy.h | 12 ++++--------
 security/apparmor/policy.c         | 33 ++++++---------------------------
 2 files changed, 10 insertions(+), 35 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 1ddd5e5728b8..4eafdd88f44e 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -80,7 +80,6 @@ struct aa_profile;
  * @name: name of the object
  * @hname - The hierarchical name
  * @list: list policy object is on
- * @rcu: rcu head used when removing from @list
  * @profiles: head of the profiles list contained in the object
  */
 struct aa_policy {
@@ -88,7 +87,6 @@ struct aa_policy {
 	char *hname;
 	struct list_head list;
 	struct list_head profiles;
-	struct rcu_head rcu;
 };
 
 /* struct aa_ns_acct - accounting of profiles in namespace
@@ -157,6 +155,7 @@ struct aa_replacedby {
 /* struct aa_profile - basic confinement data
  * @base - base components of the profile (name, refcount, lists, lock ...)
  * @count: reference count of the obj
+ * @rcu: rcu head used when removing from @list
  * @parent: parent of profile
  * @ns: namespace the profile is in
  * @replacedby: is set to the profile that replaced this profile
@@ -190,6 +189,7 @@ struct aa_replacedby {
 struct aa_profile {
 	struct aa_policy base;
 	struct kref count;
+	struct rcu_head rcu;
 	struct aa_profile __rcu *parent;
 
 	struct aa_namespace *ns;
@@ -317,12 +317,8 @@ static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p)
  */
 static inline void aa_put_profile(struct aa_profile *p)
 {
-	if (p) {
-		if (p->flags & PFLAG_NS_COUNT)
-			kref_put(&p->count, aa_free_namespace_kref);
-		else
-			kref_put(&p->count, aa_free_profile_kref);
-	}
+	if (p)
+		kref_put(&p->count, aa_free_profile_kref);
 }
 
 static inline struct aa_replacedby *aa_get_replacedby(struct aa_replacedby *p)
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 0ceee967434c..aee2e71827cd 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -328,30 +328,6 @@ static void free_namespace(struct aa_namespace *ns)
 	kzfree(ns);
 }
 
-/**
- * aa_free_namespace_rcu - free aa_namespace by rcu
- * @head: rcu_head callback for freeing of a profile  (NOT NULL)
- *
- * rcu_head is to the unconfined profile associated with the namespace
- */
-static void aa_free_namespace_rcu(struct rcu_head *head)
-{
-	struct aa_profile *p = container_of(head, struct aa_profile, base.rcu);
-	free_namespace(p->ns);
-}
-
-/**
- * aa_free_namespace_kref - free aa_namespace by kref (see aa_put_namespace)
- * @kr: kref callback for freeing of a namespace  (NOT NULL)
- *
- * kref is to the unconfined profile associated with the namespace
- */
-void aa_free_namespace_kref(struct kref *kref)
-{
-	struct aa_profile *p = container_of(kref, struct aa_profile, count);
-	call_rcu(&p->base.rcu, aa_free_namespace_rcu);
-}
-
 /**
  * __aa_find_namespace - find a namespace on a list by @name
  * @head: list to search for namespace on  (NOT NULL)
@@ -632,8 +608,11 @@ static void free_profile(struct aa_profile *profile)
  */
 static void aa_free_profile_rcu(struct rcu_head *head)
 {
-	struct aa_profile *p = container_of(head, struct aa_profile, base.rcu);
-	free_profile(p);
+	struct aa_profile *p = container_of(head, struct aa_profile, rcu);
+	if (p->flags & PFLAG_NS_COUNT)
+		free_namespace(p->ns);
+	else
+		free_profile(p);
 }
 
 /**
@@ -643,7 +622,7 @@ static void aa_free_profile_rcu(struct rcu_head *head)
 void aa_free_profile_kref(struct kref *kref)
 {
 	struct aa_profile *p = container_of(kref, struct aa_profile, count);
-	call_rcu(&p->base.rcu, aa_free_profile_rcu);
+	call_rcu(&p->rcu, aa_free_profile_rcu);
 }
 
 /**
-- 
cgit v1.2.3


From 8651e1d6572bc2c061073f05fabcd7175789259d Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:11:43 -0700
Subject: apparmor: make free_profile available outside of policy.c

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/policy.h | 1 +
 security/apparmor/policy.c         | 9 ++++-----
 security/apparmor/policy_unpack.c  | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 4eafdd88f44e..8a68226ff7f7 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -228,6 +228,7 @@ struct aa_namespace *aa_find_namespace(struct aa_namespace *root,
 void aa_free_replacedby_kref(struct kref *kref);
 struct aa_profile *aa_alloc_profile(const char *name);
 struct aa_profile *aa_new_null_profile(struct aa_profile *parent, int hat);
+void aa_free_profile(struct aa_profile *profile);
 void aa_free_profile_kref(struct kref *kref);
 struct aa_profile *aa_find_child(struct aa_profile *parent, const char *name);
 struct aa_profile *aa_lookup_profile(struct aa_namespace *ns, const char *name);
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index aee2e71827cd..7a80b0c7e0ce 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -307,7 +307,6 @@ fail_ns:
 	return NULL;
 }
 
-static void free_profile(struct aa_profile *profile);
 /**
  * free_namespace - free a profile namespace
  * @ns: the namespace to free  (MAYBE NULL)
@@ -324,7 +323,7 @@ static void free_namespace(struct aa_namespace *ns)
 	aa_put_namespace(ns->parent);
 
 	ns->unconfined->ns = NULL;
-	free_profile(ns->unconfined);
+	aa_free_profile(ns->unconfined);
 	kzfree(ns);
 }
 
@@ -568,7 +567,7 @@ void aa_free_replacedby_kref(struct kref *kref)
 }
 
 /**
- * free_profile - free a profile
+ * aa_free_profile - free a profile
  * @profile: the profile to free  (MAYBE NULL)
  *
  * Free a profile, its hats and null_profile. All references to the profile,
@@ -577,7 +576,7 @@ void aa_free_replacedby_kref(struct kref *kref)
  * If the profile was referenced from a task context, free_profile() will
  * be called from an rcu callback routine, so we must not sleep here.
  */
-static void free_profile(struct aa_profile *profile)
+void aa_free_profile(struct aa_profile *profile)
 {
 	AA_DEBUG("%s(%p)\n", __func__, profile);
 
@@ -612,7 +611,7 @@ static void aa_free_profile_rcu(struct rcu_head *head)
 	if (p->flags & PFLAG_NS_COUNT)
 		free_namespace(p->ns);
 	else
-		free_profile(p);
+		aa_free_profile(p);
 }
 
 /**
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 080a26b11f01..ce15313896ee 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -616,7 +616,7 @@ fail:
 	else if (!name)
 		name = "unknown";
 	audit_iface(profile, name, "failed to unpack profile", e, error);
-	aa_put_profile(profile);
+	aa_free_profile(profile);
 
 	return ERR_PTR(error);
 }
@@ -763,7 +763,7 @@ int aa_unpack(void *udata, size_t size, struct list_head *lh, const char **ns)
 
 		error = verify_profile(profile);
 		if (error) {
-			aa_put_profile(profile);
+			aa_free_profile(profile);
 			goto fail;
 		}
 
-- 
cgit v1.2.3


From 038165070aa55375d4bdd2f84b34a486feca63d6 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:12:43 -0700
Subject: apparmor: allow setting any profile into the unconfined state

Allow emulating the default profile behavior from boot, by allowing
loading of a profile in the unconfined state into a new NS.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/domain.c                | 4 ++--
 security/apparmor/include/policy.h        | 6 +++---
 security/apparmor/include/policy_unpack.h | 7 +++++++
 security/apparmor/policy.c                | 6 ++++--
 security/apparmor/policy_unpack.c         | 8 ++++++--
 5 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index bc28f2670ee4..26c607c971f5 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -371,8 +371,8 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 	error = aa_path_name(&bprm->file->f_path, profile->path_flags, &buffer,
 			     &name, &info);
 	if (error) {
-		if (profile->flags &
-		    (PFLAG_IX_ON_NAME_ERROR | PFLAG_UNCONFINED))
+		if (unconfined(profile) ||
+		    (profile->flags & PFLAG_IX_ON_NAME_ERROR))
 			error = 0;
 		name = bprm->filename;
 		goto audit;
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 8a68226ff7f7..65662e3c75cf 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -56,11 +56,11 @@ enum profile_mode {
 	APPARMOR_ENFORCE,	/* enforce access rules */
 	APPARMOR_COMPLAIN,	/* allow and log access violations */
 	APPARMOR_KILL,		/* kill task on access violation */
+	APPARMOR_UNCONFINED,	/* profile set to unconfined */
 };
 
 enum profile_flags {
 	PFLAG_HAT = 1,			/* profile is a hat */
-	PFLAG_UNCONFINED = 2,		/* profile is an unconfined profile */
 	PFLAG_NULL = 4,			/* profile is null learning profile */
 	PFLAG_IX_ON_NAME_ERROR = 8,	/* fallback to ix on name lookup fail */
 	PFLAG_IMMUTABLE = 0x10,		/* don't allow changes/replacement */
@@ -199,7 +199,7 @@ struct aa_profile {
 	struct aa_dfa *xmatch;
 	int xmatch_len;
 	enum audit_mode audit;
-	enum profile_mode mode;
+	long mode;
 	long flags;
 	u32 path_flags;
 	int size;
@@ -240,7 +240,7 @@ ssize_t aa_remove_profiles(char *name, size_t size);
 #define PROF_ADD 1
 #define PROF_REPLACE 0
 
-#define unconfined(X) ((X)->flags & PFLAG_UNCONFINED)
+#define unconfined(X) ((X)->mode == APPARMOR_UNCONFINED)
 
 
 /**
diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h
index 0d7ad722b8ff..c214fb88b1bc 100644
--- a/security/apparmor/include/policy_unpack.h
+++ b/security/apparmor/include/policy_unpack.h
@@ -27,6 +27,13 @@ struct aa_load_ent {
 void aa_load_ent_free(struct aa_load_ent *ent);
 struct aa_load_ent *aa_load_ent_alloc(void);
 
+#define PACKED_FLAG_HAT		1
+
+#define PACKED_MODE_ENFORCE	0
+#define PACKED_MODE_COMPLAIN	1
+#define PACKED_MODE_KILL	2
+#define PACKED_MODE_UNCONFINED	3
+
 int aa_unpack(void *udata, size_t size, struct list_head *lh, const char **ns);
 
 #endif /* __POLICY_INTERFACE_H */
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 7a80b0c7e0ce..2e4e2ecb25bc 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -96,6 +96,7 @@ const char *const profile_mode_names[] = {
 	"enforce",
 	"complain",
 	"kill",
+	"unconfined",
 };
 
 /**
@@ -290,8 +291,9 @@ static struct aa_namespace *alloc_namespace(const char *prefix,
 	if (!ns->unconfined)
 		goto fail_unconfined;
 
-	ns->unconfined->flags = PFLAG_UNCONFINED | PFLAG_IX_ON_NAME_ERROR |
-	    PFLAG_IMMUTABLE | PFLAG_NS_COUNT;
+	ns->unconfined->flags = PFLAG_IX_ON_NAME_ERROR |
+		PFLAG_IMMUTABLE | PFLAG_NS_COUNT;
+	ns->unconfined->mode = APPARMOR_UNCONFINED;
 
 	/* ns and ns->unconfined share ns->unconfined refcount */
 	ns->unconfined->ns = ns;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index ce15313896ee..cac0aa075787 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -511,12 +511,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e)
 		goto fail;
 	if (!unpack_u32(e, &tmp, NULL))
 		goto fail;
-	if (tmp)
+	if (tmp & PACKED_FLAG_HAT)
 		profile->flags |= PFLAG_HAT;
 	if (!unpack_u32(e, &tmp, NULL))
 		goto fail;
-	if (tmp)
+	if (tmp == PACKED_MODE_COMPLAIN)
 		profile->mode = APPARMOR_COMPLAIN;
+	else if (tmp == PACKED_MODE_KILL)
+		profile->mode = APPARMOR_KILL;
+	else if (tmp == PACKED_MODE_UNCONFINED)
+		profile->mode = APPARMOR_UNCONFINED;
 	if (!unpack_u32(e, &tmp, NULL))
 		goto fail;
 	if (tmp)
-- 
cgit v1.2.3


From 0d259f043f5f60f74c4fd020aac190cb6450e918 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:13:43 -0700
Subject: apparmor: add interface files for profiles and namespaces

Add basic interface files to access namespace and profile information.
The interface files are created when a profile is loaded and removed
when the profile or namespace is removed.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c         | 322 ++++++++++++++++++++++++++++++++-
 security/apparmor/include/apparmorfs.h |  38 ++++
 security/apparmor/include/audit.h      |   1 -
 security/apparmor/include/policy.h     |  21 ++-
 security/apparmor/lsm.c                |   6 +-
 security/apparmor/policy.c             |  75 ++++++--
 security/apparmor/procattr.c           |   2 +-
 7 files changed, 436 insertions(+), 29 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 3ed56e21a9fd..0fdd08c6ea59 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -12,6 +12,7 @@
  * License.
  */
 
+#include <linux/ctype.h>
 #include <linux/security.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
@@ -27,6 +28,45 @@
 #include "include/policy.h"
 #include "include/resource.h"
 
+/**
+ * aa_mangle_name - mangle a profile name to std profile layout form
+ * @name: profile name to mangle  (NOT NULL)
+ * @target: buffer to store mangled name, same length as @name (MAYBE NULL)
+ *
+ * Returns: length of mangled name
+ */
+static int mangle_name(char *name, char *target)
+{
+	char *t = target;
+
+	while (*name == '/' || *name == '.')
+		name++;
+
+	if (target) {
+		for (; *name; name++) {
+			if (*name == '/')
+				*(t)++ = '.';
+			else if (isspace(*name))
+				*(t)++ = '_';
+			else if (isalnum(*name) || strchr("._-", *name))
+				*(t)++ = *name;
+		}
+
+		*t = 0;
+	} else {
+		int len = 0;
+		for (; *name; name++) {
+			if (isalnum(*name) || isspace(*name) ||
+			    strchr("/._-", *name))
+				len++;
+		}
+
+		return len;
+	}
+
+	return t - target;
+}
+
 /**
  * aa_simple_write_to_buffer - common routine for getting policy from user
  * @op: operation doing the user buffer copy
@@ -182,8 +222,263 @@ const struct file_operations aa_fs_seq_file_ops = {
 	.release	= single_release,
 };
 
-/** Base file system setup **/
+static int aa_fs_seq_profile_open(struct inode *inode, struct file *file,
+				  int (*show)(struct seq_file *, void *))
+{
+	struct aa_replacedby *r = aa_get_replacedby(inode->i_private);
+	int error = single_open(file, show, r);
+
+	if (error) {
+		file->private_data = NULL;
+		aa_put_replacedby(r);
+	}
+
+	return error;
+}
+
+static int aa_fs_seq_profile_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = (struct seq_file *) file->private_data;
+	if (seq)
+		aa_put_replacedby(seq->private);
+	return single_release(inode, file);
+}
+
+static int aa_fs_seq_profname_show(struct seq_file *seq, void *v)
+{
+	struct aa_replacedby *r = seq->private;
+	struct aa_profile *profile = aa_get_profile_rcu(&r->profile);
+	seq_printf(seq, "%s\n", profile->base.name);
+	aa_put_profile(profile);
+
+	return 0;
+}
+
+static int aa_fs_seq_profname_open(struct inode *inode, struct file *file)
+{
+	return aa_fs_seq_profile_open(inode, file, aa_fs_seq_profname_show);
+}
+
+static const struct file_operations aa_fs_profname_fops = {
+	.owner		= THIS_MODULE,
+	.open		= aa_fs_seq_profname_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= aa_fs_seq_profile_release,
+};
+
+static int aa_fs_seq_profmode_show(struct seq_file *seq, void *v)
+{
+	struct aa_replacedby *r = seq->private;
+	struct aa_profile *profile = aa_get_profile_rcu(&r->profile);
+	seq_printf(seq, "%s\n", aa_profile_mode_names[profile->mode]);
+	aa_put_profile(profile);
+
+	return 0;
+}
+
+static int aa_fs_seq_profmode_open(struct inode *inode, struct file *file)
+{
+	return aa_fs_seq_profile_open(inode, file, aa_fs_seq_profmode_show);
+}
+
+static const struct file_operations aa_fs_profmode_fops = {
+	.owner		= THIS_MODULE,
+	.open		= aa_fs_seq_profmode_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= aa_fs_seq_profile_release,
+};
+
+/** fns to setup dynamic per profile/namespace files **/
+void __aa_fs_profile_rmdir(struct aa_profile *profile)
+{
+	struct aa_profile *child;
+	int i;
+
+	if (!profile)
+		return;
+
+	list_for_each_entry(child, &profile->base.profiles, base.list)
+		__aa_fs_profile_rmdir(child);
+
+	for (i = AAFS_PROF_SIZEOF - 1; i >= 0; --i) {
+		struct aa_replacedby *r;
+		if (!profile->dents[i])
+			continue;
+
+		r = profile->dents[i]->d_inode->i_private;
+		securityfs_remove(profile->dents[i]);
+		aa_put_replacedby(r);
+		profile->dents[i] = NULL;
+	}
+}
+
+void __aa_fs_profile_migrate_dents(struct aa_profile *old,
+				   struct aa_profile *new)
+{
+	int i;
+
+	for (i = 0; i < AAFS_PROF_SIZEOF; i++) {
+		new->dents[i] = old->dents[i];
+		old->dents[i] = NULL;
+	}
+}
+
+static struct dentry *create_profile_file(struct dentry *dir, const char *name,
+					  struct aa_profile *profile,
+					  const struct file_operations *fops)
+{
+	struct aa_replacedby *r = aa_get_replacedby(profile->replacedby);
+	struct dentry *dent;
+
+	dent = securityfs_create_file(name, S_IFREG | 0444, dir, r, fops);
+	if (IS_ERR(dent))
+		aa_put_replacedby(r);
+
+	return dent;
+}
+
+/* requires lock be held */
+int __aa_fs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
+{
+	struct aa_profile *child;
+	struct dentry *dent = NULL, *dir;
+	int error;
+
+	if (!parent) {
+		struct aa_profile *p;
+		p = aa_deref_parent(profile);
+		dent = prof_dir(p);
+		/* adding to parent that previously didn't have children */
+		dent = securityfs_create_dir("profiles", dent);
+		if (IS_ERR(dent))
+			goto fail;
+		prof_child_dir(p) = parent = dent;
+	}
+
+	if (!profile->dirname) {
+		int len, id_len;
+		len = mangle_name(profile->base.name, NULL);
+		id_len = snprintf(NULL, 0, ".%ld", profile->ns->uniq_id);
+
+		profile->dirname = kmalloc(len + id_len + 1, GFP_KERNEL);
+		if (!profile->dirname)
+			goto fail;
+
+		mangle_name(profile->base.name, profile->dirname);
+		sprintf(profile->dirname + len, ".%ld", profile->ns->uniq_id++);
+	}
+
+	dent = securityfs_create_dir(profile->dirname, parent);
+	if (IS_ERR(dent))
+		goto fail;
+	prof_dir(profile) = dir = dent;
+
+	dent = create_profile_file(dir, "name", profile, &aa_fs_profname_fops);
+	if (IS_ERR(dent))
+		goto fail;
+	profile->dents[AAFS_PROF_NAME] = dent;
+
+	dent = create_profile_file(dir, "mode", profile, &aa_fs_profmode_fops);
+	if (IS_ERR(dent))
+		goto fail;
+	profile->dents[AAFS_PROF_MODE] = dent;
+
+	list_for_each_entry(child, &profile->base.profiles, base.list) {
+		error = __aa_fs_profile_mkdir(child, prof_child_dir(profile));
+		if (error)
+			goto fail2;
+	}
+
+	return 0;
+
+fail:
+	error = PTR_ERR(dent);
+
+fail2:
+	__aa_fs_profile_rmdir(profile);
+
+	return error;
+}
+
+void __aa_fs_namespace_rmdir(struct aa_namespace *ns)
+{
+	struct aa_namespace *sub;
+	struct aa_profile *child;
+	int i;
+
+	if (!ns)
+		return;
+
+	list_for_each_entry(child, &ns->base.profiles, base.list)
+		__aa_fs_profile_rmdir(child);
+
+	list_for_each_entry(sub, &ns->sub_ns, base.list) {
+		mutex_lock(&sub->lock);
+		__aa_fs_namespace_rmdir(sub);
+		mutex_unlock(&sub->lock);
+	}
+
+	for (i = AAFS_NS_SIZEOF - 1; i >= 0; --i) {
+		securityfs_remove(ns->dents[i]);
+		ns->dents[i] = NULL;
+	}
+}
+
+int __aa_fs_namespace_mkdir(struct aa_namespace *ns, struct dentry *parent,
+			    const char *name)
+{
+	struct aa_namespace *sub;
+	struct aa_profile *child;
+	struct dentry *dent, *dir;
+	int error;
+
+	if (!name)
+		name = ns->base.name;
+
+	dent = securityfs_create_dir(name, parent);
+	if (IS_ERR(dent))
+		goto fail;
+	ns_dir(ns) = dir = dent;
+
+	dent = securityfs_create_dir("profiles", dir);
+	if (IS_ERR(dent))
+		goto fail;
+	ns_subprofs_dir(ns) = dent;
 
+	dent = securityfs_create_dir("namespaces", dir);
+	if (IS_ERR(dent))
+		goto fail;
+	ns_subns_dir(ns) = dent;
+
+	list_for_each_entry(child, &ns->base.profiles, base.list) {
+		error = __aa_fs_profile_mkdir(child, ns_subprofs_dir(ns));
+		if (error)
+			goto fail2;
+	}
+
+	list_for_each_entry(sub, &ns->sub_ns, base.list) {
+		mutex_lock(&sub->lock);
+		error = __aa_fs_namespace_mkdir(sub, ns_subns_dir(ns), NULL);
+		mutex_unlock(&sub->lock);
+		if (error)
+			goto fail2;
+	}
+
+	return 0;
+
+fail:
+	error = PTR_ERR(dent);
+
+fail2:
+	__aa_fs_namespace_rmdir(ns);
+
+	return error;
+}
+
+
+/** Base file system setup **/
 static struct aa_fs_entry aa_fs_entry_file[] = {
 	AA_FS_FILE_STRING("mask", "create read write exec append mmap_exec " \
 				  "link lock"),
@@ -246,6 +541,7 @@ static int __init aafs_create_file(struct aa_fs_entry *fs_file,
 	return error;
 }
 
+static void __init aafs_remove_dir(struct aa_fs_entry *fs_dir);
 /**
  * aafs_create_dir - recursively create a directory entry in the securityfs
  * @fs_dir: aa_fs_entry (and all child entries) to build (NOT NULL)
@@ -256,17 +552,16 @@ static int __init aafs_create_file(struct aa_fs_entry *fs_file,
 static int __init aafs_create_dir(struct aa_fs_entry *fs_dir,
 				  struct dentry *parent)
 {
-	int error;
 	struct aa_fs_entry *fs_file;
+	struct dentry *dir;
+	int error;
 
-	fs_dir->dentry = securityfs_create_dir(fs_dir->name, parent);
-	if (IS_ERR(fs_dir->dentry)) {
-		error = PTR_ERR(fs_dir->dentry);
-		fs_dir->dentry = NULL;
-		goto failed;
-	}
+	dir = securityfs_create_dir(fs_dir->name, parent);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+	fs_dir->dentry = dir;
 
-	for (fs_file = fs_dir->v.files; fs_file->name; ++fs_file) {
+	for (fs_file = fs_dir->v.files; fs_file && fs_file->name; ++fs_file) {
 		if (fs_file->v_type == AA_FS_TYPE_DIR)
 			error = aafs_create_dir(fs_file, fs_dir->dentry);
 		else
@@ -278,6 +573,8 @@ static int __init aafs_create_dir(struct aa_fs_entry *fs_dir,
 	return 0;
 
 failed:
+	aafs_remove_dir(fs_dir);
+
 	return error;
 }
 
@@ -302,7 +599,7 @@ static void __init aafs_remove_dir(struct aa_fs_entry *fs_dir)
 {
 	struct aa_fs_entry *fs_file;
 
-	for (fs_file = fs_dir->v.files; fs_file->name; ++fs_file) {
+	for (fs_file = fs_dir->v.files; fs_file && fs_file->name; ++fs_file) {
 		if (fs_file->v_type == AA_FS_TYPE_DIR)
 			aafs_remove_dir(fs_file);
 		else
@@ -346,6 +643,11 @@ static int __init aa_create_aafs(void)
 	if (error)
 		goto error;
 
+	error = __aa_fs_namespace_mkdir(root_ns, aa_fs_entry.dentry,
+					"policy");
+	if (error)
+		goto error;
+
 	/* TODO: add support for apparmorfs_null and apparmorfs_mnt */
 
 	/* Report that AppArmor fs is enabled */
diff --git a/security/apparmor/include/apparmorfs.h b/security/apparmor/include/apparmorfs.h
index 7ea4769fab3f..2494e112f2bf 100644
--- a/security/apparmor/include/apparmorfs.h
+++ b/security/apparmor/include/apparmorfs.h
@@ -61,4 +61,42 @@ extern const struct file_operations aa_fs_seq_file_ops;
 
 extern void __init aa_destroy_aafs(void);
 
+struct aa_profile;
+struct aa_namespace;
+
+enum aafs_ns_type {
+	AAFS_NS_DIR,
+	AAFS_NS_PROFS,
+	AAFS_NS_NS,
+	AAFS_NS_COUNT,
+	AAFS_NS_MAX_COUNT,
+	AAFS_NS_SIZE,
+	AAFS_NS_MAX_SIZE,
+	AAFS_NS_OWNER,
+	AAFS_NS_SIZEOF,
+};
+
+enum aafs_prof_type {
+	AAFS_PROF_DIR,
+	AAFS_PROF_PROFS,
+	AAFS_PROF_NAME,
+	AAFS_PROF_MODE,
+	AAFS_PROF_SIZEOF,
+};
+
+#define ns_dir(X) ((X)->dents[AAFS_NS_DIR])
+#define ns_subns_dir(X) ((X)->dents[AAFS_NS_NS])
+#define ns_subprofs_dir(X) ((X)->dents[AAFS_NS_PROFS])
+
+#define prof_dir(X) ((X)->dents[AAFS_PROF_DIR])
+#define prof_child_dir(X) ((X)->dents[AAFS_PROF_PROFS])
+
+void __aa_fs_profile_rmdir(struct aa_profile *profile);
+void __aa_fs_profile_migrate_dents(struct aa_profile *old,
+				   struct aa_profile *new);
+int __aa_fs_profile_mkdir(struct aa_profile *profile, struct dentry *parent);
+void __aa_fs_namespace_rmdir(struct aa_namespace *ns);
+int __aa_fs_namespace_mkdir(struct aa_namespace *ns, struct dentry *parent,
+			    const char *name);
+
 #endif /* __AA_APPARMORFS_H */
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index 69d8cae634e7..30e8d7687259 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -27,7 +27,6 @@ struct aa_profile;
 
 extern const char *const audit_mode_names[];
 #define AUDIT_MAX_INDEX 5
-
 enum audit_mode {
 	AUDIT_NORMAL,		/* follow normal auditing of accesses */
 	AUDIT_QUIET_DENIED,	/* quiet all denied access messages */
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 65662e3c75cf..5c72231d1c42 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -29,8 +29,8 @@
 #include "file.h"
 #include "resource.h"
 
-extern const char *const profile_mode_names[];
-#define APPARMOR_NAMES_MAX_INDEX 3
+extern const char *const aa_profile_mode_names[];
+#define APPARMOR_MODE_NAMES_MAX_INDEX 4
 
 #define PROFILE_MODE(_profile, _mode)		\
 	((aa_g_profile_mode == (_mode)) ||	\
@@ -110,6 +110,8 @@ struct aa_ns_acct {
  * @unconfined: special unconfined profile for the namespace
  * @sub_ns: list of namespaces under the current namespace.
  * @uniq_null: uniq value used for null learning profiles
+ * @uniq_id: a unique id count for the profiles in the namespace
+ * @dents: dentries for the namespaces file entries in apparmorfs
  *
  * An aa_namespace defines the set profiles that are searched to determine
  * which profile to attach to a task.  Profiles can not be shared between
@@ -133,6 +135,9 @@ struct aa_namespace {
 	struct aa_profile *unconfined;
 	struct list_head sub_ns;
 	atomic_t uniq_null;
+	long uniq_id;
+
+	struct dentry *dents[AAFS_NS_SIZEOF];
 };
 
 /* struct aa_policydb - match engine for a policy
@@ -172,6 +177,9 @@ struct aa_replacedby {
  * @caps: capabilities for the profile
  * @rlimits: rlimits for the profile
  *
+ * @dents: dentries for the profiles file entries in apparmorfs
+ * @dirname: name of the profile dir in apparmorfs
+ *
  * The AppArmor profile contains the basic confinement data.  Each profile
  * has a name, and exists in a namespace.  The @name and @exec_match are
  * used to determine profile attachment against unconfined tasks.  All other
@@ -208,6 +216,9 @@ struct aa_profile {
 	struct aa_file_rules file;
 	struct aa_caps caps;
 	struct aa_rlimit rlimits;
+
+	char *dirname;
+	struct dentry *dents[AAFS_PROF_SIZEOF];
 };
 
 extern struct aa_namespace *root_ns;
@@ -243,6 +254,12 @@ ssize_t aa_remove_profiles(char *name, size_t size);
 #define unconfined(X) ((X)->mode == APPARMOR_UNCONFINED)
 
 
+static inline struct aa_profile *aa_deref_parent(struct aa_profile *p)
+{
+	return rcu_dereference_protected(p->parent,
+					 mutex_is_locked(&p->ns->lock));
+}
+
 /**
  * aa_get_profile - increment refcount on profile @p
  * @p: profile  (MAYBE NULL)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index c8c148a738f7..edb3ce15e92d 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -843,7 +843,7 @@ static int param_get_mode(char *buffer, struct kernel_param *kp)
 	if (!apparmor_enabled)
 		return -EINVAL;
 
-	return sprintf(buffer, "%s", profile_mode_names[aa_g_profile_mode]);
+	return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]);
 }
 
 static int param_set_mode(const char *val, struct kernel_param *kp)
@@ -858,8 +858,8 @@ static int param_set_mode(const char *val, struct kernel_param *kp)
 	if (!val)
 		return -EINVAL;
 
-	for (i = 0; i < APPARMOR_NAMES_MAX_INDEX; i++) {
-		if (strcmp(val, profile_mode_names[i]) == 0) {
+	for (i = 0; i < APPARMOR_MODE_NAMES_MAX_INDEX; i++) {
+		if (strcmp(val, aa_profile_mode_names[i]) == 0) {
 			aa_g_profile_mode = i;
 			return 0;
 		}
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 2e4e2ecb25bc..6172509fa2b7 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -92,7 +92,7 @@
 /* root profile namespace */
 struct aa_namespace *root_ns;
 
-const char *const profile_mode_names[] = {
+const char *const aa_profile_mode_names[] = {
 	"enforce",
 	"complain",
 	"kill",
@@ -394,7 +394,13 @@ static struct aa_namespace *aa_prepare_namespace(const char *name)
 		ns = alloc_namespace(root->base.hname, name);
 		if (!ns)
 			goto out;
-		/* add parent ref */
+		if (__aa_fs_namespace_mkdir(ns, ns_subns_dir(root), name)) {
+			AA_ERROR("Failed to create interface for ns %s\n",
+				 ns->base.name);
+			free_namespace(ns);
+			ns = NULL;
+			goto out;
+		}
 		ns->parent = aa_get_namespace(root);
 		list_add_rcu(&ns->base.list, &root->sub_ns);
 		/* add list ref */
@@ -456,6 +462,7 @@ static void __remove_profile(struct aa_profile *profile)
 	__profile_list_release(&profile->base.profiles);
 	/* released by free_profile */
 	__aa_update_replacedby(profile, profile->ns->unconfined);
+	__aa_fs_profile_rmdir(profile);
 	__list_remove_profile(profile);
 }
 
@@ -492,6 +499,7 @@ static void destroy_namespace(struct aa_namespace *ns)
 
 	if (ns->parent)
 		__aa_update_replacedby(ns->unconfined, ns->parent->unconfined);
+	__aa_fs_namespace_rmdir(ns);
 	mutex_unlock(&ns->lock);
 }
 
@@ -596,6 +604,7 @@ void aa_free_profile(struct aa_profile *profile)
 	aa_free_cap_rules(&profile->caps);
 	aa_free_rlimit_rules(&profile->rlimits);
 
+	kzfree(profile->dirname);
 	aa_put_dfa(profile->xmatch);
 	aa_put_dfa(profile->policy.dfa);
 	aa_put_replacedby(profile->replacedby);
@@ -986,8 +995,7 @@ static void __replace_profile(struct aa_profile *old, struct aa_profile *new,
 			/* inherit @child and its children */
 			/* TODO: update hname of inherited children */
 			/* list refcount transferred to @new */
-			p = rcu_dereference_protected(child->parent,
-					     mutex_is_locked(&child->ns->lock));
+			p = aa_deref_parent(child);
 			rcu_assign_pointer(child->parent, aa_get_profile(new));
 			list_add_rcu(&child->base.list, &new->base.profiles);
 			aa_put_profile(p);
@@ -995,14 +1003,18 @@ static void __replace_profile(struct aa_profile *old, struct aa_profile *new,
 	}
 
 	if (!rcu_access_pointer(new->parent)) {
-		struct aa_profile *parent = rcu_dereference(old->parent);
+		struct aa_profile *parent = aa_deref_parent(old);
 		rcu_assign_pointer(new->parent, aa_get_profile(parent));
 	}
 	__aa_update_replacedby(old, new);
 	if (share_replacedby) {
 		aa_put_replacedby(new->replacedby);
 		new->replacedby = aa_get_replacedby(old->replacedby);
-	}
+	} else if (!rcu_access_pointer(new->replacedby->profile))
+		/* aafs interface uses replacedby */
+		rcu_assign_pointer(new->replacedby->profile,
+				   aa_get_profile(new));
+	__aa_fs_profile_migrate_dents(old, new);
 
 	if (list_empty(&new->base.list)) {
 		/* new is not on a list already */
@@ -1118,7 +1130,33 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 		}
 	}
 
-	/* do actual replacement */
+	/* create new fs entries for introspection if needed */
+	list_for_each_entry(ent, &lh, list) {
+		if (ent->old) {
+			/* inherit old interface files */
+
+			/* if (ent->rename)
+				TODO: support rename */
+		/* } else if (ent->rename) {
+			TODO: support rename */
+		} else {
+			struct dentry *parent;
+			if (rcu_access_pointer(ent->new->parent)) {
+				struct aa_profile *p;
+				p = aa_deref_parent(ent->new);
+				parent = prof_child_dir(p);
+			} else
+				parent = ns_subprofs_dir(ent->new->ns);
+			error = __aa_fs_profile_mkdir(ent->new, parent);
+		}
+
+		if (error) {
+			info = "failed to create ";
+			goto fail_lock;
+		}
+	}
+
+	/* Done with checks that may fail - do actual replacement */
 	list_for_each_entry_safe(ent, tmp, &lh, list) {
 		list_del_init(&ent->list);
 		op = (!ent->old && !ent->rename) ? OP_PROF_LOAD : OP_PROF_REPL;
@@ -1127,14 +1165,21 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 
 		if (ent->old) {
 			__replace_profile(ent->old, ent->new, 1);
-			if (ent->rename)
+			if (ent->rename) {
+				/* aafs interface uses replacedby */
+				struct aa_replacedby *r = ent->new->replacedby;
+				rcu_assign_pointer(r->profile,
+						   aa_get_profile(ent->new));
 				__replace_profile(ent->rename, ent->new, 0);
+			}
 		} else if (ent->rename) {
+			/* aafs interface uses replacedby */
+			rcu_assign_pointer(ent->new->replacedby->profile,
+					   aa_get_profile(ent->new));
 			__replace_profile(ent->rename, ent->new, 0);
 		} else if (ent->new->parent) {
 			struct aa_profile *parent, *newest;
-			parent = rcu_dereference_protected(ent->new->parent,
-						    mutex_is_locked(&ns->lock));
+			parent = aa_deref_parent(ent->new);
 			newest = aa_get_newest_profile(parent);
 
 			/* parent replaced in this atomic set? */
@@ -1144,10 +1189,16 @@ ssize_t aa_replace_profiles(void *udata, size_t size, bool noreplace)
 				rcu_assign_pointer(ent->new->parent, newest);
 			} else
 				aa_put_profile(newest);
+			/* aafs interface uses replacedby */
+			rcu_assign_pointer(ent->new->replacedby->profile,
+					   aa_get_profile(ent->new));
 			__list_add_profile(&parent->base.profiles, ent->new);
-		} else
+		} else {
+			/* aafs interface uses replacedby */
+			rcu_assign_pointer(ent->new->replacedby->profile,
+					   aa_get_profile(ent->new));
 			__list_add_profile(&ns->base.profiles, ent->new);
-
+		}
 		aa_load_ent_free(ent);
 	}
 	mutex_unlock(&ns->lock);
diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c
index 6c9390179b89..b125acc9aa26 100644
--- a/security/apparmor/procattr.c
+++ b/security/apparmor/procattr.c
@@ -37,7 +37,7 @@ int aa_getprocattr(struct aa_profile *profile, char **string)
 {
 	char *str;
 	int len = 0, mode_len = 0, ns_len = 0, name_len;
-	const char *mode_str = profile_mode_names[profile->mode];
+	const char *mode_str = aa_profile_mode_names[profile->mode];
 	const char *ns_name = NULL;
 	struct aa_namespace *ns = profile->ns;
 	struct aa_namespace *current_ns = __aa_current_profile()->ns;
-- 
cgit v1.2.3


From 556d0be74b19cb6288e5eb2f3216eac247d87968 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:17:43 -0700
Subject: apparmor: add an optional profile attachment string for profiles

Add the ability to take in and report a human readable profile attachment
string for profiles so that attachment specifications can be easily
inspected.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/apparmorfs.c         | 34 ++++++++++++++++++++++++++++++++++
 security/apparmor/include/apparmorfs.h |  1 +
 security/apparmor/include/policy.h     |  2 ++
 security/apparmor/policy_unpack.c      |  3 +++
 4 files changed, 40 insertions(+)

(limited to 'security')

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 0fdd08c6ea59..d6329aa7aa98 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -290,6 +290,34 @@ static const struct file_operations aa_fs_profmode_fops = {
 	.release	= aa_fs_seq_profile_release,
 };
 
+static int aa_fs_seq_profattach_show(struct seq_file *seq, void *v)
+{
+	struct aa_replacedby *r = seq->private;
+	struct aa_profile *profile = aa_get_profile_rcu(&r->profile);
+	if (profile->attach)
+		seq_printf(seq, "%s\n", profile->attach);
+	else if (profile->xmatch)
+		seq_puts(seq, "<unknown>\n");
+	else
+		seq_printf(seq, "%s\n", profile->base.name);
+	aa_put_profile(profile);
+
+	return 0;
+}
+
+static int aa_fs_seq_profattach_open(struct inode *inode, struct file *file)
+{
+	return aa_fs_seq_profile_open(inode, file, aa_fs_seq_profattach_show);
+}
+
+static const struct file_operations aa_fs_profattach_fops = {
+	.owner		= THIS_MODULE,
+	.open		= aa_fs_seq_profattach_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= aa_fs_seq_profile_release,
+};
+
 /** fns to setup dynamic per profile/namespace files **/
 void __aa_fs_profile_rmdir(struct aa_profile *profile)
 {
@@ -385,6 +413,12 @@ int __aa_fs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
 		goto fail;
 	profile->dents[AAFS_PROF_MODE] = dent;
 
+	dent = create_profile_file(dir, "attach", profile,
+				   &aa_fs_profattach_fops);
+	if (IS_ERR(dent))
+		goto fail;
+	profile->dents[AAFS_PROF_ATTACH] = dent;
+
 	list_for_each_entry(child, &profile->base.profiles, base.list) {
 		error = __aa_fs_profile_mkdir(child, prof_child_dir(profile));
 		if (error)
diff --git a/security/apparmor/include/apparmorfs.h b/security/apparmor/include/apparmorfs.h
index 2494e112f2bf..f91712cf1b30 100644
--- a/security/apparmor/include/apparmorfs.h
+++ b/security/apparmor/include/apparmorfs.h
@@ -81,6 +81,7 @@ enum aafs_prof_type {
 	AAFS_PROF_PROFS,
 	AAFS_PROF_NAME,
 	AAFS_PROF_MODE,
+	AAFS_PROF_ATTACH,
 	AAFS_PROF_SIZEOF,
 };
 
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 5c72231d1c42..59b36372ae40 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -165,6 +165,7 @@ struct aa_replacedby {
  * @ns: namespace the profile is in
  * @replacedby: is set to the profile that replaced this profile
  * @rename: optional profile name that this profile renamed
+ * @attach: human readable attachment string
  * @xmatch: optional extended matching for unconfined executables names
  * @xmatch_len: xmatch prefix len, used to determine xmatch priority
  * @audit: the auditing mode of the profile
@@ -204,6 +205,7 @@ struct aa_profile {
 	struct aa_replacedby *replacedby;
 	const char *rename;
 
+	const char *attach;
 	struct aa_dfa *xmatch;
 	int xmatch_len;
 	enum audit_mode audit;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index cac0aa075787..bdaef2e1b2a0 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -492,6 +492,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e)
 	/* profile renaming is optional */
 	(void) unpack_str(e, &profile->rename, "rename");
 
+	/* attachment string is optional */
+	(void) unpack_str(e, &profile->attach, "attach");
+
 	/* xmatch is optional and may be NULL */
 	profile->xmatch = unpack_dfa(e);
 	if (IS_ERR(profile->xmatch)) {
-- 
cgit v1.2.3


From 29b3822f1e132aa0f115f69730d6e4182df153d4 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 10 Jul 2013 21:18:43 -0700
Subject: apparmor: add the profile introspection file to interface

Add the dynamic namespace relative profiles file to the interace, to allow
introspection of loaded profiles and their modes.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Kees Cook <kees@ubuntu.com>
---
 security/apparmor/apparmorfs.c | 236 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 236 insertions(+)

(limited to 'security')

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index d6329aa7aa98..7a26608a5666 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -20,6 +20,7 @@
 #include <linux/uaccess.h>
 #include <linux/namei.h>
 #include <linux/capability.h>
+#include <linux/rcupdate.h>
 
 #include "include/apparmor.h"
 #include "include/apparmorfs.h"
@@ -512,6 +513,240 @@ fail2:
 }
 
 
+#define list_entry_next(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+#define list_entry_is_head(pos, head, member) (&pos->member == (head))
+
+/**
+ * __next_namespace - find the next namespace to list
+ * @root: root namespace to stop search at (NOT NULL)
+ * @ns: current ns position (NOT NULL)
+ *
+ * Find the next namespace from @ns under @root and handle all locking needed
+ * while switching current namespace.
+ *
+ * Returns: next namespace or NULL if at last namespace under @root
+ * Requires: ns->parent->lock to be held
+ * NOTE: will not unlock root->lock
+ */
+static struct aa_namespace *__next_namespace(struct aa_namespace *root,
+					     struct aa_namespace *ns)
+{
+	struct aa_namespace *parent, *next;
+
+	/* is next namespace a child */
+	if (!list_empty(&ns->sub_ns)) {
+		next = list_first_entry(&ns->sub_ns, typeof(*ns), base.list);
+		mutex_lock(&next->lock);
+		return next;
+	}
+
+	/* check if the next ns is a sibling, parent, gp, .. */
+	parent = ns->parent;
+	while (parent) {
+		mutex_unlock(&ns->lock);
+		next = list_entry_next(ns, base.list);
+		if (!list_entry_is_head(next, &parent->sub_ns, base.list)) {
+			mutex_lock(&next->lock);
+			return next;
+		}
+		if (parent == root)
+			return NULL;
+		ns = parent;
+		parent = parent->parent;
+	}
+
+	return NULL;
+}
+
+/**
+ * __first_profile - find the first profile in a namespace
+ * @root: namespace that is root of profiles being displayed (NOT NULL)
+ * @ns: namespace to start in   (NOT NULL)
+ *
+ * Returns: unrefcounted profile or NULL if no profile
+ * Requires: profile->ns.lock to be held
+ */
+static struct aa_profile *__first_profile(struct aa_namespace *root,
+					  struct aa_namespace *ns)
+{
+	for (; ns; ns = __next_namespace(root, ns)) {
+		if (!list_empty(&ns->base.profiles))
+			return list_first_entry(&ns->base.profiles,
+						struct aa_profile, base.list);
+	}
+	return NULL;
+}
+
+/**
+ * __next_profile - step to the next profile in a profile tree
+ * @profile: current profile in tree (NOT NULL)
+ *
+ * Perform a depth first traversal on the profile tree in a namespace
+ *
+ * Returns: next profile or NULL if done
+ * Requires: profile->ns.lock to be held
+ */
+static struct aa_profile *__next_profile(struct aa_profile *p)
+{
+	struct aa_profile *parent;
+	struct aa_namespace *ns = p->ns;
+
+	/* is next profile a child */
+	if (!list_empty(&p->base.profiles))
+		return list_first_entry(&p->base.profiles, typeof(*p),
+					base.list);
+
+	/* is next profile a sibling, parent sibling, gp, sibling, .. */
+	parent = rcu_dereference_protected(p->parent,
+					   mutex_is_locked(&p->ns->lock));
+	while (parent) {
+		p = list_entry_next(p, base.list);
+		if (!list_entry_is_head(p, &parent->base.profiles, base.list))
+			return p;
+		p = parent;
+		parent = rcu_dereference_protected(parent->parent,
+					    mutex_is_locked(&parent->ns->lock));
+	}
+
+	/* is next another profile in the namespace */
+	p = list_entry_next(p, base.list);
+	if (!list_entry_is_head(p, &ns->base.profiles, base.list))
+		return p;
+
+	return NULL;
+}
+
+/**
+ * next_profile - step to the next profile in where ever it may be
+ * @root: root namespace  (NOT NULL)
+ * @profile: current profile  (NOT NULL)
+ *
+ * Returns: next profile or NULL if there isn't one
+ */
+static struct aa_profile *next_profile(struct aa_namespace *root,
+				       struct aa_profile *profile)
+{
+	struct aa_profile *next = __next_profile(profile);
+	if (next)
+		return next;
+
+	/* finished all profiles in namespace move to next namespace */
+	return __first_profile(root, __next_namespace(root, profile->ns));
+}
+
+/**
+ * p_start - start a depth first traversal of profile tree
+ * @f: seq_file to fill
+ * @pos: current position
+ *
+ * Returns: first profile under current namespace or NULL if none found
+ *
+ * acquires first ns->lock
+ */
+static void *p_start(struct seq_file *f, loff_t *pos)
+{
+	struct aa_profile *profile = NULL;
+	struct aa_namespace *root = aa_current_profile()->ns;
+	loff_t l = *pos;
+	f->private = aa_get_namespace(root);
+
+
+	/* find the first profile */
+	mutex_lock(&root->lock);
+	profile = __first_profile(root, root);
+
+	/* skip to position */
+	for (; profile && l > 0; l--)
+		profile = next_profile(root, profile);
+
+	return profile;
+}
+
+/**
+ * p_next - read the next profile entry
+ * @f: seq_file to fill
+ * @p: profile previously returned
+ * @pos: current position
+ *
+ * Returns: next profile after @p or NULL if none
+ *
+ * may acquire/release locks in namespace tree as necessary
+ */
+static void *p_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	struct aa_profile *profile = p;
+	struct aa_namespace *ns = f->private;
+	(*pos)++;
+
+	return next_profile(ns, profile);
+}
+
+/**
+ * p_stop - stop depth first traversal
+ * @f: seq_file we are filling
+ * @p: the last profile writen
+ *
+ * Release all locking done by p_start/p_next on namespace tree
+ */
+static void p_stop(struct seq_file *f, void *p)
+{
+	struct aa_profile *profile = p;
+	struct aa_namespace *root = f->private, *ns;
+
+	if (profile) {
+		for (ns = profile->ns; ns && ns != root; ns = ns->parent)
+			mutex_unlock(&ns->lock);
+	}
+	mutex_unlock(&root->lock);
+	aa_put_namespace(root);
+}
+
+/**
+ * seq_show_profile - show a profile entry
+ * @f: seq_file to file
+ * @p: current position (profile)    (NOT NULL)
+ *
+ * Returns: error on failure
+ */
+static int seq_show_profile(struct seq_file *f, void *p)
+{
+	struct aa_profile *profile = (struct aa_profile *)p;
+	struct aa_namespace *root = f->private;
+
+	if (profile->ns != root)
+		seq_printf(f, ":%s://", aa_ns_name(root, profile->ns));
+	seq_printf(f, "%s (%s)\n", profile->base.hname,
+		   aa_profile_mode_names[profile->mode]);
+
+	return 0;
+}
+
+static const struct seq_operations aa_fs_profiles_op = {
+	.start = p_start,
+	.next = p_next,
+	.stop = p_stop,
+	.show = seq_show_profile,
+};
+
+static int profiles_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &aa_fs_profiles_op);
+}
+
+static int profiles_release(struct inode *inode, struct file *file)
+{
+	return seq_release(inode, file);
+}
+
+static const struct file_operations aa_fs_profiles_fops = {
+	.open = profiles_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = profiles_release,
+};
+
+
 /** Base file system setup **/
 static struct aa_fs_entry aa_fs_entry_file[] = {
 	AA_FS_FILE_STRING("mask", "create read write exec append mmap_exec " \
@@ -545,6 +780,7 @@ static struct aa_fs_entry aa_fs_entry_apparmor[] = {
 	AA_FS_FILE_FOPS(".load", 0640, &aa_fs_profile_load),
 	AA_FS_FILE_FOPS(".replace", 0640, &aa_fs_profile_replace),
 	AA_FS_FILE_FOPS(".remove", 0640, &aa_fs_profile_remove),
+	AA_FS_FILE_FOPS("profiles", 0640, &aa_fs_profiles_fops),
 	AA_FS_DIR("features", aa_fs_entry_features),
 	{ }
 };
-- 
cgit v1.2.3


From 84f1f787421cd83bb7dfb34d584586f6a5fe7baa Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 14 Aug 2013 11:27:32 -0700
Subject: apparmor: export set of capabilities supported by the apparmor module

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/Makefile             | 6 +++++-
 security/apparmor/apparmorfs.c         | 1 +
 security/apparmor/capability.c         | 5 +++++
 security/apparmor/include/capability.h | 4 ++++
 4 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index 5706b74c857f..0831e049072d 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -18,7 +18,11 @@ quiet_cmd_make-caps = GEN     $@
 cmd_make-caps = echo "static const char *const capability_names[] = {" > $@ ;\
 	sed $< >>$@ -r -n -e '/CAP_FS_MASK/d' \
 	-e 's/^\#define[ \t]+CAP_([A-Z0-9_]+)[ \t]+([0-9]+)/[\2] = "\L\1",/p';\
-	echo "};" >> $@
+	echo "};" >> $@ ;\
+	echo -n '\#define AA_FS_CAPS_MASK "' >> $@ ;\
+	sed $< -r -n -e '/CAP_FS_MASK/d' \
+	    -e 's/^\#define[ \t]+CAP_([A-Z0-9_]+)[ \t]+([0-9]+)/\L\1/p' | \
+	     tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
 
 
 # Build a lower case string table of rlimit names.
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 7a26608a5666..d708a55d072f 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -773,6 +773,7 @@ static struct aa_fs_entry aa_fs_entry_features[] = {
 	AA_FS_DIR("file",			aa_fs_entry_file),
 	AA_FS_FILE_U64("capability",		VFS_CAP_FLAGS_MASK),
 	AA_FS_DIR("rlimit",			aa_fs_entry_rlimit),
+	AA_FS_DIR("caps",			aa_fs_entry_caps),
 	{ }
 };
 
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index 887a5e948945..84d1f5f53877 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -27,6 +27,11 @@
  */
 #include "capability_names.h"
 
+struct aa_fs_entry aa_fs_entry_caps[] = {
+	AA_FS_FILE_STRING("mask", AA_FS_CAPS_MASK),
+	{ }
+};
+
 struct audit_cache {
 	struct aa_profile *profile;
 	kernel_cap_t caps;
diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h
index c24d2959ea02..2e7c9d6a2f3b 100644
--- a/security/apparmor/include/capability.h
+++ b/security/apparmor/include/capability.h
@@ -17,6 +17,8 @@
 
 #include <linux/sched.h>
 
+#include "apparmorfs.h"
+
 struct aa_profile;
 
 /* aa_caps - confinement data for capabilities
@@ -34,6 +36,8 @@ struct aa_caps {
 	kernel_cap_t extended;
 };
 
+extern struct aa_fs_entry aa_fs_entry_caps[];
+
 int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
 	       int audit);
 
-- 
cgit v1.2.3


From f8eb8a1324e81927b2c64823b2fc38386efd3fef Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 14 Aug 2013 11:27:36 -0700
Subject: apparmor: add the ability to report a sha1 hash of loaded policy

Provide userspace the ability to introspect a sha1 hash value for each
profile currently loaded.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/Kconfig              | 12 +++++
 security/apparmor/Makefile             |  1 +
 security/apparmor/apparmorfs.c         | 37 +++++++++++++
 security/apparmor/crypto.c             | 97 ++++++++++++++++++++++++++++++++++
 security/apparmor/include/apparmorfs.h |  1 +
 security/apparmor/include/crypto.h     | 36 +++++++++++++
 security/apparmor/include/policy.h     |  1 +
 security/apparmor/policy_unpack.c      | 20 ++++---
 8 files changed, 199 insertions(+), 6 deletions(-)
 create mode 100644 security/apparmor/crypto.c
 create mode 100644 security/apparmor/include/crypto.h

(limited to 'security')

diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig
index 9b9013b2e321..d49c53960b60 100644
--- a/security/apparmor/Kconfig
+++ b/security/apparmor/Kconfig
@@ -29,3 +29,15 @@ config SECURITY_APPARMOR_BOOTPARAM_VALUE
 	  boot.
 
 	  If you are unsure how to answer this question, answer 1.
+
+config SECURITY_APPARMOR_HASH
+	bool "SHA1 hash of loaded profiles"
+	depends on SECURITY_APPARMOR
+	depends on CRYPTO
+	select CRYPTO_SHA1
+	default y
+
+	help
+	  This option selects whether sha1 hashing is done against loaded
+          profiles and exported for inspection to user space via the apparmor
+          filesystem.
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index 0831e049072d..d693df874818 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
 apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
               resource.o sid.o file.o
+apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
 
 clean-files := capability_names.h rlim_names.h
 
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index d708a55d072f..95c2b2689a03 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -26,6 +26,7 @@
 #include "include/apparmorfs.h"
 #include "include/audit.h"
 #include "include/context.h"
+#include "include/crypto.h"
 #include "include/policy.h"
 #include "include/resource.h"
 
@@ -319,6 +320,34 @@ static const struct file_operations aa_fs_profattach_fops = {
 	.release	= aa_fs_seq_profile_release,
 };
 
+static int aa_fs_seq_hash_show(struct seq_file *seq, void *v)
+{
+	struct aa_replacedby *r = seq->private;
+	struct aa_profile *profile = aa_get_profile_rcu(&r->profile);
+	unsigned int i, size = aa_hash_size();
+
+	if (profile->hash) {
+		for (i = 0; i < size; i++)
+			seq_printf(seq, "%.2x", profile->hash[i]);
+		seq_puts(seq, "\n");
+	}
+
+	return 0;
+}
+
+static int aa_fs_seq_hash_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, aa_fs_seq_hash_show, inode->i_private);
+}
+
+static const struct file_operations aa_fs_seq_hash_fops = {
+	.owner		= THIS_MODULE,
+	.open		= aa_fs_seq_hash_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /** fns to setup dynamic per profile/namespace files **/
 void __aa_fs_profile_rmdir(struct aa_profile *profile)
 {
@@ -420,6 +449,14 @@ int __aa_fs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
 		goto fail;
 	profile->dents[AAFS_PROF_ATTACH] = dent;
 
+	if (profile->hash) {
+		dent = create_profile_file(dir, "sha1", profile,
+					   &aa_fs_seq_hash_fops);
+		if (IS_ERR(dent))
+			goto fail;
+		profile->dents[AAFS_PROF_HASH] = dent;
+	}
+
 	list_for_each_entry(child, &profile->base.profiles, base.list) {
 		error = __aa_fs_profile_mkdir(child, prof_child_dir(profile));
 		if (error)
diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c
new file mode 100644
index 000000000000..d6222ba4e919
--- /dev/null
+++ b/security/apparmor/crypto.c
@@ -0,0 +1,97 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor policy loading interface function definitions.
+ *
+ * Copyright 2013 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * Fns to provide a checksum of policy that has been loaded this can be
+ * compared to userspace policy compiles to check loaded policy is what
+ * it should be.
+ */
+
+#include <linux/crypto.h>
+
+#include "include/apparmor.h"
+#include "include/crypto.h"
+
+static unsigned int apparmor_hash_size;
+
+static struct crypto_hash *apparmor_tfm;
+
+unsigned int aa_hash_size(void)
+{
+	return apparmor_hash_size;
+}
+
+int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start,
+			 size_t len)
+{
+	struct scatterlist sg[2];
+	struct hash_desc desc = {
+		.tfm = apparmor_tfm,
+		.flags = 0
+	};
+	int error = -ENOMEM;
+	u32 le32_version = cpu_to_le32(version);
+
+	if (!apparmor_tfm)
+		return 0;
+
+	sg_init_table(sg, 2);
+	sg_set_buf(&sg[0], &le32_version, 4);
+	sg_set_buf(&sg[1], (u8 *) start, len);
+
+	profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL);
+	if (!profile->hash)
+		goto fail;
+
+	error = crypto_hash_init(&desc);
+	if (error)
+		goto fail;
+	error = crypto_hash_update(&desc, &sg[0], 4);
+	if (error)
+		goto fail;
+	error = crypto_hash_update(&desc, &sg[1], len);
+	if (error)
+		goto fail;
+	error = crypto_hash_final(&desc, profile->hash);
+	if (error)
+		goto fail;
+
+	return 0;
+
+fail:
+	kfree(profile->hash);
+	profile->hash = NULL;
+
+	return error;
+}
+
+static int __init init_profile_hash(void)
+{
+	struct crypto_hash *tfm;
+
+	if (!apparmor_initialized)
+		return 0;
+
+	tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		int error = PTR_ERR(tfm);
+		AA_ERROR("failed to setup profile sha1 hashing: %d\n", error);
+		return error;
+	}
+	apparmor_tfm = tfm;
+	apparmor_hash_size = crypto_hash_digestsize(apparmor_tfm);
+
+	aa_info_message("AppArmor sha1 policy hashing enabled");
+
+	return 0;
+}
+
+late_initcall(init_profile_hash);
diff --git a/security/apparmor/include/apparmorfs.h b/security/apparmor/include/apparmorfs.h
index f91712cf1b30..414e56878dd0 100644
--- a/security/apparmor/include/apparmorfs.h
+++ b/security/apparmor/include/apparmorfs.h
@@ -82,6 +82,7 @@ enum aafs_prof_type {
 	AAFS_PROF_NAME,
 	AAFS_PROF_MODE,
 	AAFS_PROF_ATTACH,
+	AAFS_PROF_HASH,
 	AAFS_PROF_SIZEOF,
 };
 
diff --git a/security/apparmor/include/crypto.h b/security/apparmor/include/crypto.h
new file mode 100644
index 000000000000..dc418e5024d9
--- /dev/null
+++ b/security/apparmor/include/crypto.h
@@ -0,0 +1,36 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor policy loading interface function definitions.
+ *
+ * Copyright 2013 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __APPARMOR_CRYPTO_H
+#define __APPARMOR_CRYPTO_H
+
+#include "policy.h"
+
+#ifdef CONFIG_SECURITY_APPARMOR_HASH
+unsigned int aa_hash_size(void);
+int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start,
+			 size_t len);
+#else
+static inline int aa_calc_profile_hash(struct aa_profile *profile, u32 version,
+				       void *start, size_t len)
+{
+	return 0;
+}
+
+static inline unsigned int aa_hash_size(void)
+{
+	return 0;
+}
+#endif
+
+#endif /* __APPARMOR_CRYPTO_H */
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 59b36372ae40..f2d4b6348cbc 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -219,6 +219,7 @@ struct aa_profile {
 	struct aa_caps caps;
 	struct aa_rlimit rlimits;
 
+	unsigned char *hash;
 	char *dirname;
 	struct dentry *dents[AAFS_PROF_SIZEOF];
 };
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index bdaef2e1b2a0..a689f10930b5 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -24,6 +24,7 @@
 #include "include/apparmor.h"
 #include "include/audit.h"
 #include "include/context.h"
+#include "include/crypto.h"
 #include "include/match.h"
 #include "include/policy.h"
 #include "include/policy_unpack.h"
@@ -758,10 +759,12 @@ int aa_unpack(void *udata, size_t size, struct list_head *lh, const char **ns)
 
 	*ns = NULL;
 	while (e.pos < e.end) {
+		void *start;
 		error = verify_header(&e, e.pos == e.start, ns);
 		if (error)
 			goto fail;
 
+		start = e.pos;
 		profile = unpack_profile(&e);
 		if (IS_ERR(profile)) {
 			error = PTR_ERR(profile);
@@ -769,16 +772,18 @@ int aa_unpack(void *udata, size_t size, struct list_head *lh, const char **ns)
 		}
 
 		error = verify_profile(profile);
-		if (error) {
-			aa_free_profile(profile);
-			goto fail;
-		}
+		if (error)
+			goto fail_profile;
+
+		error = aa_calc_profile_hash(profile, e.version, start,
+					     e.pos - start);
+		if (error)
+			goto fail_profile;
 
 		ent = aa_load_ent_alloc();
 		if (!ent) {
 			error = -ENOMEM;
-			aa_put_profile(profile);
-			goto fail;
+			goto fail_profile;
 		}
 
 		ent->new = profile;
@@ -787,6 +792,9 @@ int aa_unpack(void *udata, size_t size, struct list_head *lh, const char **ns)
 
 	return 0;
 
+fail_profile:
+	aa_put_profile(profile);
+
 fail:
 	list_for_each_entry_safe(ent, tmp, lh, list) {
 		list_del_init(&ent->list);
-- 
cgit v1.2.3


From 5265fc6219ddbf8dfc9b18223448a4997fb06eae Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 20 Aug 2013 15:33:20 +0930
Subject: module/lsm: Have apparmor module parameters work with no args

The apparmor module parameters for param_ops_aabool and
param_ops_aalockpolicy are both based off of the param_ops_bool,
and can handle a NULL value passed in as val. Have it enable the
new KERNEL_PARAM_FL_NOARGS flag to allow the parameters to be set
without having to state "=y" or "=1".

Cc: John Johansen <john.johansen@canonical.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 security/apparmor/lsm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'security')

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 2e2a0dd4a73f..e3a704c75ef6 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -666,6 +666,7 @@ static int param_set_aabool(const char *val, const struct kernel_param *kp);
 static int param_get_aabool(char *buffer, const struct kernel_param *kp);
 #define param_check_aabool param_check_bool
 static struct kernel_param_ops param_ops_aabool = {
+	.flags = KERNEL_PARAM_FL_NOARG,
 	.set = param_set_aabool,
 	.get = param_get_aabool
 };
@@ -682,6 +683,7 @@ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp
 static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp);
 #define param_check_aalockpolicy param_check_bool
 static struct kernel_param_ops param_ops_aalockpolicy = {
+	.flags = KERNEL_PARAM_FL_NOARG,
 	.set = param_set_aalockpolicy,
 	.get = param_get_aalockpolicy
 };
-- 
cgit v1.2.3


From 160da84dbb39443fdade7151bc63a88f8e953077 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 2 Jul 2013 10:04:54 -0700
Subject: userns: Allow PR_CAPBSET_DROP in a user namespace.

As the capabilites and capability bounding set are per user namespace
properties it is safe to allow changing them with just CAP_SETPCAP
permission in the user namespace.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Tested-by: Richard Weinberger <richard@nod.at>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 security/commoncap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/commoncap.c b/security/commoncap.c
index c44b6fe6648e..9fccf71b2b62 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -824,7 +824,7 @@ int cap_task_setnice(struct task_struct *p, int nice)
  */
 static long cap_prctl_drop(struct cred *new, unsigned long cap)
 {
-	if (!capable(CAP_SETPCAP))
+	if (!ns_capable(current_user_ns(), CAP_SETPCAP))
 		return -EPERM;
 	if (!cap_valid(cap))
 		return -EINVAL;
-- 
cgit v1.2.3


From f54fb863c6bbcbafdfc332b4a4260abb5a002137 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge.hallyn@ubuntu.com>
Date: Tue, 23 Jul 2013 13:18:53 -0500
Subject: capabilities: allow nice if we are privileged

We allow task A to change B's nice level if it has a supserset of
B's privileges, or of it has CAP_SYS_NICE.  Also allow it if A has
CAP_SYS_NICE with respect to B - meaning it is root in the same
namespace, or it created B's namespace.

Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com>
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 security/commoncap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'security')

diff --git a/security/commoncap.c b/security/commoncap.c
index 9fccf71b2b62..b9d613e0ef14 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -768,16 +768,16 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
  */
 static int cap_safe_nice(struct task_struct *p)
 {
-	int is_subset;
+	int is_subset, ret = 0;
 
 	rcu_read_lock();
 	is_subset = cap_issubset(__task_cred(p)->cap_permitted,
 				 current_cred()->cap_permitted);
+	if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
+		ret = -EPERM;
 	rcu_read_unlock();
 
-	if (!is_subset && !capable(CAP_SYS_NICE))
-		return -EPERM;
-	return 0;
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3


From 71ac7f6255c560716c20da8ee2c964bbd96e941f Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Sun, 29 Sep 2013 08:39:21 -0700
Subject: apparmor: Use shash crypto API interface for profile hashes

Use the shash interface, rather than the hash interface, when hashing
AppArmor profiles. The shash interface does not use scatterlists and it
is a better fit for what AppArmor needs.

This fixes a kernel paging BUG when aa_calc_profile_hash() is passed a
buffer from vmalloc(). The hash interface requires callers to handle
vmalloc() buffers differently than what AppArmor was doing. Due to
vmalloc() memory not being physically contiguous, each individual page
behind the buffer must be assigned to a scatterlist with sg_set_page()
and then the scatterlist passed to crypto_hash_update().

The shash interface does not have that limitation and allows vmalloc()
and kmalloc() buffers to be handled in the same manner.

BugLink: https://launchpad.net/bugs/1216294/
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=62261

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/apparmor/crypto.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c
index d6222ba4e919..532471d0b3a0 100644
--- a/security/apparmor/crypto.c
+++ b/security/apparmor/crypto.c
@@ -15,14 +15,14 @@
  * it should be.
  */
 
-#include <linux/crypto.h>
+#include <crypto/hash.h>
 
 #include "include/apparmor.h"
 #include "include/crypto.h"
 
 static unsigned int apparmor_hash_size;
 
-static struct crypto_hash *apparmor_tfm;
+static struct crypto_shash *apparmor_tfm;
 
 unsigned int aa_hash_size(void)
 {
@@ -32,35 +32,33 @@ unsigned int aa_hash_size(void)
 int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start,
 			 size_t len)
 {
-	struct scatterlist sg[2];
-	struct hash_desc desc = {
-		.tfm = apparmor_tfm,
-		.flags = 0
-	};
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(apparmor_tfm)];
+	} desc;
 	int error = -ENOMEM;
 	u32 le32_version = cpu_to_le32(version);
 
 	if (!apparmor_tfm)
 		return 0;
 
-	sg_init_table(sg, 2);
-	sg_set_buf(&sg[0], &le32_version, 4);
-	sg_set_buf(&sg[1], (u8 *) start, len);
-
 	profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL);
 	if (!profile->hash)
 		goto fail;
 
-	error = crypto_hash_init(&desc);
+	desc.shash.tfm = apparmor_tfm;
+	desc.shash.flags = 0;
+
+	error = crypto_shash_init(&desc.shash);
 	if (error)
 		goto fail;
-	error = crypto_hash_update(&desc, &sg[0], 4);
+	error = crypto_shash_update(&desc.shash, (u8 *) &le32_version, 4);
 	if (error)
 		goto fail;
-	error = crypto_hash_update(&desc, &sg[1], len);
+	error = crypto_shash_update(&desc.shash, (u8 *) start, len);
 	if (error)
 		goto fail;
-	error = crypto_hash_final(&desc, profile->hash);
+	error = crypto_shash_final(&desc.shash, profile->hash);
 	if (error)
 		goto fail;
 
@@ -75,19 +73,19 @@ fail:
 
 static int __init init_profile_hash(void)
 {
-	struct crypto_hash *tfm;
+	struct crypto_shash *tfm;
 
 	if (!apparmor_initialized)
 		return 0;
 
-	tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+	tfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm)) {
 		int error = PTR_ERR(tfm);
 		AA_ERROR("failed to setup profile sha1 hashing: %d\n", error);
 		return error;
 	}
 	apparmor_tfm = tfm;
-	apparmor_hash_size = crypto_hash_digestsize(apparmor_tfm);
+	apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm);
 
 	aa_info_message("AppArmor sha1 policy hashing enabled");
 
-- 
cgit v1.2.3


From 4cd4fc77032dca46fe7475d81461e29145db247a Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 29 Sep 2013 08:39:22 -0700
Subject: apparmor: fix suspicious RCU usage warning in policy.c/policy.h

The recent 3.12 pull request for apparmor was missing a couple rcu _protected
access modifiers. Resulting in the follow suspicious RCU usage

 [   29.804534] [ INFO: suspicious RCU usage. ]
 [   29.804539] 3.11.0+ #5 Not tainted
 [   29.804541] -------------------------------
 [   29.804545] security/apparmor/include/policy.h:363 suspicious rcu_dereference_check() usage!
 [   29.804548]
 [   29.804548] other info that might help us debug this:
 [   29.804548]
 [   29.804553]
 [   29.804553] rcu_scheduler_active = 1, debug_locks = 1
 [   29.804558] 2 locks held by apparmor_parser/1268:
 [   29.804560]  #0:  (sb_writers#9){.+.+.+}, at: [<ffffffff81120a4c>] file_start_write+0x27/0x29
 [   29.804576]  #1:  (&ns->lock){+.+.+.}, at: [<ffffffff811f5d88>] aa_replace_profiles+0x166/0x57c
 [   29.804589]
 [   29.804589] stack backtrace:
 [   29.804595] CPU: 0 PID: 1268 Comm: apparmor_parser Not tainted 3.11.0+ #5
 [   29.804599] Hardware name: ASUSTeK Computer Inc.         UL50VT          /UL50VT    , BIOS 217     03/01/2010
 [   29.804602]  0000000000000000 ffff8800b95a1d90 ffffffff8144eb9b ffff8800b94db540
 [   29.804611]  ffff8800b95a1dc0 ffffffff81087439 ffff880138cc3a18 ffff880138cc3a18
 [   29.804619]  ffff8800b9464a90 ffff880138cc3a38 ffff8800b95a1df0 ffffffff811f5084
 [   29.804628] Call Trace:
 [   29.804636]  [<ffffffff8144eb9b>] dump_stack+0x4e/0x82
 [   29.804642]  [<ffffffff81087439>] lockdep_rcu_suspicious+0xfc/0x105
 [   29.804649]  [<ffffffff811f5084>] __aa_update_replacedby+0x53/0x7f
 [   29.804655]  [<ffffffff811f5408>] __replace_profile+0x11f/0x1ed
 [   29.804661]  [<ffffffff811f6032>] aa_replace_profiles+0x410/0x57c
 [   29.804668]  [<ffffffff811f16d4>] profile_replace+0x35/0x4c
 [   29.804674]  [<ffffffff81120fa3>] vfs_write+0xad/0x113
 [   29.804680]  [<ffffffff81121609>] SyS_write+0x44/0x7a
 [   29.804687]  [<ffffffff8145bfd2>] system_call_fastpath+0x16/0x1b
 [   29.804691]
 [   29.804694] ===============================
 [   29.804697] [ INFO: suspicious RCU usage. ]
 [   29.804700] 3.11.0+ #5 Not tainted
 [   29.804703] -------------------------------
 [   29.804706] security/apparmor/policy.c:566 suspicious rcu_dereference_check() usage!
 [   29.804709]
 [   29.804709] other info that might help us debug this:
 [   29.804709]
 [   29.804714]
 [   29.804714] rcu_scheduler_active = 1, debug_locks = 1
 [   29.804718] 2 locks held by apparmor_parser/1268:
 [   29.804721]  #0:  (sb_writers#9){.+.+.+}, at: [<ffffffff81120a4c>] file_start_write+0x27/0x29
 [   29.804733]  #1:  (&ns->lock){+.+.+.}, at: [<ffffffff811f5d88>] aa_replace_profiles+0x166/0x57c
 [   29.804744]
 [   29.804744] stack backtrace:
 [   29.804750] CPU: 0 PID: 1268 Comm: apparmor_parser Not tainted 3.11.0+ #5
 [   29.804753] Hardware name: ASUSTeK Computer Inc.         UL50VT          /UL50VT    , BIOS 217     03/01/2010
 [   29.804756]  0000000000000000 ffff8800b95a1d80 ffffffff8144eb9b ffff8800b94db540
 [   29.804764]  ffff8800b95a1db0 ffffffff81087439 ffff8800b95b02b0 0000000000000000
 [   29.804772]  ffff8800b9efba08 ffff880138cc3a38 ffff8800b95a1dd0 ffffffff811f4f94
 [   29.804779] Call Trace:
 [   29.804786]  [<ffffffff8144eb9b>] dump_stack+0x4e/0x82
 [   29.804791]  [<ffffffff81087439>] lockdep_rcu_suspicious+0xfc/0x105
 [   29.804798]  [<ffffffff811f4f94>] aa_free_replacedby_kref+0x4d/0x62
 [   29.804804]  [<ffffffff811f4f47>] ? aa_put_namespace+0x17/0x17
 [   29.804810]  [<ffffffff811f4f0b>] kref_put+0x36/0x40
 [   29.804816]  [<ffffffff811f5423>] __replace_profile+0x13a/0x1ed
 [   29.804822]  [<ffffffff811f6032>] aa_replace_profiles+0x410/0x57c
 [   29.804829]  [<ffffffff811f16d4>] profile_replace+0x35/0x4c
 [   29.804835]  [<ffffffff81120fa3>] vfs_write+0xad/0x113
 [   29.804840]  [<ffffffff81121609>] SyS_write+0x44/0x7a
 [   29.804847]  [<ffffffff8145bfd2>] system_call_fastpath+0x16/0x1b

Reported-by: miles.lane@gmail.com
CC: paulmck@linux.vnet.ibm.com
Signed-off-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/apparmor/include/policy.h | 4 +++-
 security/apparmor/policy.c         | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index f2d4b6348cbc..c28b0f20ab53 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -360,7 +360,9 @@ static inline void aa_put_replacedby(struct aa_replacedby *p)
 static inline void __aa_update_replacedby(struct aa_profile *orig,
 					  struct aa_profile *new)
 {
-	struct aa_profile *tmp = rcu_dereference(orig->replacedby->profile);
+	struct aa_profile *tmp;
+	tmp = rcu_dereference_protected(orig->replacedby->profile,
+					mutex_is_locked(&orig->ns->lock));
 	rcu_assign_pointer(orig->replacedby->profile, aa_get_profile(new));
 	orig->flags |= PFLAG_INVALID;
 	aa_put_profile(tmp);
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 6172509fa2b7..345bec07a27d 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -563,7 +563,8 @@ void __init aa_free_root_ns(void)
 static void free_replacedby(struct aa_replacedby *r)
 {
 	if (r) {
-		aa_put_profile(rcu_dereference(r->profile));
+		/* r->profile will not be updated any more as r is dead */
+		aa_put_profile(rcu_dereference_protected(r->profile, true));
 		kzfree(r);
 	}
 }
-- 
cgit v1.2.3


From 19e49834d22c2271ed1f4a03aaa4b74986447fb4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 4 Oct 2013 12:54:11 -0700
Subject: selinux: remove 'flags' parameter from inode_has_perm

Every single user passes in '0'.  I think we had non-zero users back in
some stone age when selinux_inode_permission() was implemented in terms
of inode_has_perm(), but that complicated case got split up into a
totally separate code-path so that we could optimize the much simpler
special cases.

See commit 2e33405785d3 ("SELinux: delay initialization of audit data in
selinux_inode_permission") for example.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/selinux/hooks.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a5091ec06aa6..967823212d7d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1525,8 +1525,7 @@ static int task_has_system(struct task_struct *tsk,
 static int inode_has_perm(const struct cred *cred,
 			  struct inode *inode,
 			  u32 perms,
-			  struct common_audit_data *adp,
-			  unsigned flags)
+			  struct common_audit_data *adp)
 {
 	struct inode_security_struct *isec;
 	u32 sid;
@@ -1539,7 +1538,7 @@ static int inode_has_perm(const struct cred *cred,
 	sid = cred_sid(cred);
 	isec = inode->i_security;
 
-	return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
+	return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp);
 }
 
 /* Same as inode_has_perm, but pass explicit audit data containing
@@ -1554,7 +1553,7 @@ static inline int dentry_has_perm(const struct cred *cred,
 
 	ad.type = LSM_AUDIT_DATA_DENTRY;
 	ad.u.dentry = dentry;
-	return inode_has_perm(cred, inode, av, &ad, 0);
+	return inode_has_perm(cred, inode, av, &ad);
 }
 
 /* Same as inode_has_perm, but pass explicit audit data containing
@@ -1569,7 +1568,7 @@ static inline int path_has_perm(const struct cred *cred,
 
 	ad.type = LSM_AUDIT_DATA_PATH;
 	ad.u.path = *path;
-	return inode_has_perm(cred, inode, av, &ad, 0);
+	return inode_has_perm(cred, inode, av, &ad);
 }
 
 /* Same as path_has_perm, but uses the inode from the file struct. */
@@ -1581,7 +1580,7 @@ static inline int file_path_has_perm(const struct cred *cred,
 
 	ad.type = LSM_AUDIT_DATA_PATH;
 	ad.u.path = file->f_path;
-	return inode_has_perm(cred, file_inode(file), av, &ad, 0);
+	return inode_has_perm(cred, file_inode(file), av, &ad);
 }
 
 /* Check whether a task can use an open file descriptor to
@@ -1617,7 +1616,7 @@ static int file_has_perm(const struct cred *cred,
 	/* av is zero if only checking access to the descriptor. */
 	rc = 0;
 	if (av)
-		rc = inode_has_perm(cred, inode, av, &ad, 0);
+		rc = inode_has_perm(cred, inode, av, &ad);
 
 out:
 	return rc;
-- 
cgit v1.2.3


From cb4fbe5703be51f8a2dff4052b1901941ab99e12 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 4 Oct 2013 12:57:22 -0700
Subject: selinux: avc_has_perm_flags has no more users

.. so get rid of it.  The only indirect users were all the
avc_has_perm() callers which just expanded to have a zero flags
argument.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/selinux/avc.c         |  9 +++------
 security/selinux/include/avc.h | 14 +++-----------
 2 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'security')

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index dad36a6ab45f..e720f72fcb87 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -746,7 +746,6 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
  * @tclass: target security class
  * @requested: requested permissions, interpreted based on @tclass
  * @auditdata: auxiliary audit data
- * @flags: VFS walk flags
  *
  * Check the AVC to determine whether the @requested permissions are granted
  * for the SID pair (@ssid, @tsid), interpreting the permissions
@@ -756,17 +755,15 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
  * permissions are granted, -%EACCES if any permissions are denied, or
  * another -errno upon other errors.
  */
-int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass,
-		       u32 requested, struct common_audit_data *auditdata,
-		       unsigned flags)
+int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
+		 u32 requested, struct common_audit_data *auditdata)
 {
 	struct av_decision avd;
 	int rc, rc2;
 
 	rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
 
-	rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata,
-			flags);
+	rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, 0);
 	if (rc2)
 		return rc2;
 	return rc;
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index 92d0ab561db8..e30657b59cb3 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -147,17 +147,9 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 			 unsigned flags,
 			 struct av_decision *avd);
 
-int avc_has_perm_flags(u32 ssid, u32 tsid,
-		       u16 tclass, u32 requested,
-		       struct common_audit_data *auditdata,
-		       unsigned);
-
-static inline int avc_has_perm(u32 ssid, u32 tsid,
-			       u16 tclass, u32 requested,
-			       struct common_audit_data *auditdata)
-{
-	return avc_has_perm_flags(ssid, tsid, tclass, requested, auditdata, 0);
-}
+int avc_has_perm(u32 ssid, u32 tsid,
+		 u16 tclass, u32 requested,
+		 struct common_audit_data *auditdata);
 
 u32 avc_policy_seqno(void);
 
-- 
cgit v1.2.3


From ab3540626435c01e08fe58ce544311a78430f112 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 4 Oct 2013 14:05:38 -0700
Subject: selinux: remove 'flags' parameter from avc_audit()

Now avc_audit() has no more users with that parameter. Remove it.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/selinux/avc.c         | 2 +-
 security/selinux/hooks.c       | 2 +-
 security/selinux/include/avc.h | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'security')

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index e720f72fcb87..fc3e6628a864 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -763,7 +763,7 @@ int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
 
 	rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
 
-	rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, 0);
+	rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata);
 	if (rc2)
 		return rc2;
 	return rc;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 967823212d7d..5b5231068516 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1502,7 +1502,7 @@ static int cred_has_capability(const struct cred *cred,
 
 	rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd);
 	if (audit == SECURITY_CAP_AUDIT) {
-		int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0);
+		int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad);
 		if (rc2)
 			return rc2;
 	}
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index e30657b59cb3..f53ee3c58d0f 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -130,7 +130,7 @@ static inline int avc_audit(u32 ssid, u32 tsid,
 			    u16 tclass, u32 requested,
 			    struct av_decision *avd,
 			    int result,
-			    struct common_audit_data *a, unsigned flags)
+			    struct common_audit_data *a)
 {
 	u32 audited, denied;
 	audited = avc_audit_required(requested, avd, result, 0, &denied);
@@ -138,7 +138,7 @@ static inline int avc_audit(u32 ssid, u32 tsid,
 		return 0;
 	return slow_avc_audit(ssid, tsid, tclass,
 			      requested, audited, denied,
-			      a, flags);
+			      a, 0);
 }
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
-- 
cgit v1.2.3


From 5cb3e91ebd0405519795f243adbfc4ed2a6fe53f Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 14 Oct 2013 11:44:34 -0700
Subject: apparmor: fix memleak of the profile hash

BugLink: http://bugs.launchpad.net/bugs/1235523

This fixes the following kmemleak trace:
unreferenced object 0xffff8801e8c35680 (size 32):
  comm "apparmor_parser", pid 691, jiffies 4294895667 (age 13230.876s)
  hex dump (first 32 bytes):
    e0 d3 4e b5 ac 6d f4 ed 3f cb ee 48 1c fd 40 cf  ..N..m..?..H..@.
    5b cc e9 93 00 00 00 00 00 00 00 00 00 00 00 00  [...............
  backtrace:
    [<ffffffff817a97ee>] kmemleak_alloc+0x4e/0xb0
    [<ffffffff811ca9f3>] __kmalloc+0x103/0x290
    [<ffffffff8138acbc>] aa_calc_profile_hash+0x6c/0x150
    [<ffffffff8138074d>] aa_unpack+0x39d/0xd50
    [<ffffffff8137eced>] aa_replace_profiles+0x3d/0xd80
    [<ffffffff81376937>] profile_replace+0x37/0x50
    [<ffffffff811e9f2d>] vfs_write+0xbd/0x1e0
    [<ffffffff811ea96c>] SyS_write+0x4c/0xa0
    [<ffffffff817ccb1d>] system_call_fastpath+0x1a/0x1f
    [<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/apparmor/policy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'security')

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 345bec07a27d..705c2879d3a9 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -610,6 +610,7 @@ void aa_free_profile(struct aa_profile *profile)
 	aa_put_dfa(profile->policy.dfa);
 	aa_put_replacedby(profile->replacedby);
 
+	kzfree(profile->hash);
 	kzfree(profile);
 }
 
-- 
cgit v1.2.3


From ed2c7da3a40c58410508fe24e12d03e508d7ec01 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 14 Oct 2013 11:46:27 -0700
Subject: apparmor: fix bad lock balance when introspecting policy

BugLink: http://bugs.launchpad.net/bugs/1235977

The profile introspection seq file has a locking bug when policy is viewed
from a virtual root (task in a policy namespace), introspection from the
real root is not affected.

The test for root
    while (parent) {
is correct for the real root, but incorrect for tasks in a policy namespace.
This allows the task to walk backup the policy tree past its virtual root
causing it to be unlocked before the virtual root should be in the p_stop
fn.

This results in the following lockdep back trace:
[   78.479744] [ BUG: bad unlock balance detected! ]
[   78.479792] 3.11.0-11-generic #17 Not tainted
[   78.479838] -------------------------------------
[   78.479885] grep/2223 is trying to release lock (&ns->lock) at:
[   78.479952] [<ffffffff817bf3be>] mutex_unlock+0xe/0x10
[   78.480002] but there are no more locks to release!
[   78.480037]
[   78.480037] other info that might help us debug this:
[   78.480037] 1 lock held by grep/2223:
[   78.480037]  #0:  (&p->lock){+.+.+.}, at: [<ffffffff812111bd>] seq_read+0x3d/0x3d0
[   78.480037]
[   78.480037] stack backtrace:
[   78.480037] CPU: 0 PID: 2223 Comm: grep Not tainted 3.11.0-11-generic #17
[   78.480037] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   78.480037]  ffffffff817bf3be ffff880007763d60 ffffffff817b97ef ffff8800189d2190
[   78.480037]  ffff880007763d88 ffffffff810e1c6e ffff88001f044730 ffff8800189d2190
[   78.480037]  ffffffff817bf3be ffff880007763e00 ffffffff810e5bd6 0000000724fe56b7
[   78.480037] Call Trace:
[   78.480037]  [<ffffffff817bf3be>] ? mutex_unlock+0xe/0x10
[   78.480037]  [<ffffffff817b97ef>] dump_stack+0x54/0x74
[   78.480037]  [<ffffffff810e1c6e>] print_unlock_imbalance_bug+0xee/0x100
[   78.480037]  [<ffffffff817bf3be>] ? mutex_unlock+0xe/0x10
[   78.480037]  [<ffffffff810e5bd6>] lock_release_non_nested+0x226/0x300
[   78.480037]  [<ffffffff817bf2fe>] ? __mutex_unlock_slowpath+0xce/0x180
[   78.480037]  [<ffffffff817bf3be>] ? mutex_unlock+0xe/0x10
[   78.480037]  [<ffffffff810e5d5c>] lock_release+0xac/0x310
[   78.480037]  [<ffffffff817bf2b3>] __mutex_unlock_slowpath+0x83/0x180
[   78.480037]  [<ffffffff817bf3be>] mutex_unlock+0xe/0x10
[   78.480037]  [<ffffffff81376c91>] p_stop+0x51/0x90
[   78.480037]  [<ffffffff81211408>] seq_read+0x288/0x3d0
[   78.480037]  [<ffffffff811e9d9e>] vfs_read+0x9e/0x170
[   78.480037]  [<ffffffff811ea8cc>] SyS_read+0x4c/0xa0
[   78.480037]  [<ffffffff817ccc9d>] system_call_fastpath+0x1a/0x1f

Signed-off-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/apparmor/apparmorfs.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'security')

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 95c2b2689a03..7db9954f1af2 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -580,15 +580,13 @@ static struct aa_namespace *__next_namespace(struct aa_namespace *root,
 
 	/* check if the next ns is a sibling, parent, gp, .. */
 	parent = ns->parent;
-	while (parent) {
+	while (ns != root) {
 		mutex_unlock(&ns->lock);
 		next = list_entry_next(ns, base.list);
 		if (!list_entry_is_head(next, &parent->sub_ns, base.list)) {
 			mutex_lock(&next->lock);
 			return next;
 		}
-		if (parent == root)
-			return NULL;
 		ns = parent;
 		parent = parent->parent;
 	}
-- 
cgit v1.2.3