LWN.net Logo

XFRM: input callback mechanism.

From:  Evgeniy Polyakov <johnpol@2ka.mipt.ru>
To:  linux-crypto@vger.kernel.org
Subject:  [RFC] XFRM: input callback mechanism.
Date:  Mon, 3 Oct 2005 16:25:32 +0400
Cc:  netdev@vger.kernel.org

Hello, developers.

This patch adds ability to stack input xfrm processing
in a similar to dst_entry processing way, so it allows
very easy offload of input IPsec crypto processing path 
into hardware.

Idea is to create shared object between all XFRM input
layers and call next processing callback either directly
from current one, or indirectly from hardware interrupt 
or similar path, shared object caches skb, XFRM state and
several other variables.

Since packet input path does not care about skb and relies
completely on network stack to free it or queue up, it is
possible to return from net_protocol->handler() after packet
is queued somewhere (or even linked to timer) but not
yet processed.

Patch was tested with IPsec tunnel mode with only one ESP4
transformation.

Please review and comment.

Thank you.

Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -15,6 +15,18 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
+struct xfrm4_input_shared
+{
+	struct sk_buff 		*skb;
+	int 			xfrm_nr, first;
+	struct sec_decap_state 	xfrm_vec[XFRM_MAX_DEPTH];
+	__u16 			encap_type;
+	int 			decaps;
+	u32			seq;
+	struct xfrm_state 	*x;
+	int 			(*callback)(struct xfrm4_input_shared *sh);
+};
+
 int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_encap(skb, 0);
@@ -45,81 +57,66 @@ static int xfrm4_parse_spi(struct sk_buf
 	return xfrm_parse_spi(skb, nexthdr, spi, seq);
 }
 
-int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+static int xfrm4_rcv_encap_input_finish(struct xfrm4_input_shared *sh)
 {
-	int err;
-	u32 spi, seq;
-	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
-	struct xfrm_state *x;
-	int xfrm_nr = 0;
-	int decaps = 0;
-
-	if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
-		goto drop;
+	struct iphdr *iph = sh->skb->nh.iph;
 
-	do {
-		struct iphdr *iph = skb->nh.iph;
+	iph = sh->skb->nh.iph;
 
-		if (xfrm_nr == XFRM_MAX_DEPTH)
+	if (sh->x->props.mode) {
+		if (iph->protocol != IPPROTO_IPIP)
 			goto drop;
-
-		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, iph->protocol, AF_INET);
-		if (x == NULL)
+		if (!pskb_may_pull(sh->skb, sizeof(struct iphdr)))
+			goto drop;
+		if (skb_cloned(sh->skb) &&
+		    pskb_expand_head(sh->skb, 0, 0, GFP_ATOMIC))
 			goto drop;
+		if (sh->x->props.flags & XFRM_STATE_DECAP_DSCP)
+			ipv4_copy_dscp(iph, sh->skb->h.ipiph);
+		if (!(sh->x->props.flags & XFRM_STATE_NOECN))
+			ipip_ecn_decapsulate(sh->skb);
+		sh->skb->mac.raw = memmove(sh->skb->data - sh->skb->mac_len,
+				       sh->skb->mac.raw, sh->skb->mac_len);
+		sh->skb->nh.raw = sh->skb->data;
+		memset(&(IPCB(sh->skb)->opt), 0, sizeof(struct ip_options));
+		sh->decaps = 1;
+	}
 
-		spin_lock(&x->lock);
-		if (unlikely(x->km.state != XFRM_STATE_VALID))
-			goto drop_unlock;
-
-		if (x->props.replay_window && xfrm_replay_check(x, seq))
-			goto drop_unlock;
-
-		if (xfrm_state_check_expire(x))
-			goto drop_unlock;
-
-		xfrm_vec[xfrm_nr].decap.decap_type = encap_type;
-		if (x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb))
-			goto drop_unlock;
-
-		/* only the first xfrm gets the encap type */
-		encap_type = 0;
-
-		if (x->props.replay_window)
-			xfrm_replay_advance(x, seq);
-
-		x->curlft.bytes += skb->len;
-		x->curlft.packets++;
-
-		spin_unlock(&x->lock);
-
-		xfrm_vec[xfrm_nr++].xvec = x;
-
-		iph = skb->nh.iph;
-
-		if (x->props.mode) {
-			if (iph->protocol != IPPROTO_IPIP)
-				goto drop;
-			if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-				goto drop;
-			if (skb_cloned(skb) &&
-			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-				goto drop;
-			if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-				ipv4_copy_dscp(iph, skb->h.ipiph);
-			if (!(x->props.flags & XFRM_STATE_NOECN))
-				ipip_ecn_decapsulate(skb);
-			skb->mac.raw = memmove(skb->data - skb->mac_len,
-					       skb->mac.raw, skb->mac_len);
-			skb->nh.raw = skb->data;
-			memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-			decaps = 1;
-			break;
-		}
+	return 0;
 
-		if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
-			goto drop;
-	} while (!err);
+drop:
+	return -1;
+}
 
+static int xfrm4_run_input(struct xfrm4_input_shared *sh)
+{
+	int err = -1;
+	
+	if (sh->x->type->input(sh->x, &(sh->xfrm_vec[sh->xfrm_nr].decap), sh->skb))
+		goto err_out_exit;
+
+	sh->xfrm_vec[sh->xfrm_nr].xvec = sh->x;
+	sh->xfrm_nr++;
+
+	err = xfrm4_rcv_encap_input_finish(sh);
+	if (err)
+		goto err_out_exit;
+
+	return 0;
+
+err_out_exit:
+	spin_lock(&sh->x->lock);
+	sh->x->curlft.bytes -= sh->skb->len;
+	sh->x->curlft.packets--;
+	spin_unlock(&sh->x->lock);
+
+	return err;
+}
+
+static int xfrm4_rcv_encap_finish(struct xfrm4_input_shared *sh)
+{
+	struct sk_buff *skb = sh->skb;
+	
 	/* Allocate new secpath or COW existing one. */
 
 	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
@@ -131,13 +128,13 @@ int xfrm4_rcv_encap(struct sk_buff *skb,
 			secpath_put(skb->sp);
 		skb->sp = sp;
 	}
-	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
+	if (sh->xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
 		goto drop;
 
-	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
-	skb->sp->len += xfrm_nr;
+	memcpy(skb->sp->x+skb->sp->len, sh->xfrm_vec, sh->xfrm_nr*sizeof(struct sec_decap_state));
+	skb->sp->len += sh->xfrm_nr;
 
-	if (decaps) {
+	if (sh->decaps) {
 		if (!(skb->dev->flags&IFF_LOOPBACK)) {
 			dst_release(skb->dst);
 			skb->dst = NULL;
@@ -148,13 +145,117 @@ int xfrm4_rcv_encap(struct sk_buff *skb,
 		return -skb->nh.iph->protocol;
 	}
 
-drop_unlock:
-	spin_unlock(&x->lock);
-	xfrm_state_put(x);
 drop:
-	while (--xfrm_nr >= 0)
-		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
+	while (--sh->xfrm_nr >= 0)
+		xfrm_state_put(sh->xfrm_vec[sh->xfrm_nr].xvec);
 
 	kfree_skb(skb);
 	return 0;
 }
+
+static int xfrm4_run_input_init(struct xfrm4_input_shared *sh)
+{
+	struct iphdr *iph = sh->skb->nh.iph;
+	u32 spi, seq;
+	int err;
+	struct xfrm_state *x;
+
+	if (sh->xfrm_nr == XFRM_MAX_DEPTH)
+		goto drop;
+
+	if (sh->first) {
+		err = xfrm4_parse_spi(sh->skb, sh->skb->nh.iph->protocol, &spi, &seq);
+		if (err != 0)
+			err = -1;
+		sh->first = 0;
+	} else
+		err = xfrm_parse_spi(sh->skb, sh->skb->nh.iph->protocol, &spi, &seq);
+	
+	if (err < 0)
+		goto drop;
+	else if (err > 0) {
+		sh->callback = &xfrm4_rcv_encap_finish;
+		goto out;
+	}
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, iph->protocol, AF_INET);
+	if (x == NULL)
+		goto drop;
+
+	spin_lock(&x->lock);
+	if (unlikely(x->km.state != XFRM_STATE_VALID))
+		goto drop_unlock;
+
+	if (x->props.replay_window && xfrm_replay_check(x, seq))
+		goto drop_unlock;
+
+	if (xfrm_state_check_expire(x))
+		goto drop_unlock;
+
+	sh->xfrm_vec[sh->xfrm_nr].decap.decap_type = sh->encap_type;
+	sh->encap_type = 0;
+	
+	if (x->props.replay_window)
+		xfrm_replay_advance(x, seq);
+
+	x->curlft.bytes += sh->skb->len;
+	x->curlft.packets++;
+
+	spin_unlock(&x->lock);
+	
+	sh->seq = seq;
+	sh->x = x;
+	
+	err = xfrm4_run_input(sh);
+	if (err)
+		goto drop;
+
+out:
+	return err;
+
+drop_unlock:
+	spin_unlock(&sh->x->lock);
+	xfrm_state_put(sh->x);
+drop:
+	while (--sh->xfrm_nr >= 0)
+		xfrm_state_put(sh->xfrm_vec[sh->xfrm_nr].xvec);
+	
+	kfree_skb(sh->skb);
+	return -1;
+}
+
+static int xfrm4_run_next(struct xfrm4_input_shared *sh)
+{
+	int err;
+	
+	err = xfrm4_run_input_init(sh);
+	if (err < 0)
+		return err;
+
+	return sh->callback(sh);
+}
+
+int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+{
+	int err;
+	struct xfrm4_input_shared *sh;
+
+	sh = kzalloc(sizeof(struct xfrm4_input_shared), GFP_ATOMIC);
+	if (!sh) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	sh->skb = skb;
+	sh->xfrm_nr = 0;
+	sh->decaps = 0;
+	sh->first = 1;
+	sh->encap_type = encap_type;
+	sh->callback = &xfrm4_run_next;
+
+	err = xfrm4_run_next(sh);
+
+	kfree(sh);
+
+	return err;
+}


-- 
	Evgeniy Polyakov
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Copyright © 2005, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds