Merge branch 'skb_frags'

Michael S. Tsirkin says:

====================
skbuff: fix skb_segment with zero copy skbs

This fixes a bug in skb_segment where it moves frags
between skbs without orphaning them.
This causes userspace to assume it's safe to
reuse the buffer, and receiver gets corrupted data.
This further might leak information from the
transmitter on the wire.

To fix track which skb does a copied frag belong
to, and orphan frags when copying them.

As we are tracking multiple skbs here, using
short names (skb,nskb,fskb,skb_frag,frag) becomes confusing.
So before adding another one, I refactor these names
slightly.

Patch is split out to make it easier to
verify that all trasformations are trivially correct.

The problem was observed in the field,
so I think that the patch is necessary on stable
as well.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
David S. Miller 2014-03-11 16:26:46 -04:00
commit c7b76f854e

View File

@ -2838,81 +2838,84 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
/**
* skb_segment - Perform protocol segmentation on skb.
* @skb: buffer to segment
* @head_skb: buffer to segment
* @features: features for the output path (see dev->features)
*
* This function performs segmentation on the given skb. It returns
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
struct sk_buff *skb_segment(struct sk_buff *head_skb,
netdev_features_t features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
skb_frag_t *skb_frag = skb_shinfo(skb)->frags;
unsigned int mss = skb_shinfo(skb)->gso_size;
unsigned int doffset = skb->data - skb_mac_header(skb);
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
skb_frag_t *frag = skb_shinfo(head_skb)->frags;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(skb);
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int headroom;
unsigned int len;
__be16 proto;
bool csum;
int sg = !!(features & NETIF_F_SG);
int nfrags = skb_shinfo(skb)->nr_frags;
int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
int pos;
proto = skb_network_protocol(skb);
proto = skb_network_protocol(head_skb);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
csum = !!can_checksum_protocol(features, proto);
__skb_push(skb, doffset);
headroom = skb_headroom(skb);
pos = skb_headlen(skb);
__skb_push(head_skb, doffset);
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
do {
struct sk_buff *nskb;
skb_frag_t *frag;
skb_frag_t *nskb_frag;
int hsize;
int size;
len = skb->len - offset;
len = head_skb->len - offset;
if (len > mss)
len = mss;
hsize = skb_headlen(skb) - offset;
hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
hsize = 0;
if (hsize > len || !sg)
hsize = len;
if (!hsize && i >= nfrags && skb_headlen(fskb) &&
(skb_headlen(fskb) == len || sg)) {
BUG_ON(skb_headlen(fskb) > len);
if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
i = 0;
nfrags = skb_shinfo(fskb)->nr_frags;
skb_frag = skb_shinfo(fskb)->frags;
pos += skb_headlen(fskb);
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
pos += skb_headlen(list_skb);
while (pos < offset + len) {
BUG_ON(i >= nfrags);
size = skb_frag_size(skb_frag);
size = skb_frag_size(frag);
if (pos + size > offset + len)
break;
i++;
pos += size;
skb_frag++;
frag++;
}
nskb = skb_clone(fskb, GFP_ATOMIC);
fskb = fskb->next;
nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
if (unlikely(!nskb))
goto err;
@ -2933,7 +2936,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
__skb_push(nskb, doffset);
} else {
nskb = __alloc_skb(hsize + doffset + headroom,
GFP_ATOMIC, skb_alloc_rx_flag(skb),
GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
NUMA_NO_NODE);
if (unlikely(!nskb))
@ -2949,12 +2952,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
segs = nskb;
tail = nskb;
__copy_skb_header(nskb, skb);
nskb->mac_len = skb->mac_len;
__copy_skb_header(nskb, head_skb);
nskb->mac_len = head_skb->mac_len;
skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
nskb->data - tnl_hlen,
doffset + tnl_hlen);
@ -2963,30 +2966,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
nskb->csum = skb_copy_and_csum_bits(skb, offset,
nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
len, 0);
continue;
}
frag = skb_shinfo(nskb)->frags;
nskb_frag = skb_shinfo(nskb)->frags;
skb_copy_from_linear_data_offset(skb, offset,
skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
SKBTX_SHARED_FRAG;
while (pos < offset + len) {
if (i >= nfrags) {
BUG_ON(skb_headlen(fskb));
BUG_ON(skb_headlen(list_skb));
i = 0;
nfrags = skb_shinfo(fskb)->nr_frags;
skb_frag = skb_shinfo(fskb)->frags;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
BUG_ON(!nfrags);
fskb = fskb->next;
list_skb = list_skb->next;
}
if (unlikely(skb_shinfo(nskb)->nr_frags >=
@ -2997,27 +3002,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
goto err;
}
*frag = *skb_frag;
__skb_frag_ref(frag);
size = skb_frag_size(frag);
if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
goto err;
*nskb_frag = *frag;
__skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
if (pos < offset) {
frag->page_offset += offset - pos;
skb_frag_size_sub(frag, offset - pos);
nskb_frag->page_offset += offset - pos;
skb_frag_size_sub(nskb_frag, offset - pos);
}
skb_shinfo(nskb)->nr_frags++;
if (pos + size <= offset + len) {
i++;
skb_frag++;
frag++;
pos += size;
} else {
skb_frag_size_sub(frag, pos + size - (offset + len));
skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
goto skip_fraglist;
}
frag++;
nskb_frag++;
}
skip_fraglist:
@ -3031,7 +3039,7 @@ perform_csum_check:
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
}
} while ((offset += len) < skb->len);
} while ((offset += len) < head_skb->len);
return segs;