summaryrefslogtreecommitdiff
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2013-12-14 01:15:24 (GMT)
committerScott Wood <scottwood@freescale.com>2013-12-14 01:15:24 (GMT)
commitb7c81aa3ab2ac2c140e278b6d0e9a0b95112cf0b (patch)
tree87828aaf5f82c7042bfc0307bc4ac499e10f93fb /net/ipv4/route.c
parent22c782a4b14773fab7eab3c1db54ad7ad077e9b8 (diff)
parent78fd82238d0e5716578c326404184a27ba67fd6e (diff)
downloadlinux-fsl-qoriq-b7c81aa3ab2ac2c140e278b6d0e9a0b95112cf0b.tar.xz
Merge remote-tracking branch 'linus/master' into merge
Conflicts: Documentation/hwmon/ina2xx arch/powerpc/Kconfig arch/powerpc/boot/dts/b4860emu.dts arch/powerpc/boot/dts/b4qds.dtsi arch/powerpc/boot/dts/fsl/b4si-post.dtsi arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi arch/powerpc/boot/dts/p1023rdb.dts arch/powerpc/boot/dts/t4240emu.dts arch/powerpc/boot/dts/t4240qds.dts arch/powerpc/configs/85xx/p1023_defconfig arch/powerpc/configs/corenet32_smp_defconfig arch/powerpc/configs/corenet64_smp_defconfig arch/powerpc/configs/mpc85xx_smp_defconfig arch/powerpc/include/asm/cputable.h arch/powerpc/include/asm/device.h arch/powerpc/include/asm/epapr_hcalls.h arch/powerpc/include/asm/kvm_host.h arch/powerpc/include/asm/mpic.h arch/powerpc/include/asm/pci.h arch/powerpc/include/asm/ppc-opcode.h arch/powerpc/include/asm/ppc_asm.h arch/powerpc/include/asm/reg_booke.h arch/powerpc/kernel/epapr_paravirt.c arch/powerpc/kernel/process.c arch/powerpc/kernel/prom.c arch/powerpc/kernel/setup-common.c arch/powerpc/kernel/setup_32.c arch/powerpc/kernel/setup_64.c arch/powerpc/kernel/smp.c arch/powerpc/kernel/swsusp_asm64.S arch/powerpc/kernel/swsusp_booke.S arch/powerpc/kvm/book3s_pr.c arch/powerpc/kvm/booke.c arch/powerpc/kvm/booke.h arch/powerpc/kvm/e500.c arch/powerpc/kvm/e500.h arch/powerpc/kvm/e500_emulate.c arch/powerpc/kvm/e500mc.c arch/powerpc/kvm/powerpc.c arch/powerpc/perf/e6500-pmu.c arch/powerpc/platforms/85xx/Kconfig arch/powerpc/platforms/85xx/Makefile arch/powerpc/platforms/85xx/b4_qds.c arch/powerpc/platforms/85xx/c293pcie.c arch/powerpc/platforms/85xx/corenet_ds.c arch/powerpc/platforms/85xx/corenet_ds.h arch/powerpc/platforms/85xx/p1023_rds.c arch/powerpc/platforms/85xx/p2041_rdb.c arch/powerpc/platforms/85xx/p3041_ds.c arch/powerpc/platforms/85xx/p4080_ds.c arch/powerpc/platforms/85xx/p5020_ds.c arch/powerpc/platforms/85xx/p5040_ds.c arch/powerpc/platforms/85xx/smp.c arch/powerpc/platforms/85xx/t4240_qds.c arch/powerpc/platforms/Kconfig arch/powerpc/sysdev/Makefile arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c arch/powerpc/sysdev/fsl_msi.c arch/powerpc/sysdev/fsl_pci.c arch/powerpc/sysdev/fsl_pci.h arch/powerpc/sysdev/fsl_soc.h arch/powerpc/sysdev/mpic.c arch/powerpc/sysdev/mpic_timer.c drivers/Kconfig drivers/clk/Kconfig drivers/clk/clk-ppc-corenet.c drivers/cpufreq/Kconfig.powerpc drivers/cpufreq/Makefile drivers/cpufreq/ppc-corenet-cpufreq.c drivers/crypto/caam/Kconfig drivers/crypto/caam/Makefile drivers/crypto/caam/ctrl.c drivers/crypto/caam/desc_constr.h drivers/crypto/caam/intern.h drivers/crypto/caam/jr.c drivers/crypto/caam/regs.h drivers/dma/fsldma.c drivers/hwmon/ina2xx.c drivers/iommu/Kconfig drivers/iommu/fsl_pamu.c drivers/iommu/fsl_pamu.h drivers/iommu/fsl_pamu_domain.c drivers/iommu/fsl_pamu_domain.h drivers/misc/Makefile drivers/mmc/card/block.c drivers/mmc/core/core.c drivers/mmc/host/sdhci-esdhc.h drivers/mmc/host/sdhci-pltfm.c drivers/mtd/nand/fsl_ifc_nand.c drivers/net/ethernet/freescale/gianfar.c drivers/net/ethernet/freescale/gianfar.h drivers/net/ethernet/freescale/gianfar_ethtool.c drivers/net/phy/at803x.c drivers/net/phy/phy_device.c drivers/net/phy/vitesse.c drivers/pci/msi.c drivers/staging/Kconfig drivers/staging/Makefile drivers/uio/Kconfig drivers/uio/Makefile drivers/uio/uio.c drivers/usb/host/ehci-fsl.c drivers/vfio/Kconfig drivers/vfio/Makefile include/crypto/algapi.h include/linux/iommu.h include/linux/mmc/sdhci.h include/linux/msi.h include/linux/netdev_features.h include/linux/phy.h include/linux/skbuff.h include/net/ip.h include/uapi/linux/vfio.h net/core/ethtool.c net/ipv4/route.c net/ipv6/route.c
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c198
1 files changed, 131 insertions, 67 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 961a718..1417d01 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,8 @@
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-#define IP_MAX_MTU 0xFFF0
+/* IPv4 datagram length is stored into 16bit field (tot_len) */
+#define IP_MAX_MTU 0xFFFF
#define RT_GC_TIMEOUT (300*HZ)
@@ -298,7 +299,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
" %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
dst_entries_get_slow(&ipv4_dst_ops),
- st->in_hit,
+ 0, /* st->in_hit */
st->in_slow_tot,
st->in_slow_mc,
st->in_no_route,
@@ -306,16 +307,16 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
st->in_martian_dst,
st->in_martian_src,
- st->out_hit,
+ 0, /* st->out_hit */
st->out_slow_tot,
st->out_slow_mc,
- st->gc_total,
- st->gc_ignored,
- st->gc_goal_miss,
- st->gc_dst_overflow,
- st->in_hlist_search,
- st->out_hlist_search
+ 0, /* st->gc_total */
+ 0, /* st->gc_ignored */
+ 0, /* st->gc_goal_miss */
+ 0, /* st->gc_dst_overflow */
+ 0, /* st->in_hlist_search */
+ 0 /* st->out_hlist_search */
);
return 0;
}
@@ -439,12 +440,12 @@ static inline int ip_rt_proc_init(void)
static inline bool rt_is_expired(const struct rtable *rth)
{
- return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
+ return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
}
void rt_cache_flush(struct net *net)
{
- rt_genid_bump(net);
+ rt_genid_bump_ipv4(net);
#ifdef CONFIG_AS_FASTPATH
if (route_flush_fn)
route_flush_fn();
@@ -573,10 +574,25 @@ static inline void rt_free(struct rtable *rt)
static DEFINE_SPINLOCK(fnhe_lock);
+static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
+{
+ struct rtable *rt;
+
+ rt = rcu_dereference(fnhe->fnhe_rth_input);
+ if (rt) {
+ RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
+ rt_free(rt);
+ }
+ rt = rcu_dereference(fnhe->fnhe_rth_output);
+ if (rt) {
+ RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
+ rt_free(rt);
+ }
+}
+
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{
struct fib_nh_exception *fnhe, *oldest;
- struct rtable *orig;
oldest = rcu_dereference(hash->chain);
for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -584,11 +600,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe;
}
- orig = rcu_dereference(oldest->fnhe_rth);
- if (orig) {
- RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
- rt_free(orig);
- }
+ fnhe_flush_routes(oldest);
return oldest;
}
@@ -602,11 +614,25 @@ static inline u32 fnhe_hashfun(__be32 daddr)
return hval & (FNHE_HASH_SIZE - 1);
}
+static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
+{
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ rt->dst.expires = fnhe->fnhe_expires;
+
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ rt->rt_uses_gateway = 1;
+ }
+}
+
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
u32 pmtu, unsigned long expires)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
+ struct rtable *rt;
+ unsigned int i;
int depth;
u32 hval = fnhe_hashfun(daddr);
@@ -635,8 +661,15 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_gw = gw;
if (pmtu) {
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = expires;
+ fnhe->fnhe_expires = max(1UL, expires);
}
+ /* Update all cached dsts too */
+ rt = rcu_dereference(fnhe->fnhe_rth_input);
+ if (rt)
+ fill_route_from_fnhe(rt, fnhe);
+ rt = rcu_dereference(fnhe->fnhe_rth_output);
+ if (rt)
+ fill_route_from_fnhe(rt, fnhe);
} else {
if (depth > FNHE_RECLAIM_DEPTH)
fnhe = fnhe_oldest(hash);
@@ -648,10 +681,27 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
+ fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
fnhe->fnhe_expires = expires;
+
+ /* Exception created; mark the cached routes for the nexthop
+ * stale, so anyone caching it rechecks if this exception
+ * applies to them.
+ */
+ rt = rcu_dereference(nh->nh_rth_input);
+ if (rt)
+ rt->dst.obsolete = DST_OBSOLETE_KILL;
+
+ for_each_possible_cpu(i) {
+ struct rtable __rcu **prt;
+ prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
+ rt = rcu_dereference(*prt);
+ if (rt)
+ rt->dst.obsolete = DST_OBSOLETE_KILL;
+ }
}
fnhe->fnhe_stamp = jiffies;
@@ -930,12 +980,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
- if (!rt->rt_pmtu) {
- dst->obsolete = DST_OBSOLETE_KILL;
- } else {
- rt->rt_pmtu = mtu;
- dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
- }
+ if (rt->rt_pmtu == mtu &&
+ time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
+ return;
rcu_read_lock();
if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
@@ -997,6 +1044,10 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
bool new = false;
bh_lock_sock(sk);
+
+ if (!ip_sk_accept_pmtu(sk))
+ goto out;
+
rt = (struct rtable *) __sk_dst_get(sk);
if (sock_owned_by_user(sk) || !rt) {
@@ -1076,11 +1127,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*
- * When a PMTU/redirect information update invalidates a
- * route, this is indicated by setting obsolete to
- * DST_OBSOLETE_KILL.
+ * When a PMTU/redirect information update invalidates a route,
+ * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
+ * DST_OBSOLETE_DEAD by dst_free().
*/
- if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
+ if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
return NULL;
return dst;
}
@@ -1189,10 +1240,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = 576;
}
- if (mtu > IP_MAX_MTU)
- mtu = IP_MAX_MTU;
-
- return mtu;
+ return min_t(unsigned int, mtu, IP_MAX_MTU);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@ -1222,34 +1270,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
spin_lock_bh(&fnhe_lock);
if (daddr == fnhe->fnhe_daddr) {
- struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
- if (orig && rt_is_expired(orig)) {
+ struct rtable __rcu **porig;
+ struct rtable *orig;
+ int genid = fnhe_genid(dev_net(rt->dst.dev));
+
+ if (rt_is_input_route(rt))
+ porig = &fnhe->fnhe_rth_input;
+ else
+ porig = &fnhe->fnhe_rth_output;
+ orig = rcu_dereference(*porig);
+
+ if (fnhe->fnhe_genid != genid) {
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_gw = 0;
fnhe->fnhe_pmtu = 0;
fnhe->fnhe_expires = 0;
+ fnhe_flush_routes(fnhe);
+ orig = NULL;
}
- if (fnhe->fnhe_pmtu) {
- unsigned long expires = fnhe->fnhe_expires;
- unsigned long diff = expires - jiffies;
-
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = fnhe->fnhe_pmtu;
- dst_set_expires(&rt->dst, diff);
- }
- }
- if (fnhe->fnhe_gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = fnhe->fnhe_gw;
- rt->rt_uses_gateway = 1;
- } else if (!rt->rt_gateway)
+ fill_route_from_fnhe(rt, fnhe);
+ if (!rt->rt_gateway)
rt->rt_gateway = daddr;
- rcu_assign_pointer(fnhe->fnhe_rth, rt);
- if (orig)
- rt_free(orig);
+ if (!(rt->dst.flags & DST_NOCACHE)) {
+ rcu_assign_pointer(*porig, rt);
+ if (orig)
+ rt_free(orig);
+ ret = true;
+ }
fnhe->fnhe_stamp = jiffies;
- ret = true;
}
spin_unlock_bh(&fnhe_lock);
@@ -1418,7 +1468,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#endif
rth->dst.output = ip_rt_bug;
- rth->rt_genid = rt_genid(dev_net(dev));
+ rth->rt_genid = rt_genid_ipv4(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
rth->rt_is_input= 1;
@@ -1481,6 +1531,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct in_device *in_dev,
__be32 daddr, __be32 saddr, u32 tos)
{
+ struct fib_nh_exception *fnhe;
struct rtable *rth;
int err;
struct in_device *out_dev;
@@ -1527,8 +1578,13 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
+ fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (fnhe != NULL)
+ rth = rcu_dereference(fnhe->fnhe_rth_input);
+ else
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -1543,7 +1599,7 @@ static int __mkroute_input(struct sk_buff *skb,
goto cleanup;
}
- rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
+ rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
rth->rt_flags = flags;
rth->rt_type = res->type;
rth->rt_is_input = 1;
@@ -1556,7 +1612,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
- rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
+ rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1714,7 +1770,7 @@ local_input:
rth->dst.tclassid = itag;
#endif
- rth->rt_genid = rt_genid(net);
+ rth->rt_genid = rt_genid_ipv4(net);
rth->rt_flags = flags|RTCF_LOCAL;
rth->rt_type = res.type;
rth->rt_is_input = 1;
@@ -1728,8 +1784,12 @@ local_input:
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
- if (do_cache)
- rt_cache_route(&FIB_RES_NH(res), rth);
+ if (do_cache) {
+ if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
+ rth->dst.flags |= DST_NOCACHE;
+ rt_add_uncached_list(rth);
+ }
+ }
skb_dst_set(skb, &rth->dst);
err = 0;
goto out;
@@ -1871,7 +1931,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = find_exception(nh, fl4->daddr);
if (fnhe)
- prth = &fnhe->fnhe_rth;
+ prth = &fnhe->fnhe_rth_output;
else {
if (unlikely(fl4->flowi4_flags &
FLOWI_FLAG_KNOWN_NH &&
@@ -1899,7 +1959,7 @@ add:
rth->dst.output = ip_output;
- rth->rt_genid = rt_genid(dev_net(dev_out));
+ rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
rth->rt_flags = flags;
rth->rt_type = type;
rth->rt_is_input = 0;
@@ -2028,7 +2088,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
RT_SCOPE_LINK);
goto make_route;
}
- if (fl4->saddr) {
+ if (!fl4->saddr) {
if (ipv4_is_multicast(fl4->daddr))
fl4->saddr = inet_select_addr(dev_out, 0,
fl4->flowi4_scope);
@@ -2189,7 +2249,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_iif = ort->rt_iif;
rt->rt_pmtu = ort->rt_pmtu;
- rt->rt_genid = rt_genid(net);
+ rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
@@ -2445,19 +2505,22 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
-static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
+static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net = (struct net *)__ctl->extra1;
+
if (write) {
- rt_cache_flush((struct net *)__ctl->extra1);
+ rt_cache_flush(net);
+ fnhe_genid_bump(net);
return 0;
}
return -EINVAL;
}
-static ctl_table ipv4_route_table[] = {
+static struct ctl_table ipv4_route_table[] = {
{
.procname = "gc_thresh",
.data = &ipv4_dst_ops.gc_thresh,
@@ -2624,7 +2687,8 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
static __net_init int rt_genid_init(struct net *net)
{
- atomic_set(&net->rt_genid, 0);
+ atomic_set(&net->ipv4.rt_genid, 0);
+ atomic_set(&net->fnhe_genid, 0);
get_random_bytes(&net->ipv4.dev_addr_genid,
sizeof(net->ipv4.dev_addr_genid));
return 0;